All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Make the physical object coherent with GTT
@ 2014-07-10 20:53 Chris Wilson
  2014-08-08  9:25 ` Daniel Vetter
  0 siblings, 1 reply; 8+ messages in thread
From: Chris Wilson @ 2014-07-10 20:53 UTC (permalink / raw)
  To: intel-gfx; +Cc: daniel.vetter

Currently objects for which the hardware needs a contiguous physical
address are allocated a shadow backing storage to satisfy the contraint.
This shadow buffer is not wired into the normal obj->pages and so the
physical object is incoherent with accesses via the GPU, GTT and CPU. By
setting up the appropriate scatter-gather table, we can allow userspace
to access the physical object via either a GTT mmaping of or by rendering
into the GEM bo. However, keeping the CPU mmap of the shmemfs backing
storage coherent with the contiguous shadow is not yet possible.
Fortuituously, CPU mmaps of objects requiring physical addresses are not
expected to be coherent anyway.

This allows the physical constraint of the GEM object to be transparent
to userspace and allow it to efficiently render into or update them via
the GTT and GPU.

v2: Fix leak of pci handle spotted by Ville
v3: Remove the now duplicate call to detach_phys_object during free.
v4: Wait for rendering before pwrite. As this patch makes it possible to
render into the phys object, we should make it correct as well!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_dma.c |   3 +
 drivers/gpu/drm/i915/i915_drv.h |   6 +-
 drivers/gpu/drm/i915/i915_gem.c | 207 +++++++++++++++++++++++++++-------------
 include/uapi/drm/i915_drm.h     |   1 +
 4 files changed, 150 insertions(+), 67 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 5ae608121f03..1b9798503b77 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1025,6 +1025,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_CMD_PARSER_VERSION:
 		value = i915_cmd_parser_get_version();
 		break;
+	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
+		value = 1;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 211db0653831..f81d787d6b47 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1787,10 +1787,10 @@ struct drm_i915_gem_object {
 	unsigned long user_pin_count;
 	struct drm_file *pin_filp;
 
-	/** for phy allocated objects */
-	drm_dma_handle_t *phys_handle;
-
 	union {
+		/** for phy allocated objects */
+		drm_dma_handle_t *phys_handle;
+
 		struct i915_gem_userptr {
 			uintptr_t ptr;
 			unsigned read_only :1;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 83ccbabdcacf..d083cf5bdbbd 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -209,40 +209,137 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
-static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj)
+static int
+i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 {
-	drm_dma_handle_t *phys = obj->phys_handle;
+	struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
+	char *vaddr = obj->phys_handle->vaddr;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	int i;
 
-	if (!phys)
-		return;
+	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
+		return -EINVAL;
+
+	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
+		struct page *page;
+		char *src;
+
+		page = shmem_read_mapping_page(mapping, i);
+		if (IS_ERR(page))
+			return PTR_ERR(page);
+
+		src = kmap_atomic(page);
+		memcpy(vaddr, src, PAGE_SIZE);
+		drm_clflush_virt_range(vaddr, PAGE_SIZE);
+		kunmap_atomic(src);
+
+		page_cache_release(page);
+		vaddr += PAGE_SIZE;
+	}
+
+	i915_gem_chipset_flush(obj->base.dev);
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (st == NULL)
+		return -ENOMEM;
+
+	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
+		kfree(st);
+		return -ENOMEM;
+	}
+
+	sg = st->sgl;
+	sg->offset = 0;
+	sg->length = obj->base.size;
 
-	if (obj->madv == I915_MADV_WILLNEED) {
+	sg_dma_address(sg) = obj->phys_handle->busaddr;
+	sg_dma_len(sg) = obj->base.size;
+
+	obj->pages = st;
+	obj->has_dma_mapping = true;
+	return 0;
+}
+
+static void
+i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
+{
+	int ret;
+
+	BUG_ON(obj->madv == __I915_MADV_PURGED);
+
+	ret = i915_gem_object_set_to_cpu_domain(obj, true);
+	if (ret) {
+		/* In the event of a disaster, abandon all caches and
+		 * hope for the best.
+		 */
+		WARN_ON(ret != -EIO);
+		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+	}
+
+	if (obj->madv == I915_MADV_DONTNEED)
+		obj->dirty = 0;
+
+	if (obj->dirty) {
 		struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
-		char *vaddr = phys->vaddr;
+		char *vaddr = obj->phys_handle->vaddr;
 		int i;
 
 		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
-			struct page *page = shmem_read_mapping_page(mapping, i);
-			if (!IS_ERR(page)) {
-				char *dst = kmap_atomic(page);
-				memcpy(dst, vaddr, PAGE_SIZE);
-				drm_clflush_virt_range(dst, PAGE_SIZE);
-				kunmap_atomic(dst);
-
-				set_page_dirty(page);
+			struct page *page;
+			char *dst;
+
+			page = shmem_read_mapping_page(mapping, i);
+			if (IS_ERR(page))
+				continue;
+
+			dst = kmap_atomic(page);
+			drm_clflush_virt_range(vaddr, PAGE_SIZE);
+			memcpy(dst, vaddr, PAGE_SIZE);
+			kunmap_atomic(dst);
+
+			set_page_dirty(page);
+			if (obj->madv == I915_MADV_WILLNEED)
 				mark_page_accessed(page);
-				page_cache_release(page);
-			}
+			page_cache_release(page);
 			vaddr += PAGE_SIZE;
 		}
-		i915_gem_chipset_flush(obj->base.dev);
+		obj->dirty = 0;
 	}
 
-#ifdef CONFIG_X86
-	set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
-#endif
-	drm_pci_free(obj->base.dev, phys);
-	obj->phys_handle = NULL;
+	sg_free_table(obj->pages);
+	kfree(obj->pages);
+
+	obj->has_dma_mapping = false;
+}
+
+static void
+i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
+{
+	drm_pci_free(obj->base.dev, obj->phys_handle);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
+	.get_pages = i915_gem_object_get_pages_phys,
+	.put_pages = i915_gem_object_put_pages_phys,
+	.release = i915_gem_object_release_phys,
+};
+
+static int
+drop_pages(struct drm_i915_gem_object *obj)
+{
+	struct i915_vma *vma, *next;
+	int ret;
+
+	drm_gem_object_reference(&obj->base);
+	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link)
+		if (i915_vma_unbind(vma))
+			break;
+
+	ret = i915_gem_object_put_pages(obj);
+	drm_gem_object_unreference(&obj->base);
+
+	return ret;
 }
 
 int
@@ -250,9 +347,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
 			    int align)
 {
 	drm_dma_handle_t *phys;
-	struct address_space *mapping;
-	char *vaddr;
-	int i;
+	int ret;
 
 	if (obj->phys_handle) {
 		if ((unsigned long)obj->phys_handle->vaddr & (align -1))
@@ -267,41 +362,19 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
 	if (obj->base.filp == NULL)
 		return -EINVAL;
 
+	ret = drop_pages(obj);
+	if (ret)
+		return ret;
+
 	/* create a new object */
 	phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
 	if (!phys)
 		return -ENOMEM;
 
-	vaddr = phys->vaddr;
-#ifdef CONFIG_X86
-	set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE);
-#endif
-	mapping = file_inode(obj->base.filp)->i_mapping;
-	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
-		struct page *page;
-		char *src;
-
-		page = shmem_read_mapping_page(mapping, i);
-		if (IS_ERR(page)) {
-#ifdef CONFIG_X86
-			set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
-#endif
-			drm_pci_free(obj->base.dev, phys);
-			return PTR_ERR(page);
-		}
-
-		src = kmap_atomic(page);
-		memcpy(vaddr, src, PAGE_SIZE);
-		kunmap_atomic(src);
-
-		mark_page_accessed(page);
-		page_cache_release(page);
-
-		vaddr += PAGE_SIZE;
-	}
-
 	obj->phys_handle = phys;
-	return 0;
+	obj->ops = &i915_gem_phys_ops;
+
+	return i915_gem_object_get_pages(obj);
 }
 
 static int
@@ -312,6 +385,14 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
 	struct drm_device *dev = obj->base.dev;
 	void *vaddr = obj->phys_handle->vaddr + args->offset;
 	char __user *user_data = to_user_ptr(args->data_ptr);
+	int ret;
+
+	/* We manually control the domain here and pretend that it
+	 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
+	 */
+	ret = i915_gem_object_wait_rendering(obj, false);
+	if (ret)
+		return ret;
 
 	if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
 		unsigned long unwritten;
@@ -327,6 +408,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
 			return -EFAULT;
 	}
 
+	drm_clflush_virt_range(vaddr, args->size);
 	i915_gem_chipset_flush(dev);
 	return 0;
 }
@@ -1047,11 +1129,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	 * pread/pwrite currently are reading and writing from the CPU
 	 * perspective, requiring manual detiling by the client.
 	 */
-	if (obj->phys_handle) {
-		ret = i915_gem_phys_pwrite(obj, args, file);
-		goto out;
-	}
-
 	if (obj->tiling_mode == I915_TILING_NONE &&
 	    obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
 	    cpu_write_needs_clflush(obj)) {
@@ -1061,8 +1138,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 		 * textures). Fallback to the shmem path in that case. */
 	}
 
-	if (ret == -EFAULT || ret == -ENOSPC)
-		ret = i915_gem_shmem_pwrite(dev, obj, args, file);
+	if (ret == -EFAULT || ret == -ENOSPC) {
+		if (obj->phys_handle)
+			ret = i915_gem_phys_pwrite(obj, args, file);
+		else
+			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
+	}
 
 out:
 	drm_gem_object_unreference(&obj->base);
@@ -3499,7 +3580,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 	 * Stolen memory is always coherent with the GPU as it is explicitly
 	 * marked as wc by the system, or the system is cache-coherent.
 	 */
-	if (obj->stolen)
+	if (obj->stolen || obj->phys_handle)
 		return false;
 
 	/* If the GPU is snooping the contents of the CPU cache,
@@ -4442,8 +4523,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 		}
 	}
 
-	i915_gem_object_detach_phys(obj);
-
 	/* Stolen objects don't hold a ref, but do hold pin count. Fix that up
 	 * before progressing. */
 	if (obj->stolen)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index ff57f07c3249..c6b229fe4a11 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -340,6 +340,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
 #define I915_PARAM_HAS_WT     	 	 27
 #define I915_PARAM_CMD_PARSER_VERSION	 28
+#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29
 
 typedef struct drm_i915_getparam {
 	int param;
-- 
2.0.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] drm/i915: Make the physical object coherent with GTT
  2014-07-10 20:53 [PATCH] drm/i915: Make the physical object coherent with GTT Chris Wilson
@ 2014-08-08  9:25 ` Daniel Vetter
  0 siblings, 0 replies; 8+ messages in thread
From: Daniel Vetter @ 2014-08-08  9:25 UTC (permalink / raw)
  To: Chris Wilson; +Cc: daniel.vetter, intel-gfx

On Thu, Jul 10, 2014 at 09:53:43PM +0100, Chris Wilson wrote:
> Currently objects for which the hardware needs a contiguous physical
> address are allocated a shadow backing storage to satisfy the contraint.
> This shadow buffer is not wired into the normal obj->pages and so the
> physical object is incoherent with accesses via the GPU, GTT and CPU. By
> setting up the appropriate scatter-gather table, we can allow userspace
> to access the physical object via either a GTT mmaping of or by rendering
> into the GEM bo. However, keeping the CPU mmap of the shmemfs backing
> storage coherent with the contiguous shadow is not yet possible.
> Fortuituously, CPU mmaps of objects requiring physical addresses are not
> expected to be coherent anyway.
> 
> This allows the physical constraint of the GEM object to be transparent
> to userspace and allow it to efficiently render into or update them via
> the GTT and GPU.
> 
> v2: Fix leak of pci handle spotted by Ville
> v3: Remove the now duplicate call to detach_phys_object during free.
> v4: Wait for rendering before pwrite. As this patch makes it possible to
> render into the phys object, we should make it correct as well!
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

Ok, I guess you can make a case that this fixes a bug, but then the commit
message should talk about that instead of only mentioning phys object
coherency (which is kinda just a side-effect of the rework to track phys
objects in the sg tables like everything else).

Plus mentioning that kms_cursors_crc provokes this if people bother to
plug in a vga connector or something.

With that I'll pull it into -fixes with a cc: stable without requiring
testcase for the coherency part.
-Daniel


> ---
>  drivers/gpu/drm/i915/i915_dma.c |   3 +
>  drivers/gpu/drm/i915/i915_drv.h |   6 +-
>  drivers/gpu/drm/i915/i915_gem.c | 207 +++++++++++++++++++++++++++-------------
>  include/uapi/drm/i915_drm.h     |   1 +
>  4 files changed, 150 insertions(+), 67 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 5ae608121f03..1b9798503b77 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1025,6 +1025,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
>  	case I915_PARAM_CMD_PARSER_VERSION:
>  		value = i915_cmd_parser_get_version();
>  		break;
> +	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
> +		value = 1;
> +		break;
>  	default:
>  		DRM_DEBUG("Unknown parameter %d\n", param->param);
>  		return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 211db0653831..f81d787d6b47 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1787,10 +1787,10 @@ struct drm_i915_gem_object {
>  	unsigned long user_pin_count;
>  	struct drm_file *pin_filp;
>  
> -	/** for phy allocated objects */
> -	drm_dma_handle_t *phys_handle;
> -
>  	union {
> +		/** for phy allocated objects */
> +		drm_dma_handle_t *phys_handle;
> +
>  		struct i915_gem_userptr {
>  			uintptr_t ptr;
>  			unsigned read_only :1;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 83ccbabdcacf..d083cf5bdbbd 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -209,40 +209,137 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>  	return 0;
>  }
>  
> -static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj)
> +static int
> +i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
>  {
> -	drm_dma_handle_t *phys = obj->phys_handle;
> +	struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
> +	char *vaddr = obj->phys_handle->vaddr;
> +	struct sg_table *st;
> +	struct scatterlist *sg;
> +	int i;
>  
> -	if (!phys)
> -		return;
> +	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
> +		return -EINVAL;
> +
> +	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> +		struct page *page;
> +		char *src;
> +
> +		page = shmem_read_mapping_page(mapping, i);
> +		if (IS_ERR(page))
> +			return PTR_ERR(page);
> +
> +		src = kmap_atomic(page);
> +		memcpy(vaddr, src, PAGE_SIZE);
> +		drm_clflush_virt_range(vaddr, PAGE_SIZE);
> +		kunmap_atomic(src);
> +
> +		page_cache_release(page);
> +		vaddr += PAGE_SIZE;
> +	}
> +
> +	i915_gem_chipset_flush(obj->base.dev);
> +
> +	st = kmalloc(sizeof(*st), GFP_KERNEL);
> +	if (st == NULL)
> +		return -ENOMEM;
> +
> +	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
> +		kfree(st);
> +		return -ENOMEM;
> +	}
> +
> +	sg = st->sgl;
> +	sg->offset = 0;
> +	sg->length = obj->base.size;
>  
> -	if (obj->madv == I915_MADV_WILLNEED) {
> +	sg_dma_address(sg) = obj->phys_handle->busaddr;
> +	sg_dma_len(sg) = obj->base.size;
> +
> +	obj->pages = st;
> +	obj->has_dma_mapping = true;
> +	return 0;
> +}
> +
> +static void
> +i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
> +{
> +	int ret;
> +
> +	BUG_ON(obj->madv == __I915_MADV_PURGED);
> +
> +	ret = i915_gem_object_set_to_cpu_domain(obj, true);
> +	if (ret) {
> +		/* In the event of a disaster, abandon all caches and
> +		 * hope for the best.
> +		 */
> +		WARN_ON(ret != -EIO);
> +		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> +	}
> +
> +	if (obj->madv == I915_MADV_DONTNEED)
> +		obj->dirty = 0;
> +
> +	if (obj->dirty) {
>  		struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
> -		char *vaddr = phys->vaddr;
> +		char *vaddr = obj->phys_handle->vaddr;
>  		int i;
>  
>  		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> -			struct page *page = shmem_read_mapping_page(mapping, i);
> -			if (!IS_ERR(page)) {
> -				char *dst = kmap_atomic(page);
> -				memcpy(dst, vaddr, PAGE_SIZE);
> -				drm_clflush_virt_range(dst, PAGE_SIZE);
> -				kunmap_atomic(dst);
> -
> -				set_page_dirty(page);
> +			struct page *page;
> +			char *dst;
> +
> +			page = shmem_read_mapping_page(mapping, i);
> +			if (IS_ERR(page))
> +				continue;
> +
> +			dst = kmap_atomic(page);
> +			drm_clflush_virt_range(vaddr, PAGE_SIZE);
> +			memcpy(dst, vaddr, PAGE_SIZE);
> +			kunmap_atomic(dst);
> +
> +			set_page_dirty(page);
> +			if (obj->madv == I915_MADV_WILLNEED)
>  				mark_page_accessed(page);
> -				page_cache_release(page);
> -			}
> +			page_cache_release(page);
>  			vaddr += PAGE_SIZE;
>  		}
> -		i915_gem_chipset_flush(obj->base.dev);
> +		obj->dirty = 0;
>  	}
>  
> -#ifdef CONFIG_X86
> -	set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
> -#endif
> -	drm_pci_free(obj->base.dev, phys);
> -	obj->phys_handle = NULL;
> +	sg_free_table(obj->pages);
> +	kfree(obj->pages);
> +
> +	obj->has_dma_mapping = false;
> +}
> +
> +static void
> +i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
> +{
> +	drm_pci_free(obj->base.dev, obj->phys_handle);
> +}
> +
> +static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
> +	.get_pages = i915_gem_object_get_pages_phys,
> +	.put_pages = i915_gem_object_put_pages_phys,
> +	.release = i915_gem_object_release_phys,
> +};
> +
> +static int
> +drop_pages(struct drm_i915_gem_object *obj)
> +{
> +	struct i915_vma *vma, *next;
> +	int ret;
> +
> +	drm_gem_object_reference(&obj->base);
> +	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link)
> +		if (i915_vma_unbind(vma))
> +			break;
> +
> +	ret = i915_gem_object_put_pages(obj);
> +	drm_gem_object_unreference(&obj->base);
> +
> +	return ret;
>  }
>  
>  int
> @@ -250,9 +347,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>  			    int align)
>  {
>  	drm_dma_handle_t *phys;
> -	struct address_space *mapping;
> -	char *vaddr;
> -	int i;
> +	int ret;
>  
>  	if (obj->phys_handle) {
>  		if ((unsigned long)obj->phys_handle->vaddr & (align -1))
> @@ -267,41 +362,19 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>  	if (obj->base.filp == NULL)
>  		return -EINVAL;
>  
> +	ret = drop_pages(obj);
> +	if (ret)
> +		return ret;
> +
>  	/* create a new object */
>  	phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
>  	if (!phys)
>  		return -ENOMEM;
>  
> -	vaddr = phys->vaddr;
> -#ifdef CONFIG_X86
> -	set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE);
> -#endif
> -	mapping = file_inode(obj->base.filp)->i_mapping;
> -	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> -		struct page *page;
> -		char *src;
> -
> -		page = shmem_read_mapping_page(mapping, i);
> -		if (IS_ERR(page)) {
> -#ifdef CONFIG_X86
> -			set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
> -#endif
> -			drm_pci_free(obj->base.dev, phys);
> -			return PTR_ERR(page);
> -		}
> -
> -		src = kmap_atomic(page);
> -		memcpy(vaddr, src, PAGE_SIZE);
> -		kunmap_atomic(src);
> -
> -		mark_page_accessed(page);
> -		page_cache_release(page);
> -
> -		vaddr += PAGE_SIZE;
> -	}
> -
>  	obj->phys_handle = phys;
> -	return 0;
> +	obj->ops = &i915_gem_phys_ops;
> +
> +	return i915_gem_object_get_pages(obj);
>  }
>  
>  static int
> @@ -312,6 +385,14 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
>  	struct drm_device *dev = obj->base.dev;
>  	void *vaddr = obj->phys_handle->vaddr + args->offset;
>  	char __user *user_data = to_user_ptr(args->data_ptr);
> +	int ret;
> +
> +	/* We manually control the domain here and pretend that it
> +	 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
> +	 */
> +	ret = i915_gem_object_wait_rendering(obj, false);
> +	if (ret)
> +		return ret;
>  
>  	if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
>  		unsigned long unwritten;
> @@ -327,6 +408,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
>  			return -EFAULT;
>  	}
>  
> +	drm_clflush_virt_range(vaddr, args->size);
>  	i915_gem_chipset_flush(dev);
>  	return 0;
>  }
> @@ -1047,11 +1129,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>  	 * pread/pwrite currently are reading and writing from the CPU
>  	 * perspective, requiring manual detiling by the client.
>  	 */
> -	if (obj->phys_handle) {
> -		ret = i915_gem_phys_pwrite(obj, args, file);
> -		goto out;
> -	}
> -
>  	if (obj->tiling_mode == I915_TILING_NONE &&
>  	    obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
>  	    cpu_write_needs_clflush(obj)) {
> @@ -1061,8 +1138,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>  		 * textures). Fallback to the shmem path in that case. */
>  	}
>  
> -	if (ret == -EFAULT || ret == -ENOSPC)
> -		ret = i915_gem_shmem_pwrite(dev, obj, args, file);
> +	if (ret == -EFAULT || ret == -ENOSPC) {
> +		if (obj->phys_handle)
> +			ret = i915_gem_phys_pwrite(obj, args, file);
> +		else
> +			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
> +	}
>  
>  out:
>  	drm_gem_object_unreference(&obj->base);
> @@ -3499,7 +3580,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
>  	 * Stolen memory is always coherent with the GPU as it is explicitly
>  	 * marked as wc by the system, or the system is cache-coherent.
>  	 */
> -	if (obj->stolen)
> +	if (obj->stolen || obj->phys_handle)
>  		return false;
>  
>  	/* If the GPU is snooping the contents of the CPU cache,
> @@ -4442,8 +4523,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  		}
>  	}
>  
> -	i915_gem_object_detach_phys(obj);
> -
>  	/* Stolen objects don't hold a ref, but do hold pin count. Fix that up
>  	 * before progressing. */
>  	if (obj->stolen)
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index ff57f07c3249..c6b229fe4a11 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -340,6 +340,7 @@ typedef struct drm_i915_irq_wait {
>  #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
>  #define I915_PARAM_HAS_WT     	 	 27
>  #define I915_PARAM_CMD_PARSER_VERSION	 28
> +#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29
>  
>  typedef struct drm_i915_getparam {
>  	int param;
> -- 
> 2.0.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] drm/i915: Make the physical object coherent with GTT
  2014-06-11 11:59   ` Chris Wilson
@ 2014-07-10 20:59     ` Daniel Vetter
  0 siblings, 0 replies; 8+ messages in thread
From: Daniel Vetter @ 2014-07-10 20:59 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Wed, Jun 11, 2014 at 12:59:51PM +0100, Chris Wilson wrote:
> Currently objects for which the hardware needs a contiguous physical
> address are allocated a shadow backing storage to satisfy the contraint.
> This shadow buffer is not wired into the normal obj->pages and so the
> physical object is incoherent with accesses via the GPU, GTT and CPU. By
> setting up the appropriate scatter-gather table, we can allow userspace
> to access the physical object via either a GTT mmaping of or by rendering
> into the GEM bo. However, keeping the CPU mmap of the shmemfs backing
> storage coherent with the contiguous shadow is not yet possible.
> Fortuituously, CPU mmaps of objects requiring physical addresses are not
> expected to be coherent anyway.
> 
> This allows the physical constraint of the GEM object to be transparent
> to userspace and allow it to efficiently render into or update them via
> the GTT and GPU.
> 
> v2: Fix leak of pci handle spotted by Ville
> v3: Remove the now duplicate call to detach_phys_object during free.
> v4: Wait for rendering before pwrite. As this patch makes it possible to
> render into the phys object, we should make it correct as well!
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

So for the record I'd like to see a test for the wait_rendering in
phys_pwrite:

a) show black screen with black cursor
b) throw busy load on blitter
c) upload white cursor with blitter
d) pwrite black cursor
e) wait for blitter to finish
grab crc for a&e and compare

-Daniel

> ---
>  drivers/gpu/drm/i915/i915_dma.c |   3 +
>  drivers/gpu/drm/i915/i915_drv.h |   6 +-
>  drivers/gpu/drm/i915/i915_gem.c | 206 +++++++++++++++++++++++++++-------------
>  include/uapi/drm/i915_drm.h     |   1 +
>  4 files changed, 149 insertions(+), 67 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 59e8ffe230a7..b1f072039865 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1025,6 +1025,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
>  	case I915_PARAM_CMD_PARSER_VERSION:
>  		value = i915_cmd_parser_get_version();
>  		break;
> +	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
> +		value = 1;
> +		break;
>  	default:
>  		DRM_DEBUG("Unknown parameter %d\n", param->param);
>  		return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 0a9d1b85d529..c80ddcf8aa6f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1715,10 +1715,10 @@ struct drm_i915_gem_object {
>  	unsigned long user_pin_count;
>  	struct drm_file *pin_filp;
>  
> -	/** for phy allocated objects */
> -	drm_dma_handle_t *phys_handle;
> -
>  	union {
> +		/** for phy allocated objects */
> +		drm_dma_handle_t *phys_handle;
> +
>  		struct i915_gem_userptr {
>  			uintptr_t ptr;
>  			unsigned read_only :1;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 25b5063388b2..f9e158261c42 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -209,40 +209,137 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>  	return 0;
>  }
>  
> -static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj)
> +static int
> +i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
>  {
> -	drm_dma_handle_t *phys = obj->phys_handle;
> +	struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
> +	char *vaddr = obj->phys_handle->vaddr;
> +	struct sg_table *st;
> +	struct scatterlist *sg;
> +	int i;
>  
> -	if (!phys)
> -		return;
> +	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
> +		return -EINVAL;
> +
> +	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> +		struct page *page;
> +		char *src;
> +
> +		page = shmem_read_mapping_page(mapping, i);
> +		if (IS_ERR(page))
> +			return PTR_ERR(page);
> +
> +		src = kmap_atomic(page);
> +		memcpy(vaddr, src, PAGE_SIZE);
> +		drm_clflush_virt_range(vaddr, PAGE_SIZE);
> +		kunmap_atomic(src);
> +
> +		page_cache_release(page);
> +		vaddr += PAGE_SIZE;
> +	}
> +
> +	i915_gem_chipset_flush(obj->base.dev);
> +
> +	st = kmalloc(sizeof(*st), GFP_KERNEL);
> +	if (st == NULL)
> +		return -ENOMEM;
> +
> +	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
> +		kfree(st);
> +		return -ENOMEM;
> +	}
> +
> +	sg = st->sgl;
> +	sg->offset = 0;
> +	sg->length = obj->base.size;
>  
> -	if (obj->madv == I915_MADV_WILLNEED) {
> +	sg_dma_address(sg) = obj->phys_handle->busaddr;
> +	sg_dma_len(sg) = obj->base.size;
> +
> +	obj->pages = st;
> +	obj->has_dma_mapping = true;
> +	return 0;
> +}
> +
> +static void
> +i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
> +{
> +	int ret;
> +
> +	BUG_ON(obj->madv == __I915_MADV_PURGED);
> +
> +	ret = i915_gem_object_set_to_cpu_domain(obj, true);
> +	if (ret) {
> +		/* In the event of a disaster, abandon all caches and
> +		 * hope for the best.
> +		 */
> +		WARN_ON(ret != -EIO);
> +		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> +	}
> +
> +	if (obj->madv == I915_MADV_DONTNEED)
> +		obj->dirty = 0;
> +
> +	if (obj->dirty) {
>  		struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
> -		char *vaddr = phys->vaddr;
> +		char *vaddr = obj->phys_handle->vaddr;
>  		int i;
>  
>  		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> -			struct page *page = shmem_read_mapping_page(mapping, i);
> -			if (!IS_ERR(page)) {
> -				char *dst = kmap_atomic(page);
> -				memcpy(dst, vaddr, PAGE_SIZE);
> -				drm_clflush_virt_range(dst, PAGE_SIZE);
> -				kunmap_atomic(dst);
> -
> -				set_page_dirty(page);
> +			struct page *page;
> +			char *dst;
> +
> +			page = shmem_read_mapping_page(mapping, i);
> +			if (IS_ERR(page))
> +				continue;
> +
> +			dst = kmap_atomic(page);
> +			drm_clflush_virt_range(vaddr, PAGE_SIZE);
> +			memcpy(dst, vaddr, PAGE_SIZE);
> +			kunmap_atomic(dst);
> +
> +			set_page_dirty(page);
> +			if (obj->madv == I915_MADV_WILLNEED)
>  				mark_page_accessed(page);
> -				page_cache_release(page);
> -			}
> +			page_cache_release(page);
>  			vaddr += PAGE_SIZE;
>  		}
> -		i915_gem_chipset_flush(obj->base.dev);
> +		obj->dirty = 0;
>  	}
>  
> -#ifdef CONFIG_X86
> -	set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
> -#endif
> -	drm_pci_free(obj->base.dev, phys);
> -	obj->phys_handle = NULL;
> +	sg_free_table(obj->pages);
> +	kfree(obj->pages);
> +
> +	obj->has_dma_mapping = false;
> +}
> +
> +static void
> +i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
> +{
> +	drm_pci_free(obj->base.dev, obj->phys_handle);
> +}
> +
> +static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
> +	.get_pages = i915_gem_object_get_pages_phys,
> +	.put_pages = i915_gem_object_put_pages_phys,
> +	.release = i915_gem_object_release_phys,
> +};
> +
> +static int
> +drop_pages(struct drm_i915_gem_object *obj)
> +{
> +	struct i915_vma *vma, *next;
> +	int ret;
> +
> +	drm_gem_object_reference(&obj->base);
> +	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link)
> +		if (i915_vma_unbind(vma))
> +			break;
> +
> +	ret = i915_gem_object_put_pages(obj);
> +	drm_gem_object_unreference(&obj->base);
> +
> +	return ret;
>  }
>  
>  int
> @@ -250,9 +347,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>  			    int align)
>  {
>  	drm_dma_handle_t *phys;
> -	struct address_space *mapping;
> -	char *vaddr;
> -	int i;
> +	int ret;
>  
>  	if (obj->phys_handle) {
>  		if ((unsigned long)obj->phys_handle->vaddr & (align -1))
> @@ -267,41 +362,19 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>  	if (obj->base.filp == NULL)
>  		return -EINVAL;
>  
> +	ret = drop_pages(obj);
> +	if (ret)
> +		return ret;
> +
>  	/* create a new object */
>  	phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
>  	if (!phys)
>  		return -ENOMEM;
>  
> -	vaddr = phys->vaddr;
> -#ifdef CONFIG_X86
> -	set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE);
> -#endif
> -	mapping = file_inode(obj->base.filp)->i_mapping;
> -	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> -		struct page *page;
> -		char *src;
> -
> -		page = shmem_read_mapping_page(mapping, i);
> -		if (IS_ERR(page)) {
> -#ifdef CONFIG_X86
> -			set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
> -#endif
> -			drm_pci_free(obj->base.dev, phys);
> -			return PTR_ERR(page);
> -		}
> -
> -		src = kmap_atomic(page);
> -		memcpy(vaddr, src, PAGE_SIZE);
> -		kunmap_atomic(src);
> -
> -		mark_page_accessed(page);
> -		page_cache_release(page);
> -
> -		vaddr += PAGE_SIZE;
> -	}
> -
>  	obj->phys_handle = phys;
> -	return 0;
> +	obj->ops = &i915_gem_phys_ops;
> +
> +	return i915_gem_object_get_pages(obj);
>  }
>  
>  static int
> @@ -313,6 +386,13 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
>  	void *vaddr = obj->phys_handle->vaddr + args->offset;
>  	char __user *user_data = to_user_ptr(args->data_ptr);
>  
> +	/* We manually control the domain here and pretend that it
> +	 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
> +	 */
> +	ret = i915_gem_object_wait_rendering(obj, false);
> +	if (ret)
> +		return ret;
> +
>  	if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
>  		unsigned long unwritten;
>  
> @@ -327,6 +407,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
>  			return -EFAULT;
>  	}
>  
> +	drm_clflush_virt_range(vaddr, args->size);
>  	i915_gem_chipset_flush(dev);
>  	return 0;
>  }
> @@ -1190,11 +1271,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>  	 * pread/pwrite currently are reading and writing from the CPU
>  	 * perspective, requiring manual detiling by the client.
>  	 */
> -	if (obj->phys_handle) {
> -		ret = i915_gem_phys_pwrite(obj, args, file);
> -		goto out;
> -	}
> -
>  	if (obj->tiling_mode == I915_TILING_NONE &&
>  	    (obj->base.filp == NULL ||
>  	     (obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
> @@ -1205,8 +1281,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>  		 * textures). Fallback to the shmem path in that case. */
>  	}
>  
> -	if (ret == -EFAULT || ret == -ENOSPC)
> -		ret = i915_gem_shmem_pwrite(dev, obj, args, file);
> +	if (ret == -EFAULT || ret == -ENOSPC) {
> +		if (obj->phys_handle)
> +			ret = i915_gem_phys_pwrite(obj, args, file);
> +		else
> +			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
> +	}
>  
>  out:
>  	drm_gem_object_unreference(&obj->base);
> @@ -3646,7 +3726,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
>  	 * Stolen memory is always coherent with the GPU as it is explicitly
>  	 * marked as wc by the system, or the system is cache-coherent.
>  	 */
> -	if (obj->stolen)
> +	if (obj->stolen || obj->phys_handle)
>  		return false;
>  
>  	/* If the GPU is snooping the contents of the CPU cache,
> @@ -4579,8 +4659,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  		}
>  	}
>  
> -	i915_gem_object_detach_phys(obj);
> -
>  	/* Stolen objects don't hold a ref, but do hold pin count. Fix that up
>  	 * before progressing. */
>  	if (obj->stolen)
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index b6498a43497e..50d905483d8b 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -390,6 +390,7 @@ typedef struct drm_i915_irq_wait {
>  #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
>  #define I915_PARAM_HAS_WT     	 	 27
>  #define I915_PARAM_CMD_PARSER_VERSION	 28
> +#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29
>  
>  typedef struct drm_i915_getparam {
>  	int param;
> -- 
> 2.0.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH] drm/i915: Make the physical object coherent with GTT
  2014-06-11 11:40 ` Ville Syrjälä
  2014-06-11 11:51   ` Chris Wilson
@ 2014-06-11 11:59   ` Chris Wilson
  2014-07-10 20:59     ` Daniel Vetter
  1 sibling, 1 reply; 8+ messages in thread
From: Chris Wilson @ 2014-06-11 11:59 UTC (permalink / raw)
  To: intel-gfx

Currently objects for which the hardware needs a contiguous physical
address are allocated a shadow backing storage to satisfy the contraint.
This shadow buffer is not wired into the normal obj->pages and so the
physical object is incoherent with accesses via the GPU, GTT and CPU. By
setting up the appropriate scatter-gather table, we can allow userspace
to access the physical object via either a GTT mmaping of or by rendering
into the GEM bo. However, keeping the CPU mmap of the shmemfs backing
storage coherent with the contiguous shadow is not yet possible.
Fortuituously, CPU mmaps of objects requiring physical addresses are not
expected to be coherent anyway.

This allows the physical constraint of the GEM object to be transparent
to userspace and allow it to efficiently render into or update them via
the GTT and GPU.

v2: Fix leak of pci handle spotted by Ville
v3: Remove the now duplicate call to detach_phys_object during free.
v4: Wait for rendering before pwrite. As this patch makes it possible to
render into the phys object, we should make it correct as well!

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_dma.c |   3 +
 drivers/gpu/drm/i915/i915_drv.h |   6 +-
 drivers/gpu/drm/i915/i915_gem.c | 206 +++++++++++++++++++++++++++-------------
 include/uapi/drm/i915_drm.h     |   1 +
 4 files changed, 149 insertions(+), 67 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 59e8ffe230a7..b1f072039865 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1025,6 +1025,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_CMD_PARSER_VERSION:
 		value = i915_cmd_parser_get_version();
 		break;
+	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
+		value = 1;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0a9d1b85d529..c80ddcf8aa6f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1715,10 +1715,10 @@ struct drm_i915_gem_object {
 	unsigned long user_pin_count;
 	struct drm_file *pin_filp;
 
-	/** for phy allocated objects */
-	drm_dma_handle_t *phys_handle;
-
 	union {
+		/** for phy allocated objects */
+		drm_dma_handle_t *phys_handle;
+
 		struct i915_gem_userptr {
 			uintptr_t ptr;
 			unsigned read_only :1;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 25b5063388b2..f9e158261c42 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -209,40 +209,137 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
-static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj)
+static int
+i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 {
-	drm_dma_handle_t *phys = obj->phys_handle;
+	struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
+	char *vaddr = obj->phys_handle->vaddr;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	int i;
 
-	if (!phys)
-		return;
+	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
+		return -EINVAL;
+
+	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
+		struct page *page;
+		char *src;
+
+		page = shmem_read_mapping_page(mapping, i);
+		if (IS_ERR(page))
+			return PTR_ERR(page);
+
+		src = kmap_atomic(page);
+		memcpy(vaddr, src, PAGE_SIZE);
+		drm_clflush_virt_range(vaddr, PAGE_SIZE);
+		kunmap_atomic(src);
+
+		page_cache_release(page);
+		vaddr += PAGE_SIZE;
+	}
+
+	i915_gem_chipset_flush(obj->base.dev);
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (st == NULL)
+		return -ENOMEM;
+
+	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
+		kfree(st);
+		return -ENOMEM;
+	}
+
+	sg = st->sgl;
+	sg->offset = 0;
+	sg->length = obj->base.size;
 
-	if (obj->madv == I915_MADV_WILLNEED) {
+	sg_dma_address(sg) = obj->phys_handle->busaddr;
+	sg_dma_len(sg) = obj->base.size;
+
+	obj->pages = st;
+	obj->has_dma_mapping = true;
+	return 0;
+}
+
+static void
+i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
+{
+	int ret;
+
+	BUG_ON(obj->madv == __I915_MADV_PURGED);
+
+	ret = i915_gem_object_set_to_cpu_domain(obj, true);
+	if (ret) {
+		/* In the event of a disaster, abandon all caches and
+		 * hope for the best.
+		 */
+		WARN_ON(ret != -EIO);
+		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+	}
+
+	if (obj->madv == I915_MADV_DONTNEED)
+		obj->dirty = 0;
+
+	if (obj->dirty) {
 		struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
-		char *vaddr = phys->vaddr;
+		char *vaddr = obj->phys_handle->vaddr;
 		int i;
 
 		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
-			struct page *page = shmem_read_mapping_page(mapping, i);
-			if (!IS_ERR(page)) {
-				char *dst = kmap_atomic(page);
-				memcpy(dst, vaddr, PAGE_SIZE);
-				drm_clflush_virt_range(dst, PAGE_SIZE);
-				kunmap_atomic(dst);
-
-				set_page_dirty(page);
+			struct page *page;
+			char *dst;
+
+			page = shmem_read_mapping_page(mapping, i);
+			if (IS_ERR(page))
+				continue;
+
+			dst = kmap_atomic(page);
+			drm_clflush_virt_range(vaddr, PAGE_SIZE);
+			memcpy(dst, vaddr, PAGE_SIZE);
+			kunmap_atomic(dst);
+
+			set_page_dirty(page);
+			if (obj->madv == I915_MADV_WILLNEED)
 				mark_page_accessed(page);
-				page_cache_release(page);
-			}
+			page_cache_release(page);
 			vaddr += PAGE_SIZE;
 		}
-		i915_gem_chipset_flush(obj->base.dev);
+		obj->dirty = 0;
 	}
 
-#ifdef CONFIG_X86
-	set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
-#endif
-	drm_pci_free(obj->base.dev, phys);
-	obj->phys_handle = NULL;
+	sg_free_table(obj->pages);
+	kfree(obj->pages);
+
+	obj->has_dma_mapping = false;
+}
+
+static void
+i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
+{
+	drm_pci_free(obj->base.dev, obj->phys_handle);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
+	.get_pages = i915_gem_object_get_pages_phys,
+	.put_pages = i915_gem_object_put_pages_phys,
+	.release = i915_gem_object_release_phys,
+};
+
+static int
+drop_pages(struct drm_i915_gem_object *obj)
+{
+	struct i915_vma *vma, *next;
+	int ret;
+
+	drm_gem_object_reference(&obj->base);
+	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link)
+		if (i915_vma_unbind(vma))
+			break;
+
+	ret = i915_gem_object_put_pages(obj);
+	drm_gem_object_unreference(&obj->base);
+
+	return ret;
 }
 
 int
@@ -250,9 +347,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
 			    int align)
 {
 	drm_dma_handle_t *phys;
-	struct address_space *mapping;
-	char *vaddr;
-	int i;
+	int ret;
 
 	if (obj->phys_handle) {
 		if ((unsigned long)obj->phys_handle->vaddr & (align -1))
@@ -267,41 +362,19 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
 	if (obj->base.filp == NULL)
 		return -EINVAL;
 
+	ret = drop_pages(obj);
+	if (ret)
+		return ret;
+
 	/* create a new object */
 	phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
 	if (!phys)
 		return -ENOMEM;
 
-	vaddr = phys->vaddr;
-#ifdef CONFIG_X86
-	set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE);
-#endif
-	mapping = file_inode(obj->base.filp)->i_mapping;
-	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
-		struct page *page;
-		char *src;
-
-		page = shmem_read_mapping_page(mapping, i);
-		if (IS_ERR(page)) {
-#ifdef CONFIG_X86
-			set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
-#endif
-			drm_pci_free(obj->base.dev, phys);
-			return PTR_ERR(page);
-		}
-
-		src = kmap_atomic(page);
-		memcpy(vaddr, src, PAGE_SIZE);
-		kunmap_atomic(src);
-
-		mark_page_accessed(page);
-		page_cache_release(page);
-
-		vaddr += PAGE_SIZE;
-	}
-
 	obj->phys_handle = phys;
-	return 0;
+	obj->ops = &i915_gem_phys_ops;
+
+	return i915_gem_object_get_pages(obj);
 }
 
 static int
@@ -313,6 +386,13 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
 	void *vaddr = obj->phys_handle->vaddr + args->offset;
 	char __user *user_data = to_user_ptr(args->data_ptr);
 
+	/* We manually control the domain here and pretend that it
+	 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
+	 */
+	ret = i915_gem_object_wait_rendering(obj, false);
+	if (ret)
+		return ret;
+
 	if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
 		unsigned long unwritten;
 
@@ -327,6 +407,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
 			return -EFAULT;
 	}
 
+	drm_clflush_virt_range(vaddr, args->size);
 	i915_gem_chipset_flush(dev);
 	return 0;
 }
@@ -1190,11 +1271,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	 * pread/pwrite currently are reading and writing from the CPU
 	 * perspective, requiring manual detiling by the client.
 	 */
-	if (obj->phys_handle) {
-		ret = i915_gem_phys_pwrite(obj, args, file);
-		goto out;
-	}
-
 	if (obj->tiling_mode == I915_TILING_NONE &&
 	    (obj->base.filp == NULL ||
 	     (obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
@@ -1205,8 +1281,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 		 * textures). Fallback to the shmem path in that case. */
 	}
 
-	if (ret == -EFAULT || ret == -ENOSPC)
-		ret = i915_gem_shmem_pwrite(dev, obj, args, file);
+	if (ret == -EFAULT || ret == -ENOSPC) {
+		if (obj->phys_handle)
+			ret = i915_gem_phys_pwrite(obj, args, file);
+		else
+			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
+	}
 
 out:
 	drm_gem_object_unreference(&obj->base);
@@ -3646,7 +3726,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 	 * Stolen memory is always coherent with the GPU as it is explicitly
 	 * marked as wc by the system, or the system is cache-coherent.
 	 */
-	if (obj->stolen)
+	if (obj->stolen || obj->phys_handle)
 		return false;
 
 	/* If the GPU is snooping the contents of the CPU cache,
@@ -4579,8 +4659,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 		}
 	}
 
-	i915_gem_object_detach_phys(obj);
-
 	/* Stolen objects don't hold a ref, but do hold pin count. Fix that up
 	 * before progressing. */
 	if (obj->stolen)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index b6498a43497e..50d905483d8b 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -390,6 +390,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
 #define I915_PARAM_HAS_WT     	 	 27
 #define I915_PARAM_CMD_PARSER_VERSION	 28
+#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29
 
 typedef struct drm_i915_getparam {
 	int param;
-- 
2.0.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] drm/i915: Make the physical object coherent with GTT
  2014-06-11 11:40 ` Ville Syrjälä
@ 2014-06-11 11:51   ` Chris Wilson
  2014-06-11 11:59   ` Chris Wilson
  1 sibling, 0 replies; 8+ messages in thread
From: Chris Wilson @ 2014-06-11 11:51 UTC (permalink / raw)
  To: Ville Syrjälä; +Cc: intel-gfx

On Wed, Jun 11, 2014 at 02:40:00PM +0300, Ville Syrjälä wrote:
> On Wed, Jun 11, 2014 at 11:28:41AM +0100, Chris Wilson wrote:
> > Currently objects for which the hardware needs a contiguous physical
> > address are allocated a shadow backing storage to satisfy the contraint.
> > This shadow buffer is not wired into the normal obj->pages and so the
> > physical object is incoherent with accesses via the GPU, GTT and CPU. By
> > setting up the appropriate scatter-gather table, we can allow userspace
> > to access the physical object via either a GTT mmaping of or by rendering
> > into the GEM bo. However, keeping the CPU mmap of the shmemfs backing
> > storage coherent with the contiguous shadow is not yet possible.
> > Fortuituously, CPU mmaps of objects requiring physical addresses are not
> > expected to be coherent anyway.
> > 
> > This allows the physical constraint of the GEM object to be transparent
> > to userspace and allow it to efficiently render into or update them via
> > the GTT and GPU.
> > 
> > v2: Fix leak of pci handle spotted by Ville
> > v3: Remove the now duplicate call to detach_phys_object during free.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Looks good. pread and cpu mmap would get garbage but that's nothing
> new, and commit message says as much.
> 
> phys pread should be rather easy to add (not much different from
> i915_gem_phys_pwrite()), but I guess no one has needed it since
> it's not there.

For completeness, we should do it. But as you say, no one has yet
requested the bits back from the phys object, and now if they really do
desire they can at least use a GTT mmap.

> Oh, should there be a i915_gem_object_wait_rendering() in phys pwrite?

Yes.

> But in any case this patch is:
> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

I'll add the wait.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] drm/i915: Make the physical object coherent with GTT
  2014-06-11 10:28 Chris Wilson
@ 2014-06-11 11:40 ` Ville Syrjälä
  2014-06-11 11:51   ` Chris Wilson
  2014-06-11 11:59   ` Chris Wilson
  0 siblings, 2 replies; 8+ messages in thread
From: Ville Syrjälä @ 2014-06-11 11:40 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Wed, Jun 11, 2014 at 11:28:41AM +0100, Chris Wilson wrote:
> Currently objects for which the hardware needs a contiguous physical
> address are allocated a shadow backing storage to satisfy the contraint.
> This shadow buffer is not wired into the normal obj->pages and so the
> physical object is incoherent with accesses via the GPU, GTT and CPU. By
> setting up the appropriate scatter-gather table, we can allow userspace
> to access the physical object via either a GTT mmaping of or by rendering
> into the GEM bo. However, keeping the CPU mmap of the shmemfs backing
> storage coherent with the contiguous shadow is not yet possible.
> Fortuituously, CPU mmaps of objects requiring physical addresses are not
> expected to be coherent anyway.
> 
> This allows the physical constraint of the GEM object to be transparent
> to userspace and allow it to efficiently render into or update them via
> the GTT and GPU.
> 
> v2: Fix leak of pci handle spotted by Ville
> v3: Remove the now duplicate call to detach_phys_object during free.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Looks good. pread and cpu mmap would get garbage but that's nothing
new, and commit message says as much.

phys pread should be rather easy to add (not much different from
i915_gem_phys_pwrite()), but I guess no one has needed it since
it's not there.

Oh, should there be a i915_gem_object_wait_rendering() in phys pwrite?

But in any case this patch is:
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

> ---
>  drivers/gpu/drm/i915/i915_dma.c |   3 +
>  drivers/gpu/drm/i915/i915_drv.h |   6 +-
>  drivers/gpu/drm/i915/i915_gem.c | 199 +++++++++++++++++++++++++++-------------
>  include/uapi/drm/i915_drm.h     |   1 +
>  4 files changed, 142 insertions(+), 67 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 59e8ffe230a7..b1f072039865 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1025,6 +1025,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
>  	case I915_PARAM_CMD_PARSER_VERSION:
>  		value = i915_cmd_parser_get_version();
>  		break;
> +	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
> +		value = 1;
> +		break;
>  	default:
>  		DRM_DEBUG("Unknown parameter %d\n", param->param);
>  		return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 0a9d1b85d529..c80ddcf8aa6f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1715,10 +1715,10 @@ struct drm_i915_gem_object {
>  	unsigned long user_pin_count;
>  	struct drm_file *pin_filp;
>  
> -	/** for phy allocated objects */
> -	drm_dma_handle_t *phys_handle;
> -
>  	union {
> +		/** for phy allocated objects */
> +		drm_dma_handle_t *phys_handle;
> +
>  		struct i915_gem_userptr {
>  			uintptr_t ptr;
>  			unsigned read_only :1;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 25b5063388b2..6e8535ba72d3 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -209,40 +209,137 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>  	return 0;
>  }
>  
> -static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj)
> +static int
> +i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
>  {
> -	drm_dma_handle_t *phys = obj->phys_handle;
> +	struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
> +	char *vaddr = obj->phys_handle->vaddr;
> +	struct sg_table *st;
> +	struct scatterlist *sg;
> +	int i;
>  
> -	if (!phys)
> -		return;
> +	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
> +		return -EINVAL;
> +
> +	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> +		struct page *page;
> +		char *src;
> +
> +		page = shmem_read_mapping_page(mapping, i);
> +		if (IS_ERR(page))
> +			return PTR_ERR(page);
> +
> +		src = kmap_atomic(page);
> +		memcpy(vaddr, src, PAGE_SIZE);
> +		drm_clflush_virt_range(vaddr, PAGE_SIZE);
> +		kunmap_atomic(src);
> +
> +		page_cache_release(page);
> +		vaddr += PAGE_SIZE;
> +	}
> +
> +	i915_gem_chipset_flush(obj->base.dev);
> +
> +	st = kmalloc(sizeof(*st), GFP_KERNEL);
> +	if (st == NULL)
> +		return -ENOMEM;
> +
> +	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
> +		kfree(st);
> +		return -ENOMEM;
> +	}
>  
> -	if (obj->madv == I915_MADV_WILLNEED) {
> +	sg = st->sgl;
> +	sg->offset = 0;
> +	sg->length = obj->base.size;
> +
> +	sg_dma_address(sg) = obj->phys_handle->busaddr;
> +	sg_dma_len(sg) = obj->base.size;
> +
> +	obj->pages = st;
> +	obj->has_dma_mapping = true;
> +	return 0;
> +}
> +
> +static void
> +i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
> +{
> +	int ret;
> +
> +	BUG_ON(obj->madv == __I915_MADV_PURGED);
> +
> +	ret = i915_gem_object_set_to_cpu_domain(obj, true);
> +	if (ret) {
> +		/* In the event of a disaster, abandon all caches and
> +		 * hope for the best.
> +		 */
> +		WARN_ON(ret != -EIO);
> +		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> +	}
> +
> +	if (obj->madv == I915_MADV_DONTNEED)
> +		obj->dirty = 0;
> +
> +	if (obj->dirty) {
>  		struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
> -		char *vaddr = phys->vaddr;
> +		char *vaddr = obj->phys_handle->vaddr;
>  		int i;
>  
>  		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> -			struct page *page = shmem_read_mapping_page(mapping, i);
> -			if (!IS_ERR(page)) {
> -				char *dst = kmap_atomic(page);
> -				memcpy(dst, vaddr, PAGE_SIZE);
> -				drm_clflush_virt_range(dst, PAGE_SIZE);
> -				kunmap_atomic(dst);
> -
> -				set_page_dirty(page);
> +			struct page *page;
> +			char *dst;
> +
> +			page = shmem_read_mapping_page(mapping, i);
> +			if (IS_ERR(page))
> +				continue;
> +
> +			dst = kmap_atomic(page);
> +			drm_clflush_virt_range(vaddr, PAGE_SIZE);
> +			memcpy(dst, vaddr, PAGE_SIZE);
> +			kunmap_atomic(dst);
> +
> +			set_page_dirty(page);
> +			if (obj->madv == I915_MADV_WILLNEED)
>  				mark_page_accessed(page);
> -				page_cache_release(page);
> -			}
> +			page_cache_release(page);
>  			vaddr += PAGE_SIZE;
>  		}
> -		i915_gem_chipset_flush(obj->base.dev);
> +		obj->dirty = 0;
>  	}
>  
> -#ifdef CONFIG_X86
> -	set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
> -#endif
> -	drm_pci_free(obj->base.dev, phys);
> -	obj->phys_handle = NULL;
> +	sg_free_table(obj->pages);
> +	kfree(obj->pages);
> +
> +	obj->has_dma_mapping = false;
> +}
> +
> +static void
> +i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
> +{
> +	drm_pci_free(obj->base.dev, obj->phys_handle);
> +}
> +
> +static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
> +	.get_pages = i915_gem_object_get_pages_phys,
> +	.put_pages = i915_gem_object_put_pages_phys,
> +	.release = i915_gem_object_release_phys,
> +};
> +
> +static int
> +drop_pages(struct drm_i915_gem_object *obj)
> +{
> +	struct i915_vma *vma, *next;
> +	int ret;
> +
> +	drm_gem_object_reference(&obj->base);
> +	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link)
> +		if (i915_vma_unbind(vma))
> +			break;
> +
> +	ret = i915_gem_object_put_pages(obj);
> +	drm_gem_object_unreference(&obj->base);
> +
> +	return ret;
>  }
>  
>  int
> @@ -250,9 +347,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>  			    int align)
>  {
>  	drm_dma_handle_t *phys;
> -	struct address_space *mapping;
> -	char *vaddr;
> -	int i;
> +	int ret;
>  
>  	if (obj->phys_handle) {
>  		if ((unsigned long)obj->phys_handle->vaddr & (align -1))
> @@ -267,41 +362,19 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>  	if (obj->base.filp == NULL)
>  		return -EINVAL;
>  
> +	ret = drop_pages(obj);
> +	if (ret)
> +		return ret;
> +
>  	/* create a new object */
>  	phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
>  	if (!phys)
>  		return -ENOMEM;
>  
> -	vaddr = phys->vaddr;
> -#ifdef CONFIG_X86
> -	set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE);
> -#endif
> -	mapping = file_inode(obj->base.filp)->i_mapping;
> -	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> -		struct page *page;
> -		char *src;
> -
> -		page = shmem_read_mapping_page(mapping, i);
> -		if (IS_ERR(page)) {
> -#ifdef CONFIG_X86
> -			set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
> -#endif
> -			drm_pci_free(obj->base.dev, phys);
> -			return PTR_ERR(page);
> -		}
> -
> -		src = kmap_atomic(page);
> -		memcpy(vaddr, src, PAGE_SIZE);
> -		kunmap_atomic(src);
> -
> -		mark_page_accessed(page);
> -		page_cache_release(page);
> -
> -		vaddr += PAGE_SIZE;
> -	}
> -
>  	obj->phys_handle = phys;
> -	return 0;
> +	obj->ops = &i915_gem_phys_ops;
> +
> +	return i915_gem_object_get_pages(obj);
>  }
>  
>  static int
> @@ -327,6 +400,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
>  			return -EFAULT;
>  	}
>  
> +	drm_clflush_virt_range(vaddr, args->size);
>  	i915_gem_chipset_flush(dev);
>  	return 0;
>  }
> @@ -1190,11 +1264,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>  	 * pread/pwrite currently are reading and writing from the CPU
>  	 * perspective, requiring manual detiling by the client.
>  	 */
> -	if (obj->phys_handle) {
> -		ret = i915_gem_phys_pwrite(obj, args, file);
> -		goto out;
> -	}
> -
>  	if (obj->tiling_mode == I915_TILING_NONE &&
>  	    (obj->base.filp == NULL ||
>  	     (obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
> @@ -1205,8 +1274,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>  		 * textures). Fallback to the shmem path in that case. */
>  	}
>  
> -	if (ret == -EFAULT || ret == -ENOSPC)
> -		ret = i915_gem_shmem_pwrite(dev, obj, args, file);
> +	if (ret == -EFAULT || ret == -ENOSPC) {
> +		if (obj->phys_handle)
> +			ret = i915_gem_phys_pwrite(obj, args, file);
> +		else
> +			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
> +	}
>  
>  out:
>  	drm_gem_object_unreference(&obj->base);
> @@ -3646,7 +3719,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
>  	 * Stolen memory is always coherent with the GPU as it is explicitly
>  	 * marked as wc by the system, or the system is cache-coherent.
>  	 */
> -	if (obj->stolen)
> +	if (obj->stolen || obj->phys_handle)
>  		return false;
>  
>  	/* If the GPU is snooping the contents of the CPU cache,
> @@ -4579,8 +4652,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  		}
>  	}
>  
> -	i915_gem_object_detach_phys(obj);
> -
>  	/* Stolen objects don't hold a ref, but do hold pin count. Fix that up
>  	 * before progressing. */
>  	if (obj->stolen)
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index b6498a43497e..50d905483d8b 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -390,6 +390,7 @@ typedef struct drm_i915_irq_wait {
>  #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
>  #define I915_PARAM_HAS_WT     	 	 27
>  #define I915_PARAM_CMD_PARSER_VERSION	 28
> +#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29
>  
>  typedef struct drm_i915_getparam {
>  	int param;
> -- 
> 2.0.0

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH] drm/i915: Make the physical object coherent with GTT
@ 2014-06-11 10:28 Chris Wilson
  2014-06-11 11:40 ` Ville Syrjälä
  0 siblings, 1 reply; 8+ messages in thread
From: Chris Wilson @ 2014-06-11 10:28 UTC (permalink / raw)
  To: intel-gfx

Currently objects for which the hardware needs a contiguous physical
address are allocated a shadow backing storage to satisfy the contraint.
This shadow buffer is not wired into the normal obj->pages and so the
physical object is incoherent with accesses via the GPU, GTT and CPU. By
setting up the appropriate scatter-gather table, we can allow userspace
to access the physical object via either a GTT mmaping of or by rendering
into the GEM bo. However, keeping the CPU mmap of the shmemfs backing
storage coherent with the contiguous shadow is not yet possible.
Fortuituously, CPU mmaps of objects requiring physical addresses are not
expected to be coherent anyway.

This allows the physical constraint of the GEM object to be transparent
to userspace and allow it to efficiently render into or update them via
the GTT and GPU.

v2: Fix leak of pci handle spotted by Ville
v3: Remove the now duplicate call to detach_phys_object during free.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_dma.c |   3 +
 drivers/gpu/drm/i915/i915_drv.h |   6 +-
 drivers/gpu/drm/i915/i915_gem.c | 199 +++++++++++++++++++++++++++-------------
 include/uapi/drm/i915_drm.h     |   1 +
 4 files changed, 142 insertions(+), 67 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 59e8ffe230a7..b1f072039865 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1025,6 +1025,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_CMD_PARSER_VERSION:
 		value = i915_cmd_parser_get_version();
 		break;
+	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
+		value = 1;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0a9d1b85d529..c80ddcf8aa6f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1715,10 +1715,10 @@ struct drm_i915_gem_object {
 	unsigned long user_pin_count;
 	struct drm_file *pin_filp;
 
-	/** for phy allocated objects */
-	drm_dma_handle_t *phys_handle;
-
 	union {
+		/** for phy allocated objects */
+		drm_dma_handle_t *phys_handle;
+
 		struct i915_gem_userptr {
 			uintptr_t ptr;
 			unsigned read_only :1;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 25b5063388b2..6e8535ba72d3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -209,40 +209,137 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
-static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj)
+static int
+i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 {
-	drm_dma_handle_t *phys = obj->phys_handle;
+	struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
+	char *vaddr = obj->phys_handle->vaddr;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	int i;
 
-	if (!phys)
-		return;
+	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
+		return -EINVAL;
+
+	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
+		struct page *page;
+		char *src;
+
+		page = shmem_read_mapping_page(mapping, i);
+		if (IS_ERR(page))
+			return PTR_ERR(page);
+
+		src = kmap_atomic(page);
+		memcpy(vaddr, src, PAGE_SIZE);
+		drm_clflush_virt_range(vaddr, PAGE_SIZE);
+		kunmap_atomic(src);
+
+		page_cache_release(page);
+		vaddr += PAGE_SIZE;
+	}
+
+	i915_gem_chipset_flush(obj->base.dev);
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (st == NULL)
+		return -ENOMEM;
+
+	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
+		kfree(st);
+		return -ENOMEM;
+	}
 
-	if (obj->madv == I915_MADV_WILLNEED) {
+	sg = st->sgl;
+	sg->offset = 0;
+	sg->length = obj->base.size;
+
+	sg_dma_address(sg) = obj->phys_handle->busaddr;
+	sg_dma_len(sg) = obj->base.size;
+
+	obj->pages = st;
+	obj->has_dma_mapping = true;
+	return 0;
+}
+
+static void
+i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
+{
+	int ret;
+
+	BUG_ON(obj->madv == __I915_MADV_PURGED);
+
+	ret = i915_gem_object_set_to_cpu_domain(obj, true);
+	if (ret) {
+		/* In the event of a disaster, abandon all caches and
+		 * hope for the best.
+		 */
+		WARN_ON(ret != -EIO);
+		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+	}
+
+	if (obj->madv == I915_MADV_DONTNEED)
+		obj->dirty = 0;
+
+	if (obj->dirty) {
 		struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
-		char *vaddr = phys->vaddr;
+		char *vaddr = obj->phys_handle->vaddr;
 		int i;
 
 		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
-			struct page *page = shmem_read_mapping_page(mapping, i);
-			if (!IS_ERR(page)) {
-				char *dst = kmap_atomic(page);
-				memcpy(dst, vaddr, PAGE_SIZE);
-				drm_clflush_virt_range(dst, PAGE_SIZE);
-				kunmap_atomic(dst);
-
-				set_page_dirty(page);
+			struct page *page;
+			char *dst;
+
+			page = shmem_read_mapping_page(mapping, i);
+			if (IS_ERR(page))
+				continue;
+
+			dst = kmap_atomic(page);
+			drm_clflush_virt_range(vaddr, PAGE_SIZE);
+			memcpy(dst, vaddr, PAGE_SIZE);
+			kunmap_atomic(dst);
+
+			set_page_dirty(page);
+			if (obj->madv == I915_MADV_WILLNEED)
 				mark_page_accessed(page);
-				page_cache_release(page);
-			}
+			page_cache_release(page);
 			vaddr += PAGE_SIZE;
 		}
-		i915_gem_chipset_flush(obj->base.dev);
+		obj->dirty = 0;
 	}
 
-#ifdef CONFIG_X86
-	set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
-#endif
-	drm_pci_free(obj->base.dev, phys);
-	obj->phys_handle = NULL;
+	sg_free_table(obj->pages);
+	kfree(obj->pages);
+
+	obj->has_dma_mapping = false;
+}
+
+static void
+i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
+{
+	drm_pci_free(obj->base.dev, obj->phys_handle);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
+	.get_pages = i915_gem_object_get_pages_phys,
+	.put_pages = i915_gem_object_put_pages_phys,
+	.release = i915_gem_object_release_phys,
+};
+
+static int
+drop_pages(struct drm_i915_gem_object *obj)
+{
+	struct i915_vma *vma, *next;
+	int ret;
+
+	drm_gem_object_reference(&obj->base);
+	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link)
+		if (i915_vma_unbind(vma))
+			break;
+
+	ret = i915_gem_object_put_pages(obj);
+	drm_gem_object_unreference(&obj->base);
+
+	return ret;
 }
 
 int
@@ -250,9 +347,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
 			    int align)
 {
 	drm_dma_handle_t *phys;
-	struct address_space *mapping;
-	char *vaddr;
-	int i;
+	int ret;
 
 	if (obj->phys_handle) {
 		if ((unsigned long)obj->phys_handle->vaddr & (align -1))
@@ -267,41 +362,19 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
 	if (obj->base.filp == NULL)
 		return -EINVAL;
 
+	ret = drop_pages(obj);
+	if (ret)
+		return ret;
+
 	/* create a new object */
 	phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
 	if (!phys)
 		return -ENOMEM;
 
-	vaddr = phys->vaddr;
-#ifdef CONFIG_X86
-	set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE);
-#endif
-	mapping = file_inode(obj->base.filp)->i_mapping;
-	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
-		struct page *page;
-		char *src;
-
-		page = shmem_read_mapping_page(mapping, i);
-		if (IS_ERR(page)) {
-#ifdef CONFIG_X86
-			set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
-#endif
-			drm_pci_free(obj->base.dev, phys);
-			return PTR_ERR(page);
-		}
-
-		src = kmap_atomic(page);
-		memcpy(vaddr, src, PAGE_SIZE);
-		kunmap_atomic(src);
-
-		mark_page_accessed(page);
-		page_cache_release(page);
-
-		vaddr += PAGE_SIZE;
-	}
-
 	obj->phys_handle = phys;
-	return 0;
+	obj->ops = &i915_gem_phys_ops;
+
+	return i915_gem_object_get_pages(obj);
 }
 
 static int
@@ -327,6 +400,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
 			return -EFAULT;
 	}
 
+	drm_clflush_virt_range(vaddr, args->size);
 	i915_gem_chipset_flush(dev);
 	return 0;
 }
@@ -1190,11 +1264,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	 * pread/pwrite currently are reading and writing from the CPU
 	 * perspective, requiring manual detiling by the client.
 	 */
-	if (obj->phys_handle) {
-		ret = i915_gem_phys_pwrite(obj, args, file);
-		goto out;
-	}
-
 	if (obj->tiling_mode == I915_TILING_NONE &&
 	    (obj->base.filp == NULL ||
 	     (obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
@@ -1205,8 +1274,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 		 * textures). Fallback to the shmem path in that case. */
 	}
 
-	if (ret == -EFAULT || ret == -ENOSPC)
-		ret = i915_gem_shmem_pwrite(dev, obj, args, file);
+	if (ret == -EFAULT || ret == -ENOSPC) {
+		if (obj->phys_handle)
+			ret = i915_gem_phys_pwrite(obj, args, file);
+		else
+			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
+	}
 
 out:
 	drm_gem_object_unreference(&obj->base);
@@ -3646,7 +3719,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 	 * Stolen memory is always coherent with the GPU as it is explicitly
 	 * marked as wc by the system, or the system is cache-coherent.
 	 */
-	if (obj->stolen)
+	if (obj->stolen || obj->phys_handle)
 		return false;
 
 	/* If the GPU is snooping the contents of the CPU cache,
@@ -4579,8 +4652,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 		}
 	}
 
-	i915_gem_object_detach_phys(obj);
-
 	/* Stolen objects don't hold a ref, but do hold pin count. Fix that up
 	 * before progressing. */
 	if (obj->stolen)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index b6498a43497e..50d905483d8b 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -390,6 +390,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
 #define I915_PARAM_HAS_WT     	 	 27
 #define I915_PARAM_CMD_PARSER_VERSION	 28
+#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29
 
 typedef struct drm_i915_getparam {
 	int param;
-- 
2.0.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH] drm/i915: Make the physical object coherent with GTT
@ 2014-06-05 15:42 Chris Wilson
  0 siblings, 0 replies; 8+ messages in thread
From: Chris Wilson @ 2014-06-05 15:42 UTC (permalink / raw)
  To: intel-gfx

Currently objects for which the hardware needs a contiguous physical
address are allocated a shadow backing storage to satisfy the contraint.
This shadow buffer is not wired into the normal obj->pages and so the
physical object is incoherent with accesses via the GPU, GTT and CPU. By
setting up the appropriate scatter-gather table, we can allow userspace
to access the physical object via either a GTT mmaping of or by rendering
into the GEM bo. However, keeping the CPU mmap of the shmemfs backing
storage coherent with the contiguous shadow is not yet possible.
Fortuituously, CPU mmaps of objects requiring physical addresses are not
expected to be coherent anyway.

This allows the physical constraint of the GEM object to be transparent
to userspace and allow it to efficiently render into or update them via
the GTT and GPU.

v2: Fix leak of pci handle spotted by Ville

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_dma.c |   3 +
 drivers/gpu/drm/i915/i915_drv.h |   6 +-
 drivers/gpu/drm/i915/i915_gem.c | 197 +++++++++++++++++++++++++++-------------
 include/uapi/drm/i915_drm.h     |   1 +
 4 files changed, 142 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 4a0b4d22faff..4811c635ab12 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1025,6 +1025,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_CMD_PARSER_VERSION:
 		value = i915_cmd_parser_get_version();
 		break;
+	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
+		value = 1;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8da2de4c997b..b39f1782ded7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1715,10 +1715,10 @@ struct drm_i915_gem_object {
 	unsigned long user_pin_count;
 	struct drm_file *pin_filp;
 
-	/** for phy allocated objects */
-	drm_dma_handle_t *phys_handle;
-
 	union {
+		/** for phy allocated objects */
+		drm_dma_handle_t *phys_handle;
+
 		struct i915_gem_userptr {
 			uintptr_t ptr;
 			unsigned read_only :1;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 71d77c8fc77b..5f44524369ff 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -210,40 +210,137 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
-static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj)
+static int
+i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 {
-	drm_dma_handle_t *phys = obj->phys_handle;
+	struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
+	char *vaddr = obj->phys_handle->vaddr;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	int i;
 
-	if (!phys)
-		return;
+	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
+		return -EINVAL;
+
+	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
+		struct page *page;
+		char *src;
+
+		page = shmem_read_mapping_page(mapping, i);
+		if (IS_ERR(page))
+			return PTR_ERR(page);
+
+		src = kmap_atomic(page);
+		memcpy(vaddr, src, PAGE_SIZE);
+		drm_clflush_virt_range(vaddr, PAGE_SIZE);
+		kunmap_atomic(src);
+
+		page_cache_release(page);
+		vaddr += PAGE_SIZE;
+	}
+
+	i915_gem_chipset_flush(obj->base.dev);
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (st == NULL)
+		return -ENOMEM;
+
+	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
+		kfree(st);
+		return -ENOMEM;
+	}
 
-	if (obj->madv == I915_MADV_WILLNEED) {
+	sg = st->sgl;
+	sg->offset = 0;
+	sg->length = obj->base.size;
+
+	sg_dma_address(sg) = obj->phys_handle->busaddr;
+	sg_dma_len(sg) = obj->base.size;
+
+	obj->pages = st;
+	obj->has_dma_mapping = true;
+	return 0;
+}
+
+static void
+i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
+{
+	int ret;
+
+	BUG_ON(obj->madv == __I915_MADV_PURGED);
+
+	ret = i915_gem_object_set_to_cpu_domain(obj, true);
+	if (ret) {
+		/* In the event of a disaster, abandon all caches and
+		 * hope for the best.
+		 */
+		WARN_ON(ret != -EIO);
+		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+	}
+
+	if (obj->madv == I915_MADV_DONTNEED)
+		obj->dirty = 0;
+
+	if (obj->dirty) {
 		struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
-		char *vaddr = phys->vaddr;
+		char *vaddr = obj->phys_handle->vaddr;
 		int i;
 
 		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
-			struct page *page = shmem_read_mapping_page(mapping, i);
-			if (!IS_ERR(page)) {
-				char *dst = kmap_atomic(page);
-				memcpy(dst, vaddr, PAGE_SIZE);
-				drm_clflush_virt_range(dst, PAGE_SIZE);
-				kunmap_atomic(dst);
-
-				set_page_dirty(page);
+			struct page *page;
+			char *dst;
+
+			page = shmem_read_mapping_page(mapping, i);
+			if (IS_ERR(page))
+				continue;
+
+			dst = kmap_atomic(page);
+			drm_clflush_virt_range(vaddr, PAGE_SIZE);
+			memcpy(dst, vaddr, PAGE_SIZE);
+			kunmap_atomic(dst);
+
+			set_page_dirty(page);
+			if (obj->madv == I915_MADV_WILLNEED)
 				mark_page_accessed(page);
-				page_cache_release(page);
-			}
+			page_cache_release(page);
 			vaddr += PAGE_SIZE;
 		}
-		i915_gem_chipset_flush(obj->base.dev);
+		obj->dirty = 0;
 	}
 
-#ifdef CONFIG_X86
-	set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
-#endif
-	drm_pci_free(obj->base.dev, phys);
-	obj->phys_handle = NULL;
+	sg_free_table(obj->pages);
+	kfree(obj->pages);
+
+	obj->has_dma_mapping = false;
+}
+
+static void
+i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
+{
+	drm_pci_free(obj->base.dev, obj->phys_handle);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
+	.get_pages = i915_gem_object_get_pages_phys,
+	.put_pages = i915_gem_object_put_pages_phys,
+	.release = i915_gem_object_release_phys,
+};
+
+static int
+drop_pages(struct drm_i915_gem_object *obj)
+{
+	struct i915_vma *vma, *next;
+	int ret;
+
+	drm_gem_object_reference(&obj->base);
+	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link)
+		if (i915_vma_unbind(vma))
+			break;
+
+	ret = i915_gem_object_put_pages(obj);
+	drm_gem_object_unreference(&obj->base);
+
+	return ret;
 }
 
 int
@@ -251,9 +348,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
 			    int align)
 {
 	drm_dma_handle_t *phys;
-	struct address_space *mapping;
-	char *vaddr;
-	int i;
+	int ret;
 
 	if (obj->phys_handle) {
 		if ((unsigned long)obj->phys_handle->vaddr & (align -1))
@@ -268,41 +363,19 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
 	if (obj->base.filp == NULL)
 		return -EINVAL;
 
+	ret = drop_pages(obj);
+	if (ret)
+		return ret;
+
 	/* create a new object */
 	phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
 	if (!phys)
 		return -ENOMEM;
 
-	vaddr = phys->vaddr;
-#ifdef CONFIG_X86
-	set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE);
-#endif
-	mapping = file_inode(obj->base.filp)->i_mapping;
-	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
-		struct page *page;
-		char *src;
-
-		page = shmem_read_mapping_page(mapping, i);
-		if (IS_ERR(page)) {
-#ifdef CONFIG_X86
-			set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
-#endif
-			drm_pci_free(obj->base.dev, phys);
-			return PTR_ERR(page);
-		}
-
-		src = kmap_atomic(page);
-		memcpy(vaddr, src, PAGE_SIZE);
-		kunmap_atomic(src);
-
-		mark_page_accessed(page);
-		page_cache_release(page);
-
-		vaddr += PAGE_SIZE;
-	}
-
 	obj->phys_handle = phys;
-	return 0;
+	obj->ops = &i915_gem_phys_ops;
+
+	return i915_gem_object_get_pages(obj);
 }
 
 static int
@@ -328,6 +401,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
 			return -EFAULT;
 	}
 
+	drm_clflush_virt_range(vaddr, args->size);
 	i915_gem_chipset_flush(dev);
 	return 0;
 }
@@ -1191,11 +1265,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	 * pread/pwrite currently are reading and writing from the CPU
 	 * perspective, requiring manual detiling by the client.
 	 */
-	if (obj->phys_handle) {
-		ret = i915_gem_phys_pwrite(obj, args, file);
-		goto out;
-	}
-
 	if (obj->tiling_mode == I915_TILING_NONE &&
 	    (obj->base.filp == NULL ||
 	     (obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
@@ -1206,8 +1275,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 		 * textures). Fallback to the shmem path in that case. */
 	}
 
-	if (ret == -EFAULT || ret == -ENOSPC)
-		ret = i915_gem_shmem_pwrite(dev, obj, args, file);
+	if (ret == -EFAULT || ret == -ENOSPC) {
+		if (obj->phys_handle)
+			ret = i915_gem_phys_pwrite(obj, args, file);
+		else
+			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
+	}
 
 out:
 	drm_gem_object_unreference(&obj->base);
@@ -3651,7 +3724,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 	 * Stolen memory is always coherent with the GPU as it is explicitly
 	 * marked as wc by the system, or the system is cache-coherent.
 	 */
-	if (obj->stolen)
+	if (obj->stolen || obj->phys_handle)
 		return false;
 
 	/* If the GPU is snooping the contents of the CPU cache,
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index b6498a43497e..50d905483d8b 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -390,6 +390,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
 #define I915_PARAM_HAS_WT     	 	 27
 #define I915_PARAM_CMD_PARSER_VERSION	 28
+#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29
 
 typedef struct drm_i915_getparam {
 	int param;
-- 
2.0.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2014-08-08  9:24 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-07-10 20:53 [PATCH] drm/i915: Make the physical object coherent with GTT Chris Wilson
2014-08-08  9:25 ` Daniel Vetter
  -- strict thread matches above, loose matches on Subject: below --
2014-06-11 10:28 Chris Wilson
2014-06-11 11:40 ` Ville Syrjälä
2014-06-11 11:51   ` Chris Wilson
2014-06-11 11:59   ` Chris Wilson
2014-07-10 20:59     ` Daniel Vetter
2014-06-05 15:42 Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.