All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/i915: Fix dynamic allocation of physical handles
@ 2014-04-18  7:27 Chris Wilson
  2014-04-18  7:27 ` [PATCH 2/2] drm/i915: Make the physical object coherent with GTT Chris Wilson
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Chris Wilson @ 2014-04-18  7:27 UTC (permalink / raw)
  To: intel-gfx; +Cc: stable

A single object may be referenced by multiple registers fundamentally
breaking the static allotment of ids in the current design. When the
object is used the second time, the physical address of the first
assignment is relinquished and a second one granted. However, the
hardware is still reading (and possibly writing) to the old physical
address now returned to the system. Eventually hilarity will ensue, but
in the short term, it just means that cursors are broken when using more
than one pipe.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=77351
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/i915/i915_dma.c      |   1 -
 drivers/gpu/drm/i915/i915_drv.h      |  24 +--
 drivers/gpu/drm/i915/i915_gem.c      | 315 ++++++++++++++---------------------
 drivers/gpu/drm/i915/intel_display.c |  11 +-
 drivers/gpu/drm/i915/intel_overlay.c |  12 +-
 5 files changed, 131 insertions(+), 232 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index d67ca8051e07..12571aac7516 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1854,7 +1854,6 @@ int i915_driver_unload(struct drm_device *dev)
 		flush_workqueue(dev_priv->wq);
 
 		mutex_lock(&dev->struct_mutex);
-		i915_gem_free_all_phys_object(dev);
 		i915_gem_cleanup_ringbuffer(dev);
 		i915_gem_context_fini(dev);
 		WARN_ON(dev_priv->mm.aliasing_ppgtt);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0e85bf2c3326..a2375a0ef27c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -246,18 +246,6 @@ struct intel_ddi_plls {
 #define WATCH_LISTS	0
 #define WATCH_GTT	0
 
-#define I915_GEM_PHYS_CURSOR_0 1
-#define I915_GEM_PHYS_CURSOR_1 2
-#define I915_GEM_PHYS_OVERLAY_REGS 3
-#define I915_MAX_PHYS_OBJECT (I915_GEM_PHYS_OVERLAY_REGS)
-
-struct drm_i915_gem_phys_object {
-	int id;
-	struct page **page_list;
-	drm_dma_handle_t *handle;
-	struct drm_i915_gem_object *cur_obj;
-};
-
 struct opregion_header;
 struct opregion_acpi;
 struct opregion_swsci;
@@ -1036,9 +1024,6 @@ struct i915_gem_mm {
 	/** Bit 6 swizzling required for Y tiling */
 	uint32_t bit_6_swizzle_y;
 
-	/* storage for physical objects */
-	struct drm_i915_gem_phys_object *phys_objs[I915_MAX_PHYS_OBJECT];
-
 	/* accounting, useful for userland debugging */
 	spinlock_t object_stat_lock;
 	size_t object_memory;
@@ -1647,7 +1632,7 @@ struct drm_i915_gem_object {
 	struct drm_file *pin_filp;
 
 	/** for phy allocated objects */
-	struct drm_i915_gem_phys_object *phys_obj;
+	drm_dma_handle_t *phys_handle;
 
 	union {
 		struct i915_gem_userptr {
@@ -2250,13 +2235,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 				     u32 alignment,
 				     struct intel_ring_buffer *pipelined);
 void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj);
-int i915_gem_attach_phys_object(struct drm_device *dev,
-				struct drm_i915_gem_object *obj,
-				int id,
+int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
 				int align);
-void i915_gem_detach_phys_object(struct drm_device *dev,
-				 struct drm_i915_gem_object *obj);
-void i915_gem_free_all_phys_object(struct drm_device *dev);
 int i915_gem_open(struct drm_device *dev, struct drm_file *file);
 void i915_gem_release(struct drm_device *dev, struct drm_file *file);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 764467ebade5..e302a23031fe 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -46,11 +46,6 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 static void
 i915_gem_object_retire(struct drm_i915_gem_object *obj);
 
-static int i915_gem_phys_pwrite(struct drm_device *dev,
-				struct drm_i915_gem_object *obj,
-				struct drm_i915_gem_pwrite *args,
-				struct drm_file *file);
-
 static void i915_gem_write_fence(struct drm_device *dev, int reg,
 				 struct drm_i915_gem_object *obj);
 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
@@ -212,6 +207,124 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
+static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj)
+{
+	drm_dma_handle_t *phys = obj->phys_handle;
+
+	if (!phys)
+		return;
+
+	if (obj->madv == I915_MADV_WILLNEED) {
+		struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
+		char *vaddr = phys->vaddr;
+		int i;
+
+		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
+			struct page *page = shmem_read_mapping_page(mapping, i);
+			if (!IS_ERR(page)) {
+				char *dst = kmap_atomic(page);
+				memcpy(dst, vaddr, PAGE_SIZE);
+				kunmap_atomic(dst);
+
+				drm_clflush_pages(&page, 1);
+
+				set_page_dirty(page);
+				mark_page_accessed(page);
+				page_cache_release(page);
+			}
+			vaddr += PAGE_SIZE;
+		}
+		i915_gem_chipset_flush(obj->base.dev);
+	}
+
+#ifdef CONFIG_X86
+	set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
+#endif
+	drm_pci_free(obj->base.dev, phys);
+	obj->phys_handle = NULL;
+}
+
+int
+i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
+			    int align)
+{
+	drm_dma_handle_t *phys;
+	struct address_space *mapping;
+	char *vaddr;
+	int i;
+
+	if (obj->phys_handle) {
+		if ((unsigned long)obj->phys_handle->vaddr & (align -1))
+			return -EBUSY;
+
+		return 0;
+	}
+
+	if (obj->madv != I915_MADV_WILLNEED)
+		return -EFAULT;
+
+	if (obj->base.filp == NULL)
+		return -EINVAL;
+
+	/* create a new object */
+	phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
+	if (!phys)
+		return -ENOMEM;
+
+	vaddr = phys->vaddr;
+#ifdef CONFIG_X86
+	set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE);
+#endif
+	mapping = file_inode(obj->base.filp)->i_mapping;
+	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
+		struct page *page;
+		char *src;
+
+		page = shmem_read_mapping_page(mapping, i);
+		if (IS_ERR(page))
+			return PTR_ERR(page);
+
+		src = kmap_atomic(page);
+		memcpy(vaddr, src, PAGE_SIZE);
+		kunmap_atomic(src);
+
+		mark_page_accessed(page);
+		page_cache_release(page);
+
+		vaddr += PAGE_SIZE;
+	}
+
+	obj->phys_handle = phys;
+	return 0;
+}
+
+static int
+i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
+		     struct drm_i915_gem_pwrite *args,
+		     struct drm_file *file_priv)
+{
+	struct drm_device *dev = obj->base.dev;
+	void *vaddr = obj->phys_handle->vaddr + args->offset;
+	char __user *user_data = to_user_ptr(args->data_ptr);
+
+	if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
+		unsigned long unwritten;
+
+		/* The physical object once assigned is fixed for the lifetime
+		 * of the obj, so we can safely drop the lock and continue
+		 * to access vaddr.
+		 */
+		mutex_unlock(&dev->struct_mutex);
+		unwritten = copy_from_user(vaddr, user_data, args->size);
+		mutex_lock(&dev->struct_mutex);
+		if (unwritten)
+			return -EFAULT;
+	}
+
+	i915_gem_chipset_flush(dev);
+	return 0;
+}
+
 void *i915_gem_object_alloc(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1071,8 +1184,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	 * pread/pwrite currently are reading and writing from the CPU
 	 * perspective, requiring manual detiling by the client.
 	 */
-	if (obj->phys_obj) {
-		ret = i915_gem_phys_pwrite(dev, obj, args, file);
+	if (obj->phys_handle) {
+		ret = i915_gem_phys_pwrite(obj, args, file);
 		goto out;
 	}
 
@@ -4376,9 +4489,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 
 	trace_i915_gem_object_destroy(obj);
 
-	if (obj->phys_obj)
-		i915_gem_detach_phys_object(dev, obj);
-
 	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
 		int ret;
 
@@ -4408,6 +4518,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	i915_gem_object_put_pages(obj);
 	i915_gem_object_free_mmap_offset(obj);
 	i915_gem_object_release_stolen(obj);
+	i915_gem_object_detach_phys(obj);
 
 	BUG_ON(obj->pages);
 
@@ -4875,190 +4986,6 @@ i915_gem_load(struct drm_device *dev)
 	register_shrinker(&dev_priv->mm.shrinker);
 }
 
-/*
- * Create a physically contiguous memory object for this object
- * e.g. for cursor + overlay regs
- */
-static int i915_gem_init_phys_object(struct drm_device *dev,
-				     int id, int size, int align)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_gem_phys_object *phys_obj;
-	int ret;
-
-	if (dev_priv->mm.phys_objs[id - 1] || !size)
-		return 0;
-
-	phys_obj = kzalloc(sizeof(*phys_obj), GFP_KERNEL);
-	if (!phys_obj)
-		return -ENOMEM;
-
-	phys_obj->id = id;
-
-	phys_obj->handle = drm_pci_alloc(dev, size, align);
-	if (!phys_obj->handle) {
-		ret = -ENOMEM;
-		goto kfree_obj;
-	}
-#ifdef CONFIG_X86
-	set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
-#endif
-
-	dev_priv->mm.phys_objs[id - 1] = phys_obj;
-
-	return 0;
-kfree_obj:
-	kfree(phys_obj);
-	return ret;
-}
-
-static void i915_gem_free_phys_object(struct drm_device *dev, int id)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_gem_phys_object *phys_obj;
-
-	if (!dev_priv->mm.phys_objs[id - 1])
-		return;
-
-	phys_obj = dev_priv->mm.phys_objs[id - 1];
-	if (phys_obj->cur_obj) {
-		i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
-	}
-
-#ifdef CONFIG_X86
-	set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
-#endif
-	drm_pci_free(dev, phys_obj->handle);
-	kfree(phys_obj);
-	dev_priv->mm.phys_objs[id - 1] = NULL;
-}
-
-void i915_gem_free_all_phys_object(struct drm_device *dev)
-{
-	int i;
-
-	for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
-		i915_gem_free_phys_object(dev, i);
-}
-
-void i915_gem_detach_phys_object(struct drm_device *dev,
-				 struct drm_i915_gem_object *obj)
-{
-	struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
-	char *vaddr;
-	int i;
-	int page_count;
-
-	if (!obj->phys_obj)
-		return;
-	vaddr = obj->phys_obj->handle->vaddr;
-
-	page_count = obj->base.size / PAGE_SIZE;
-	for (i = 0; i < page_count; i++) {
-		struct page *page = shmem_read_mapping_page(mapping, i);
-		if (!IS_ERR(page)) {
-			char *dst = kmap_atomic(page);
-			memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE);
-			kunmap_atomic(dst);
-
-			drm_clflush_pages(&page, 1);
-
-			set_page_dirty(page);
-			mark_page_accessed(page);
-			page_cache_release(page);
-		}
-	}
-	i915_gem_chipset_flush(dev);
-
-	obj->phys_obj->cur_obj = NULL;
-	obj->phys_obj = NULL;
-}
-
-int
-i915_gem_attach_phys_object(struct drm_device *dev,
-			    struct drm_i915_gem_object *obj,
-			    int id,
-			    int align)
-{
-	struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int ret = 0;
-	int page_count;
-	int i;
-
-	if (id > I915_MAX_PHYS_OBJECT)
-		return -EINVAL;
-
-	if (obj->phys_obj) {
-		if (obj->phys_obj->id == id)
-			return 0;
-		i915_gem_detach_phys_object(dev, obj);
-	}
-
-	/* create a new object */
-	if (!dev_priv->mm.phys_objs[id - 1]) {
-		ret = i915_gem_init_phys_object(dev, id,
-						obj->base.size, align);
-		if (ret) {
-			DRM_ERROR("failed to init phys object %d size: %zu\n",
-				  id, obj->base.size);
-			return ret;
-		}
-	}
-
-	/* bind to the object */
-	obj->phys_obj = dev_priv->mm.phys_objs[id - 1];
-	obj->phys_obj->cur_obj = obj;
-
-	page_count = obj->base.size / PAGE_SIZE;
-
-	for (i = 0; i < page_count; i++) {
-		struct page *page;
-		char *dst, *src;
-
-		page = shmem_read_mapping_page(mapping, i);
-		if (IS_ERR(page))
-			return PTR_ERR(page);
-
-		src = kmap_atomic(page);
-		dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE);
-		memcpy(dst, src, PAGE_SIZE);
-		kunmap_atomic(src);
-
-		mark_page_accessed(page);
-		page_cache_release(page);
-	}
-
-	return 0;
-}
-
-static int
-i915_gem_phys_pwrite(struct drm_device *dev,
-		     struct drm_i915_gem_object *obj,
-		     struct drm_i915_gem_pwrite *args,
-		     struct drm_file *file_priv)
-{
-	void *vaddr = obj->phys_obj->handle->vaddr + args->offset;
-	char __user *user_data = to_user_ptr(args->data_ptr);
-
-	if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
-		unsigned long unwritten;
-
-		/* The physical object once assigned is fixed for the lifetime
-		 * of the obj, so we can safely drop the lock and continue
-		 * to access vaddr.
-		 */
-		mutex_unlock(&dev->struct_mutex);
-		unwritten = copy_from_user(vaddr, user_data, args->size);
-		mutex_lock(&dev->struct_mutex);
-		if (unwritten)
-			return -EFAULT;
-	}
-
-	i915_gem_chipset_flush(dev);
-	return 0;
-}
-
 void i915_gem_release(struct drm_device *dev, struct drm_file *file)
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3d017f5b656a..61735fde3f12 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -7777,14 +7777,12 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
 		addr = i915_gem_obj_ggtt_offset(obj);
 	} else {
 		int align = IS_I830(dev) ? 16 * 1024 : 256;
-		ret = i915_gem_attach_phys_object(dev, obj,
-						  (intel_crtc->pipe == 0) ? I915_GEM_PHYS_CURSOR_0 : I915_GEM_PHYS_CURSOR_1,
-						  align);
+		ret = i915_gem_object_attach_phys(obj, align);
 		if (ret) {
 			DRM_DEBUG_KMS("failed to attach phys object\n");
 			goto fail_locked;
 		}
-		addr = obj->phys_obj->handle->busaddr;
+		addr = obj->phys_handle->busaddr;
 	}
 
 	if (IS_GEN2(dev))
@@ -7792,10 +7790,7 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
 
  finish:
 	if (intel_crtc->cursor_bo) {
-		if (INTEL_INFO(dev)->cursor_needs_physical) {
-			if (intel_crtc->cursor_bo != obj)
-				i915_gem_detach_phys_object(dev, intel_crtc->cursor_bo);
-		} else
+		if (!INTEL_INFO(dev)->cursor_needs_physical)
 			i915_gem_object_unpin_from_display_plane(intel_crtc->cursor_bo);
 		drm_gem_object_unreference(&intel_crtc->cursor_bo->base);
 	}
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 4232230a78bd..5423eb0c1335 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -194,7 +194,7 @@ intel_overlay_map_regs(struct intel_overlay *overlay)
 	struct overlay_registers __iomem *regs;
 
 	if (OVERLAY_NEEDS_PHYSICAL(overlay->dev))
-		regs = (struct overlay_registers __iomem *)overlay->reg_bo->phys_obj->handle->vaddr;
+		regs = (struct overlay_registers __iomem *)overlay->reg_bo->phys_handle->vaddr;
 	else
 		regs = io_mapping_map_wc(dev_priv->gtt.mappable,
 					 i915_gem_obj_ggtt_offset(overlay->reg_bo));
@@ -1347,14 +1347,12 @@ void intel_setup_overlay(struct drm_device *dev)
 	overlay->reg_bo = reg_bo;
 
 	if (OVERLAY_NEEDS_PHYSICAL(dev)) {
-		ret = i915_gem_attach_phys_object(dev, reg_bo,
-						  I915_GEM_PHYS_OVERLAY_REGS,
-						  PAGE_SIZE);
+		ret = i915_gem_object_attach_phys(reg_bo, PAGE_SIZE);
 		if (ret) {
 			DRM_ERROR("failed to attach phys overlay regs\n");
 			goto out_free_bo;
 		}
-		overlay->flip_addr = reg_bo->phys_obj->handle->busaddr;
+		overlay->flip_addr = reg_bo->phys_handle->busaddr;
 	} else {
 		ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, PIN_MAPPABLE);
 		if (ret) {
@@ -1436,7 +1434,7 @@ intel_overlay_map_regs_atomic(struct intel_overlay *overlay)
 		/* Cast to make sparse happy, but it's wc memory anyway, so
 		 * equivalent to the wc io mapping on X86. */
 		regs = (struct overlay_registers __iomem *)
-			overlay->reg_bo->phys_obj->handle->vaddr;
+			overlay->reg_bo->phys_handle->vaddr;
 	else
 		regs = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
 						i915_gem_obj_ggtt_offset(overlay->reg_bo));
@@ -1470,7 +1468,7 @@ intel_overlay_capture_error_state(struct drm_device *dev)
 	error->dovsta = I915_READ(DOVSTA);
 	error->isr = I915_READ(ISR);
 	if (OVERLAY_NEEDS_PHYSICAL(overlay->dev))
-		error->base = (__force long)overlay->reg_bo->phys_obj->handle->vaddr;
+		error->base = (__force long)overlay->reg_bo->phys_handle->vaddr;
 	else
 		error->base = i915_gem_obj_ggtt_offset(overlay->reg_bo);
 
-- 
1.9.2

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/2] drm/i915: Make the physical object coherent with GTT
  2014-04-18  7:27 [PATCH 1/2] drm/i915: Fix dynamic allocation of physical handles Chris Wilson
@ 2014-04-18  7:27 ` Chris Wilson
  2014-04-22 20:03   ` Daniel Vetter
  2014-05-02 17:17   ` Ville Syrjälä
  2014-05-01  8:32 ` [PATCH 1/2] drm/i915: Fix dynamic allocation of physical handles Chris Wilson
  2014-05-02 17:12 ` Ville Syrjälä
  2 siblings, 2 replies; 6+ messages in thread
From: Chris Wilson @ 2014-04-18  7:27 UTC (permalink / raw)
  To: intel-gfx

Currently objects for which the hardware needs a contiguous physical
address are allocated a shadow backing storage to satisfy the contraint.
This shadow buffer is not wired into the normal obj->pages and so the
physical object is incoherent with accesses via the GPU, GTT and CPU. By
setting up the appropriate scatter-gather table, we can allow userspace
to access the physical object via either a GTT mmaping of or by rendering
into the GEM bo. However, keeping the CPU mmap of the shmemfs backing
storage coherent with the contiguous shadow is not yet possible.
Fortuituously, CPU mmaps of objects requiring physical addresses are not
expected to be coherent anyway.

This allows the physical constraint of the GEM object to be transparent
to userspace and allow it to efficiently render into or update them via
the GTT and GPU.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_dma.c |   3 +
 drivers/gpu/drm/i915/i915_gem.c | 182 +++++++++++++++++++++++++++-------------
 include/uapi/drm/i915_drm.h     |   1 +
 3 files changed, 129 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 12571aac7516..dd43fdc736ac 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1022,6 +1022,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_CMD_PARSER_VERSION:
 		value = i915_cmd_parser_get_version();
 		break;
+	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
+		value = 1;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e302a23031fe..3eb5c07e1018 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -207,41 +207,129 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
-static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj)
+static int
+i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
 {
-	drm_dma_handle_t *phys = obj->phys_handle;
+	struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
+	char *vaddr = obj->phys_handle->vaddr;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	int i;
 
-	if (!phys)
-		return;
+	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
+		return -EINVAL;
+
+	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
+		struct page *page;
+		char *src;
+
+		page = shmem_read_mapping_page(mapping, i);
+		if (IS_ERR(page))
+			return PTR_ERR(page);
+
+		src = kmap_atomic(page);
+		memcpy(vaddr, src, PAGE_SIZE);
+		kunmap_atomic(src);
+
+		page_cache_release(page);
+		vaddr += PAGE_SIZE;
+	}
+
+	drm_clflush_virt_range(obj->phys_handle->vaddr, obj->base.size);
+	i915_gem_chipset_flush(obj->base.dev);
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (st == NULL)
+		return -ENOMEM;
+
+	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
+		kfree(st);
+		return -ENOMEM;
+	}
+
+	sg = st->sgl;
+	sg->offset = 0;
+	sg->length = obj->base.size;
+
+	sg_dma_address(sg) = obj->phys_handle->busaddr;
+	sg_dma_len(sg) = obj->base.size;
 
-	if (obj->madv == I915_MADV_WILLNEED) {
+	obj->pages = st;
+	obj->has_dma_mapping = true;
+	return 0;
+}
+
+static void
+i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
+{
+	int ret;
+
+	BUG_ON(obj->madv == __I915_MADV_PURGED);
+
+	ret = i915_gem_object_set_to_cpu_domain(obj, true);
+	if (ret) {
+		/* In the event of a disaster, abandon all caches and
+		 * hope for the best.
+		 */
+		WARN_ON(ret != -EIO);
+		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+	}
+
+	if (obj->madv == I915_MADV_DONTNEED)
+		obj->dirty = 0;
+
+	if (obj->dirty) {
 		struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
-		char *vaddr = phys->vaddr;
+		char *vaddr = obj->phys_handle->vaddr;
 		int i;
 
 		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
-			struct page *page = shmem_read_mapping_page(mapping, i);
-			if (!IS_ERR(page)) {
-				char *dst = kmap_atomic(page);
-				memcpy(dst, vaddr, PAGE_SIZE);
-				kunmap_atomic(dst);
+			struct page *page;
+			char *dst;
+
+			page = shmem_read_mapping_page(mapping, i);
+			if (IS_ERR(page))
+				continue;
 
-				drm_clflush_pages(&page, 1);
+			dst = kmap_atomic(page);
+			memcpy(dst, vaddr, PAGE_SIZE);
+			kunmap_atomic(dst);
 
-				set_page_dirty(page);
+			set_page_dirty(page);
+			if (obj->madv == I915_MADV_WILLNEED)
 				mark_page_accessed(page);
-				page_cache_release(page);
-			}
+			page_cache_release(page);
 			vaddr += PAGE_SIZE;
 		}
-		i915_gem_chipset_flush(obj->base.dev);
+		obj->dirty = 0;
 	}
 
-#ifdef CONFIG_X86
-	set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
-#endif
-	drm_pci_free(obj->base.dev, phys);
-	obj->phys_handle = NULL;
+	sg_free_table(obj->pages);
+	kfree(obj->pages);
+
+	obj->has_dma_mapping = false;
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
+	.get_pages = i915_gem_object_get_pages_phys,
+	.put_pages = i915_gem_object_put_pages_phys,
+};
+
+static int
+drop_pages(struct drm_i915_gem_object *obj)
+{
+	struct i915_vma *vma, *next;
+	int ret;
+
+	drm_gem_object_reference(&obj->base);
+	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link)
+		if (i915_vma_unbind(vma))
+			break;
+
+	ret = i915_gem_object_put_pages(obj);
+	drm_gem_object_unreference(&obj->base);
+
+	return ret;
 }
 
 int
@@ -249,9 +337,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
 			    int align)
 {
 	drm_dma_handle_t *phys;
-	struct address_space *mapping;
-	char *vaddr;
-	int i;
+	int ret;
 
 	if (obj->phys_handle) {
 		if ((unsigned long)obj->phys_handle->vaddr & (align -1))
@@ -266,36 +352,19 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
 	if (obj->base.filp == NULL)
 		return -EINVAL;
 
+	ret = drop_pages(obj);
+	if (ret)
+		return ret;
+
 	/* create a new object */
 	phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
 	if (!phys)
 		return -ENOMEM;
 
-	vaddr = phys->vaddr;
-#ifdef CONFIG_X86
-	set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE);
-#endif
-	mapping = file_inode(obj->base.filp)->i_mapping;
-	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
-		struct page *page;
-		char *src;
-
-		page = shmem_read_mapping_page(mapping, i);
-		if (IS_ERR(page))
-			return PTR_ERR(page);
-
-		src = kmap_atomic(page);
-		memcpy(vaddr, src, PAGE_SIZE);
-		kunmap_atomic(src);
-
-		mark_page_accessed(page);
-		page_cache_release(page);
-
-		vaddr += PAGE_SIZE;
-	}
-
 	obj->phys_handle = phys;
-	return 0;
+	obj->ops = &i915_gem_phys_ops;
+
+	return i915_gem_object_get_pages(obj);
 }
 
 static int
@@ -321,6 +390,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
 			return -EFAULT;
 	}
 
+	drm_clflush_virt_range(vaddr, args->size);
 	i915_gem_chipset_flush(dev);
 	return 0;
 }
@@ -1184,11 +1254,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	 * pread/pwrite currently are reading and writing from the CPU
 	 * perspective, requiring manual detiling by the client.
 	 */
-	if (obj->phys_handle) {
-		ret = i915_gem_phys_pwrite(obj, args, file);
-		goto out;
-	}
-
 	if (obj->tiling_mode == I915_TILING_NONE &&
 	    (obj->base.filp == NULL ||
 	     (obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
@@ -1199,8 +1264,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 		 * textures). Fallback to the shmem path in that case. */
 	}
 
-	if (ret == -EFAULT || ret == -ENOSPC)
-		ret = i915_gem_shmem_pwrite(dev, obj, args, file);
+	if (ret == -EFAULT || ret == -ENOSPC) {
+		if (obj->phys_handle)
+			ret = i915_gem_phys_pwrite(obj, args, file);
+		else
+			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
+	}
 
 out:
 	drm_gem_object_unreference(&obj->base);
@@ -3658,7 +3727,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 	 * Stolen memory is always coherent with the GPU as it is explicitly
 	 * marked as wc by the system, or the system is cache-coherent.
 	 */
-	if (obj->stolen)
+	if (obj->stolen || obj->phys_handle)
 		return false;
 
 	/* If the GPU is snooping the contents of the CPU cache,
@@ -4518,7 +4587,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	i915_gem_object_put_pages(obj);
 	i915_gem_object_free_mmap_offset(obj);
 	i915_gem_object_release_stolen(obj);
-	i915_gem_object_detach_phys(obj);
 
 	BUG_ON(obj->pages);
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index beee13a53fbb..cf1425c9f54e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -390,6 +390,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
 #define I915_PARAM_HAS_WT     	 	 27
 #define I915_PARAM_CMD_PARSER_VERSION	 28
+#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29
 
 typedef struct drm_i915_getparam {
 	int param;
-- 
1.9.2

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/2] drm/i915: Make the physical object coherent with GTT
  2014-04-18  7:27 ` [PATCH 2/2] drm/i915: Make the physical object coherent with GTT Chris Wilson
@ 2014-04-22 20:03   ` Daniel Vetter
  2014-05-02 17:17   ` Ville Syrjälä
  1 sibling, 0 replies; 6+ messages in thread
From: Daniel Vetter @ 2014-04-22 20:03 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, Apr 18, 2014 at 08:27:04AM +0100, Chris Wilson wrote:
> Currently objects for which the hardware needs a contiguous physical
> address are allocated a shadow backing storage to satisfy the contraint.
> This shadow buffer is not wired into the normal obj->pages and so the
> physical object is incoherent with accesses via the GPU, GTT and CPU. By
> setting up the appropriate scatter-gather table, we can allow userspace
> to access the physical object via either a GTT mmaping of or by rendering
> into the GEM bo. However, keeping the CPU mmap of the shmemfs backing
> storage coherent with the contiguous shadow is not yet possible.
> Fortuituously, CPU mmaps of objects requiring physical addresses are not
> expected to be coherent anyway.
> 
> This allows the physical constraint of the GEM object to be transparent
> to userspace and allow it to efficiently render into or update them via
> the GTT and GPU.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

You know the drill: I'd like to have a small igt for this which creates a
cursor bo, puts it to use and then checks whether the pwrite -> gtt mmap
read path is coherent.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_dma.c |   3 +
>  drivers/gpu/drm/i915/i915_gem.c | 182 +++++++++++++++++++++++++++-------------
>  include/uapi/drm/i915_drm.h     |   1 +
>  3 files changed, 129 insertions(+), 57 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 12571aac7516..dd43fdc736ac 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1022,6 +1022,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
>  	case I915_PARAM_CMD_PARSER_VERSION:
>  		value = i915_cmd_parser_get_version();
>  		break;
> +	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
> +		value = 1;
> +		break;
>  	default:
>  		DRM_DEBUG("Unknown parameter %d\n", param->param);
>  		return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index e302a23031fe..3eb5c07e1018 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -207,41 +207,129 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>  	return 0;
>  }
>  
> -static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj)
> +static int
> +i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
>  {
> -	drm_dma_handle_t *phys = obj->phys_handle;
> +	struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
> +	char *vaddr = obj->phys_handle->vaddr;
> +	struct sg_table *st;
> +	struct scatterlist *sg;
> +	int i;
>  
> -	if (!phys)
> -		return;
> +	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
> +		return -EINVAL;
> +
> +	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> +		struct page *page;
> +		char *src;
> +
> +		page = shmem_read_mapping_page(mapping, i);
> +		if (IS_ERR(page))
> +			return PTR_ERR(page);
> +
> +		src = kmap_atomic(page);
> +		memcpy(vaddr, src, PAGE_SIZE);
> +		kunmap_atomic(src);
> +
> +		page_cache_release(page);
> +		vaddr += PAGE_SIZE;
> +	}
> +
> +	drm_clflush_virt_range(obj->phys_handle->vaddr, obj->base.size);
> +	i915_gem_chipset_flush(obj->base.dev);
> +
> +	st = kmalloc(sizeof(*st), GFP_KERNEL);
> +	if (st == NULL)
> +		return -ENOMEM;
> +
> +	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
> +		kfree(st);
> +		return -ENOMEM;
> +	}
> +
> +	sg = st->sgl;
> +	sg->offset = 0;
> +	sg->length = obj->base.size;
> +
> +	sg_dma_address(sg) = obj->phys_handle->busaddr;
> +	sg_dma_len(sg) = obj->base.size;
>  
> -	if (obj->madv == I915_MADV_WILLNEED) {
> +	obj->pages = st;
> +	obj->has_dma_mapping = true;
> +	return 0;
> +}
> +
> +static void
> +i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
> +{
> +	int ret;
> +
> +	BUG_ON(obj->madv == __I915_MADV_PURGED);
> +
> +	ret = i915_gem_object_set_to_cpu_domain(obj, true);
> +	if (ret) {
> +		/* In the event of a disaster, abandon all caches and
> +		 * hope for the best.
> +		 */
> +		WARN_ON(ret != -EIO);
> +		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> +	}
> +
> +	if (obj->madv == I915_MADV_DONTNEED)
> +		obj->dirty = 0;
> +
> +	if (obj->dirty) {
>  		struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
> -		char *vaddr = phys->vaddr;
> +		char *vaddr = obj->phys_handle->vaddr;
>  		int i;
>  
>  		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> -			struct page *page = shmem_read_mapping_page(mapping, i);
> -			if (!IS_ERR(page)) {
> -				char *dst = kmap_atomic(page);
> -				memcpy(dst, vaddr, PAGE_SIZE);
> -				kunmap_atomic(dst);
> +			struct page *page;
> +			char *dst;
> +
> +			page = shmem_read_mapping_page(mapping, i);
> +			if (IS_ERR(page))
> +				continue;
>  
> -				drm_clflush_pages(&page, 1);
> +			dst = kmap_atomic(page);
> +			memcpy(dst, vaddr, PAGE_SIZE);
> +			kunmap_atomic(dst);
>  
> -				set_page_dirty(page);
> +			set_page_dirty(page);
> +			if (obj->madv == I915_MADV_WILLNEED)
>  				mark_page_accessed(page);
> -				page_cache_release(page);
> -			}
> +			page_cache_release(page);
>  			vaddr += PAGE_SIZE;
>  		}
> -		i915_gem_chipset_flush(obj->base.dev);
> +		obj->dirty = 0;
>  	}
>  
> -#ifdef CONFIG_X86
> -	set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
> -#endif
> -	drm_pci_free(obj->base.dev, phys);
> -	obj->phys_handle = NULL;
> +	sg_free_table(obj->pages);
> +	kfree(obj->pages);
> +
> +	obj->has_dma_mapping = false;
> +}
> +
> +static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
> +	.get_pages = i915_gem_object_get_pages_phys,
> +	.put_pages = i915_gem_object_put_pages_phys,
> +};
> +
> +static int
> +drop_pages(struct drm_i915_gem_object *obj)
> +{
> +	struct i915_vma *vma, *next;
> +	int ret;
> +
> +	drm_gem_object_reference(&obj->base);
> +	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link)
> +		if (i915_vma_unbind(vma))
> +			break;
> +
> +	ret = i915_gem_object_put_pages(obj);
> +	drm_gem_object_unreference(&obj->base);
> +
> +	return ret;
>  }
>  
>  int
> @@ -249,9 +337,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>  			    int align)
>  {
>  	drm_dma_handle_t *phys;
> -	struct address_space *mapping;
> -	char *vaddr;
> -	int i;
> +	int ret;
>  
>  	if (obj->phys_handle) {
>  		if ((unsigned long)obj->phys_handle->vaddr & (align -1))
> @@ -266,36 +352,19 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>  	if (obj->base.filp == NULL)
>  		return -EINVAL;
>  
> +	ret = drop_pages(obj);
> +	if (ret)
> +		return ret;
> +
>  	/* create a new object */
>  	phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
>  	if (!phys)
>  		return -ENOMEM;
>  
> -	vaddr = phys->vaddr;
> -#ifdef CONFIG_X86
> -	set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE);
> -#endif
> -	mapping = file_inode(obj->base.filp)->i_mapping;
> -	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> -		struct page *page;
> -		char *src;
> -
> -		page = shmem_read_mapping_page(mapping, i);
> -		if (IS_ERR(page))
> -			return PTR_ERR(page);
> -
> -		src = kmap_atomic(page);
> -		memcpy(vaddr, src, PAGE_SIZE);
> -		kunmap_atomic(src);
> -
> -		mark_page_accessed(page);
> -		page_cache_release(page);
> -
> -		vaddr += PAGE_SIZE;
> -	}
> -
>  	obj->phys_handle = phys;
> -	return 0;
> +	obj->ops = &i915_gem_phys_ops;
> +
> +	return i915_gem_object_get_pages(obj);
>  }
>  
>  static int
> @@ -321,6 +390,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
>  			return -EFAULT;
>  	}
>  
> +	drm_clflush_virt_range(vaddr, args->size);
>  	i915_gem_chipset_flush(dev);
>  	return 0;
>  }
> @@ -1184,11 +1254,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>  	 * pread/pwrite currently are reading and writing from the CPU
>  	 * perspective, requiring manual detiling by the client.
>  	 */
> -	if (obj->phys_handle) {
> -		ret = i915_gem_phys_pwrite(obj, args, file);
> -		goto out;
> -	}
> -
>  	if (obj->tiling_mode == I915_TILING_NONE &&
>  	    (obj->base.filp == NULL ||
>  	     (obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
> @@ -1199,8 +1264,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>  		 * textures). Fallback to the shmem path in that case. */
>  	}
>  
> -	if (ret == -EFAULT || ret == -ENOSPC)
> -		ret = i915_gem_shmem_pwrite(dev, obj, args, file);
> +	if (ret == -EFAULT || ret == -ENOSPC) {
> +		if (obj->phys_handle)
> +			ret = i915_gem_phys_pwrite(obj, args, file);
> +		else
> +			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
> +	}
>  
>  out:
>  	drm_gem_object_unreference(&obj->base);
> @@ -3658,7 +3727,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
>  	 * Stolen memory is always coherent with the GPU as it is explicitly
>  	 * marked as wc by the system, or the system is cache-coherent.
>  	 */
> -	if (obj->stolen)
> +	if (obj->stolen || obj->phys_handle)
>  		return false;
>  
>  	/* If the GPU is snooping the contents of the CPU cache,
> @@ -4518,7 +4587,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  	i915_gem_object_put_pages(obj);
>  	i915_gem_object_free_mmap_offset(obj);
>  	i915_gem_object_release_stolen(obj);
> -	i915_gem_object_detach_phys(obj);
>  
>  	BUG_ON(obj->pages);
>  
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index beee13a53fbb..cf1425c9f54e 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -390,6 +390,7 @@ typedef struct drm_i915_irq_wait {
>  #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
>  #define I915_PARAM_HAS_WT     	 	 27
>  #define I915_PARAM_CMD_PARSER_VERSION	 28
> +#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29
>  
>  typedef struct drm_i915_getparam {
>  	int param;
> -- 
> 1.9.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] drm/i915: Fix dynamic allocation of physical handles
  2014-04-18  7:27 [PATCH 1/2] drm/i915: Fix dynamic allocation of physical handles Chris Wilson
  2014-04-18  7:27 ` [PATCH 2/2] drm/i915: Make the physical object coherent with GTT Chris Wilson
@ 2014-05-01  8:32 ` Chris Wilson
  2014-05-02 17:12 ` Ville Syrjälä
  2 siblings, 0 replies; 6+ messages in thread
From: Chris Wilson @ 2014-05-01  8:32 UTC (permalink / raw)
  To: intel-gfx; +Cc: stable

On Fri, Apr 18, 2014 at 08:27:03AM +0100, Chris Wilson wrote:
> A single object may be referenced by multiple registers fundamentally
> breaking the static allotment of ids in the current design. When the
> object is used the second time, the physical address of the first
> assignment is relinquished and a second one granted. However, the
> hardware is still reading (and possibly writing) to the old physical
> address now returned to the system. Eventually hilarity will ensue, but
> in the short term, it just means that cursors are broken when using more
> than one pipe.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=77351
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: stable@vger.kernel.org
Ping  ^^^^^^
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] drm/i915: Fix dynamic allocation of physical handles
  2014-04-18  7:27 [PATCH 1/2] drm/i915: Fix dynamic allocation of physical handles Chris Wilson
  2014-04-18  7:27 ` [PATCH 2/2] drm/i915: Make the physical object coherent with GTT Chris Wilson
  2014-05-01  8:32 ` [PATCH 1/2] drm/i915: Fix dynamic allocation of physical handles Chris Wilson
@ 2014-05-02 17:12 ` Ville Syrjälä
  2 siblings, 0 replies; 6+ messages in thread
From: Ville Syrjälä @ 2014-05-02 17:12 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, stable

On Fri, Apr 18, 2014 at 08:27:03AM +0100, Chris Wilson wrote:
> A single object may be referenced by multiple registers fundamentally
> breaking the static allotment of ids in the current design. When the
> object is used the second time, the physical address of the first
> assignment is relinquished and a second one granted. However, the
> hardware is still reading (and possibly writing) to the old physical
> address now returned to the system. Eventually hilarity will ensue, but
> in the short term, it just means that cursors are broken when using more
> than one pipe.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=77351
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: stable@vger.kernel.org
> ---
>  drivers/gpu/drm/i915/i915_dma.c      |   1 -
>  drivers/gpu/drm/i915/i915_drv.h      |  24 +--
>  drivers/gpu/drm/i915/i915_gem.c      | 315 ++++++++++++++---------------------
>  drivers/gpu/drm/i915/intel_display.c |  11 +-
>  drivers/gpu/drm/i915/intel_overlay.c |  12 +-
>  5 files changed, 131 insertions(+), 232 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index d67ca8051e07..12571aac7516 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1854,7 +1854,6 @@ int i915_driver_unload(struct drm_device *dev)
>  		flush_workqueue(dev_priv->wq);
>  
>  		mutex_lock(&dev->struct_mutex);
> -		i915_gem_free_all_phys_object(dev);
>  		i915_gem_cleanup_ringbuffer(dev);
>  		i915_gem_context_fini(dev);
>  		WARN_ON(dev_priv->mm.aliasing_ppgtt);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 0e85bf2c3326..a2375a0ef27c 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -246,18 +246,6 @@ struct intel_ddi_plls {
>  #define WATCH_LISTS	0
>  #define WATCH_GTT	0
>  
> -#define I915_GEM_PHYS_CURSOR_0 1
> -#define I915_GEM_PHYS_CURSOR_1 2
> -#define I915_GEM_PHYS_OVERLAY_REGS 3
> -#define I915_MAX_PHYS_OBJECT (I915_GEM_PHYS_OVERLAY_REGS)
> -
> -struct drm_i915_gem_phys_object {
> -	int id;
> -	struct page **page_list;
> -	drm_dma_handle_t *handle;
> -	struct drm_i915_gem_object *cur_obj;
> -};
> -
>  struct opregion_header;
>  struct opregion_acpi;
>  struct opregion_swsci;
> @@ -1036,9 +1024,6 @@ struct i915_gem_mm {
>  	/** Bit 6 swizzling required for Y tiling */
>  	uint32_t bit_6_swizzle_y;
>  
> -	/* storage for physical objects */
> -	struct drm_i915_gem_phys_object *phys_objs[I915_MAX_PHYS_OBJECT];
> -
>  	/* accounting, useful for userland debugging */
>  	spinlock_t object_stat_lock;
>  	size_t object_memory;
> @@ -1647,7 +1632,7 @@ struct drm_i915_gem_object {
>  	struct drm_file *pin_filp;
>  
>  	/** for phy allocated objects */
> -	struct drm_i915_gem_phys_object *phys_obj;
> +	drm_dma_handle_t *phys_handle;
>  
>  	union {
>  		struct i915_gem_userptr {
> @@ -2250,13 +2235,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  				     u32 alignment,
>  				     struct intel_ring_buffer *pipelined);
>  void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj);
> -int i915_gem_attach_phys_object(struct drm_device *dev,
> -				struct drm_i915_gem_object *obj,
> -				int id,
> +int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>  				int align);
> -void i915_gem_detach_phys_object(struct drm_device *dev,
> -				 struct drm_i915_gem_object *obj);
> -void i915_gem_free_all_phys_object(struct drm_device *dev);
>  int i915_gem_open(struct drm_device *dev, struct drm_file *file);
>  void i915_gem_release(struct drm_device *dev, struct drm_file *file);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 764467ebade5..e302a23031fe 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -46,11 +46,6 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
>  static void
>  i915_gem_object_retire(struct drm_i915_gem_object *obj);
>  
> -static int i915_gem_phys_pwrite(struct drm_device *dev,
> -				struct drm_i915_gem_object *obj,
> -				struct drm_i915_gem_pwrite *args,
> -				struct drm_file *file);
> -
>  static void i915_gem_write_fence(struct drm_device *dev, int reg,
>  				 struct drm_i915_gem_object *obj);
>  static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
> @@ -212,6 +207,124 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>  	return 0;
>  }
>  
> +static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj)
> +{
> +	drm_dma_handle_t *phys = obj->phys_handle;
> +
> +	if (!phys)
> +		return;
> +
> +	if (obj->madv == I915_MADV_WILLNEED) {
> +		struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
> +		char *vaddr = phys->vaddr;
> +		int i;
> +
> +		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> +			struct page *page = shmem_read_mapping_page(mapping, i);
> +			if (!IS_ERR(page)) {
> +				char *dst = kmap_atomic(page);
> +				memcpy(dst, vaddr, PAGE_SIZE);
> +				kunmap_atomic(dst);
> +
> +				drm_clflush_pages(&page, 1);

Could clflush while already have it kmapped to avoid double kmap.

> +
> +				set_page_dirty(page);
> +				mark_page_accessed(page);
> +				page_cache_release(page);
> +			}
> +			vaddr += PAGE_SIZE;
> +		}
> +		i915_gem_chipset_flush(obj->base.dev);
> +	}
> +
> +#ifdef CONFIG_X86
> +	set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
> +#endif
> +	drm_pci_free(obj->base.dev, phys);
> +	obj->phys_handle = NULL;
> +}
> +
> +int
> +i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
> +			    int align)
> +{
> +	drm_dma_handle_t *phys;
> +	struct address_space *mapping;
> +	char *vaddr;
> +	int i;
> +
> +	if (obj->phys_handle) {
> +		if ((unsigned long)obj->phys_handle->vaddr & (align -1))
> +			return -EBUSY;
> +
> +		return 0;
> +	}
> +
> +	if (obj->madv != I915_MADV_WILLNEED)
> +		return -EFAULT;
> +
> +	if (obj->base.filp == NULL)
> +		return -EINVAL;
> +
> +	/* create a new object */
> +	phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
> +	if (!phys)
> +		return -ENOMEM;
> +
> +	vaddr = phys->vaddr;
> +#ifdef CONFIG_X86
> +	set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE);
> +#endif
> +	mapping = file_inode(obj->base.filp)->i_mapping;
> +	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> +		struct page *page;
> +		char *src;
> +
> +		page = shmem_read_mapping_page(mapping, i);
> +		if (IS_ERR(page))
> +			return PTR_ERR(page);

This is going to leak.

> +
> +		src = kmap_atomic(page);
> +		memcpy(vaddr, src, PAGE_SIZE);
> +		kunmap_atomic(src);
> +
> +		mark_page_accessed(page);
> +		page_cache_release(page);
> +
> +		vaddr += PAGE_SIZE;
> +	}
> +
> +	obj->phys_handle = phys;
> +	return 0;
> +}
> +
> +static int
> +i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
> +		     struct drm_i915_gem_pwrite *args,
> +		     struct drm_file *file_priv)
> +{
> +	struct drm_device *dev = obj->base.dev;
> +	void *vaddr = obj->phys_handle->vaddr + args->offset;
> +	char __user *user_data = to_user_ptr(args->data_ptr);
> +
> +	if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
> +		unsigned long unwritten;
> +
> +		/* The physical object once assigned is fixed for the lifetime
> +		 * of the obj, so we can safely drop the lock and continue
> +		 * to access vaddr.
> +		 */
> +		mutex_unlock(&dev->struct_mutex);
> +		unwritten = copy_from_user(vaddr, user_data, args->size);
> +		mutex_lock(&dev->struct_mutex);
> +		if (unwritten)
> +			return -EFAULT;

I was wondering why we don't clflush here but then I realized the
destination is either wc or uc.

Apart from the potential leak the rest of patch looks OK to me.

> +	}
> +
> +	i915_gem_chipset_flush(dev);
> +	return 0;
> +}
> +
>  void *i915_gem_object_alloc(struct drm_device *dev)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -1071,8 +1184,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>  	 * pread/pwrite currently are reading and writing from the CPU
>  	 * perspective, requiring manual detiling by the client.
>  	 */
> -	if (obj->phys_obj) {
> -		ret = i915_gem_phys_pwrite(dev, obj, args, file);
> +	if (obj->phys_handle) {
> +		ret = i915_gem_phys_pwrite(obj, args, file);
>  		goto out;
>  	}
>  
> @@ -4376,9 +4489,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  
>  	trace_i915_gem_object_destroy(obj);
>  
> -	if (obj->phys_obj)
> -		i915_gem_detach_phys_object(dev, obj);
> -
>  	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
>  		int ret;
>  
> @@ -4408,6 +4518,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  	i915_gem_object_put_pages(obj);
>  	i915_gem_object_free_mmap_offset(obj);
>  	i915_gem_object_release_stolen(obj);
> +	i915_gem_object_detach_phys(obj);
>  
>  	BUG_ON(obj->pages);
>  
> @@ -4875,190 +4986,6 @@ i915_gem_load(struct drm_device *dev)
>  	register_shrinker(&dev_priv->mm.shrinker);
>  }
>  
> -/*
> - * Create a physically contiguous memory object for this object
> - * e.g. for cursor + overlay regs
> - */
> -static int i915_gem_init_phys_object(struct drm_device *dev,
> -				     int id, int size, int align)
> -{
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct drm_i915_gem_phys_object *phys_obj;
> -	int ret;
> -
> -	if (dev_priv->mm.phys_objs[id - 1] || !size)
> -		return 0;
> -
> -	phys_obj = kzalloc(sizeof(*phys_obj), GFP_KERNEL);
> -	if (!phys_obj)
> -		return -ENOMEM;
> -
> -	phys_obj->id = id;
> -
> -	phys_obj->handle = drm_pci_alloc(dev, size, align);
> -	if (!phys_obj->handle) {
> -		ret = -ENOMEM;
> -		goto kfree_obj;
> -	}
> -#ifdef CONFIG_X86
> -	set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
> -#endif
> -
> -	dev_priv->mm.phys_objs[id - 1] = phys_obj;
> -
> -	return 0;
> -kfree_obj:
> -	kfree(phys_obj);
> -	return ret;
> -}
> -
> -static void i915_gem_free_phys_object(struct drm_device *dev, int id)
> -{
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	struct drm_i915_gem_phys_object *phys_obj;
> -
> -	if (!dev_priv->mm.phys_objs[id - 1])
> -		return;
> -
> -	phys_obj = dev_priv->mm.phys_objs[id - 1];
> -	if (phys_obj->cur_obj) {
> -		i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
> -	}
> -
> -#ifdef CONFIG_X86
> -	set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
> -#endif
> -	drm_pci_free(dev, phys_obj->handle);
> -	kfree(phys_obj);
> -	dev_priv->mm.phys_objs[id - 1] = NULL;
> -}
> -
> -void i915_gem_free_all_phys_object(struct drm_device *dev)
> -{
> -	int i;
> -
> -	for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
> -		i915_gem_free_phys_object(dev, i);
> -}
> -
> -void i915_gem_detach_phys_object(struct drm_device *dev,
> -				 struct drm_i915_gem_object *obj)
> -{
> -	struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
> -	char *vaddr;
> -	int i;
> -	int page_count;
> -
> -	if (!obj->phys_obj)
> -		return;
> -	vaddr = obj->phys_obj->handle->vaddr;
> -
> -	page_count = obj->base.size / PAGE_SIZE;
> -	for (i = 0; i < page_count; i++) {
> -		struct page *page = shmem_read_mapping_page(mapping, i);
> -		if (!IS_ERR(page)) {
> -			char *dst = kmap_atomic(page);
> -			memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE);
> -			kunmap_atomic(dst);
> -
> -			drm_clflush_pages(&page, 1);
> -
> -			set_page_dirty(page);
> -			mark_page_accessed(page);
> -			page_cache_release(page);
> -		}
> -	}
> -	i915_gem_chipset_flush(dev);
> -
> -	obj->phys_obj->cur_obj = NULL;
> -	obj->phys_obj = NULL;
> -}
> -
> -int
> -i915_gem_attach_phys_object(struct drm_device *dev,
> -			    struct drm_i915_gem_object *obj,
> -			    int id,
> -			    int align)
> -{
> -	struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	int ret = 0;
> -	int page_count;
> -	int i;
> -
> -	if (id > I915_MAX_PHYS_OBJECT)
> -		return -EINVAL;
> -
> -	if (obj->phys_obj) {
> -		if (obj->phys_obj->id == id)
> -			return 0;
> -		i915_gem_detach_phys_object(dev, obj);
> -	}
> -
> -	/* create a new object */
> -	if (!dev_priv->mm.phys_objs[id - 1]) {
> -		ret = i915_gem_init_phys_object(dev, id,
> -						obj->base.size, align);
> -		if (ret) {
> -			DRM_ERROR("failed to init phys object %d size: %zu\n",
> -				  id, obj->base.size);
> -			return ret;
> -		}
> -	}
> -
> -	/* bind to the object */
> -	obj->phys_obj = dev_priv->mm.phys_objs[id - 1];
> -	obj->phys_obj->cur_obj = obj;
> -
> -	page_count = obj->base.size / PAGE_SIZE;
> -
> -	for (i = 0; i < page_count; i++) {
> -		struct page *page;
> -		char *dst, *src;
> -
> -		page = shmem_read_mapping_page(mapping, i);
> -		if (IS_ERR(page))
> -			return PTR_ERR(page);
> -
> -		src = kmap_atomic(page);
> -		dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE);
> -		memcpy(dst, src, PAGE_SIZE);
> -		kunmap_atomic(src);
> -
> -		mark_page_accessed(page);
> -		page_cache_release(page);
> -	}
> -
> -	return 0;
> -}
> -
> -static int
> -i915_gem_phys_pwrite(struct drm_device *dev,
> -		     struct drm_i915_gem_object *obj,
> -		     struct drm_i915_gem_pwrite *args,
> -		     struct drm_file *file_priv)
> -{
> -	void *vaddr = obj->phys_obj->handle->vaddr + args->offset;
> -	char __user *user_data = to_user_ptr(args->data_ptr);
> -
> -	if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
> -		unsigned long unwritten;
> -
> -		/* The physical object once assigned is fixed for the lifetime
> -		 * of the obj, so we can safely drop the lock and continue
> -		 * to access vaddr.
> -		 */
> -		mutex_unlock(&dev->struct_mutex);
> -		unwritten = copy_from_user(vaddr, user_data, args->size);
> -		mutex_lock(&dev->struct_mutex);
> -		if (unwritten)
> -			return -EFAULT;
> -	}
> -
> -	i915_gem_chipset_flush(dev);
> -	return 0;
> -}
> -
>  void i915_gem_release(struct drm_device *dev, struct drm_file *file)
>  {
>  	struct drm_i915_file_private *file_priv = file->driver_priv;
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 3d017f5b656a..61735fde3f12 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -7777,14 +7777,12 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
>  		addr = i915_gem_obj_ggtt_offset(obj);
>  	} else {
>  		int align = IS_I830(dev) ? 16 * 1024 : 256;
> -		ret = i915_gem_attach_phys_object(dev, obj,
> -						  (intel_crtc->pipe == 0) ? I915_GEM_PHYS_CURSOR_0 : I915_GEM_PHYS_CURSOR_1,
> -						  align);
> +		ret = i915_gem_object_attach_phys(obj, align);
>  		if (ret) {
>  			DRM_DEBUG_KMS("failed to attach phys object\n");
>  			goto fail_locked;
>  		}
> -		addr = obj->phys_obj->handle->busaddr;
> +		addr = obj->phys_handle->busaddr;
>  	}
>  
>  	if (IS_GEN2(dev))
> @@ -7792,10 +7790,7 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
>  
>   finish:
>  	if (intel_crtc->cursor_bo) {
> -		if (INTEL_INFO(dev)->cursor_needs_physical) {
> -			if (intel_crtc->cursor_bo != obj)
> -				i915_gem_detach_phys_object(dev, intel_crtc->cursor_bo);
> -		} else
> +		if (!INTEL_INFO(dev)->cursor_needs_physical)
>  			i915_gem_object_unpin_from_display_plane(intel_crtc->cursor_bo);
>  		drm_gem_object_unreference(&intel_crtc->cursor_bo->base);
>  	}
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index 4232230a78bd..5423eb0c1335 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -194,7 +194,7 @@ intel_overlay_map_regs(struct intel_overlay *overlay)
>  	struct overlay_registers __iomem *regs;
>  
>  	if (OVERLAY_NEEDS_PHYSICAL(overlay->dev))
> -		regs = (struct overlay_registers __iomem *)overlay->reg_bo->phys_obj->handle->vaddr;
> +		regs = (struct overlay_registers __iomem *)overlay->reg_bo->phys_handle->vaddr;
>  	else
>  		regs = io_mapping_map_wc(dev_priv->gtt.mappable,
>  					 i915_gem_obj_ggtt_offset(overlay->reg_bo));
> @@ -1347,14 +1347,12 @@ void intel_setup_overlay(struct drm_device *dev)
>  	overlay->reg_bo = reg_bo;
>  
>  	if (OVERLAY_NEEDS_PHYSICAL(dev)) {
> -		ret = i915_gem_attach_phys_object(dev, reg_bo,
> -						  I915_GEM_PHYS_OVERLAY_REGS,
> -						  PAGE_SIZE);
> +		ret = i915_gem_object_attach_phys(reg_bo, PAGE_SIZE);
>  		if (ret) {
>  			DRM_ERROR("failed to attach phys overlay regs\n");
>  			goto out_free_bo;
>  		}
> -		overlay->flip_addr = reg_bo->phys_obj->handle->busaddr;
> +		overlay->flip_addr = reg_bo->phys_handle->busaddr;
>  	} else {
>  		ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, PIN_MAPPABLE);
>  		if (ret) {
> @@ -1436,7 +1434,7 @@ intel_overlay_map_regs_atomic(struct intel_overlay *overlay)
>  		/* Cast to make sparse happy, but it's wc memory anyway, so
>  		 * equivalent to the wc io mapping on X86. */
>  		regs = (struct overlay_registers __iomem *)
> -			overlay->reg_bo->phys_obj->handle->vaddr;
> +			overlay->reg_bo->phys_handle->vaddr;
>  	else
>  		regs = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
>  						i915_gem_obj_ggtt_offset(overlay->reg_bo));
> @@ -1470,7 +1468,7 @@ intel_overlay_capture_error_state(struct drm_device *dev)
>  	error->dovsta = I915_READ(DOVSTA);
>  	error->isr = I915_READ(ISR);
>  	if (OVERLAY_NEEDS_PHYSICAL(overlay->dev))
> -		error->base = (__force long)overlay->reg_bo->phys_obj->handle->vaddr;
> +		error->base = (__force long)overlay->reg_bo->phys_handle->vaddr;
>  	else
>  		error->base = i915_gem_obj_ggtt_offset(overlay->reg_bo);
>  
> -- 
> 1.9.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/2] drm/i915: Make the physical object coherent with GTT
  2014-04-18  7:27 ` [PATCH 2/2] drm/i915: Make the physical object coherent with GTT Chris Wilson
  2014-04-22 20:03   ` Daniel Vetter
@ 2014-05-02 17:17   ` Ville Syrjälä
  1 sibling, 0 replies; 6+ messages in thread
From: Ville Syrjälä @ 2014-05-02 17:17 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Fri, Apr 18, 2014 at 08:27:04AM +0100, Chris Wilson wrote:
> Currently objects for which the hardware needs a contiguous physical
> address are allocated a shadow backing storage to satisfy the contraint.
> This shadow buffer is not wired into the normal obj->pages and so the
> physical object is incoherent with accesses via the GPU, GTT and CPU. By
> setting up the appropriate scatter-gather table, we can allow userspace
> to access the physical object via either a GTT mmaping of or by rendering
> into the GEM bo. However, keeping the CPU mmap of the shmemfs backing
> storage coherent with the contiguous shadow is not yet possible.
> Fortuituously, CPU mmaps of objects requiring physical addresses are not
> expected to be coherent anyway.
> 
> This allows the physical constraint of the GEM object to be transparent
> to userspace and allow it to efficiently render into or update them via
> the GTT and GPU.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_dma.c |   3 +
>  drivers/gpu/drm/i915/i915_gem.c | 182 +++++++++++++++++++++++++++-------------
>  include/uapi/drm/i915_drm.h     |   1 +
>  3 files changed, 129 insertions(+), 57 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 12571aac7516..dd43fdc736ac 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1022,6 +1022,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
>  	case I915_PARAM_CMD_PARSER_VERSION:
>  		value = i915_cmd_parser_get_version();
>  		break;
> +	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
> +		value = 1;
> +		break;
>  	default:
>  		DRM_DEBUG("Unknown parameter %d\n", param->param);
>  		return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index e302a23031fe..3eb5c07e1018 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -207,41 +207,129 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>  	return 0;
>  }
>  
> -static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj)
> +static int
> +i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
>  {
> -	drm_dma_handle_t *phys = obj->phys_handle;
> +	struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
> +	char *vaddr = obj->phys_handle->vaddr;
> +	struct sg_table *st;
> +	struct scatterlist *sg;
> +	int i;
>  
> -	if (!phys)
> -		return;
> +	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
> +		return -EINVAL;
> +
> +	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> +		struct page *page;
> +		char *src;
> +
> +		page = shmem_read_mapping_page(mapping, i);
> +		if (IS_ERR(page))
> +			return PTR_ERR(page);
> +
> +		src = kmap_atomic(page);
> +		memcpy(vaddr, src, PAGE_SIZE);
> +		kunmap_atomic(src);
> +
> +		page_cache_release(page);
> +		vaddr += PAGE_SIZE;
> +	}
> +
> +	drm_clflush_virt_range(obj->phys_handle->vaddr, obj->base.size);

No longer wc/uc so now we flush.

> +	i915_gem_chipset_flush(obj->base.dev);
> +
> +	st = kmalloc(sizeof(*st), GFP_KERNEL);
> +	if (st == NULL)
> +		return -ENOMEM;
> +
> +	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
> +		kfree(st);
> +		return -ENOMEM;
> +	}
> +
> +	sg = st->sgl;
> +	sg->offset = 0;
> +	sg->length = obj->base.size;
> +
> +	sg_dma_address(sg) = obj->phys_handle->busaddr;
> +	sg_dma_len(sg) = obj->base.size;
>  
> -	if (obj->madv == I915_MADV_WILLNEED) {
> +	obj->pages = st;
> +	obj->has_dma_mapping = true;
> +	return 0;
> +}
> +
> +static void
> +i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
> +{
> +	int ret;
> +
> +	BUG_ON(obj->madv == __I915_MADV_PURGED);
> +
> +	ret = i915_gem_object_set_to_cpu_domain(obj, true);
> +	if (ret) {
> +		/* In the event of a disaster, abandon all caches and
> +		 * hope for the best.
> +		 */
> +		WARN_ON(ret != -EIO);
> +		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> +	}
> +
> +	if (obj->madv == I915_MADV_DONTNEED)
> +		obj->dirty = 0;
> +
> +	if (obj->dirty) {
>  		struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
> -		char *vaddr = phys->vaddr;
> +		char *vaddr = obj->phys_handle->vaddr;
>  		int i;
>  
>  		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> -			struct page *page = shmem_read_mapping_page(mapping, i);
> -			if (!IS_ERR(page)) {
> -				char *dst = kmap_atomic(page);
> -				memcpy(dst, vaddr, PAGE_SIZE);
> -				kunmap_atomic(dst);
> +			struct page *page;
> +			char *dst;
> +
> +			page = shmem_read_mapping_page(mapping, i);
> +			if (IS_ERR(page))
> +				continue;
>  
> -				drm_clflush_pages(&page, 1);
> +			dst = kmap_atomic(page);
> +			memcpy(dst, vaddr, PAGE_SIZE);
> +			kunmap_atomic(dst);
>  
> -				set_page_dirty(page);
> +			set_page_dirty(page);
> +			if (obj->madv == I915_MADV_WILLNEED)
>  				mark_page_accessed(page);
> -				page_cache_release(page);
> -			}
> +			page_cache_release(page);
>  			vaddr += PAGE_SIZE;
>  		}
> -		i915_gem_chipset_flush(obj->base.dev);
> +		obj->dirty = 0;
>  	}
>  
> -#ifdef CONFIG_X86
> -	set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
> -#endif
> -	drm_pci_free(obj->base.dev, phys);

This gets leaked.

I suppose you should still keep i915_gem_object_detach_phys() around.

> -	obj->phys_handle = NULL;
> +	sg_free_table(obj->pages);
> +	kfree(obj->pages);
> +
> +	obj->has_dma_mapping = false;
> +}
> +
> +static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
> +	.get_pages = i915_gem_object_get_pages_phys,
> +	.put_pages = i915_gem_object_put_pages_phys,
> +};
> +
> +static int
> +drop_pages(struct drm_i915_gem_object *obj)
> +{
> +	struct i915_vma *vma, *next;
> +	int ret;
> +
> +	drm_gem_object_reference(&obj->base);
> +	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link)
> +		if (i915_vma_unbind(vma))
> +			break;
> +
> +	ret = i915_gem_object_put_pages(obj);
> +	drm_gem_object_unreference(&obj->base);
> +
> +	return ret;
>  }
>  
>  int
> @@ -249,9 +337,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>  			    int align)
>  {
>  	drm_dma_handle_t *phys;
> -	struct address_space *mapping;
> -	char *vaddr;
> -	int i;
> +	int ret;
>  
>  	if (obj->phys_handle) {
>  		if ((unsigned long)obj->phys_handle->vaddr & (align -1))
> @@ -266,36 +352,19 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
>  	if (obj->base.filp == NULL)
>  		return -EINVAL;
>  
> +	ret = drop_pages(obj);
> +	if (ret)
> +		return ret;
> +
>  	/* create a new object */
>  	phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
>  	if (!phys)
>  		return -ENOMEM;
>  
> -	vaddr = phys->vaddr;
> -#ifdef CONFIG_X86
> -	set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE);
> -#endif
> -	mapping = file_inode(obj->base.filp)->i_mapping;
> -	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> -		struct page *page;
> -		char *src;
> -
> -		page = shmem_read_mapping_page(mapping, i);
> -		if (IS_ERR(page))
> -			return PTR_ERR(page);
> -
> -		src = kmap_atomic(page);
> -		memcpy(vaddr, src, PAGE_SIZE);
> -		kunmap_atomic(src);
> -
> -		mark_page_accessed(page);
> -		page_cache_release(page);
> -
> -		vaddr += PAGE_SIZE;
> -	}
> -
>  	obj->phys_handle = phys;
> -	return 0;
> +	obj->ops = &i915_gem_phys_ops;
> +
> +	return i915_gem_object_get_pages(obj);
>  }
>  
>  static int
> @@ -321,6 +390,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
>  			return -EFAULT;
>  	}
>  
> +	drm_clflush_virt_range(vaddr, args->size);

And need to flush again. Check.

The patch looks sane enough to me, save for the phys_handle leak.

>  	i915_gem_chipset_flush(dev);
>  	return 0;
>  }
> @@ -1184,11 +1254,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>  	 * pread/pwrite currently are reading and writing from the CPU
>  	 * perspective, requiring manual detiling by the client.
>  	 */
> -	if (obj->phys_handle) {
> -		ret = i915_gem_phys_pwrite(obj, args, file);
> -		goto out;
> -	}
> -
>  	if (obj->tiling_mode == I915_TILING_NONE &&
>  	    (obj->base.filp == NULL ||
>  	     (obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
> @@ -1199,8 +1264,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>  		 * textures). Fallback to the shmem path in that case. */
>  	}
>  
> -	if (ret == -EFAULT || ret == -ENOSPC)
> -		ret = i915_gem_shmem_pwrite(dev, obj, args, file);
> +	if (ret == -EFAULT || ret == -ENOSPC) {
> +		if (obj->phys_handle)
> +			ret = i915_gem_phys_pwrite(obj, args, file);
> +		else
> +			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
> +	}
>  
>  out:
>  	drm_gem_object_unreference(&obj->base);
> @@ -3658,7 +3727,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
>  	 * Stolen memory is always coherent with the GPU as it is explicitly
>  	 * marked as wc by the system, or the system is cache-coherent.
>  	 */
> -	if (obj->stolen)
> +	if (obj->stolen || obj->phys_handle)
>  		return false;
>  
>  	/* If the GPU is snooping the contents of the CPU cache,
> @@ -4518,7 +4587,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  	i915_gem_object_put_pages(obj);
>  	i915_gem_object_free_mmap_offset(obj);
>  	i915_gem_object_release_stolen(obj);
> -	i915_gem_object_detach_phys(obj);
>  
>  	BUG_ON(obj->pages);
>  
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index beee13a53fbb..cf1425c9f54e 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -390,6 +390,7 @@ typedef struct drm_i915_irq_wait {
>  #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
>  #define I915_PARAM_HAS_WT     	 	 27
>  #define I915_PARAM_CMD_PARSER_VERSION	 28
> +#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29
>  
>  typedef struct drm_i915_getparam {
>  	int param;
> -- 
> 1.9.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2014-05-02 17:17 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-04-18  7:27 [PATCH 1/2] drm/i915: Fix dynamic allocation of physical handles Chris Wilson
2014-04-18  7:27 ` [PATCH 2/2] drm/i915: Make the physical object coherent with GTT Chris Wilson
2014-04-22 20:03   ` Daniel Vetter
2014-05-02 17:17   ` Ville Syrjälä
2014-05-01  8:32 ` [PATCH 1/2] drm/i915: Fix dynamic allocation of physical handles Chris Wilson
2014-05-02 17:12 ` Ville Syrjälä

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.