All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Repeat unbinding during free if interrupted
@ 2010-07-23 13:39 Chris Wilson
  2010-07-23 14:48 ` [PATCH] drm/i915: Repeat unbinding during free if interrupted (v2) Chris Wilson
  0 siblings, 1 reply; 6+ messages in thread
From: Chris Wilson @ 2010-07-23 13:39 UTC (permalink / raw)
  To: intel-gfx; +Cc: stable

If during the freeing of an object the unbind is interrupted a system
call, which is quite possible if we have outstanding GPU writes that
must be flused, the unbind is silently aborted. This still leaves the
AGP region and backing pages allocated, and perhaps more importantly,
the object remains upon the various lists exposing us to memory
corruption.

I think this is the cause behind the use-after-free, such as

  Bug 15664 - Graphics hang and kernel backtrace when starting Azureus
              with Compiz enabled
  https://bugzilla.kernel.org/show_bug.cgi?id=15664

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@kernel.org
---
 drivers/gpu/drm/i915/i915_gem.c |    5 ++++-
 1 files changed, 4 insertions(+), 1 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3c7c0f7..a4c0b44 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4292,6 +4292,7 @@ void i915_gem_free_object(struct drm_gem_object *obj)
 {
 	struct drm_device *dev = obj->dev;
 	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
+	int ret;
 
 	trace_i915_gem_object_destroy(obj);
 
@@ -4301,7 +4302,9 @@ void i915_gem_free_object(struct drm_gem_object *obj)
 	if (obj_priv->phys_obj)
 		i915_gem_detach_phys_object(dev, obj);
 
-	i915_gem_object_unbind(obj);
+	do {
+	    ret = i915_gem_object_unbind(obj);
+	} while (ret == -ERESTARTSYS);
 
 	if (obj_priv->mmap_offset)
 		i915_gem_free_mmap_offset(obj);
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH] drm/i915: Repeat unbinding during free if interrupted (v2)
  2010-07-23 13:39 [PATCH] drm/i915: Repeat unbinding during free if interrupted Chris Wilson
@ 2010-07-23 14:48 ` Chris Wilson
  2010-07-23 14:54   ` [PATCH] drm/i915: Repeat unbinding during free if interrupted (v3) Chris Wilson
  0 siblings, 1 reply; 6+ messages in thread
From: Chris Wilson @ 2010-07-23 14:48 UTC (permalink / raw)
  To: intel-gfx; +Cc: stable

If during the freeing of an object the unbind is interrupted by a system
call, which is quite possible if we have outstanding GPU writes that
must be flushed, the unbind is silently aborted. This still leaves the
AGP region and backing pages allocated, and perhaps more importantly,
the object remains upon the various lists exposing us to memory
corruption.

I think this is the cause behind the use-after-free, such as

  Bug 15664 - Graphics hang and kernel backtrace when starting Azureus
              with Compiz enabled
  https://bugzilla.kernel.org/show_bug.cgi?id=15664

v2: Daniel Vetter reminded me that kernel space programming is never easy.
We cannot simply spin to clear the pending signal and so must deferred
the freeing of the object until later.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@kernel.org
---
 drivers/gpu/drm/i915/i915_drv.h |    8 +++++
 drivers/gpu/drm/i915/i915_gem.c |   56 ++++++++++++++++++++++++++++----------
 2 files changed, 49 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0e7bf85..a66503c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -551,6 +551,14 @@ typedef struct drm_i915_private {
 		struct list_head fence_list;
 
 		/**
+		 * List of objects currently pending being freed.
+		 *
+		 * These objects are no longer in use, but due to a signal
+		 * we were prevented from freeing them at the appointed time.
+		 */
+		struct list_head deferred_free_list;
+
+		/**
 		 * We leave the user IRQ off as much as possible,
 		 * but this means that requests will finish and never
 		 * be retired once the system goes idle. Set a timer to
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3c7c0f7..28a71e4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -52,6 +52,7 @@ static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
 static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 				struct drm_i915_gem_pwrite *args,
 				struct drm_file *file_priv);
+static void i915_gem_free_object_tail(struct drm_gem_object *obj);
 
 static LIST_HEAD(shrink_list);
 static DEFINE_SPINLOCK(shrink_list_lock);
@@ -1707,6 +1708,15 @@ i915_gem_retire_requests_ring(struct drm_device *dev,
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	uint32_t seqno;
 
+	if (!list_empty(&dev_priv->mm.deferred_free_list)) {
+	    struct drm_i915_gem_object *obj_priv, *tmp;
+
+	    list_for_each_entry_safe(obj_priv, tmp,
+				     &dev_priv->mm.deferred_free_list,
+				     list)
+		i915_gem_free_object_tail(&obj_priv->base);
+	}
+
 	if (!ring->status_page.page_addr
 			|| list_empty(&ring->request_list))
 		return;
@@ -1929,11 +1939,12 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
 	 * before we unbind.
 	 */
 	ret = i915_gem_object_set_to_cpu_domain(obj, 1);
-	if (ret) {
-		if (ret != -ERESTARTSYS)
-			DRM_ERROR("set_domain failed: %d\n", ret);
+	if (ret == -ERESTARTSYS)
 		return ret;
-	}
+	/* Continue on if we fail due to EIO, the GPU is hung so we
+	 * should be safe and we need to cleanup or else we might
+	 * cause memory corruption through use-after-free.
+	 */
 
 	BUG_ON(obj_priv->active);
 
@@ -1967,7 +1978,7 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
 
 	trace_i915_gem_object_unbind(obj);
 
-	return 0;
+	return ret;
 }
 
 int
@@ -4288,20 +4299,19 @@ int i915_gem_init_object(struct drm_gem_object *obj)
 	return 0;
 }
 
-void i915_gem_free_object(struct drm_gem_object *obj)
+static void i915_gem_free_object_tail(struct drm_gem_object *obj)
 {
 	struct drm_device *dev = obj->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
+	int ret;
 
-	trace_i915_gem_object_destroy(obj);
-
-	while (obj_priv->pin_count > 0)
-		i915_gem_object_unpin(obj);
-
-	if (obj_priv->phys_obj)
-		i915_gem_detach_phys_object(dev, obj);
-
-	i915_gem_object_unbind(obj);
+	ret = i915_gem_object_unbind(obj);
+	if (ret == -ERESTARTSYS) {
+		list_move_tail(&obj_priv->list,
+			       &dev_priv->mm.deferred_free_list);
+		return;
+	}
 
 	if (obj_priv->mmap_offset)
 		i915_gem_free_mmap_offset(obj);
@@ -4313,6 +4323,22 @@ void i915_gem_free_object(struct drm_gem_object *obj)
 	kfree(obj_priv);
 }
 
+void i915_gem_free_object(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
+
+	trace_i915_gem_object_destroy(obj);
+
+	while (obj_priv->pin_count > 0)
+		i915_gem_object_unpin(obj);
+
+	if (obj_priv->phys_obj)
+		i915_gem_detach_phys_object(dev, obj);
+
+	i915_gem_free_object_tail(obj);
+}
+
 int
 i915_gem_idle(struct drm_device *dev)
 {
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH] drm/i915: Repeat unbinding during free if interrupted (v3)
  2010-07-23 14:48 ` [PATCH] drm/i915: Repeat unbinding during free if interrupted (v2) Chris Wilson
@ 2010-07-23 14:54   ` Chris Wilson
  2010-07-23 15:25     ` Daniel Vetter
                       ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Chris Wilson @ 2010-07-23 14:54 UTC (permalink / raw)
  To: intel-gfx; +Cc: stable

If during the freeing of an object the unbind is interrupted by a system
call, which is quite possible if we have outstanding GPU writes that
must be flushed, the unbind is silently aborted. This still leaves the
AGP region and backing pages allocated, and perhaps more importantly,
the object remains upon the various lists exposing us to memory
corruption.

I think this is the cause behind the use-after-free, such as

  Bug 15664 - Graphics hang and kernel backtrace when starting Azureus
              with Compiz enabled
  https://bugzilla.kernel.org/show_bug.cgi?id=15664

v2: Daniel Vetter reminded me that kernel space programming is never easy.
We cannot simply spin to clear the pending signal and so must deferred
the freeing of the object until later.
v3: Run from the top level retire requests.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@kernel.org
---
 drivers/gpu/drm/i915/i915_drv.h |    8 +++++
 drivers/gpu/drm/i915/i915_gem.c |   56 ++++++++++++++++++++++++++++----------
 2 files changed, 49 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0e7bf85..a66503c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -551,6 +551,14 @@ typedef struct drm_i915_private {
 		struct list_head fence_list;
 
 		/**
+		 * List of objects currently pending being freed.
+		 *
+		 * These objects are no longer in use, but due to a signal
+		 * we were prevented from freeing them at the appointed time.
+		 */
+		struct list_head deferred_free_list;
+
+		/**
 		 * We leave the user IRQ off as much as possible,
 		 * but this means that requests will finish and never
 		 * be retired once the system goes idle. Set a timer to
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3c7c0f7..1f2c5f5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -52,6 +52,7 @@ static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
 static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 				struct drm_i915_gem_pwrite *args,
 				struct drm_file *file_priv);
+static void i915_gem_free_object_tail(struct drm_gem_object *obj);
 
 static LIST_HEAD(shrink_list);
 static DEFINE_SPINLOCK(shrink_list_lock);
@@ -1746,6 +1747,15 @@ i915_gem_retire_requests(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 
+	if (!list_empty(&dev_priv->mm.deferred_free_list)) {
+	    struct drm_i915_gem_object *obj_priv, *tmp;
+
+	    list_for_each_entry_safe(obj_priv, tmp,
+				     &dev_priv->mm.deferred_free_list,
+				     list)
+		i915_gem_free_object_tail(&obj_priv->base);
+	}
+
 	i915_gem_retire_requests_ring(dev, &dev_priv->render_ring);
 	if (HAS_BSD(dev))
 		i915_gem_retire_requests_ring(dev, &dev_priv->bsd_ring);
@@ -1929,11 +1939,12 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
 	 * before we unbind.
 	 */
 	ret = i915_gem_object_set_to_cpu_domain(obj, 1);
-	if (ret) {
-		if (ret != -ERESTARTSYS)
-			DRM_ERROR("set_domain failed: %d\n", ret);
+	if (ret == -ERESTARTSYS)
 		return ret;
-	}
+	/* Continue on if we fail due to EIO, the GPU is hung so we
+	 * should be safe and we need to cleanup or else we might
+	 * cause memory corruption through use-after-free.
+	 */
 
 	BUG_ON(obj_priv->active);
 
@@ -1967,7 +1978,7 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
 
 	trace_i915_gem_object_unbind(obj);
 
-	return 0;
+	return ret;
 }
 
 int
@@ -4288,20 +4299,19 @@ int i915_gem_init_object(struct drm_gem_object *obj)
 	return 0;
 }
 
-void i915_gem_free_object(struct drm_gem_object *obj)
+static void i915_gem_free_object_tail(struct drm_gem_object *obj)
 {
 	struct drm_device *dev = obj->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
+	int ret;
 
-	trace_i915_gem_object_destroy(obj);
-
-	while (obj_priv->pin_count > 0)
-		i915_gem_object_unpin(obj);
-
-	if (obj_priv->phys_obj)
-		i915_gem_detach_phys_object(dev, obj);
-
-	i915_gem_object_unbind(obj);
+	ret = i915_gem_object_unbind(obj);
+	if (ret == -ERESTARTSYS) {
+		list_move_tail(&obj_priv->list,
+			       &dev_priv->mm.deferred_free_list);
+		return;
+	}
 
 	if (obj_priv->mmap_offset)
 		i915_gem_free_mmap_offset(obj);
@@ -4313,6 +4323,22 @@ void i915_gem_free_object(struct drm_gem_object *obj)
 	kfree(obj_priv);
 }
 
+void i915_gem_free_object(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
+
+	trace_i915_gem_object_destroy(obj);
+
+	while (obj_priv->pin_count > 0)
+		i915_gem_object_unpin(obj);
+
+	if (obj_priv->phys_obj)
+		i915_gem_detach_phys_object(dev, obj);
+
+	i915_gem_free_object_tail(obj);
+}
+
 int
 i915_gem_idle(struct drm_device *dev)
 {
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] drm/i915: Repeat unbinding during free if interrupted (v3)
  2010-07-23 14:54   ` [PATCH] drm/i915: Repeat unbinding during free if interrupted (v3) Chris Wilson
@ 2010-07-23 15:25     ` Daniel Vetter
  2010-07-23 17:39     ` [PATCH] drm/i915: Repeat unbinding during free if interrupted (v4) Chris Wilson
  2010-07-23 18:20     ` [PATCH] drm/i915: Repeat unbinding during free if interrupted (v5) Chris Wilson
  2 siblings, 0 replies; 6+ messages in thread
From: Daniel Vetter @ 2010-07-23 15:25 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx, stable

On Fri, Jul 23, 2010 at 03:54:44PM +0100, Chris Wilson wrote:
> If during the freeing of an object the unbind is interrupted by a system
> call, which is quite possible if we have outstanding GPU writes that
> must be flushed, the unbind is silently aborted. This still leaves the
> AGP region and backing pages allocated, and perhaps more importantly,
> the object remains upon the various lists exposing us to memory
> corruption.
> 
> I think this is the cause behind the use-after-free, such as
> 
>   Bug 15664 - Graphics hang and kernel backtrace when starting Azureus
>               with Compiz enabled
>   https://bugzilla.kernel.org/show_bug.cgi?id=15664
> 
> v2: Daniel Vetter reminded me that kernel space programming is never easy.
> We cannot simply spin to clear the pending signal and so must deferred
> the freeing of the object until later.
> v3: Run from the top level retire requests.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: stable@kernel.org

Cleaning up the deferred free list in retire_request looks much saner than
what I've had in mind when discussing this on irc.

Reviewed-By: Daniel Vetter <daniel@ffwll.ch>
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] drm/i915: Repeat unbinding during free if interrupted (v4)
  2010-07-23 14:54   ` [PATCH] drm/i915: Repeat unbinding during free if interrupted (v3) Chris Wilson
  2010-07-23 15:25     ` Daniel Vetter
@ 2010-07-23 17:39     ` Chris Wilson
  2010-07-23 18:20     ` [PATCH] drm/i915: Repeat unbinding during free if interrupted (v5) Chris Wilson
  2 siblings, 0 replies; 6+ messages in thread
From: Chris Wilson @ 2010-07-23 17:39 UTC (permalink / raw)
  To: intel-gfx; +Cc: stable

If during the freeing of an object the unbind is interrupted by a system
call, which is quite possible if we have outstanding GPU writes that
must be flushed, the unbind is silently aborted. This still leaves the
AGP region and backing pages allocated, and perhaps more importantly,
the object remains upon the various lists exposing us to memory
corruption.

I think this is the cause behind the use-after-free, such as

  Bug 15664 - Graphics hang and kernel backtrace when starting Azureus
              with Compiz enabled
  https://bugzilla.kernel.org/show_bug.cgi?id=15664

v2: Daniel Vetter reminded me that kernel space programming is never easy.
We cannot simply spin to clear the pending signal and so must deferred
the freeing of the object until later.
v3: Run from the top level retire requests.
v4: Tested with P(return -ERESTARTSYS)=.5 from i915_gem_do_wait_request()

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@kernel.org
Reviewed-by: Daniel Vetter <daniel@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.h |    8 +++++
 drivers/gpu/drm/i915/i915_gem.c |   57 ++++++++++++++++++++++++++++----------
 2 files changed, 50 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0e7bf85..a66503c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -551,6 +551,14 @@ typedef struct drm_i915_private {
 		struct list_head fence_list;
 
 		/**
+		 * List of objects currently pending being freed.
+		 *
+		 * These objects are no longer in use, but due to a signal
+		 * we were prevented from freeing them at the appointed time.
+		 */
+		struct list_head deferred_free_list;
+
+		/**
 		 * We leave the user IRQ off as much as possible,
 		 * but this means that requests will finish and never
 		 * be retired once the system goes idle. Set a timer to
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 78835f8..9424d9b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -52,6 +52,7 @@ static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
 static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 				struct drm_i915_gem_pwrite *args,
 				struct drm_file *file_priv);
+static void i915_gem_free_object_tail(struct drm_gem_object *obj);
 
 static LIST_HEAD(shrink_list);
 static DEFINE_SPINLOCK(shrink_list_lock);
@@ -1746,6 +1747,15 @@ i915_gem_retire_requests(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 
+	if (!list_empty(&dev_priv->mm.deferred_free_list)) {
+	    struct drm_i915_gem_object *obj_priv, *tmp;
+
+	    list_for_each_entry_safe(obj_priv, tmp,
+				     &dev_priv->mm.deferred_free_list,
+				     list)
+		i915_gem_free_object_tail(&obj_priv->base);
+	}
+
 	i915_gem_retire_requests_ring(dev, &dev_priv->render_ring);
 	if (HAS_BSD(dev))
 		i915_gem_retire_requests_ring(dev, &dev_priv->bsd_ring);
@@ -1929,11 +1939,12 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
 	 * before we unbind.
 	 */
 	ret = i915_gem_object_set_to_cpu_domain(obj, 1);
-	if (ret) {
-		if (ret != -ERESTARTSYS)
-			DRM_ERROR("set_domain failed: %d\n", ret);
+	if (ret == -ERESTARTSYS)
 		return ret;
-	}
+	/* Continue on if we fail due to EIO, the GPU is hung so we
+	 * should be safe and we need to cleanup or else we might
+	 * cause memory corruption through use-after-free.
+	 */
 
 	BUG_ON(obj_priv->active);
 
@@ -1967,7 +1978,7 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
 
 	trace_i915_gem_object_unbind(obj);
 
-	return 0;
+	return ret;
 }
 
 int
@@ -4292,20 +4303,19 @@ int i915_gem_init_object(struct drm_gem_object *obj)
 	return 0;
 }
 
-void i915_gem_free_object(struct drm_gem_object *obj)
+static void i915_gem_free_object_tail(struct drm_gem_object *obj)
 {
 	struct drm_device *dev = obj->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
+	int ret;
 
-	trace_i915_gem_object_destroy(obj);
-
-	while (obj_priv->pin_count > 0)
-		i915_gem_object_unpin(obj);
-
-	if (obj_priv->phys_obj)
-		i915_gem_detach_phys_object(dev, obj);
-
-	i915_gem_object_unbind(obj);
+	ret = i915_gem_object_unbind(obj);
+	if (ret == -ERESTARTSYS) {
+		list_move_tail(&obj_priv->list,
+			       &dev_priv->mm.deferred_free_list);
+		return;
+	}
 
 	if (obj_priv->mmap_offset)
 		i915_gem_free_mmap_offset(obj);
@@ -4317,6 +4327,22 @@ void i915_gem_free_object(struct drm_gem_object *obj)
 	kfree(obj_priv);
 }
 
+void i915_gem_free_object(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
+
+	trace_i915_gem_object_destroy(obj);
+
+	while (obj_priv->pin_count > 0)
+		i915_gem_object_unpin(obj);
+
+	if (obj_priv->phys_obj)
+		i915_gem_detach_phys_object(dev, obj);
+
+	i915_gem_free_object_tail(obj);
+}
+
 int
 i915_gem_idle(struct drm_device *dev)
 {
@@ -4555,6 +4581,7 @@ i915_gem_load(struct drm_device *dev)
 	INIT_LIST_HEAD(&dev_priv->mm.gpu_write_list);
 	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
+	INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list);
 	INIT_LIST_HEAD(&dev_priv->render_ring.active_list);
 	INIT_LIST_HEAD(&dev_priv->render_ring.request_list);
 	if (HAS_BSD(dev)) {
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH] drm/i915: Repeat unbinding during free if interrupted (v5)
  2010-07-23 14:54   ` [PATCH] drm/i915: Repeat unbinding during free if interrupted (v3) Chris Wilson
  2010-07-23 15:25     ` Daniel Vetter
  2010-07-23 17:39     ` [PATCH] drm/i915: Repeat unbinding during free if interrupted (v4) Chris Wilson
@ 2010-07-23 18:20     ` Chris Wilson
  2 siblings, 0 replies; 6+ messages in thread
From: Chris Wilson @ 2010-07-23 18:20 UTC (permalink / raw)
  To: intel-gfx; +Cc: stable

If during the freeing of an object the unbind is interrupted by a system
call, which is quite possible if we have outstanding GPU writes that
must be flushed, the unbind is silently aborted. This still leaves the
AGP region and backing pages allocated, and perhaps more importantly,
the object remains upon the various lists exposing us to memory
corruption.

I think this is the cause behind the use-after-free, such as

  Bug 15664 - Graphics hang and kernel backtrace when starting Azureus
              with Compiz enabled
  https://bugzilla.kernel.org/show_bug.cgi?id=15664

v2: Daniel Vetter reminded me that kernel space programming is never easy.
We cannot simply spin to clear the pending signal and so must deferred
the freeing of the object until later.
v3: Run from the top level retire requests.
v4: Tested with P(return -ERESTARTSYS)=.5 from i915_gem_do_wait_request()
v5: Rebase against Eric's for-linus tree.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@kernel.org
Cc: Daniel Vetter <daniel@ffwll.ch>
---
 drivers/gpu/drm/i915/i915_drv.c |    2 +-
 drivers/gpu/drm/i915/i915_drv.h |   11 ++++-
 drivers/gpu/drm/i915/i915_gem.c |   96 ++++++++++++++++++++++++---------------
 3 files changed, 70 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 119692f..5044f65 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -340,7 +340,7 @@ int i965_reset(struct drm_device *dev, u8 flags)
 	/*
 	 * Clear request list
 	 */
-	i915_gem_retire_requests(dev, &dev_priv->render_ring);
+	i915_gem_retire_requests(dev);
 
 	if (need_display)
 		i915_save_display(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5a0100e..e3ff35e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -547,6 +547,14 @@ typedef struct drm_i915_private {
 		struct list_head fence_list;
 
 		/**
+		 * List of objects currently pending being freed.
+		 *
+		 * These objects are no longer in use, but due to a signal
+		 * we were prevented from freeing them at the appointed time.
+		 */
+		struct list_head deferred_free_list;
+
+		/**
 		 * We leave the user IRQ off as much as possible,
 		 * but this means that requests will finish and never
 		 * be retired once the system goes idle. Set a timer to
@@ -955,8 +963,7 @@ uint32_t i915_get_gem_seqno(struct drm_device *dev,
 bool i915_seqno_passed(uint32_t seq1, uint32_t seq2);
 int i915_gem_object_get_fence_reg(struct drm_gem_object *obj);
 int i915_gem_object_put_fence_reg(struct drm_gem_object *obj);
-void i915_gem_retire_requests(struct drm_device *dev,
-		 struct intel_ring_buffer *ring);
+void i915_gem_retire_requests(struct drm_device *dev);
 void i915_gem_retire_work_handler(struct work_struct *work);
 void i915_gem_clflush_object(struct drm_gem_object *obj);
 int i915_gem_object_set_domain(struct drm_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2d0e109..6a371e3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -53,6 +53,7 @@ static int i915_gem_evict_from_inactive_list(struct drm_device *dev);
 static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 				struct drm_i915_gem_pwrite *args,
 				struct drm_file *file_priv);
+static void i915_gem_free_object_tail(struct drm_gem_object *obj);
 
 static LIST_HEAD(shrink_list);
 static DEFINE_SPINLOCK(shrink_list_lock);
@@ -1709,9 +1710,9 @@ i915_get_gem_seqno(struct drm_device *dev,
 /**
  * This function clears the request list as sequence numbers are passed.
  */
-void
-i915_gem_retire_requests(struct drm_device *dev,
-		struct intel_ring_buffer *ring)
+static void
+i915_gem_retire_requests_ring(struct drm_device *dev,
+			      struct intel_ring_buffer *ring)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	uint32_t seqno;
@@ -1751,6 +1752,25 @@ i915_gem_retire_requests(struct drm_device *dev,
 }
 
 void
+i915_gem_retire_requests(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+
+	if (!list_empty(&dev_priv->mm.deferred_free_list)) {
+	    struct drm_i915_gem_object *obj_priv, *tmp;
+
+	    list_for_each_entry_safe(obj_priv, tmp,
+				     &dev_priv->mm.deferred_free_list,
+				     list)
+		i915_gem_free_object_tail(&obj_priv->base);
+	}
+
+	i915_gem_retire_requests_ring(dev, &dev_priv->render_ring);
+	if (HAS_BSD(dev))
+		i915_gem_retire_requests_ring(dev, &dev_priv->bsd_ring);
+}
+
+void
 i915_gem_retire_work_handler(struct work_struct *work)
 {
 	drm_i915_private_t *dev_priv;
@@ -1761,10 +1781,7 @@ i915_gem_retire_work_handler(struct work_struct *work)
 	dev = dev_priv->dev;
 
 	mutex_lock(&dev->struct_mutex);
-	i915_gem_retire_requests(dev, &dev_priv->render_ring);
-
-	if (HAS_BSD(dev))
-		i915_gem_retire_requests(dev, &dev_priv->bsd_ring);
+	i915_gem_retire_requests(dev);
 
 	if (!dev_priv->mm.suspended &&
 		(!list_empty(&dev_priv->render_ring.request_list) ||
@@ -1832,7 +1849,7 @@ i915_do_wait_request(struct drm_device *dev, uint32_t seqno,
 	 * a separate wait queue to handle that.
 	 */
 	if (ret == 0)
-		i915_gem_retire_requests(dev, ring);
+		i915_gem_retire_requests_ring(dev, ring);
 
 	return ret;
 }
@@ -1945,11 +1962,12 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
 	 * before we unbind.
 	 */
 	ret = i915_gem_object_set_to_cpu_domain(obj, 1);
-	if (ret) {
-		if (ret != -ERESTARTSYS)
-			DRM_ERROR("set_domain failed: %d\n", ret);
+	if (ret == -ERESTARTSYS)
 		return ret;
-	}
+	/* Continue on if we fail due to EIO, the GPU is hung so we
+	 * should be safe and we need to cleanup or else we might
+	 * cause memory corruption through use-after-free.
+	 */
 
 	BUG_ON(obj_priv->active);
 
@@ -1985,7 +2003,7 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
 
 	trace_i915_gem_object_unbind(obj);
 
-	return 0;
+	return ret;
 }
 
 static struct drm_gem_object *
@@ -2107,10 +2125,7 @@ i915_gem_evict_something(struct drm_device *dev, int min_size)
 	struct intel_ring_buffer *render_ring = &dev_priv->render_ring;
 	struct intel_ring_buffer *bsd_ring = &dev_priv->bsd_ring;
 	for (;;) {
-		i915_gem_retire_requests(dev, render_ring);
-
-		if (HAS_BSD(dev))
-			i915_gem_retire_requests(dev, bsd_ring);
+		i915_gem_retire_requests(dev);
 
 		/* If there's an inactive buffer available now, grab it
 		 * and be done.
@@ -4329,7 +4344,6 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 	struct drm_i915_gem_busy *args = data;
 	struct drm_gem_object *obj;
 	struct drm_i915_gem_object *obj_priv;
-	drm_i915_private_t *dev_priv = dev->dev_private;
 
 	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
 	if (obj == NULL) {
@@ -4344,10 +4358,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 	 * actually unmasked, and our working set ends up being larger than
 	 * required.
 	 */
-	i915_gem_retire_requests(dev, &dev_priv->render_ring);
-
-	if (HAS_BSD(dev))
-		i915_gem_retire_requests(dev, &dev_priv->bsd_ring);
+	i915_gem_retire_requests(dev);
 
 	obj_priv = to_intel_bo(obj);
 	/* Don't count being on the flushing list against the object being
@@ -4457,20 +4468,19 @@ int i915_gem_init_object(struct drm_gem_object *obj)
 	return 0;
 }
 
-void i915_gem_free_object(struct drm_gem_object *obj)
+static void i915_gem_free_object_tail(struct drm_gem_object *obj)
 {
 	struct drm_device *dev = obj->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
+	int ret;
 
-	trace_i915_gem_object_destroy(obj);
-
-	while (obj_priv->pin_count > 0)
-		i915_gem_object_unpin(obj);
-
-	if (obj_priv->phys_obj)
-		i915_gem_detach_phys_object(dev, obj);
-
-	i915_gem_object_unbind(obj);
+	ret = i915_gem_object_unbind(obj);
+	if (ret == -ERESTARTSYS) {
+		list_move_tail(&obj_priv->list,
+			       &dev_priv->mm.deferred_free_list);
+		return;
+	}
 
 	if (obj_priv->mmap_offset)
 		i915_gem_free_mmap_offset(obj);
@@ -4482,6 +4492,22 @@ void i915_gem_free_object(struct drm_gem_object *obj)
 	kfree(obj_priv);
 }
 
+void i915_gem_free_object(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
+
+	trace_i915_gem_object_destroy(obj);
+
+	while (obj_priv->pin_count > 0)
+		i915_gem_object_unpin(obj);
+
+	if (obj_priv->phys_obj)
+		i915_gem_detach_phys_object(dev, obj);
+
+	i915_gem_free_object_tail(obj);
+}
+
 /** Unbinds all inactive objects. */
 static int
 i915_gem_evict_from_inactive_list(struct drm_device *dev)
@@ -4755,6 +4781,7 @@ i915_gem_load(struct drm_device *dev)
 	INIT_LIST_HEAD(&dev_priv->mm.gpu_write_list);
 	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
+	INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list);
 	INIT_LIST_HEAD(&dev_priv->render_ring.active_list);
 	INIT_LIST_HEAD(&dev_priv->render_ring.request_list);
 	if (HAS_BSD(dev)) {
@@ -5043,10 +5070,7 @@ rescan:
 			continue;
 
 		spin_unlock(&shrink_list_lock);
-		i915_gem_retire_requests(dev, &dev_priv->render_ring);
-
-		if (HAS_BSD(dev))
-			i915_gem_retire_requests(dev, &dev_priv->bsd_ring);
+		i915_gem_retire_requests(dev);
 
 		list_for_each_entry_safe(obj_priv, next_obj,
 					 &dev_priv->mm.inactive_list,
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2010-07-23 18:21 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-07-23 13:39 [PATCH] drm/i915: Repeat unbinding during free if interrupted Chris Wilson
2010-07-23 14:48 ` [PATCH] drm/i915: Repeat unbinding during free if interrupted (v2) Chris Wilson
2010-07-23 14:54   ` [PATCH] drm/i915: Repeat unbinding during free if interrupted (v3) Chris Wilson
2010-07-23 15:25     ` Daniel Vetter
2010-07-23 17:39     ` [PATCH] drm/i915: Repeat unbinding during free if interrupted (v4) Chris Wilson
2010-07-23 18:20     ` [PATCH] drm/i915: Repeat unbinding during free if interrupted (v5) Chris Wilson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.