From mboxrd@z Thu Jan 1 00:00:00 1970 From: Chris Wilson Subject: [PATCH] drm/i915: Repeat unbinding during free if interrupted (v4) Date: Fri, 23 Jul 2010 18:39:27 +0100 Message-ID: <1279906767-10844-1-git-send-email-chris@chris-wilson.co.uk> References: <1279896884-29492-1-git-send-email-chris@chris-wilson.co.uk> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: Received: from fireflyinternet.com (server109-228-4-14.live-servers.net [109.228.4.14]) by gabe.freedesktop.org (Postfix) with ESMTP id E19A09E7B5 for ; Fri, 23 Jul 2010 10:56:01 -0700 (PDT) In-Reply-To: <1279896884-29492-1-git-send-email-chris@chris-wilson.co.uk> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: intel-gfx-bounces+gcfxdi-intel-gfx=m.gmane.org@lists.freedesktop.org Errors-To: intel-gfx-bounces+gcfxdi-intel-gfx=m.gmane.org@lists.freedesktop.org To: intel-gfx@lists.freedesktop.org Cc: stable@kernel.org List-Id: intel-gfx@lists.freedesktop.org If during the freeing of an object the unbind is interrupted by a system call, which is quite possible if we have outstanding GPU writes that must be flushed, the unbind is silently aborted. This still leaves the AGP region and backing pages allocated, and perhaps more importantly, the object remains upon the various lists exposing us to memory corruption. I think this is the cause behind the use-after-free, such as Bug 15664 - Graphics hang and kernel backtrace when starting Azureus with Compiz enabled https://bugzilla.kernel.org/show_bug.cgi?id=15664 v2: Daniel Vetter reminded me that kernel space programming is never easy. We cannot simply spin to clear the pending signal and so must deferred the freeing of the object until later. v3: Run from the top level retire requests. v4: Tested with P(return -ERESTARTSYS)=.5 from i915_gem_do_wait_request() Signed-off-by: Chris Wilson Cc: stable@kernel.org Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 8 +++++ drivers/gpu/drm/i915/i915_gem.c | 57 ++++++++++++++++++++++++++++---------- 2 files changed, 50 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0e7bf85..a66503c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -551,6 +551,14 @@ typedef struct drm_i915_private { struct list_head fence_list; /** + * List of objects currently pending being freed. + * + * These objects are no longer in use, but due to a signal + * we were prevented from freeing them at the appointed time. + */ + struct list_head deferred_free_list; + + /** * We leave the user IRQ off as much as possible, * but this means that requests will finish and never * be retired once the system goes idle. Set a timer to diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 78835f8..9424d9b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -52,6 +52,7 @@ static void i915_gem_clear_fence_reg(struct drm_gem_object *obj); static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, struct drm_i915_gem_pwrite *args, struct drm_file *file_priv); +static void i915_gem_free_object_tail(struct drm_gem_object *obj); static LIST_HEAD(shrink_list); static DEFINE_SPINLOCK(shrink_list_lock); @@ -1746,6 +1747,15 @@ i915_gem_retire_requests(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; + if (!list_empty(&dev_priv->mm.deferred_free_list)) { + struct drm_i915_gem_object *obj_priv, *tmp; + + list_for_each_entry_safe(obj_priv, tmp, + &dev_priv->mm.deferred_free_list, + list) + i915_gem_free_object_tail(&obj_priv->base); + } + i915_gem_retire_requests_ring(dev, &dev_priv->render_ring); if (HAS_BSD(dev)) i915_gem_retire_requests_ring(dev, &dev_priv->bsd_ring); @@ -1929,11 +1939,12 @@ i915_gem_object_unbind(struct drm_gem_object *obj) * before we unbind. */ ret = i915_gem_object_set_to_cpu_domain(obj, 1); - if (ret) { - if (ret != -ERESTARTSYS) - DRM_ERROR("set_domain failed: %d\n", ret); + if (ret == -ERESTARTSYS) return ret; - } + /* Continue on if we fail due to EIO, the GPU is hung so we + * should be safe and we need to cleanup or else we might + * cause memory corruption through use-after-free. + */ BUG_ON(obj_priv->active); @@ -1967,7 +1978,7 @@ i915_gem_object_unbind(struct drm_gem_object *obj) trace_i915_gem_object_unbind(obj); - return 0; + return ret; } int @@ -4292,20 +4303,19 @@ int i915_gem_init_object(struct drm_gem_object *obj) return 0; } -void i915_gem_free_object(struct drm_gem_object *obj) +static void i915_gem_free_object_tail(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; + drm_i915_private_t *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); + int ret; - trace_i915_gem_object_destroy(obj); - - while (obj_priv->pin_count > 0) - i915_gem_object_unpin(obj); - - if (obj_priv->phys_obj) - i915_gem_detach_phys_object(dev, obj); - - i915_gem_object_unbind(obj); + ret = i915_gem_object_unbind(obj); + if (ret == -ERESTARTSYS) { + list_move_tail(&obj_priv->list, + &dev_priv->mm.deferred_free_list); + return; + } if (obj_priv->mmap_offset) i915_gem_free_mmap_offset(obj); @@ -4317,6 +4327,22 @@ void i915_gem_free_object(struct drm_gem_object *obj) kfree(obj_priv); } +void i915_gem_free_object(struct drm_gem_object *obj) +{ + struct drm_device *dev = obj->dev; + struct drm_i915_gem_object *obj_priv = to_intel_bo(obj); + + trace_i915_gem_object_destroy(obj); + + while (obj_priv->pin_count > 0) + i915_gem_object_unpin(obj); + + if (obj_priv->phys_obj) + i915_gem_detach_phys_object(dev, obj); + + i915_gem_free_object_tail(obj); +} + int i915_gem_idle(struct drm_device *dev) { @@ -4555,6 +4581,7 @@ i915_gem_load(struct drm_device *dev) INIT_LIST_HEAD(&dev_priv->mm.gpu_write_list); INIT_LIST_HEAD(&dev_priv->mm.inactive_list); INIT_LIST_HEAD(&dev_priv->mm.fence_list); + INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list); INIT_LIST_HEAD(&dev_priv->render_ring.active_list); INIT_LIST_HEAD(&dev_priv->render_ring.request_list); if (HAS_BSD(dev)) { -- 1.7.1