All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] drm/vmwgfx: Fix gem refcounting and memory evictions
@ 2022-04-20  4:03 Zack Rusin
       [not found] ` <CAKPuMhMdrDK7K6c3C_8CaOB++uQJun_RiHuSHtRYa8eJNPJLUg@mail.gmail.com>
  0 siblings, 1 reply; 2+ messages in thread
From: Zack Rusin @ 2022-04-20  4:03 UTC (permalink / raw)
  To: dri-devel; +Cc: krastevm, mombasawalam

From: Zack Rusin <zackr@vmware.com>

v2: Add the last part of the ref count fix which was spotted by
Philipp Sieweck where the ref count of cpu writers is off due to
ERESTARTSYS or EBUSY during bo waits.

The initial GEM port broke refcounting on shareable (prime) surfaces and
memory evictions. The prime surfaces broke because the parent surfaces
weren't increasing the ref count on GEM surfaces, which meant that
the memory backing textures could have been deleted while the texture
was still accessible. The evictions broke due to a typo, the code was
supposed to exit if the passed buffers were not vmw_buffer_object
not if they were. They're tied because the evictions depend on having
memory to actually evict.

This fixes crashes with XA state tracker which is used for xrender
acceleration on xf86-video-vmware, apps/tests which use a lot of
memory (a good test being the piglit's streaming-texture-leak) and
desktops.

Signed-off-by: Zack Rusin <zackr@vmware.com>
Fixes: 8afa13a0583f ("drm/vmwgfx: Implement DRIVER_GEM")
Reported-by: Philipp Sieweck <psi@informatik.uni-kiel.de>
Cc: <stable@vger.kernel.org> # v5.17+
Reviewed-by: Maaz Mombasawala <mombasawalam@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_bo.c      | 43 ++++++++++++-------------
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c     |  8 ++---
 drivers/gpu/drm/vmwgfx/vmwgfx_surface.c |  7 +++-
 3 files changed, 28 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
index 31aecc46624b..04c8a378aeed 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -46,6 +46,21 @@ vmw_buffer_object(struct ttm_buffer_object *bo)
 	return container_of(bo, struct vmw_buffer_object, base);
 }
 
+/**
+ * bo_is_vmw - check if the buffer object is a &vmw_buffer_object
+ * @bo: ttm buffer object to be checked
+ *
+ * Uses destroy function associated with the object to determine if this is
+ * a &vmw_buffer_object.
+ *
+ * Returns:
+ * true if the object is of &vmw_buffer_object type, false if not.
+ */
+static bool bo_is_vmw(struct ttm_buffer_object *bo)
+{
+	return bo->destroy == &vmw_bo_bo_free ||
+	       bo->destroy == &vmw_gem_destroy;
+}
 
 /**
  * vmw_bo_pin_in_placement - Validate a buffer to placement.
@@ -615,8 +630,9 @@ int vmw_user_bo_synccpu_ioctl(struct drm_device *dev, void *data,
 
 		ret = vmw_user_bo_synccpu_grab(vbo, arg->flags);
 		vmw_bo_unreference(&vbo);
-		if (unlikely(ret != 0 && ret != -ERESTARTSYS &&
-			     ret != -EBUSY)) {
+		if (unlikely(ret != 0)) {
+			if (ret == -ERESTARTSYS || ret == -EBUSY)
+				return -EBUSY;
 			DRM_ERROR("Failed synccpu grab on handle 0x%08x.\n",
 				  (unsigned int) arg->handle);
 			return ret;
@@ -798,7 +814,7 @@ int vmw_dumb_create(struct drm_file *file_priv,
 void vmw_bo_swap_notify(struct ttm_buffer_object *bo)
 {
 	/* Is @bo embedded in a struct vmw_buffer_object? */
-	if (vmw_bo_is_vmw_bo(bo))
+	if (!bo_is_vmw(bo))
 		return;
 
 	/* Kill any cached kernel maps before swapout */
@@ -822,7 +838,7 @@ void vmw_bo_move_notify(struct ttm_buffer_object *bo,
 	struct vmw_buffer_object *vbo;
 
 	/* Make sure @bo is embedded in a struct vmw_buffer_object? */
-	if (vmw_bo_is_vmw_bo(bo))
+	if (!bo_is_vmw(bo))
 		return;
 
 	vbo = container_of(bo, struct vmw_buffer_object, base);
@@ -843,22 +859,3 @@ void vmw_bo_move_notify(struct ttm_buffer_object *bo,
 	if (mem->mem_type != VMW_PL_MOB && bo->resource->mem_type == VMW_PL_MOB)
 		vmw_resource_unbind_list(vbo);
 }
-
-/**
- * vmw_bo_is_vmw_bo - check if the buffer object is a &vmw_buffer_object
- * @bo: buffer object to be checked
- *
- * Uses destroy function associated with the object to determine if this is
- * a &vmw_buffer_object.
- *
- * Returns:
- * true if the object is of &vmw_buffer_object type, false if not.
- */
-bool vmw_bo_is_vmw_bo(struct ttm_buffer_object *bo)
-{
-	if (bo->destroy == &vmw_bo_bo_free ||
-	    bo->destroy == &vmw_gem_destroy)
-		return true;
-
-	return false;
-}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 72a17618ba0a..0c12faa4e533 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1018,13 +1018,10 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id)
 		goto out_no_fman;
 	}
 
-	drm_vma_offset_manager_init(&dev_priv->vma_manager,
-				    DRM_FILE_PAGE_OFFSET_START,
-				    DRM_FILE_PAGE_OFFSET_SIZE);
 	ret = ttm_device_init(&dev_priv->bdev, &vmw_bo_driver,
 			      dev_priv->drm.dev,
 			      dev_priv->drm.anon_inode->i_mapping,
-			      &dev_priv->vma_manager,
+			      dev_priv->drm.vma_offset_manager,
 			      dev_priv->map_mode == vmw_dma_alloc_coherent,
 			      false);
 	if (unlikely(ret != 0)) {
@@ -1195,7 +1192,6 @@ static void vmw_driver_unload(struct drm_device *dev)
 	vmw_devcaps_destroy(dev_priv);
 	vmw_vram_manager_fini(dev_priv);
 	ttm_device_fini(&dev_priv->bdev);
-	drm_vma_offset_manager_destroy(&dev_priv->vma_manager);
 	vmw_release_device_late(dev_priv);
 	vmw_fence_manager_takedown(dev_priv->fman);
 	if (dev_priv->capabilities & SVGA_CAP_IRQMASK)
@@ -1419,7 +1415,7 @@ vmw_get_unmapped_area(struct file *file, unsigned long uaddr,
 	struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev);
 
 	return drm_get_unmapped_area(file, uaddr, len, pgoff, flags,
-				     &dev_priv->vma_manager);
+				     dev_priv->drm.vma_offset_manager);
 }
 
 static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index 00e8e27e4884..ace7ca150b03 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -683,6 +683,9 @@ static void vmw_user_surface_base_release(struct ttm_base_object **p_base)
 	    container_of(base, struct vmw_user_surface, prime.base);
 	struct vmw_resource *res = &user_srf->srf.res;
 
+	if (base->shareable && res && res->backup)
+		drm_gem_object_put(&res->backup->base.base);
+
 	*p_base = NULL;
 	vmw_resource_unreference(&res);
 }
@@ -857,6 +860,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 			goto out_unlock;
 		}
 		vmw_bo_reference(res->backup);
+		drm_gem_object_get(&res->backup->base.base);
 	}
 
 	tmp = vmw_resource_reference(&srf->res);
@@ -1513,7 +1517,6 @@ vmw_gb_surface_define_internal(struct drm_device *dev,
 							&res->backup);
 		if (ret == 0)
 			vmw_bo_reference(res->backup);
-
 	}
 
 	if (unlikely(ret != 0)) {
@@ -1561,6 +1564,8 @@ vmw_gb_surface_define_internal(struct drm_device *dev,
 			drm_vma_node_offset_addr(&res->backup->base.base.vma_node);
 		rep->buffer_size = res->backup->base.base.size;
 		rep->buffer_handle = backup_handle;
+		if (user_srf->prime.base.shareable)
+			drm_gem_object_get(&res->backup->base.base);
 	} else {
 		rep->buffer_map_handle = 0;
 		rep->buffer_size = 0;
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH v2] drm/vmwgfx: Fix gem refcounting and memory evictions
       [not found] ` <CAKPuMhMdrDK7K6c3C_8CaOB++uQJun_RiHuSHtRYa8eJNPJLUg@mail.gmail.com>
@ 2022-04-20 18:46   ` Martin Krastev (VMware)
  0 siblings, 0 replies; 2+ messages in thread
From: Martin Krastev (VMware) @ 2022-04-20 18:46 UTC (permalink / raw)
  To: dri-devel; +Cc: mombasawalam

From: Martin Krastev <krastevm@vmware.com>


> From: Zack Rusin <zack@kde.org>
> Date: Wed, 20 Apr 2022 at 07:03
> Subject: [PATCH v2] drm/vmwgfx: Fix gem refcounting and memory evictions
> To: <dri-devel@lists.freedesktop.org>
> Cc: <krastevm@vmware.com>, <mombasawalam@vmware.com>
>
>
> From: Zack Rusin <zackr@vmware.com>
>
> v2: Add the last part of the ref count fix which was spotted by
> Philipp Sieweck where the ref count of cpu writers is off due to
> ERESTARTSYS or EBUSY during bo waits.
>
> The initial GEM port broke refcounting on shareable (prime) surfaces and
> memory evictions. The prime surfaces broke because the parent surfaces
> weren't increasing the ref count on GEM surfaces, which meant that
> the memory backing textures could have been deleted while the texture
> was still accessible. The evictions broke due to a typo, the code was
> supposed to exit if the passed buffers were not vmw_buffer_object
> not if they were. They're tied because the evictions depend on having
> memory to actually evict.
>
> This fixes crashes with XA state tracker which is used for xrender
> acceleration on xf86-video-vmware, apps/tests which use a lot of
> memory (a good test being the piglit's streaming-texture-leak) and
> desktops.
>
> Signed-off-by: Zack Rusin <zackr@vmware.com>
> Fixes: 8afa13a0583f ("drm/vmwgfx: Implement DRIVER_GEM")
> Reported-by: Philipp Sieweck <psi@informatik.uni-kiel.de>
> Cc: <stable@vger.kernel.org> # v5.17+
> Reviewed-by: Maaz Mombasawala <mombasawalam@vmware.com>
> ---
>   drivers/gpu/drm/vmwgfx/vmwgfx_bo.c      | 43 ++++++++++++-------------
>   drivers/gpu/drm/vmwgfx/vmwgfx_drv.c     |  8 ++---
>   drivers/gpu/drm/vmwgfx/vmwgfx_surface.c |  7 +++-
>   3 files changed, 28 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> index 31aecc46624b..04c8a378aeed 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> @@ -46,6 +46,21 @@ vmw_buffer_object(struct ttm_buffer_object *bo)
>          return container_of(bo, struct vmw_buffer_object, base);
>   }
>
> +/**
> + * bo_is_vmw - check if the buffer object is a &vmw_buffer_object
> + * @bo: ttm buffer object to be checked
> + *
> + * Uses destroy function associated with the object to determine if this is
> + * a &vmw_buffer_object.
> + *
> + * Returns:
> + * true if the object is of &vmw_buffer_object type, false if not.
> + */
> +static bool bo_is_vmw(struct ttm_buffer_object *bo)
> +{
> +       return bo->destroy == &vmw_bo_bo_free ||
> +              bo->destroy == &vmw_gem_destroy;
> +}
>
>   /**
>    * vmw_bo_pin_in_placement - Validate a buffer to placement.
> @@ -615,8 +630,9 @@ int vmw_user_bo_synccpu_ioctl(struct drm_device
> *dev, void *data,
>
>                  ret = vmw_user_bo_synccpu_grab(vbo, arg->flags);
>                  vmw_bo_unreference(&vbo);
> -               if (unlikely(ret != 0 && ret != -ERESTARTSYS &&
> -                            ret != -EBUSY)) {
> +               if (unlikely(ret != 0)) {
> +                       if (ret == -ERESTARTSYS || ret == -EBUSY)
> +                               return -EBUSY;
>                          DRM_ERROR("Failed synccpu grab on handle 0x%08x.\n",
>                                    (unsigned int) arg->handle);
>                          return ret;
> @@ -798,7 +814,7 @@ int vmw_dumb_create(struct drm_file *file_priv,
>   void vmw_bo_swap_notify(struct ttm_buffer_object *bo)
>   {
>          /* Is @bo embedded in a struct vmw_buffer_object? */
> -       if (vmw_bo_is_vmw_bo(bo))
> +       if (!bo_is_vmw(bo))
>                  return;
>
>          /* Kill any cached kernel maps before swapout */
> @@ -822,7 +838,7 @@ void vmw_bo_move_notify(struct ttm_buffer_object *bo,
>          struct vmw_buffer_object *vbo;
>
>          /* Make sure @bo is embedded in a struct vmw_buffer_object? */
> -       if (vmw_bo_is_vmw_bo(bo))
> +       if (!bo_is_vmw(bo))
>                  return;
>
>          vbo = container_of(bo, struct vmw_buffer_object, base);
> @@ -843,22 +859,3 @@ void vmw_bo_move_notify(struct ttm_buffer_object *bo,
>          if (mem->mem_type != VMW_PL_MOB && bo->resource->mem_type == VMW_PL_MOB)
>                  vmw_resource_unbind_list(vbo);
>   }
> -
> -/**
> - * vmw_bo_is_vmw_bo - check if the buffer object is a &vmw_buffer_object
> - * @bo: buffer object to be checked
> - *
> - * Uses destroy function associated with the object to determine if this is
> - * a &vmw_buffer_object.
> - *
> - * Returns:
> - * true if the object is of &vmw_buffer_object type, false if not.
> - */
> -bool vmw_bo_is_vmw_bo(struct ttm_buffer_object *bo)
> -{
> -       if (bo->destroy == &vmw_bo_bo_free ||
> -           bo->destroy == &vmw_gem_destroy)
> -               return true;
> -
> -       return false;
> -}
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> index 72a17618ba0a..0c12faa4e533 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
> @@ -1018,13 +1018,10 @@ static int vmw_driver_load(struct vmw_private
> *dev_priv, u32 pci_id)
>                  goto out_no_fman;
>          }
>
> -       drm_vma_offset_manager_init(&dev_priv->vma_manager,
> -                                   DRM_FILE_PAGE_OFFSET_START,
> -                                   DRM_FILE_PAGE_OFFSET_SIZE);
>          ret = ttm_device_init(&dev_priv->bdev, &vmw_bo_driver,
>                                dev_priv->drm.dev,
>                                dev_priv->drm.anon_inode->i_mapping,
> -                             &dev_priv->vma_manager,
> +                             dev_priv->drm.vma_offset_manager,
>                                dev_priv->map_mode == vmw_dma_alloc_coherent,
>                                false);
>          if (unlikely(ret != 0)) {
> @@ -1195,7 +1192,6 @@ static void vmw_driver_unload(struct drm_device *dev)
>          vmw_devcaps_destroy(dev_priv);
>          vmw_vram_manager_fini(dev_priv);
>          ttm_device_fini(&dev_priv->bdev);
> -       drm_vma_offset_manager_destroy(&dev_priv->vma_manager);
>          vmw_release_device_late(dev_priv);
>          vmw_fence_manager_takedown(dev_priv->fman);
>          if (dev_priv->capabilities & SVGA_CAP_IRQMASK)
> @@ -1419,7 +1415,7 @@ vmw_get_unmapped_area(struct file *file,
> unsigned long uaddr,
>          struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev);
>
>          return drm_get_unmapped_area(file, uaddr, len, pgoff, flags,
> -                                    &dev_priv->vma_manager);
> +                                    dev_priv->drm.vma_offset_manager);
>   }
>
>   static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val,
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
> b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
> index 00e8e27e4884..ace7ca150b03 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
> @@ -683,6 +683,9 @@ static void vmw_user_surface_base_release(struct
> ttm_base_object **p_base)
>              container_of(base, struct vmw_user_surface, prime.base);
>          struct vmw_resource *res = &user_srf->srf.res;
>
> +       if (base->shareable && res && res->backup)
> +               drm_gem_object_put(&res->backup->base.base);
> +
>          *p_base = NULL;
>          vmw_resource_unreference(&res);
>   }
> @@ -857,6 +860,7 @@ int vmw_surface_define_ioctl(struct drm_device
> *dev, void *data,
>                          goto out_unlock;
>                  }
>                  vmw_bo_reference(res->backup);
> +               drm_gem_object_get(&res->backup->base.base);
>          }
>
>          tmp = vmw_resource_reference(&srf->res);
> @@ -1513,7 +1517,6 @@ vmw_gb_surface_define_internal(struct drm_device *dev,
>                                                          &res->backup);
>                  if (ret == 0)
>                          vmw_bo_reference(res->backup);
> -
>          }
>
>          if (unlikely(ret != 0)) {
> @@ -1561,6 +1564,8 @@ vmw_gb_surface_define_internal(struct drm_device *dev,
>
> drm_vma_node_offset_addr(&res->backup->base.base.vma_node);
>                  rep->buffer_size = res->backup->base.base.size;
>                  rep->buffer_handle = backup_handle;
> +               if (user_srf->prime.base.shareable)
> +                       drm_gem_object_get(&res->backup->base.base);
>          } else {
>                  rep->buffer_map_handle = 0;
>                  rep->buffer_size = 0;
> --
> 2.32.0


LGTM!


Reviewed-by: Martin Krastev <krastevm@vmware.com>


Regards,

Martin


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2022-04-20 18:46 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-20  4:03 [PATCH v2] drm/vmwgfx: Fix gem refcounting and memory evictions Zack Rusin
     [not found] ` <CAKPuMhMdrDK7K6c3C_8CaOB++uQJun_RiHuSHtRYa8eJNPJLUg@mail.gmail.com>
2022-04-20 18:46   ` Martin Krastev (VMware)

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.