All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/ttm: remove need_dma32 flag
@ 2020-10-02 11:31 Christian König
  2020-10-02 11:31 ` [PATCH 2/2] drm/ttm: remove no_retry flag Christian König
  0 siblings, 1 reply; 5+ messages in thread
From: Christian König @ 2020-10-02 11:31 UTC (permalink / raw)
  To: dri-devel, ray.huang, airlied, daniel

Drivers can just set the DMA32 flag in their TT creation function.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 7 +++++--
 drivers/gpu/drm/drm_gem_vram_helper.c   | 4 ++--
 drivers/gpu/drm/nouveau/nouveau_bo.c    | 6 +++++-
 drivers/gpu/drm/nouveau/nouveau_ttm.c   | 3 +--
 drivers/gpu/drm/qxl/qxl_ttm.c           | 3 +--
 drivers/gpu/drm/radeon/radeon_ttm.c     | 7 +++++--
 drivers/gpu/drm/ttm/ttm_device.c        | 4 +---
 drivers/gpu/drm/ttm/ttm_tt.c            | 3 ---
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c     | 3 +--
 include/drm/ttm/ttm_device.h            | 4 +---
 10 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 088dcfce6bca..c5f2b4971ef7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1287,6 +1287,7 @@ static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev,
 static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
 					   uint32_t page_flags)
 {
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
 	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
 	struct amdgpu_ttm_tt *gtt;
 	enum ttm_caching caching;
@@ -1297,6 +1298,9 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
 	}
 	gtt->gobj = &bo->base;
 
+	if (dma_addressing_limited(adev->dev))
+		page_flags |= TTM_PAGE_FLAG_DMA32;
+
 	if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
 		caching = ttm_write_combined;
 	else
@@ -1884,8 +1888,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 	r = ttm_device_init(&adev->mman.bdev,
 			       &amdgpu_bo_driver,
 			       adev_to_drm(adev)->anon_inode->i_mapping,
-			       adev_to_drm(adev)->vma_offset_manager,
-			       dma_addressing_limited(adev->dev));
+			       adev_to_drm(adev)->vma_offset_manager);
 	if (r) {
 		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
 		return r;
diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c
index f7a027123975..4ba9f19dac90 100644
--- a/drivers/gpu/drm/drm_gem_vram_helper.c
+++ b/drivers/gpu/drm/drm_gem_vram_helper.c
@@ -907,6 +907,7 @@ static struct ttm_tt *bo_driver_ttm_tt_create(struct ttm_buffer_object *bo,
 	if (!tt)
 		return NULL;
 
+	page_flags |= TTM_PAGE_FLAG_DMA32;
 	ret = ttm_tt_init(tt, bo, page_flags, ttm_cached);
 	if (ret < 0)
 		goto err_ttm_tt_init;
@@ -1019,8 +1020,7 @@ static int drm_vram_mm_init(struct drm_vram_mm *vmm, struct drm_device *dev,
 
 	ret = ttm_device_init(&vmm->bdev, &bo_driver,
 				 dev->anon_inode->i_mapping,
-				 dev->vma_offset_manager,
-				 true);
+				 dev->vma_offset_manager);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 912bc3204c4e..bf13152d3288 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -662,9 +662,13 @@ nouveau_bo_wr32(struct nouveau_bo *nvbo, unsigned index, u32 val)
 static struct ttm_tt *
 nouveau_ttm_tt_create(struct ttm_buffer_object *bo, uint32_t page_flags)
 {
-#if IS_ENABLED(CONFIG_AGP)
 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
 
+	if (drm->client.mmu.dmabits <= 32)
+		page_flags |= TTM_PAGE_FLAG_DMA32;
+
+#if IS_ENABLED(CONFIG_AGP)
+
 	if (drm->agp.bridge) {
 		return ttm_agp_tt_create(bo, drm->agp.bridge, page_flags);
 	}
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index 2fb127da0208..55f14c6a4dd5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -318,8 +318,7 @@ nouveau_ttm_init(struct nouveau_drm *drm)
 	ret = ttm_device_init(&drm->ttm.bdev,
 				  &nouveau_bo_driver,
 				  dev->anon_inode->i_mapping,
-				  dev->vma_offset_manager,
-				  drm->client.mmu.dmabits <= 32 ? true : false);
+				  dev->vma_offset_manager);
 	if (ret) {
 		NV_ERROR(drm, "error initialising bo driver, %d\n", ret);
 		return ret;
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index 9b707af7dfd0..22019141a09a 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -217,8 +217,7 @@ int qxl_ttm_init(struct qxl_device *qdev)
 	r = ttm_device_init(&qdev->mman.bdev,
 			       &qxl_bo_driver,
 			       qdev->ddev.anon_inode->i_mapping,
-			       qdev->ddev.vma_offset_manager,
-			       false);
+			       qdev->ddev.vma_offset_manager);
 	if (r) {
 		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
 		return r;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 3835c9457c06..f8e0bf75cd7d 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -588,6 +588,10 @@ static struct ttm_tt *radeon_ttm_tt_create(struct ttm_buffer_object *bo,
 	rbo = container_of(bo, struct radeon_bo, tbo);
 
 	rdev = radeon_get_rdev(bo->bdev);
+
+	if (dma_addressing_limited(&rdev->pdev->dev))
+		page_flags |= TTM_PAGE_FLAG_DMA32;
+
 #if IS_ENABLED(CONFIG_AGP)
 	if (rdev->flags & RADEON_IS_AGP) {
 		return ttm_agp_tt_create(bo, rdev->ddev->agp->bridge,
@@ -818,8 +822,7 @@ int radeon_ttm_init(struct radeon_device *rdev)
 	r = ttm_device_init(&rdev->mman.bdev,
 			       &radeon_bo_driver,
 			       rdev->ddev->anon_inode->i_mapping,
-			       rdev->ddev->vma_offset_manager,
-			       dma_addressing_limited(&rdev->pdev->dev));
+			       rdev->ddev->vma_offset_manager);
 	if (r) {
 		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
 		return r;
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index fe2e4052afcb..f4ecb6f217ab 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -158,8 +158,7 @@ static void ttm_device_delayed_workqueue(struct work_struct *work)
 
 int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
 		    struct address_space *mapping,
-		    struct drm_vma_offset_manager *vma_manager,
-		    bool need_dma32)
+		    struct drm_vma_offset_manager *vma_manager)
 {
 	struct ttm_global *glob = &ttm_glob;
 	int ret;
@@ -179,7 +178,6 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
 	INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
 	INIT_LIST_HEAD(&bdev->ddestroy);
 	bdev->dev_mapping = mapping;
-	bdev->need_dma32 = need_dma32;
 	mutex_lock(&ttm_global_mutex);
 	list_add_tail(&bdev->device_list, &glob->device_list);
 	mutex_unlock(&ttm_global_mutex);
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 95dffee8299d..e2b1e6c53a04 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -52,9 +52,6 @@ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc)
 	if (bo->ttm)
 		return 0;
 
-	if (bdev->need_dma32)
-		page_flags |= TTM_PAGE_FLAG_DMA32;
-
 	if (bdev->no_retry)
 		page_flags |= TTM_PAGE_FLAG_NO_RETRY;
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index dcbcb04ff27c..6c9d67b080ef 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -875,8 +875,7 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 	ret = ttm_device_init(&dev_priv->bdev,
 				 &vmw_bo_driver,
 				 dev->anon_inode->i_mapping,
-				 &dev_priv->vma_manager,
-				 false);
+				 &dev_priv->vma_manager);
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("Failed initializing TTM buffer object driver.\n");
 		goto out_no_bdev;
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index 13840697d3ba..bfc6dd87f2d3 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -327,7 +327,6 @@ struct ttm_device {
 
 	struct delayed_work wq;
 
-	bool need_dma32;
 	bool no_retry;
 };
 
@@ -359,8 +358,7 @@ ttm_set_driver_manager(struct ttm_device *bdev, int type,
  */
 int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
 		    struct address_space *mapping,
-		    struct drm_vma_offset_manager *vma_manager,
-		    bool need_dma32);
+		    struct drm_vma_offset_manager *vma_manager);
 int ttm_device_release(struct ttm_device *bdev);
 
 /**
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/2] drm/ttm: remove no_retry flag
  2020-10-02 11:31 [PATCH 1/2] drm/ttm: remove need_dma32 flag Christian König
@ 2020-10-02 11:31 ` Christian König
  2020-10-02 12:31   ` Daniel Vetter
  0 siblings, 1 reply; 5+ messages in thread
From: Christian König @ 2020-10-02 11:31 UTC (permalink / raw)
  To: dri-devel, ray.huang, airlied, daniel

Amdgpu was the only user of this.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 6 +++---
 drivers/gpu/drm/ttm/ttm_tt.c            | 3 ---
 include/drm/ttm/ttm_device.h            | 2 --
 3 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c5f2b4971ef7..0a4233985870 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1298,6 +1298,9 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
 	}
 	gtt->gobj = &bo->base;
 
+	/* We opt to avoid OOM on system pages allocations */
+	page_flags |= TTM_PAGE_FLAG_NO_RETRY;
+
 	if (dma_addressing_limited(adev->dev))
 		page_flags |= TTM_PAGE_FLAG_DMA32;
 
@@ -1895,9 +1898,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 	}
 	adev->mman.initialized = true;
 
-	/* We opt to avoid OOM on system pages allocations */
-	adev->mman.bdev.no_retry = true;
-
 	/* Initialize VRAM pool with all of VRAM divided into pages */
 	r = amdgpu_vram_mgr_init(adev);
 	if (r) {
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index e2b1e6c53a04..98514abaa939 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -52,9 +52,6 @@ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc)
 	if (bo->ttm)
 		return 0;
 
-	if (bdev->no_retry)
-		page_flags |= TTM_PAGE_FLAG_NO_RETRY;
-
 	switch (bo->type) {
 	case ttm_bo_type_device:
 		if (zero_alloc)
diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
index bfc6dd87f2d3..e0eba36c1309 100644
--- a/include/drm/ttm/ttm_device.h
+++ b/include/drm/ttm/ttm_device.h
@@ -326,8 +326,6 @@ struct ttm_device {
 	 */
 
 	struct delayed_work wq;
-
-	bool no_retry;
 };
 
 static inline struct ttm_resource_manager *
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] drm/ttm: remove no_retry flag
  2020-10-02 11:31 ` [PATCH 2/2] drm/ttm: remove no_retry flag Christian König
@ 2020-10-02 12:31   ` Daniel Vetter
  2020-10-05 14:37     ` Christian König
  0 siblings, 1 reply; 5+ messages in thread
From: Daniel Vetter @ 2020-10-02 12:31 UTC (permalink / raw)
  To: Christian König; +Cc: Huang Rui, dri-devel

On Fri, Oct 2, 2020 at 1:31 PM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Amdgpu was the only user of this.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>

Uh this smells like a fishy band-aid. And the original commit
introducing this also doesn't sched any light on why this should
happen, and why it's specific to the amdgpu driver. Do you have some
more memories here?

I guess no retry makes sense for a "do you still have memory?" query,
but once we've commit to having that memory, I'm not seeing why we
should not try to find it? Might also tie into the lack of active
shrinking for ttm objects in the system domain.
-Daniel

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 6 +++---
>  drivers/gpu/drm/ttm/ttm_tt.c            | 3 ---
>  include/drm/ttm/ttm_device.h            | 2 --
>  3 files changed, 3 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index c5f2b4971ef7..0a4233985870 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1298,6 +1298,9 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
>         }
>         gtt->gobj = &bo->base;
>
> +       /* We opt to avoid OOM on system pages allocations */
> +       page_flags |= TTM_PAGE_FLAG_NO_RETRY;
> +
>         if (dma_addressing_limited(adev->dev))
>                 page_flags |= TTM_PAGE_FLAG_DMA32;
>
> @@ -1895,9 +1898,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>         }
>         adev->mman.initialized = true;
>
> -       /* We opt to avoid OOM on system pages allocations */
> -       adev->mman.bdev.no_retry = true;
> -
>         /* Initialize VRAM pool with all of VRAM divided into pages */
>         r = amdgpu_vram_mgr_init(adev);
>         if (r) {
> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
> index e2b1e6c53a04..98514abaa939 100644
> --- a/drivers/gpu/drm/ttm/ttm_tt.c
> +++ b/drivers/gpu/drm/ttm/ttm_tt.c
> @@ -52,9 +52,6 @@ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc)
>         if (bo->ttm)
>                 return 0;
>
> -       if (bdev->no_retry)
> -               page_flags |= TTM_PAGE_FLAG_NO_RETRY;
> -
>         switch (bo->type) {
>         case ttm_bo_type_device:
>                 if (zero_alloc)
> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
> index bfc6dd87f2d3..e0eba36c1309 100644
> --- a/include/drm/ttm/ttm_device.h
> +++ b/include/drm/ttm/ttm_device.h
> @@ -326,8 +326,6 @@ struct ttm_device {
>          */
>
>         struct delayed_work wq;
> -
> -       bool no_retry;
>  };
>
>  static inline struct ttm_resource_manager *
> --
> 2.17.1
>


-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] drm/ttm: remove no_retry flag
  2020-10-02 12:31   ` Daniel Vetter
@ 2020-10-05 14:37     ` Christian König
  2020-10-05 14:55       ` Daniel Vetter
  0 siblings, 1 reply; 5+ messages in thread
From: Christian König @ 2020-10-05 14:37 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Huang Rui, dri-devel

Am 02.10.20 um 14:31 schrieb Daniel Vetter:
> On Fri, Oct 2, 2020 at 1:31 PM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> Amdgpu was the only user of this.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
> Uh this smells like a fishy band-aid. And the original commit
> introducing this also doesn't sched any light on why this should
> happen, and why it's specific to the amdgpu driver. Do you have some
> more memories here?

Nope, I briefly remember that we had a customer which ran into the OOM 
killer and instead wanted to get -ENOMEM.

But I honestly don't remember why we have it approached like that.

Christian.

>
> I guess no retry makes sense for a "do you still have memory?" query,
> but once we've commit to having that memory, I'm not seeing why we
> should not try to find it? Might also tie into the lack of active
> shrinking for ttm objects in the system domain.
> -Daniel
>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 6 +++---
>>   drivers/gpu/drm/ttm/ttm_tt.c            | 3 ---
>>   include/drm/ttm/ttm_device.h            | 2 --
>>   3 files changed, 3 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index c5f2b4971ef7..0a4233985870 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -1298,6 +1298,9 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
>>          }
>>          gtt->gobj = &bo->base;
>>
>> +       /* We opt to avoid OOM on system pages allocations */
>> +       page_flags |= TTM_PAGE_FLAG_NO_RETRY;
>> +
>>          if (dma_addressing_limited(adev->dev))
>>                  page_flags |= TTM_PAGE_FLAG_DMA32;
>>
>> @@ -1895,9 +1898,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>>          }
>>          adev->mman.initialized = true;
>>
>> -       /* We opt to avoid OOM on system pages allocations */
>> -       adev->mman.bdev.no_retry = true;
>> -
>>          /* Initialize VRAM pool with all of VRAM divided into pages */
>>          r = amdgpu_vram_mgr_init(adev);
>>          if (r) {
>> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
>> index e2b1e6c53a04..98514abaa939 100644
>> --- a/drivers/gpu/drm/ttm/ttm_tt.c
>> +++ b/drivers/gpu/drm/ttm/ttm_tt.c
>> @@ -52,9 +52,6 @@ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc)
>>          if (bo->ttm)
>>                  return 0;
>>
>> -       if (bdev->no_retry)
>> -               page_flags |= TTM_PAGE_FLAG_NO_RETRY;
>> -
>>          switch (bo->type) {
>>          case ttm_bo_type_device:
>>                  if (zero_alloc)
>> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
>> index bfc6dd87f2d3..e0eba36c1309 100644
>> --- a/include/drm/ttm/ttm_device.h
>> +++ b/include/drm/ttm/ttm_device.h
>> @@ -326,8 +326,6 @@ struct ttm_device {
>>           */
>>
>>          struct delayed_work wq;
>> -
>> -       bool no_retry;
>>   };
>>
>>   static inline struct ttm_resource_manager *
>> --
>> 2.17.1
>>
>

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] drm/ttm: remove no_retry flag
  2020-10-05 14:37     ` Christian König
@ 2020-10-05 14:55       ` Daniel Vetter
  0 siblings, 0 replies; 5+ messages in thread
From: Daniel Vetter @ 2020-10-05 14:55 UTC (permalink / raw)
  To: Christian König; +Cc: Huang Rui, dri-devel

On Mon, Oct 5, 2020 at 4:37 PM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Am 02.10.20 um 14:31 schrieb Daniel Vetter:
> > On Fri, Oct 2, 2020 at 1:31 PM Christian König
> > <ckoenig.leichtzumerken@gmail.com> wrote:
> >> Amdgpu was the only user of this.
> >>
> >> Signed-off-by: Christian König <christian.koenig@amd.com>
> > Uh this smells like a fishy band-aid. And the original commit
> > introducing this also doesn't sched any light on why this should
> > happen, and why it's specific to the amdgpu driver. Do you have some
> > more memories here?
>
> Nope, I briefly remember that we had a customer which ran into the OOM
> killer and instead wanted to get -ENOMEM.
>
> But I honestly don't remember why we have it approached like that.

Well oom killer being supremely unpopular is kinda not news. I think
what you want is that in the buffer create ioctl you don't retry, but
instead fall over if there's no memory. So that userspace knows it
can't allocate more gpu memory.

But in execbuf not trying to find the memory we promised is totally
there is kinda rude. So I think this should be a runtime flag, perhaps
in the ttm_operation_ctx?

The other thing which is really nasty is if we add a shrinker for
SYSTEM objects (using trylocks and all that), and maybe throw out the
swapped shrinker completely and only rely on that first one. Since
when that happens you do want to shrink excessive drag, but not too
much (but I think that should still be covered by the NO_RETRY flag,
iirc that means "shrink a bit, but dont get desperate"). But that's
kinda a bigger discussion.
-Daniel

>
> Christian.
>
> >
> > I guess no retry makes sense for a "do you still have memory?" query,
> > but once we've commit to having that memory, I'm not seeing why we
> > should not try to find it? Might also tie into the lack of active
> > shrinking for ttm objects in the system domain.
> > -Daniel
> >
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 6 +++---
> >>   drivers/gpu/drm/ttm/ttm_tt.c            | 3 ---
> >>   include/drm/ttm/ttm_device.h            | 2 --
> >>   3 files changed, 3 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> >> index c5f2b4971ef7..0a4233985870 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> >> @@ -1298,6 +1298,9 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
> >>          }
> >>          gtt->gobj = &bo->base;
> >>
> >> +       /* We opt to avoid OOM on system pages allocations */
> >> +       page_flags |= TTM_PAGE_FLAG_NO_RETRY;
> >> +
> >>          if (dma_addressing_limited(adev->dev))
> >>                  page_flags |= TTM_PAGE_FLAG_DMA32;
> >>
> >> @@ -1895,9 +1898,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
> >>          }
> >>          adev->mman.initialized = true;
> >>
> >> -       /* We opt to avoid OOM on system pages allocations */
> >> -       adev->mman.bdev.no_retry = true;
> >> -
> >>          /* Initialize VRAM pool with all of VRAM divided into pages */
> >>          r = amdgpu_vram_mgr_init(adev);
> >>          if (r) {
> >> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
> >> index e2b1e6c53a04..98514abaa939 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_tt.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_tt.c
> >> @@ -52,9 +52,6 @@ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc)
> >>          if (bo->ttm)
> >>                  return 0;
> >>
> >> -       if (bdev->no_retry)
> >> -               page_flags |= TTM_PAGE_FLAG_NO_RETRY;
> >> -
> >>          switch (bo->type) {
> >>          case ttm_bo_type_device:
> >>                  if (zero_alloc)
> >> diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h
> >> index bfc6dd87f2d3..e0eba36c1309 100644
> >> --- a/include/drm/ttm/ttm_device.h
> >> +++ b/include/drm/ttm/ttm_device.h
> >> @@ -326,8 +326,6 @@ struct ttm_device {
> >>           */
> >>
> >>          struct delayed_work wq;
> >> -
> >> -       bool no_retry;
> >>   };
> >>
> >>   static inline struct ttm_resource_manager *
> >> --
> >> 2.17.1
> >>
> >
>


-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-10-05 14:55 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-02 11:31 [PATCH 1/2] drm/ttm: remove need_dma32 flag Christian König
2020-10-02 11:31 ` [PATCH 2/2] drm/ttm: remove no_retry flag Christian König
2020-10-02 12:31   ` Daniel Vetter
2020-10-05 14:37     ` Christian König
2020-10-05 14:55       ` Daniel Vetter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.