From db0be21d83078b2fe4cc6e9115d0b63a72a7e505 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 17 Mar 2015 11:13:03 -0400 Subject: [PATCH 10/10] drm/radeon: fix TOPDOWN handling for bo_create (v4) radeon_bo_create() calls radeon_ttm_placement_from_domain() before ttm_bo_init() is called. radeon_ttm_placement_from_domain() uses the ttm bo size to determine when to select top down allocation but since the ttm bo is not initialized yet the check is always false. v2: only use topdown for vram if the user has not requested CPU access explicitly. Fixes VCE. v3: explictly set CPU access on kernel allocations where we expect allocations to be at the start of vram to avoid fragmentation and extra migration. v4: drop v2/v3 changes, rebase on top of pfn changes Noticed-by: Oded Gabbay Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 2 +- drivers/gpu/drm/radeon/radeon_gem.c | 3 ++- drivers/gpu/drm/radeon/radeon_mn.c | 3 ++- drivers/gpu/drm/radeon/radeon_object.c | 19 +++++++++++-------- drivers/gpu/drm/radeon/radeon_ttm.c | 12 ++++++++---- 5 files changed, 24 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 7de3e21..809fc49 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2981,7 +2981,7 @@ extern int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data); extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enable); extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable); extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain, - unsigned fpfn, unsigned lpfn); + u64 size, unsigned fpfn, unsigned lpfn); extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo); extern int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, uint32_t flags); diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 0175296..3e785647 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -337,7 +337,8 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, goto release_object; } - radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT, 0, 0); + radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT, + bo->tbo.mem.size, 0, 0); r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); radeon_bo_unreserve(bo); up_read(¤t->mm->mmap_sem); diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c index 359f1f2..0def222 100644 --- a/drivers/gpu/drm/radeon/radeon_mn.c +++ b/drivers/gpu/drm/radeon/radeon_mn.c @@ -141,7 +141,8 @@ static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn, DRM_ERROR("(%d) failed to wait for user bo\n", r); } - radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU, 0, 0); + radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU, + bo->tbo.mem.size, 0, 0); r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); if (r) DRM_ERROR("(%d) failed to validate user bo\n", r); diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 205dfb8..44b594f 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -94,7 +94,7 @@ bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo) } void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain, - unsigned fpfn, unsigned lpfn) + u64 size, unsigned fpfn, unsigned lpfn) { u32 c = 0, i; @@ -181,9 +181,10 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain, * improve fragmentation quality. * 512kb was measured as the most optimal number. */ - if (rbo->tbo.mem.size > 512 * 1024) { + if (size > 512 * 1024) { for (i = 0; i < c; i++) { - rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN; + if (rbo->placements[i].lpfn == 0) + rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN; } } } @@ -254,7 +255,7 @@ int radeon_bo_create(struct radeon_device *rdev, bo->flags &= ~RADEON_GEM_GTT_WC; #endif - radeon_ttm_placement_from_domain(bo, domain, fpfn, lpfn); + radeon_ttm_placement_from_domain(bo, domain, size, fpfn, lpfn); /* Kernel allocation are uninterruptible */ down_read(&rdev->pm.mclk_lock); r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type, @@ -360,7 +361,7 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset, lpfn = bo->rdev->mc.visible_vram_size >> PAGE_SHIFT; else lpfn = max_offset >> PAGE_SHIFT; - radeon_ttm_placement_from_domain(bo, domain, 0, lpfn); + radeon_ttm_placement_from_domain(bo, domain, bo->tbo.mem.size, 0, lpfn); for (i = 0; i < bo->placement.num_placement; i++) bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; @@ -558,11 +559,11 @@ int radeon_bo_list_validate(struct radeon_device *rdev, retry: if (ring == R600_RING_TYPE_UVD_INDEX) { - radeon_ttm_placement_from_domain(bo, domain, + radeon_ttm_placement_from_domain(bo, domain, bo->tbo.mem.size, 0, (256 * 1024 * 1024) >> PAGE_SHIFT); radeon_uvd_force_into_uvd_segment(bo, allowed); } else { - radeon_ttm_placement_from_domain(bo, domain, 0, 0); + radeon_ttm_placement_from_domain(bo, domain, bo->tbo.mem.size, 0, 0); } initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved); @@ -805,10 +806,12 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo) /* hurrah the memory is not visible ! */ radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM, + rbo->tbo.mem.size, 0, rdev->mc.visible_vram_size >> PAGE_SHIFT); r = ttm_bo_validate(bo, &rbo->placement, false, false); if (unlikely(r == -ENOMEM)) { - radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT, 0, 0); + radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT, + rbo->tbo.mem.size, 0, 0); return ttm_bo_validate(bo, &rbo->placement, false, false); } else if (unlikely(r != 0)) { return r; diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 49d00d8..befb590 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -197,7 +197,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo, switch (bo->mem.mem_type) { case TTM_PL_VRAM: if (rbo->rdev->ring[radeon_copy_ring_index(rbo->rdev)].ready == false) - radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU, 0, 0); + radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU, + rbo->tbo.mem.size, 0, 0); else if (rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size && bo->mem.start < (rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT)) { unsigned fpfn = rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT; @@ -209,7 +210,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo, * BOs to be evicted from VRAM */ radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM | - RADEON_GEM_DOMAIN_GTT, 0, 0); + RADEON_GEM_DOMAIN_GTT, + rbo->tbo.mem.size, 0, 0); rbo->placement.num_busy_placement = 0; for (i = 0; i < rbo->placement.num_placement; i++) { if (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) { @@ -222,11 +224,13 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo, } } } else - radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT, 0, 0); + radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT, + rbo->tbo.mem.size, 0, 0); break; case TTM_PL_TT: default: - radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU, 0, 0); + radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU, + rbo->tbo.mem.size, 0, 0); } *placement = rbo->placement; } -- 1.8.3.1