Hi, I hope the following nitpicks are helpful. Kind Regards, Edward. On 08/29/2016 07:20 PM, Christian König wrote: > From: Christian König > > Split VRAM allocations into 4MB blocks. > > Signed-off-by: Christian König > --- > drivers/gpu/drm/amd/amdgpu/Makefile | 3 +- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 + > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 + > drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 1 + > drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 + > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 2 + > drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 220 +++++++++++++++++++++++++++ > 9 files changed, 239 insertions(+), 2 deletions(-) > create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > > diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile > index c8d4d7c..ba12d3c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/Makefile > +++ b/drivers/gpu/drm/amd/amdgpu/Makefile > @@ -29,7 +29,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ > amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ > atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ > amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ > - amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o > + amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ > + amdgpu_vram_mgr.o > > # add asic specific block > amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index b2d95a9..7e3d341 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -96,6 +96,7 @@ extern unsigned amdgpu_cg_mask; > extern unsigned amdgpu_pg_mask; > extern int amdgpu_sclk_deep_sleep_en; > extern char *amdgpu_virtual_display; > +extern int amdgpu_vram_page_split; > > #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 > #define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 38f5315..b2dcc11 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -1004,6 +1004,12 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev) > amdgpu_vm_block_size); > amdgpu_vm_block_size = 9; > } > + > + if (amdgpu_vram_page_split != -1 && amdgpu_vram_page_split < 16) { > + dev_warn(adev->dev, "invalid VRAM page split (%d)\n", > + amdgpu_vram_page_split); > + amdgpu_vram_page_split = 1024; > + } > } > > /** > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > index 56c85e6..dcef0b4 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > @@ -82,6 +82,7 @@ int amdgpu_vm_size = 64; > int amdgpu_vm_block_size = -1; > int amdgpu_vm_fault_stop = 0; > int amdgpu_vm_debug = 0; > +int amdgpu_vram_page_split = 1024; > int amdgpu_exp_hw_support = 0; > int amdgpu_dal = -1; > int amdgpu_sched_jobs = 32; > @@ -161,6 +162,9 @@ module_param_named(vm_fault_stop, amdgpu_vm_fault_stop, int, 0444); > MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)"); > module_param_named(vm_debug, amdgpu_vm_debug, int, 0644); > > +MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 1024)"); > +module_param_named(vram_page_split, amdgpu_vram_page_split, int, 0444); > + > MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))"); > module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > index 79f6413..d2dab28 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c > @@ -933,6 +933,7 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) > !bo->pin_count); > WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM && > !(bo->flags & AMDGPU_GEM_CREATE_VRAM_LINEAR)); > + WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET); > > return bo->tbo.offset; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > index b6a2739..e498465 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h > @@ -31,6 +31,8 @@ > #include > #include "amdgpu.h" > > +#define AMDGPU_BO_INVALID_OFFSET LONG_MAX > + > /** > * amdgpu_mem_type_to_domain - return domain corresponding to mem_type > * @mem_type: ttm memory type > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index db8638b..9990957 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -164,7 +164,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, > break; > case TTM_PL_VRAM: > /* "On-card" video ram */ > - man->func = &ttm_bo_manager_func; > + man->func = &amdgpu_vram_mgr_func; > man->gpu_offset = adev->mc.vram_start; > man->flags = TTM_MEMTYPE_FLAG_FIXED | > TTM_MEMTYPE_FLAG_MAPPABLE; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > index 72f6bfc..365ce9e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > @@ -65,6 +65,8 @@ struct amdgpu_mman { > struct amdgpu_mman_lru guard; > }; > > +extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func; > + > int amdgpu_copy_buffer(struct amdgpu_ring *ring, > uint64_t src_offset, > uint64_t dst_offset, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > new file mode 100644 > index 0000000..6c5f3ba > --- /dev/null > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c > @@ -0,0 +1,220 @@ > +/* > + * Copyright 2015 Advanced Micro Devices, Inc. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR > + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR > + * OTHER DEALINGS IN THE SOFTWARE. > + * > + * Authors: Christian König > + */ > + > +#include > +#include "amdgpu.h" > + > +struct amdgpu_vram_mgr { > + struct drm_mm mm; > + spinlock_t lock; > +}; > + > +/** > + * amdgpu_vram_mgr_init - init VRAM manager and DRM MM > + * > + * @man: TTM memory type manager > + * @p_size: maximum size of VRAM > + * > + * Allocate and initialize the VRAM manager. > + */ > +static int amdgpu_vram_mgr_init(struct ttm_mem_type_manager *man, > + unsigned long p_size) > +{ > + struct amdgpu_vram_mgr *mgr; > + > + mgr = kzalloc(sizeof(*mgr), GFP_KERNEL); > + if (!mgr) > + return -ENOMEM; > + > + drm_mm_init(&mgr->mm, 0, p_size); > + spin_lock_init(&mgr->lock); > + man->priv = mgr; > + return 0; > +} > + > +/** > + * amdgpu_vram_mgr_fini - free and destroy VRAM manager > + * > + * @man: TTM memory type manager > + * > + * Destory and free the VRAM manager, returns -EBUSY if ranges are still > + * allocated inside it. > + */ > +static int amdgpu_vram_mgr_fini(struct ttm_mem_type_manager *man) > +{ > + struct amdgpu_vram_mgr *mgr = man->priv; > + > + spin_lock(&mgr->lock); > + if (drm_mm_clean(&mgr->mm)) { > + drm_mm_takedown(&mgr->mm); > + spin_unlock(&mgr->lock); > + kfree(mgr); > + man->priv = NULL; > + return 0; > + } > + spin_unlock(&mgr->lock); > + return -EBUSY; What about taking the bulk of this out the true branch into the body of the function as in the following: if (!drm_mm_clean(&mgr->mm)) { spin_unlock(&mgr->lock); return -EBUSY; } drm_mm_takedown(&mgr->mm); spin_unlock(&mgr->lock); kfree(mgr); man->priv = NULL; return 0; > +} > + > +/** > + * amdgpu_vram_mgr_new - allocate new ranges > + * > + * @man: TTM memory type manager > + * @tbo: TTM BO we need this range for > + * @place: placement flags and restrictions > + * @mem: the resulting mem object > + * > + * Allocate VRAM for the given BO. > + */ > +static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, > + struct ttm_buffer_object *tbo, > + const struct ttm_place *place, > + struct ttm_mem_reg *mem) > +{ > + struct amdgpu_bo *bo = container_of(tbo, struct amdgpu_bo, tbo); > + struct amdgpu_vram_mgr *mgr = man->priv; > + struct drm_mm *mm = &mgr->mm; > + struct drm_mm_node *nodes; > + enum drm_mm_search_flags sflags = DRM_MM_SEARCH_DEFAULT; > + enum drm_mm_allocator_flags aflags = DRM_MM_CREATE_DEFAULT; > + unsigned long lpfn, num_nodes, pages_per_node, pages_left; > + unsigned i; > + int r; > + > + lpfn = place->lpfn; > + if (!lpfn) > + lpfn = man->size; > + > + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_LINEAR || > + amdgpu_vram_page_split == -1) { > + pages_per_node = ~0ul; > + num_nodes = 1; > + } else { > + pages_per_node = max((uint32_t)amdgpu_vram_page_split, > + mem->page_alignment); > + num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node); > + } > + > + nodes = kcalloc(num_nodes, sizeof(*nodes), GFP_KERNEL); > + if (!nodes) > + return -ENOMEM; > + > + if (place->flags & TTM_PL_FLAG_TOPDOWN) { > + sflags = DRM_MM_SEARCH_BELOW; > + aflags = DRM_MM_CREATE_TOP; > + } > + > + pages_left = mem->num_pages; > + > + spin_lock(&mgr->lock); > + for (i = 0; i < num_nodes; ++i) { > + unsigned long pages = min(pages_left, pages_per_node); > + uint32_t alignment = mem->page_alignment; > + > + if (pages == pages_per_node) > + alignment = pages_per_node; > + else > + sflags |= DRM_MM_SEARCH_BEST; > + > + r = drm_mm_insert_node_in_range_generic(mm, &nodes[i], pages, > + alignment, 0, > + place->fpfn, lpfn, > + sflags, aflags); > + if (unlikely(r)) > + goto error; > + > + pages_left -= pages; > + } > + spin_unlock(&mgr->lock); > + > + mem->start = num_nodes == 1 ? nodes[0].start : AMDGPU_BO_INVALID_OFFSET; > + mem->mm_node = nodes; > + > + return 0; > + > +error: > + while (i--) Perhaps make this more explicit of intent, I feel i being negative is a bug waiting to happen? > + drm_mm_remove_node(&nodes[i]); Could be wrong but do we miss one nodes[] here? > + spin_unlock(&mgr->lock); > + > + kfree(nodes); > + return r == -ENOSPC ? 0 : r; > +} > + > +/** > + * amdgpu_vram_mgr_del - free ranges > + * > + * @man: TTM memory type manager > + * @tbo: TTM BO we need this range for > + * @place: placement flags and restrictions > + * @mem: TTM memory object > + * > + * Free the allocated VRAM again. > + */ > +static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man, > + struct ttm_mem_reg *mem) > +{ > + struct amdgpu_vram_mgr *mgr = man->priv; > + struct drm_mm_node *nodes = mem->mm_node; > + unsigned i, pages = mem->num_pages; > + > + if (!mem->mm_node) > + return; > + > + spin_lock(&mgr->lock); > + for (i = 0; pages; ++i) { More explicit on the intent of the pages predicate. > + pages -= nodes[i].size; > + drm_mm_remove_node(&nodes[i]); > + } > + spin_unlock(&mgr->lock); > + > + kfree(mem->mm_node); > + mem->mm_node = NULL; > +} > + > +/** > + * amdgpu_vram_mgr_debug - dump VRAM table > + * > + * @man: TTM memory type manager > + * @prefix: text prefix > + * > + * Dump the table content using printk. > + */ > +static void amdgpu_vram_mgr_debug(struct ttm_mem_type_manager *man, > + const char *prefix) > +{ > + struct amdgpu_vram_mgr *mgr = man->priv; > + > + spin_lock(&mgr->lock); > + drm_mm_debug_table(&mgr->mm, prefix); > + spin_unlock(&mgr->lock); > +} > + > +const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func = { > + amdgpu_vram_mgr_init, > + amdgpu_vram_mgr_fini, > + amdgpu_vram_mgr_new, > + amdgpu_vram_mgr_del, > + amdgpu_vram_mgr_debug > +}; >