* [PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_move_blit @ 2017-10-06 22:08 Harish Kasiviswanathan [not found] ` <1507327732-8797-1-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org> 0 siblings, 1 reply; 7+ messages in thread From: Harish Kasiviswanathan @ 2017-10-06 22:08 UTC (permalink / raw) To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Harish Kasiviswanathan Add more generic function amdgpu_copy_ttm_mem_to_mem() that supports arbitrary copy size, offsets and two BOs (source & dest.). This is useful for KFD Cross Memory Attach feature where data needs to be copied from BOs from different processes v2: Add struct amdgpu_copy_mem and changed amdgpu_copy_ttm_mem_to_mem() function parameters to use the struct Change-Id: I848d541a84a1c2d12827d9dcf6d9054d854b4159 Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 169 +++++++++++++++++++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 12 +++ 2 files changed, 132 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 1086f03..dda4f08 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -289,97 +289,168 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, return addr; } -static int amdgpu_move_blit(struct ttm_buffer_object *bo, - bool evict, bool no_wait_gpu, - struct ttm_mem_reg *new_mem, - struct ttm_mem_reg *old_mem) +/** + * amdgpu_copy_ttm_mem_to_mem - Helper function for copy + * + * The function copies @size bytes from {src->mem + src->offset} to + * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a + * move and different for a BO to BO copy. + * + * @f: Returns the last fence if multiple jobs are submitted. + */ +int amdgpu_copy_ttm_mem_to_mem(struct amdgpu_device *adev, + struct amdgpu_copy_mem *src, + struct amdgpu_copy_mem *dst, + uint64_t size, + struct reservation_object *resv, + struct dma_fence **f) { - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; - - struct drm_mm_node *old_mm, *new_mm; - uint64_t old_start, old_size, new_start, new_size; - unsigned long num_pages; + struct drm_mm_node *src_mm, *dst_mm; + uint64_t src_node_start, dst_node_start, src_node_size, + dst_node_size, src_page_offset, dst_page_offset; struct dma_fence *fence = NULL; - int r; - - BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0); + int r = 0; + const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE * + AMDGPU_GPU_PAGE_SIZE); if (!ring->ready) { DRM_ERROR("Trying to move memory with ring turned off.\n"); return -EINVAL; } - old_mm = old_mem->mm_node; - old_size = old_mm->size; - old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem); + src_mm = src->mem->mm_node; + while (src->offset >= (src_mm->size << PAGE_SHIFT)) { + src->offset -= (src_mm->size << PAGE_SHIFT); + ++src_mm; + } + src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) + + src->offset; + src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset; + src_page_offset = src_node_start & (PAGE_SIZE - 1); - new_mm = new_mem->mm_node; - new_size = new_mm->size; - new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem); + dst_mm = dst->mem->mm_node; + while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) { + dst->offset -= (dst_mm->size << PAGE_SHIFT); + ++dst_mm; + } + dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) + + dst->offset; + dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset; + dst_page_offset = dst_node_start & (PAGE_SIZE - 1); - num_pages = new_mem->num_pages; mutex_lock(&adev->mman.gtt_window_lock); - while (num_pages) { - unsigned long cur_pages = min(min(old_size, new_size), - (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE); - uint64_t from = old_start, to = new_start; + + while (size) { + unsigned long cur_size; + uint64_t from = src_node_start, to = dst_node_start; struct dma_fence *next; - if (old_mem->mem_type == TTM_PL_TT && - !amdgpu_gtt_mgr_is_allocated(old_mem)) { - r = amdgpu_map_buffer(bo, old_mem, cur_pages, - old_start, 0, ring, &from); + /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst + * begins at an offset, then adjust the size accordingly + */ + cur_size = min3(min(src_node_size, dst_node_size), size, + GTT_MAX_BYTES); + if (cur_size + src_page_offset > GTT_MAX_BYTES || + cur_size + dst_page_offset > GTT_MAX_BYTES) + cur_size -= max(src_page_offset, dst_page_offset); + + /* Map only what needs to be accessed. Map src to window 0 and + * dst to window 1 + */ + if (src->mem->mem_type == TTM_PL_TT && + !amdgpu_gtt_mgr_is_allocated(src->mem)) { + r = amdgpu_map_buffer(src->bo, src->mem, + PFN_UP(cur_size + src_page_offset), + src_node_start, 0, ring, + &from); if (r) goto error; + /* Adjust the offset because amdgpu_map_buffer returns + * start of mapped page + */ + from += src_page_offset; } - if (new_mem->mem_type == TTM_PL_TT && - !amdgpu_gtt_mgr_is_allocated(new_mem)) { - r = amdgpu_map_buffer(bo, new_mem, cur_pages, - new_start, 1, ring, &to); + if (dst->mem->mem_type == TTM_PL_TT && + !amdgpu_gtt_mgr_is_allocated(dst->mem)) { + r = amdgpu_map_buffer(dst->bo, dst->mem, + PFN_UP(cur_size + dst_page_offset), + dst_node_start, 1, ring, + &to); if (r) goto error; + to += dst_page_offset; } - r = amdgpu_copy_buffer(ring, from, to, - cur_pages * PAGE_SIZE, - bo->resv, &next, false, true); + r = amdgpu_copy_buffer(ring, from, to, cur_size, + resv, &next, false, true); if (r) goto error; dma_fence_put(fence); fence = next; - num_pages -= cur_pages; - if (!num_pages) + size -= cur_size; + if (!size) break; - old_size -= cur_pages; - if (!old_size) { - old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem); - old_size = old_mm->size; + src_node_size -= cur_size; + if (!src_node_size) { + src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm, + src->mem); + src_node_size = (src_mm->size << PAGE_SHIFT); } else { - old_start += cur_pages * PAGE_SIZE; + src_node_start += cur_size; + src_page_offset = src_node_start & (PAGE_SIZE - 1); } - - new_size -= cur_pages; - if (!new_size) { - new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem); - new_size = new_mm->size; + dst_node_size -= cur_size; + if (!dst_node_size) { + dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm, + dst->mem); + dst_node_size = (dst_mm->size << PAGE_SHIFT); } else { - new_start += cur_pages * PAGE_SIZE; + dst_node_start += cur_size; + dst_page_offset = dst_node_start & (PAGE_SIZE - 1); } } +error: mutex_unlock(&adev->mman.gtt_window_lock); + if (f) + *f = dma_fence_get(fence); + dma_fence_put(fence); + return r; +} + + +static int amdgpu_move_blit(struct ttm_buffer_object *bo, + bool evict, bool no_wait_gpu, + struct ttm_mem_reg *new_mem, + struct ttm_mem_reg *old_mem) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); + struct amdgpu_copy_mem src, dst; + struct dma_fence *fence = NULL; + int r; + + src.bo = bo; + dst.bo = bo; + src.mem = old_mem; + dst.mem = new_mem; + src.offset = 0; + dst.offset = 0; + + r = amdgpu_copy_ttm_mem_to_mem(adev, &src, &dst, + new_mem->num_pages << PAGE_SHIFT, + bo->resv, &fence); + if (r) + goto error; r = ttm_bo_pipeline_move(bo, fence, evict, new_mem); dma_fence_put(fence); return r; error: - mutex_unlock(&adev->mman.gtt_window_lock); - if (fence) dma_fence_wait(fence, false); dma_fence_put(fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 7abae68..2188034 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -58,6 +58,12 @@ struct amdgpu_mman { struct amd_sched_entity entity; }; +struct amdgpu_copy_mem { + struct ttm_buffer_object *bo; + struct ttm_mem_reg *mem; + unsigned long offset; +}; + extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func; extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func; @@ -72,6 +78,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, struct reservation_object *resv, struct dma_fence **fence, bool direct_submit, bool vm_needs_flush); +int amdgpu_copy_ttm_mem_to_mem(struct amdgpu_device *adev, + struct amdgpu_copy_mem *src, + struct amdgpu_copy_mem *dst, + uint64_t size, + struct reservation_object *resv, + struct dma_fence **f); int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint64_t src_data, struct reservation_object *resv, -- 1.9.1 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 7+ messages in thread
[parent not found: <1507327732-8797-1-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>]
* [PATCH v2 2/2] drm/amdgpu: Add amdgpu_find_mm_node() [not found] ` <1507327732-8797-1-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org> @ 2017-10-06 22:08 ` Harish Kasiviswanathan [not found] ` <1507327732-8797-2-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org> 2017-10-09 9:08 ` [PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_move_blit Christian König 1 sibling, 1 reply; 7+ messages in thread From: Harish Kasiviswanathan @ 2017-10-06 22:08 UTC (permalink / raw) To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Harish Kasiviswanathan Change-Id: I12231e18bb60152843cd0e0213ddd0d0e04e7497 Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 36 +++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index dda4f08..e9bfe9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -290,6 +290,23 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, } /** + * amdgpu_find_mm_node - Helper function finds the drm_mm_node + * corresponding to @offset. It also modifies the offset to be + * within the drm_mm_node returned + */ +static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, + unsigned long *offset) +{ + struct drm_mm_node *mm_node = mem->mm_node; + + while (*offset >= (mm_node->size << PAGE_SHIFT)) { + *offset -= (mm_node->size << PAGE_SHIFT); + ++mm_node; + } + return mm_node; +} + +/** * amdgpu_copy_ttm_mem_to_mem - Helper function for copy * * The function copies @size bytes from {src->mem + src->offset} to @@ -319,21 +336,13 @@ int amdgpu_copy_ttm_mem_to_mem(struct amdgpu_device *adev, return -EINVAL; } - src_mm = src->mem->mm_node; - while (src->offset >= (src_mm->size << PAGE_SHIFT)) { - src->offset -= (src_mm->size << PAGE_SHIFT); - ++src_mm; - } + src_mm = amdgpu_find_mm_node(src->mem, &src->offset); src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) + src->offset; src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset; src_page_offset = src_node_start & (PAGE_SIZE - 1); - dst_mm = dst->mem->mm_node; - while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) { - dst->offset -= (dst_mm->size << PAGE_SHIFT); - ++dst_mm; - } + dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset); dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) + dst->offset; dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset; @@ -1216,7 +1225,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, { struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); - struct drm_mm_node *nodes = abo->tbo.mem.mm_node; + struct drm_mm_node *nodes; uint32_t value = 0; int ret = 0; uint64_t pos; @@ -1225,10 +1234,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, if (bo->mem.mem_type != TTM_PL_VRAM) return -EIO; - while (offset >= (nodes->size << PAGE_SHIFT)) { - offset -= nodes->size << PAGE_SHIFT; - ++nodes; - } + nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset); pos = (nodes->start << PAGE_SHIFT) + offset; while (len && pos < adev->mc.mc_vram_size) { -- 1.9.1 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 7+ messages in thread
[parent not found: <1507327732-8797-2-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>]
* Re: [PATCH v2 2/2] drm/amdgpu: Add amdgpu_find_mm_node() [not found] ` <1507327732-8797-2-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org> @ 2017-10-09 9:10 ` Christian König [not found] ` <7bac471e-759e-d596-67e4-be0173cecabc-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> 0 siblings, 1 reply; 7+ messages in thread From: Christian König @ 2017-10-09 9:10 UTC (permalink / raw) To: Harish Kasiviswanathan, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW Am 07.10.2017 um 00:08 schrieb Harish Kasiviswanathan: > Change-Id: I12231e18bb60152843cd0e0213ddd0d0e04e7497 > Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 36 +++++++++++++++++++-------------- > 1 file changed, 21 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index dda4f08..e9bfe9d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -290,6 +290,23 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, > } > > /** > + * amdgpu_find_mm_node - Helper function finds the drm_mm_node > + * corresponding to @offset. It also modifies the offset to be > + * within the drm_mm_node returned > + */ > +static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, > + unsigned long *offset) > +{ > + struct drm_mm_node *mm_node = mem->mm_node; > + > + while (*offset >= (mm_node->size << PAGE_SHIFT)) { > + *offset -= (mm_node->size << PAGE_SHIFT); > + ++mm_node; > + } > + return mm_node; > +} Please take a look at amdgpu_ttm_io_mem_pfn(). There we assume that each node has the same size and do: > page_offset = do_div(offset, size); > mm += offset; Not 100% sure if that is a good idea, but when we add a common function for this we should use it everywhere. Regards, Christian. > + > +/** > * amdgpu_copy_ttm_mem_to_mem - Helper function for copy > * > * The function copies @size bytes from {src->mem + src->offset} to > @@ -319,21 +336,13 @@ int amdgpu_copy_ttm_mem_to_mem(struct amdgpu_device *adev, > return -EINVAL; > } > > - src_mm = src->mem->mm_node; > - while (src->offset >= (src_mm->size << PAGE_SHIFT)) { > - src->offset -= (src_mm->size << PAGE_SHIFT); > - ++src_mm; > - } > + src_mm = amdgpu_find_mm_node(src->mem, &src->offset); > src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) + > src->offset; > src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset; > src_page_offset = src_node_start & (PAGE_SIZE - 1); > > - dst_mm = dst->mem->mm_node; > - while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) { > - dst->offset -= (dst_mm->size << PAGE_SHIFT); > - ++dst_mm; > - } > + dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset); > dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) + > dst->offset; > dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset; > @@ -1216,7 +1225,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, > { > struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); > struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); > - struct drm_mm_node *nodes = abo->tbo.mem.mm_node; > + struct drm_mm_node *nodes; > uint32_t value = 0; > int ret = 0; > uint64_t pos; > @@ -1225,10 +1234,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, > if (bo->mem.mem_type != TTM_PL_VRAM) > return -EIO; > > - while (offset >= (nodes->size << PAGE_SHIFT)) { > - offset -= nodes->size << PAGE_SHIFT; > - ++nodes; > - } > + nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset); > pos = (nodes->start << PAGE_SHIFT) + offset; > > while (len && pos < adev->mc.mc_vram_size) { _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 7+ messages in thread
[parent not found: <7bac471e-759e-d596-67e4-be0173cecabc-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>]
* RE: [PATCH v2 2/2] drm/amdgpu: Add amdgpu_find_mm_node() [not found] ` <7bac471e-759e-d596-67e4-be0173cecabc-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> @ 2017-10-10 15:28 ` Kasiviswanathan, Harish 0 siblings, 0 replies; 7+ messages in thread From: Kasiviswanathan, Harish @ 2017-10-10 15:28 UTC (permalink / raw) To: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW -----Original Message----- From: Christian König [mailto:ckoenig.leichtzumerken@gmail.com] Sent: Monday, October 09, 2017 5:11 AM To: Kasiviswanathan, Harish <Harish.Kasiviswanathan@amd.com>; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH v2 2/2] drm/amdgpu: Add amdgpu_find_mm_node() Am 07.10.2017 um 00:08 schrieb Harish Kasiviswanathan: > Change-Id: I12231e18bb60152843cd0e0213ddd0d0e04e7497 > Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 36 +++++++++++++++++++-------------- > 1 file changed, 21 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index dda4f08..e9bfe9d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -290,6 +290,23 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, > } > > /** > + * amdgpu_find_mm_node - Helper function finds the drm_mm_node > + * corresponding to @offset. It also modifies the offset to be > + * within the drm_mm_node returned > + */ > +static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, > + unsigned long *offset) > +{ > + struct drm_mm_node *mm_node = mem->mm_node; > + > + while (*offset >= (mm_node->size << PAGE_SHIFT)) { > + *offset -= (mm_node->size << PAGE_SHIFT); > + ++mm_node; > + } > + return mm_node; > +} Please take a look at amdgpu_ttm_io_mem_pfn(). There we assume that each node has the same size and do: > page_offset = do_div(offset, size); > mm += offset; Not 100% sure if that is a good idea, but when we add a common function for this we should use it everywhere. [HK]: Ok I missed this one, probably because it was used in a different way. I will change this function to call amdgpu_find_mm_node(). The optimization (assuming all nodes are same size) will work in the current setup but as you mentioned I am also not sure if it is a good idea. To be safe I will be use the while() loop for now. Regards, Christian. > + > +/** > * amdgpu_copy_ttm_mem_to_mem - Helper function for copy > * > * The function copies @size bytes from {src->mem + src->offset} to > @@ -319,21 +336,13 @@ int amdgpu_copy_ttm_mem_to_mem(struct amdgpu_device *adev, > return -EINVAL; > } > > - src_mm = src->mem->mm_node; > - while (src->offset >= (src_mm->size << PAGE_SHIFT)) { > - src->offset -= (src_mm->size << PAGE_SHIFT); > - ++src_mm; > - } > + src_mm = amdgpu_find_mm_node(src->mem, &src->offset); > src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) + > src->offset; > src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset; > src_page_offset = src_node_start & (PAGE_SIZE - 1); > > - dst_mm = dst->mem->mm_node; > - while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) { > - dst->offset -= (dst_mm->size << PAGE_SHIFT); > - ++dst_mm; > - } > + dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset); > dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) + > dst->offset; > dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset; > @@ -1216,7 +1225,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, > { > struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); > struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); > - struct drm_mm_node *nodes = abo->tbo.mem.mm_node; > + struct drm_mm_node *nodes; > uint32_t value = 0; > int ret = 0; > uint64_t pos; > @@ -1225,10 +1234,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, > if (bo->mem.mem_type != TTM_PL_VRAM) > return -EIO; > > - while (offset >= (nodes->size << PAGE_SHIFT)) { > - offset -= nodes->size << PAGE_SHIFT; > - ++nodes; > - } > + nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset); > pos = (nodes->start << PAGE_SHIFT) + offset; > > while (len && pos < adev->mc.mc_vram_size) { _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_move_blit [not found] ` <1507327732-8797-1-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org> 2017-10-06 22:08 ` [PATCH v2 2/2] drm/amdgpu: Add amdgpu_find_mm_node() Harish Kasiviswanathan @ 2017-10-09 9:08 ` Christian König [not found] ` <586e28f0-7c99-a991-6167-0eaf56ffb0c7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> 1 sibling, 1 reply; 7+ messages in thread From: Christian König @ 2017-10-09 9:08 UTC (permalink / raw) To: Harish Kasiviswanathan, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW Am 07.10.2017 um 00:08 schrieb Harish Kasiviswanathan: > Add more generic function amdgpu_copy_ttm_mem_to_mem() that supports > arbitrary copy size, offsets and two BOs (source & dest.). > > This is useful for KFD Cross Memory Attach feature where data needs to > be copied from BOs from different processes > > v2: Add struct amdgpu_copy_mem and changed amdgpu_copy_ttm_mem_to_mem() > function parameters to use the struct > > Change-Id: I848d541a84a1c2d12827d9dcf6d9054d854b4159 > Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> Looks like a nice cleanup to me, with the notes below fixed the patch is Reviewed-by: Christian König <christian.koenig@amd.com>. > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 169 +++++++++++++++++++++++--------- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 12 +++ > 2 files changed, 132 insertions(+), 49 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index 1086f03..dda4f08 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -289,97 +289,168 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, > return addr; > } > > -static int amdgpu_move_blit(struct ttm_buffer_object *bo, > - bool evict, bool no_wait_gpu, > - struct ttm_mem_reg *new_mem, > - struct ttm_mem_reg *old_mem) > +/** > + * amdgpu_copy_ttm_mem_to_mem - Helper function for copy > + * > + * The function copies @size bytes from {src->mem + src->offset} to > + * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a > + * move and different for a BO to BO copy. > + * > + * @f: Returns the last fence if multiple jobs are submitted. > + */ > +int amdgpu_copy_ttm_mem_to_mem(struct amdgpu_device *adev, Please name that amdgpu_ttm_copy_.... to note that the function is part of the TTM subsystem in amdgpu. > + struct amdgpu_copy_mem *src, > + struct amdgpu_copy_mem *dst, > + uint64_t size, > + struct reservation_object *resv, > + struct dma_fence **f) > { > - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); > struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; > - > - struct drm_mm_node *old_mm, *new_mm; > - uint64_t old_start, old_size, new_start, new_size; > - unsigned long num_pages; > + struct drm_mm_node *src_mm, *dst_mm; > + uint64_t src_node_start, dst_node_start, src_node_size, > + dst_node_size, src_page_offset, dst_page_offset; > struct dma_fence *fence = NULL; > - int r; > - > - BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0); > + int r = 0; > + const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE * > + AMDGPU_GPU_PAGE_SIZE); > > if (!ring->ready) { > DRM_ERROR("Trying to move memory with ring turned off.\n"); > return -EINVAL; > } > > - old_mm = old_mem->mm_node; > - old_size = old_mm->size; > - old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem); > + src_mm = src->mem->mm_node; > + while (src->offset >= (src_mm->size << PAGE_SHIFT)) { > + src->offset -= (src_mm->size << PAGE_SHIFT); > + ++src_mm; > + } > + src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) + > + src->offset; > + src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset; > + src_page_offset = src_node_start & (PAGE_SIZE - 1); > > - new_mm = new_mem->mm_node; > - new_size = new_mm->size; > - new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem); > + dst_mm = dst->mem->mm_node; > + while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) { > + dst->offset -= (dst_mm->size << PAGE_SHIFT); > + ++dst_mm; > + } > + dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) + > + dst->offset; > + dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset; > + dst_page_offset = dst_node_start & (PAGE_SIZE - 1); > > - num_pages = new_mem->num_pages; > mutex_lock(&adev->mman.gtt_window_lock); > - while (num_pages) { > - unsigned long cur_pages = min(min(old_size, new_size), > - (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE); > - uint64_t from = old_start, to = new_start; > + > + while (size) { > + unsigned long cur_size; > + uint64_t from = src_node_start, to = dst_node_start; > struct dma_fence *next; > > - if (old_mem->mem_type == TTM_PL_TT && > - !amdgpu_gtt_mgr_is_allocated(old_mem)) { > - r = amdgpu_map_buffer(bo, old_mem, cur_pages, > - old_start, 0, ring, &from); > + /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst > + * begins at an offset, then adjust the size accordingly > + */ > + cur_size = min3(min(src_node_size, dst_node_size), size, > + GTT_MAX_BYTES); > + if (cur_size + src_page_offset > GTT_MAX_BYTES || > + cur_size + dst_page_offset > GTT_MAX_BYTES) > + cur_size -= max(src_page_offset, dst_page_offset); > + > + /* Map only what needs to be accessed. Map src to window 0 and > + * dst to window 1 > + */ > + if (src->mem->mem_type == TTM_PL_TT && > + !amdgpu_gtt_mgr_is_allocated(src->mem)) { > + r = amdgpu_map_buffer(src->bo, src->mem, > + PFN_UP(cur_size + src_page_offset), > + src_node_start, 0, ring, > + &from); > if (r) > goto error; > + /* Adjust the offset because amdgpu_map_buffer returns > + * start of mapped page > + */ > + from += src_page_offset; > } > > - if (new_mem->mem_type == TTM_PL_TT && > - !amdgpu_gtt_mgr_is_allocated(new_mem)) { > - r = amdgpu_map_buffer(bo, new_mem, cur_pages, > - new_start, 1, ring, &to); > + if (dst->mem->mem_type == TTM_PL_TT && > + !amdgpu_gtt_mgr_is_allocated(dst->mem)) { > + r = amdgpu_map_buffer(dst->bo, dst->mem, > + PFN_UP(cur_size + dst_page_offset), > + dst_node_start, 1, ring, > + &to); > if (r) > goto error; > + to += dst_page_offset; > } > > - r = amdgpu_copy_buffer(ring, from, to, > - cur_pages * PAGE_SIZE, > - bo->resv, &next, false, true); > + r = amdgpu_copy_buffer(ring, from, to, cur_size, > + resv, &next, false, true); > if (r) > goto error; > > dma_fence_put(fence); > fence = next; > > - num_pages -= cur_pages; > - if (!num_pages) > + size -= cur_size; > + if (!size) > break; > > - old_size -= cur_pages; > - if (!old_size) { > - old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem); > - old_size = old_mm->size; > + src_node_size -= cur_size; > + if (!src_node_size) { > + src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm, > + src->mem); > + src_node_size = (src_mm->size << PAGE_SHIFT); > } else { > - old_start += cur_pages * PAGE_SIZE; > + src_node_start += cur_size; > + src_page_offset = src_node_start & (PAGE_SIZE - 1); > } > - > - new_size -= cur_pages; > - if (!new_size) { > - new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem); > - new_size = new_mm->size; > + dst_node_size -= cur_size; > + if (!dst_node_size) { > + dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm, > + dst->mem); > + dst_node_size = (dst_mm->size << PAGE_SHIFT); > } else { > - new_start += cur_pages * PAGE_SIZE; > + dst_node_start += cur_size; > + dst_page_offset = dst_node_start & (PAGE_SIZE - 1); > } > } > +error: > mutex_unlock(&adev->mman.gtt_window_lock); > + if (f) > + *f = dma_fence_get(fence); > + dma_fence_put(fence); > + return r; > +} > + > + > +static int amdgpu_move_blit(struct ttm_buffer_object *bo, > + bool evict, bool no_wait_gpu, > + struct ttm_mem_reg *new_mem, > + struct ttm_mem_reg *old_mem) > +{ I think I would rather prefer to change the callers of that function, but not 100% sure. How much change would that be? But not a blocking issue. > + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); > + struct amdgpu_copy_mem src, dst; > + struct dma_fence *fence = NULL; > + int r; > + > + src.bo = bo; > + dst.bo = bo; > + src.mem = old_mem; > + dst.mem = new_mem; > + src.offset = 0; > + dst.offset = 0; > + > + r = amdgpu_copy_ttm_mem_to_mem(adev, &src, &dst, > + new_mem->num_pages << PAGE_SHIFT, > + bo->resv, &fence); > + if (r) > + goto error; > > r = ttm_bo_pipeline_move(bo, fence, evict, new_mem); > dma_fence_put(fence); > return r; > > error: > - mutex_unlock(&adev->mman.gtt_window_lock); > - > if (fence) > dma_fence_wait(fence, false); > dma_fence_put(fence); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > index 7abae68..2188034 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > @@ -58,6 +58,12 @@ struct amdgpu_mman { > struct amd_sched_entity entity; > }; > > +struct amdgpu_copy_mem { > + struct ttm_buffer_object *bo; > + struct ttm_mem_reg *mem; > + unsigned long offset; > +}; > + > extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func; > extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func; > > @@ -72,6 +78,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, > struct reservation_object *resv, > struct dma_fence **fence, bool direct_submit, > bool vm_needs_flush); > +int amdgpu_copy_ttm_mem_to_mem(struct amdgpu_device *adev, > + struct amdgpu_copy_mem *src, > + struct amdgpu_copy_mem *dst, > + uint64_t size, > + struct reservation_object *resv, > + struct dma_fence **f); > int amdgpu_fill_buffer(struct amdgpu_bo *bo, > uint64_t src_data, > struct reservation_object *resv, _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 7+ messages in thread
[parent not found: <586e28f0-7c99-a991-6167-0eaf56ffb0c7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>]
* RE: [PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_move_blit [not found] ` <586e28f0-7c99-a991-6167-0eaf56ffb0c7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> @ 2017-10-10 15:20 ` Kasiviswanathan, Harish 0 siblings, 0 replies; 7+ messages in thread From: Kasiviswanathan, Harish @ 2017-10-10 15:20 UTC (permalink / raw) To: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW -----Original Message----- From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf Of Christian König Sent: Monday, October 09, 2017 5:08 AM To: Kasiviswanathan, Harish <Harish.Kasiviswanathan@amd.com>; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_move_blit Am 07.10.2017 um 00:08 schrieb Harish Kasiviswanathan: > Add more generic function amdgpu_copy_ttm_mem_to_mem() that supports > arbitrary copy size, offsets and two BOs (source & dest.). > > This is useful for KFD Cross Memory Attach feature where data needs to > be copied from BOs from different processes > > v2: Add struct amdgpu_copy_mem and changed amdgpu_copy_ttm_mem_to_mem() > function parameters to use the struct > > Change-Id: I848d541a84a1c2d12827d9dcf6d9054d854b4159 > Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> Looks like a nice cleanup to me, with the notes below fixed the patch is Reviewed-by: Christian König <christian.koenig@amd.com>. > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 169 +++++++++++++++++++++++--------- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 12 +++ > 2 files changed, 132 insertions(+), 49 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index 1086f03..dda4f08 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -289,97 +289,168 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, > return addr; > } > > -static int amdgpu_move_blit(struct ttm_buffer_object *bo, > - bool evict, bool no_wait_gpu, > - struct ttm_mem_reg *new_mem, > - struct ttm_mem_reg *old_mem) > +/** > + * amdgpu_copy_ttm_mem_to_mem - Helper function for copy > + * > + * The function copies @size bytes from {src->mem + src->offset} to > + * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a > + * move and different for a BO to BO copy. > + * > + * @f: Returns the last fence if multiple jobs are submitted. > + */ > +int amdgpu_copy_ttm_mem_to_mem(struct amdgpu_device *adev, Please name that amdgpu_ttm_copy_.... to note that the function is part of the TTM subsystem in amdgpu. [HK] - Will fix the name. > + struct amdgpu_copy_mem *src, > + struct amdgpu_copy_mem *dst, > + uint64_t size, > + struct reservation_object *resv, > + struct dma_fence **f) > { > - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); > struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; > - > - struct drm_mm_node *old_mm, *new_mm; > - uint64_t old_start, old_size, new_start, new_size; > - unsigned long num_pages; > + struct drm_mm_node *src_mm, *dst_mm; > + uint64_t src_node_start, dst_node_start, src_node_size, > + dst_node_size, src_page_offset, dst_page_offset; > struct dma_fence *fence = NULL; > - int r; > - > - BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0); > + int r = 0; > + const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE * > + AMDGPU_GPU_PAGE_SIZE); > > if (!ring->ready) { > DRM_ERROR("Trying to move memory with ring turned off.\n"); > return -EINVAL; > } > > - old_mm = old_mem->mm_node; > - old_size = old_mm->size; > - old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem); > + src_mm = src->mem->mm_node; > + while (src->offset >= (src_mm->size << PAGE_SHIFT)) { > + src->offset -= (src_mm->size << PAGE_SHIFT); > + ++src_mm; > + } > + src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) + > + src->offset; > + src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset; > + src_page_offset = src_node_start & (PAGE_SIZE - 1); > > - new_mm = new_mem->mm_node; > - new_size = new_mm->size; > - new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem); > + dst_mm = dst->mem->mm_node; > + while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) { > + dst->offset -= (dst_mm->size << PAGE_SHIFT); > + ++dst_mm; > + } > + dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) + > + dst->offset; > + dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset; > + dst_page_offset = dst_node_start & (PAGE_SIZE - 1); > > - num_pages = new_mem->num_pages; > mutex_lock(&adev->mman.gtt_window_lock); > - while (num_pages) { > - unsigned long cur_pages = min(min(old_size, new_size), > - (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE); > - uint64_t from = old_start, to = new_start; > + > + while (size) { > + unsigned long cur_size; > + uint64_t from = src_node_start, to = dst_node_start; > struct dma_fence *next; > > - if (old_mem->mem_type == TTM_PL_TT && > - !amdgpu_gtt_mgr_is_allocated(old_mem)) { > - r = amdgpu_map_buffer(bo, old_mem, cur_pages, > - old_start, 0, ring, &from); > + /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst > + * begins at an offset, then adjust the size accordingly > + */ > + cur_size = min3(min(src_node_size, dst_node_size), size, > + GTT_MAX_BYTES); > + if (cur_size + src_page_offset > GTT_MAX_BYTES || > + cur_size + dst_page_offset > GTT_MAX_BYTES) > + cur_size -= max(src_page_offset, dst_page_offset); > + > + /* Map only what needs to be accessed. Map src to window 0 and > + * dst to window 1 > + */ > + if (src->mem->mem_type == TTM_PL_TT && > + !amdgpu_gtt_mgr_is_allocated(src->mem)) { > + r = amdgpu_map_buffer(src->bo, src->mem, > + PFN_UP(cur_size + src_page_offset), > + src_node_start, 0, ring, > + &from); > if (r) > goto error; > + /* Adjust the offset because amdgpu_map_buffer returns > + * start of mapped page > + */ > + from += src_page_offset; > } > > - if (new_mem->mem_type == TTM_PL_TT && > - !amdgpu_gtt_mgr_is_allocated(new_mem)) { > - r = amdgpu_map_buffer(bo, new_mem, cur_pages, > - new_start, 1, ring, &to); > + if (dst->mem->mem_type == TTM_PL_TT && > + !amdgpu_gtt_mgr_is_allocated(dst->mem)) { > + r = amdgpu_map_buffer(dst->bo, dst->mem, > + PFN_UP(cur_size + dst_page_offset), > + dst_node_start, 1, ring, > + &to); > if (r) > goto error; > + to += dst_page_offset; > } > > - r = amdgpu_copy_buffer(ring, from, to, > - cur_pages * PAGE_SIZE, > - bo->resv, &next, false, true); > + r = amdgpu_copy_buffer(ring, from, to, cur_size, > + resv, &next, false, true); > if (r) > goto error; > > dma_fence_put(fence); > fence = next; > > - num_pages -= cur_pages; > - if (!num_pages) > + size -= cur_size; > + if (!size) > break; > > - old_size -= cur_pages; > - if (!old_size) { > - old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem); > - old_size = old_mm->size; > + src_node_size -= cur_size; > + if (!src_node_size) { > + src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm, > + src->mem); > + src_node_size = (src_mm->size << PAGE_SHIFT); > } else { > - old_start += cur_pages * PAGE_SIZE; > + src_node_start += cur_size; > + src_page_offset = src_node_start & (PAGE_SIZE - 1); > } > - > - new_size -= cur_pages; > - if (!new_size) { > - new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem); > - new_size = new_mm->size; > + dst_node_size -= cur_size; > + if (!dst_node_size) { > + dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm, > + dst->mem); > + dst_node_size = (dst_mm->size << PAGE_SHIFT); > } else { > - new_start += cur_pages * PAGE_SIZE; > + dst_node_start += cur_size; > + dst_page_offset = dst_node_start & (PAGE_SIZE - 1); > } > } > +error: > mutex_unlock(&adev->mman.gtt_window_lock); > + if (f) > + *f = dma_fence_get(fence); > + dma_fence_put(fence); > + return r; > +} > + > + > +static int amdgpu_move_blit(struct ttm_buffer_object *bo, > + bool evict, bool no_wait_gpu, > + struct ttm_mem_reg *new_mem, > + struct ttm_mem_reg *old_mem) > +{ I think I would rather prefer to change the callers of that function, but not 100% sure. How much change would that be? But not a blocking issue. [HK]: The function is called 3 times in amdgpu_ttm.c. I don't see an elegant way to avoid this function. So for now I am leaving it as it is. > + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); > + struct amdgpu_copy_mem src, dst; > + struct dma_fence *fence = NULL; > + int r; > + > + src.bo = bo; > + dst.bo = bo; > + src.mem = old_mem; > + dst.mem = new_mem; > + src.offset = 0; > + dst.offset = 0; > + > + r = amdgpu_copy_ttm_mem_to_mem(adev, &src, &dst, > + new_mem->num_pages << PAGE_SHIFT, > + bo->resv, &fence); > + if (r) > + goto error; > > r = ttm_bo_pipeline_move(bo, fence, evict, new_mem); > dma_fence_put(fence); > return r; > > error: > - mutex_unlock(&adev->mman.gtt_window_lock); > - > if (fence) > dma_fence_wait(fence, false); > dma_fence_put(fence); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > index 7abae68..2188034 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > @@ -58,6 +58,12 @@ struct amdgpu_mman { > struct amd_sched_entity entity; > }; > > +struct amdgpu_copy_mem { > + struct ttm_buffer_object *bo; > + struct ttm_mem_reg *mem; > + unsigned long offset; > +}; > + > extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func; > extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func; > > @@ -72,6 +78,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, > struct reservation_object *resv, > struct dma_fence **fence, bool direct_submit, > bool vm_needs_flush); > +int amdgpu_copy_ttm_mem_to_mem(struct amdgpu_device *adev, > + struct amdgpu_copy_mem *src, > + struct amdgpu_copy_mem *dst, > + uint64_t size, > + struct reservation_object *resv, > + struct dma_fence **f); > int amdgpu_fill_buffer(struct amdgpu_bo *bo, > uint64_t src_data, > struct reservation_object *resv, _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_move_blit @ 2017-10-11 19:49 Harish Kasiviswanathan 0 siblings, 0 replies; 7+ messages in thread From: Harish Kasiviswanathan @ 2017-10-11 19:49 UTC (permalink / raw) To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Harish Kasiviswanathan Add more generic function amdgpu_copy_ttm_mem_to_mem() that supports arbitrary copy size, offsets and two BOs (source & dest.). This is useful for KFD Cross Memory Attach feature where data needs to be copied from BOs from different processes v2: Add struct amdgpu_copy_mem and changed amdgpu_copy_ttm_mem_to_mem() function parameters to use the struct v3: Minor function name change Change-Id: I848d541a84a1c2d12827d9dcf6d9054d854b4159 Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 169 +++++++++++++++++++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 12 +++ 2 files changed, 132 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 1086f03..c5e1e35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -289,97 +289,168 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, return addr; } -static int amdgpu_move_blit(struct ttm_buffer_object *bo, - bool evict, bool no_wait_gpu, - struct ttm_mem_reg *new_mem, - struct ttm_mem_reg *old_mem) +/** + * amdgpu_ttm_copy_mem_to_mem - Helper function for copy + * + * The function copies @size bytes from {src->mem + src->offset} to + * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a + * move and different for a BO to BO copy. + * + * @f: Returns the last fence if multiple jobs are submitted. + */ +int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, + struct amdgpu_copy_mem *src, + struct amdgpu_copy_mem *dst, + uint64_t size, + struct reservation_object *resv, + struct dma_fence **f) { - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; - - struct drm_mm_node *old_mm, *new_mm; - uint64_t old_start, old_size, new_start, new_size; - unsigned long num_pages; + struct drm_mm_node *src_mm, *dst_mm; + uint64_t src_node_start, dst_node_start, src_node_size, + dst_node_size, src_page_offset, dst_page_offset; struct dma_fence *fence = NULL; - int r; - - BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0); + int r = 0; + const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE * + AMDGPU_GPU_PAGE_SIZE); if (!ring->ready) { DRM_ERROR("Trying to move memory with ring turned off.\n"); return -EINVAL; } - old_mm = old_mem->mm_node; - old_size = old_mm->size; - old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem); + src_mm = src->mem->mm_node; + while (src->offset >= (src_mm->size << PAGE_SHIFT)) { + src->offset -= (src_mm->size << PAGE_SHIFT); + ++src_mm; + } + src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) + + src->offset; + src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset; + src_page_offset = src_node_start & (PAGE_SIZE - 1); - new_mm = new_mem->mm_node; - new_size = new_mm->size; - new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem); + dst_mm = dst->mem->mm_node; + while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) { + dst->offset -= (dst_mm->size << PAGE_SHIFT); + ++dst_mm; + } + dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) + + dst->offset; + dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset; + dst_page_offset = dst_node_start & (PAGE_SIZE - 1); - num_pages = new_mem->num_pages; mutex_lock(&adev->mman.gtt_window_lock); - while (num_pages) { - unsigned long cur_pages = min(min(old_size, new_size), - (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE); - uint64_t from = old_start, to = new_start; + + while (size) { + unsigned long cur_size; + uint64_t from = src_node_start, to = dst_node_start; struct dma_fence *next; - if (old_mem->mem_type == TTM_PL_TT && - !amdgpu_gtt_mgr_is_allocated(old_mem)) { - r = amdgpu_map_buffer(bo, old_mem, cur_pages, - old_start, 0, ring, &from); + /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst + * begins at an offset, then adjust the size accordingly + */ + cur_size = min3(min(src_node_size, dst_node_size), size, + GTT_MAX_BYTES); + if (cur_size + src_page_offset > GTT_MAX_BYTES || + cur_size + dst_page_offset > GTT_MAX_BYTES) + cur_size -= max(src_page_offset, dst_page_offset); + + /* Map only what needs to be accessed. Map src to window 0 and + * dst to window 1 + */ + if (src->mem->mem_type == TTM_PL_TT && + !amdgpu_gtt_mgr_is_allocated(src->mem)) { + r = amdgpu_map_buffer(src->bo, src->mem, + PFN_UP(cur_size + src_page_offset), + src_node_start, 0, ring, + &from); if (r) goto error; + /* Adjust the offset because amdgpu_map_buffer returns + * start of mapped page + */ + from += src_page_offset; } - if (new_mem->mem_type == TTM_PL_TT && - !amdgpu_gtt_mgr_is_allocated(new_mem)) { - r = amdgpu_map_buffer(bo, new_mem, cur_pages, - new_start, 1, ring, &to); + if (dst->mem->mem_type == TTM_PL_TT && + !amdgpu_gtt_mgr_is_allocated(dst->mem)) { + r = amdgpu_map_buffer(dst->bo, dst->mem, + PFN_UP(cur_size + dst_page_offset), + dst_node_start, 1, ring, + &to); if (r) goto error; + to += dst_page_offset; } - r = amdgpu_copy_buffer(ring, from, to, - cur_pages * PAGE_SIZE, - bo->resv, &next, false, true); + r = amdgpu_copy_buffer(ring, from, to, cur_size, + resv, &next, false, true); if (r) goto error; dma_fence_put(fence); fence = next; - num_pages -= cur_pages; - if (!num_pages) + size -= cur_size; + if (!size) break; - old_size -= cur_pages; - if (!old_size) { - old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem); - old_size = old_mm->size; + src_node_size -= cur_size; + if (!src_node_size) { + src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm, + src->mem); + src_node_size = (src_mm->size << PAGE_SHIFT); } else { - old_start += cur_pages * PAGE_SIZE; + src_node_start += cur_size; + src_page_offset = src_node_start & (PAGE_SIZE - 1); } - - new_size -= cur_pages; - if (!new_size) { - new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem); - new_size = new_mm->size; + dst_node_size -= cur_size; + if (!dst_node_size) { + dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm, + dst->mem); + dst_node_size = (dst_mm->size << PAGE_SHIFT); } else { - new_start += cur_pages * PAGE_SIZE; + dst_node_start += cur_size; + dst_page_offset = dst_node_start & (PAGE_SIZE - 1); } } +error: mutex_unlock(&adev->mman.gtt_window_lock); + if (f) + *f = dma_fence_get(fence); + dma_fence_put(fence); + return r; +} + + +static int amdgpu_move_blit(struct ttm_buffer_object *bo, + bool evict, bool no_wait_gpu, + struct ttm_mem_reg *new_mem, + struct ttm_mem_reg *old_mem) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); + struct amdgpu_copy_mem src, dst; + struct dma_fence *fence = NULL; + int r; + + src.bo = bo; + dst.bo = bo; + src.mem = old_mem; + dst.mem = new_mem; + src.offset = 0; + dst.offset = 0; + + r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, + new_mem->num_pages << PAGE_SHIFT, + bo->resv, &fence); + if (r) + goto error; r = ttm_bo_pipeline_move(bo, fence, evict, new_mem); dma_fence_put(fence); return r; error: - mutex_unlock(&adev->mman.gtt_window_lock); - if (fence) dma_fence_wait(fence, false); dma_fence_put(fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 7abae68..abd4084 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -58,6 +58,12 @@ struct amdgpu_mman { struct amd_sched_entity entity; }; +struct amdgpu_copy_mem { + struct ttm_buffer_object *bo; + struct ttm_mem_reg *mem; + unsigned long offset; +}; + extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func; extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func; @@ -72,6 +78,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, struct reservation_object *resv, struct dma_fence **fence, bool direct_submit, bool vm_needs_flush); +int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, + struct amdgpu_copy_mem *src, + struct amdgpu_copy_mem *dst, + uint64_t size, + struct reservation_object *resv, + struct dma_fence **f); int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint64_t src_data, struct reservation_object *resv, -- 1.9.1 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 7+ messages in thread
end of thread, other threads:[~2017-10-11 19:49 UTC | newest] Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2017-10-06 22:08 [PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_move_blit Harish Kasiviswanathan [not found] ` <1507327732-8797-1-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org> 2017-10-06 22:08 ` [PATCH v2 2/2] drm/amdgpu: Add amdgpu_find_mm_node() Harish Kasiviswanathan [not found] ` <1507327732-8797-2-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org> 2017-10-09 9:10 ` Christian König [not found] ` <7bac471e-759e-d596-67e4-be0173cecabc-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> 2017-10-10 15:28 ` Kasiviswanathan, Harish 2017-10-09 9:08 ` [PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_move_blit Christian König [not found] ` <586e28f0-7c99-a991-6167-0eaf56ffb0c7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> 2017-10-10 15:20 ` Kasiviswanathan, Harish 2017-10-11 19:49 Harish Kasiviswanathan
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.