* [PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_move_blit
@ 2017-10-06 22:08 Harish Kasiviswanathan
[not found] ` <1507327732-8797-1-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
0 siblings, 1 reply; 8+ messages in thread
From: Harish Kasiviswanathan @ 2017-10-06 22:08 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Harish Kasiviswanathan
Add more generic function amdgpu_copy_ttm_mem_to_mem() that supports
arbitrary copy size, offsets and two BOs (source & dest.).
This is useful for KFD Cross Memory Attach feature where data needs to
be copied from BOs from different processes
v2: Add struct amdgpu_copy_mem and changed amdgpu_copy_ttm_mem_to_mem()
function parameters to use the struct
Change-Id: I848d541a84a1c2d12827d9dcf6d9054d854b4159
Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 169 +++++++++++++++++++++++---------
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 12 +++
2 files changed, 132 insertions(+), 49 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 1086f03..dda4f08 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -289,97 +289,168 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
return addr;
}
-static int amdgpu_move_blit(struct ttm_buffer_object *bo,
- bool evict, bool no_wait_gpu,
- struct ttm_mem_reg *new_mem,
- struct ttm_mem_reg *old_mem)
+/**
+ * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
+ *
+ * The function copies @size bytes from {src->mem + src->offset} to
+ * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
+ * move and different for a BO to BO copy.
+ *
+ * @f: Returns the last fence if multiple jobs are submitted.
+ */
+int amdgpu_copy_ttm_mem_to_mem(struct amdgpu_device *adev,
+ struct amdgpu_copy_mem *src,
+ struct amdgpu_copy_mem *dst,
+ uint64_t size,
+ struct reservation_object *resv,
+ struct dma_fence **f)
{
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
-
- struct drm_mm_node *old_mm, *new_mm;
- uint64_t old_start, old_size, new_start, new_size;
- unsigned long num_pages;
+ struct drm_mm_node *src_mm, *dst_mm;
+ uint64_t src_node_start, dst_node_start, src_node_size,
+ dst_node_size, src_page_offset, dst_page_offset;
struct dma_fence *fence = NULL;
- int r;
-
- BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0);
+ int r = 0;
+ const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
+ AMDGPU_GPU_PAGE_SIZE);
if (!ring->ready) {
DRM_ERROR("Trying to move memory with ring turned off.\n");
return -EINVAL;
}
- old_mm = old_mem->mm_node;
- old_size = old_mm->size;
- old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem);
+ src_mm = src->mem->mm_node;
+ while (src->offset >= (src_mm->size << PAGE_SHIFT)) {
+ src->offset -= (src_mm->size << PAGE_SHIFT);
+ ++src_mm;
+ }
+ src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
+ src->offset;
+ src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
+ src_page_offset = src_node_start & (PAGE_SIZE - 1);
- new_mm = new_mem->mm_node;
- new_size = new_mm->size;
- new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem);
+ dst_mm = dst->mem->mm_node;
+ while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) {
+ dst->offset -= (dst_mm->size << PAGE_SHIFT);
+ ++dst_mm;
+ }
+ dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
+ dst->offset;
+ dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
+ dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
- num_pages = new_mem->num_pages;
mutex_lock(&adev->mman.gtt_window_lock);
- while (num_pages) {
- unsigned long cur_pages = min(min(old_size, new_size),
- (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE);
- uint64_t from = old_start, to = new_start;
+
+ while (size) {
+ unsigned long cur_size;
+ uint64_t from = src_node_start, to = dst_node_start;
struct dma_fence *next;
- if (old_mem->mem_type == TTM_PL_TT &&
- !amdgpu_gtt_mgr_is_allocated(old_mem)) {
- r = amdgpu_map_buffer(bo, old_mem, cur_pages,
- old_start, 0, ring, &from);
+ /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
+ * begins at an offset, then adjust the size accordingly
+ */
+ cur_size = min3(min(src_node_size, dst_node_size), size,
+ GTT_MAX_BYTES);
+ if (cur_size + src_page_offset > GTT_MAX_BYTES ||
+ cur_size + dst_page_offset > GTT_MAX_BYTES)
+ cur_size -= max(src_page_offset, dst_page_offset);
+
+ /* Map only what needs to be accessed. Map src to window 0 and
+ * dst to window 1
+ */
+ if (src->mem->mem_type == TTM_PL_TT &&
+ !amdgpu_gtt_mgr_is_allocated(src->mem)) {
+ r = amdgpu_map_buffer(src->bo, src->mem,
+ PFN_UP(cur_size + src_page_offset),
+ src_node_start, 0, ring,
+ &from);
if (r)
goto error;
+ /* Adjust the offset because amdgpu_map_buffer returns
+ * start of mapped page
+ */
+ from += src_page_offset;
}
- if (new_mem->mem_type == TTM_PL_TT &&
- !amdgpu_gtt_mgr_is_allocated(new_mem)) {
- r = amdgpu_map_buffer(bo, new_mem, cur_pages,
- new_start, 1, ring, &to);
+ if (dst->mem->mem_type == TTM_PL_TT &&
+ !amdgpu_gtt_mgr_is_allocated(dst->mem)) {
+ r = amdgpu_map_buffer(dst->bo, dst->mem,
+ PFN_UP(cur_size + dst_page_offset),
+ dst_node_start, 1, ring,
+ &to);
if (r)
goto error;
+ to += dst_page_offset;
}
- r = amdgpu_copy_buffer(ring, from, to,
- cur_pages * PAGE_SIZE,
- bo->resv, &next, false, true);
+ r = amdgpu_copy_buffer(ring, from, to, cur_size,
+ resv, &next, false, true);
if (r)
goto error;
dma_fence_put(fence);
fence = next;
- num_pages -= cur_pages;
- if (!num_pages)
+ size -= cur_size;
+ if (!size)
break;
- old_size -= cur_pages;
- if (!old_size) {
- old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem);
- old_size = old_mm->size;
+ src_node_size -= cur_size;
+ if (!src_node_size) {
+ src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm,
+ src->mem);
+ src_node_size = (src_mm->size << PAGE_SHIFT);
} else {
- old_start += cur_pages * PAGE_SIZE;
+ src_node_start += cur_size;
+ src_page_offset = src_node_start & (PAGE_SIZE - 1);
}
-
- new_size -= cur_pages;
- if (!new_size) {
- new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem);
- new_size = new_mm->size;
+ dst_node_size -= cur_size;
+ if (!dst_node_size) {
+ dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm,
+ dst->mem);
+ dst_node_size = (dst_mm->size << PAGE_SHIFT);
} else {
- new_start += cur_pages * PAGE_SIZE;
+ dst_node_start += cur_size;
+ dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
}
}
+error:
mutex_unlock(&adev->mman.gtt_window_lock);
+ if (f)
+ *f = dma_fence_get(fence);
+ dma_fence_put(fence);
+ return r;
+}
+
+
+static int amdgpu_move_blit(struct ttm_buffer_object *bo,
+ bool evict, bool no_wait_gpu,
+ struct ttm_mem_reg *new_mem,
+ struct ttm_mem_reg *old_mem)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+ struct amdgpu_copy_mem src, dst;
+ struct dma_fence *fence = NULL;
+ int r;
+
+ src.bo = bo;
+ dst.bo = bo;
+ src.mem = old_mem;
+ dst.mem = new_mem;
+ src.offset = 0;
+ dst.offset = 0;
+
+ r = amdgpu_copy_ttm_mem_to_mem(adev, &src, &dst,
+ new_mem->num_pages << PAGE_SHIFT,
+ bo->resv, &fence);
+ if (r)
+ goto error;
r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
dma_fence_put(fence);
return r;
error:
- mutex_unlock(&adev->mman.gtt_window_lock);
-
if (fence)
dma_fence_wait(fence, false);
dma_fence_put(fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 7abae68..2188034 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -58,6 +58,12 @@ struct amdgpu_mman {
struct amd_sched_entity entity;
};
+struct amdgpu_copy_mem {
+ struct ttm_buffer_object *bo;
+ struct ttm_mem_reg *mem;
+ unsigned long offset;
+};
+
extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func;
extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func;
@@ -72,6 +78,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
struct reservation_object *resv,
struct dma_fence **fence, bool direct_submit,
bool vm_needs_flush);
+int amdgpu_copy_ttm_mem_to_mem(struct amdgpu_device *adev,
+ struct amdgpu_copy_mem *src,
+ struct amdgpu_copy_mem *dst,
+ uint64_t size,
+ struct reservation_object *resv,
+ struct dma_fence **f);
int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint64_t src_data,
struct reservation_object *resv,
--
1.9.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v2 2/2] drm/amdgpu: Add amdgpu_find_mm_node()
[not found] ` <1507327732-8797-1-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
@ 2017-10-06 22:08 ` Harish Kasiviswanathan
[not found] ` <1507327732-8797-2-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
2017-10-09 9:08 ` [PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_move_blit Christian König
1 sibling, 1 reply; 8+ messages in thread
From: Harish Kasiviswanathan @ 2017-10-06 22:08 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Harish Kasiviswanathan
Change-Id: I12231e18bb60152843cd0e0213ddd0d0e04e7497
Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 36 +++++++++++++++++++--------------
1 file changed, 21 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index dda4f08..e9bfe9d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -290,6 +290,23 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
}
/**
+ * amdgpu_find_mm_node - Helper function finds the drm_mm_node
+ * corresponding to @offset. It also modifies the offset to be
+ * within the drm_mm_node returned
+ */
+static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
+ unsigned long *offset)
+{
+ struct drm_mm_node *mm_node = mem->mm_node;
+
+ while (*offset >= (mm_node->size << PAGE_SHIFT)) {
+ *offset -= (mm_node->size << PAGE_SHIFT);
+ ++mm_node;
+ }
+ return mm_node;
+}
+
+/**
* amdgpu_copy_ttm_mem_to_mem - Helper function for copy
*
* The function copies @size bytes from {src->mem + src->offset} to
@@ -319,21 +336,13 @@ int amdgpu_copy_ttm_mem_to_mem(struct amdgpu_device *adev,
return -EINVAL;
}
- src_mm = src->mem->mm_node;
- while (src->offset >= (src_mm->size << PAGE_SHIFT)) {
- src->offset -= (src_mm->size << PAGE_SHIFT);
- ++src_mm;
- }
+ src_mm = amdgpu_find_mm_node(src->mem, &src->offset);
src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
src->offset;
src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
src_page_offset = src_node_start & (PAGE_SIZE - 1);
- dst_mm = dst->mem->mm_node;
- while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) {
- dst->offset -= (dst_mm->size << PAGE_SHIFT);
- ++dst_mm;
- }
+ dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset);
dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
dst->offset;
dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
@@ -1216,7 +1225,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
{
struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
- struct drm_mm_node *nodes = abo->tbo.mem.mm_node;
+ struct drm_mm_node *nodes;
uint32_t value = 0;
int ret = 0;
uint64_t pos;
@@ -1225,10 +1234,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
if (bo->mem.mem_type != TTM_PL_VRAM)
return -EIO;
- while (offset >= (nodes->size << PAGE_SHIFT)) {
- offset -= nodes->size << PAGE_SHIFT;
- ++nodes;
- }
+ nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset);
pos = (nodes->start << PAGE_SHIFT) + offset;
while (len && pos < adev->mc.mc_vram_size) {
--
1.9.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_move_blit
[not found] ` <1507327732-8797-1-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
2017-10-06 22:08 ` [PATCH v2 2/2] drm/amdgpu: Add amdgpu_find_mm_node() Harish Kasiviswanathan
@ 2017-10-09 9:08 ` Christian König
[not found] ` <586e28f0-7c99-a991-6167-0eaf56ffb0c7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
1 sibling, 1 reply; 8+ messages in thread
From: Christian König @ 2017-10-09 9:08 UTC (permalink / raw)
To: Harish Kasiviswanathan, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Am 07.10.2017 um 00:08 schrieb Harish Kasiviswanathan:
> Add more generic function amdgpu_copy_ttm_mem_to_mem() that supports
> arbitrary copy size, offsets and two BOs (source & dest.).
>
> This is useful for KFD Cross Memory Attach feature where data needs to
> be copied from BOs from different processes
>
> v2: Add struct amdgpu_copy_mem and changed amdgpu_copy_ttm_mem_to_mem()
> function parameters to use the struct
>
> Change-Id: I848d541a84a1c2d12827d9dcf6d9054d854b4159
> Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Looks like a nice cleanup to me, with the notes below fixed the patch is
Reviewed-by: Christian König <christian.koenig@amd.com>.
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 169 +++++++++++++++++++++++---------
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 12 +++
> 2 files changed, 132 insertions(+), 49 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 1086f03..dda4f08 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -289,97 +289,168 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
> return addr;
> }
>
> -static int amdgpu_move_blit(struct ttm_buffer_object *bo,
> - bool evict, bool no_wait_gpu,
> - struct ttm_mem_reg *new_mem,
> - struct ttm_mem_reg *old_mem)
> +/**
> + * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
> + *
> + * The function copies @size bytes from {src->mem + src->offset} to
> + * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
> + * move and different for a BO to BO copy.
> + *
> + * @f: Returns the last fence if multiple jobs are submitted.
> + */
> +int amdgpu_copy_ttm_mem_to_mem(struct amdgpu_device *adev,
Please name that amdgpu_ttm_copy_.... to note that the function is part
of the TTM subsystem in amdgpu.
> + struct amdgpu_copy_mem *src,
> + struct amdgpu_copy_mem *dst,
> + uint64_t size,
> + struct reservation_object *resv,
> + struct dma_fence **f)
> {
> - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
> struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> -
> - struct drm_mm_node *old_mm, *new_mm;
> - uint64_t old_start, old_size, new_start, new_size;
> - unsigned long num_pages;
> + struct drm_mm_node *src_mm, *dst_mm;
> + uint64_t src_node_start, dst_node_start, src_node_size,
> + dst_node_size, src_page_offset, dst_page_offset;
> struct dma_fence *fence = NULL;
> - int r;
> -
> - BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0);
> + int r = 0;
> + const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
> + AMDGPU_GPU_PAGE_SIZE);
>
> if (!ring->ready) {
> DRM_ERROR("Trying to move memory with ring turned off.\n");
> return -EINVAL;
> }
>
> - old_mm = old_mem->mm_node;
> - old_size = old_mm->size;
> - old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem);
> + src_mm = src->mem->mm_node;
> + while (src->offset >= (src_mm->size << PAGE_SHIFT)) {
> + src->offset -= (src_mm->size << PAGE_SHIFT);
> + ++src_mm;
> + }
> + src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
> + src->offset;
> + src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
> + src_page_offset = src_node_start & (PAGE_SIZE - 1);
>
> - new_mm = new_mem->mm_node;
> - new_size = new_mm->size;
> - new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem);
> + dst_mm = dst->mem->mm_node;
> + while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) {
> + dst->offset -= (dst_mm->size << PAGE_SHIFT);
> + ++dst_mm;
> + }
> + dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
> + dst->offset;
> + dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
> + dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
>
> - num_pages = new_mem->num_pages;
> mutex_lock(&adev->mman.gtt_window_lock);
> - while (num_pages) {
> - unsigned long cur_pages = min(min(old_size, new_size),
> - (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE);
> - uint64_t from = old_start, to = new_start;
> +
> + while (size) {
> + unsigned long cur_size;
> + uint64_t from = src_node_start, to = dst_node_start;
> struct dma_fence *next;
>
> - if (old_mem->mem_type == TTM_PL_TT &&
> - !amdgpu_gtt_mgr_is_allocated(old_mem)) {
> - r = amdgpu_map_buffer(bo, old_mem, cur_pages,
> - old_start, 0, ring, &from);
> + /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
> + * begins at an offset, then adjust the size accordingly
> + */
> + cur_size = min3(min(src_node_size, dst_node_size), size,
> + GTT_MAX_BYTES);
> + if (cur_size + src_page_offset > GTT_MAX_BYTES ||
> + cur_size + dst_page_offset > GTT_MAX_BYTES)
> + cur_size -= max(src_page_offset, dst_page_offset);
> +
> + /* Map only what needs to be accessed. Map src to window 0 and
> + * dst to window 1
> + */
> + if (src->mem->mem_type == TTM_PL_TT &&
> + !amdgpu_gtt_mgr_is_allocated(src->mem)) {
> + r = amdgpu_map_buffer(src->bo, src->mem,
> + PFN_UP(cur_size + src_page_offset),
> + src_node_start, 0, ring,
> + &from);
> if (r)
> goto error;
> + /* Adjust the offset because amdgpu_map_buffer returns
> + * start of mapped page
> + */
> + from += src_page_offset;
> }
>
> - if (new_mem->mem_type == TTM_PL_TT &&
> - !amdgpu_gtt_mgr_is_allocated(new_mem)) {
> - r = amdgpu_map_buffer(bo, new_mem, cur_pages,
> - new_start, 1, ring, &to);
> + if (dst->mem->mem_type == TTM_PL_TT &&
> + !amdgpu_gtt_mgr_is_allocated(dst->mem)) {
> + r = amdgpu_map_buffer(dst->bo, dst->mem,
> + PFN_UP(cur_size + dst_page_offset),
> + dst_node_start, 1, ring,
> + &to);
> if (r)
> goto error;
> + to += dst_page_offset;
> }
>
> - r = amdgpu_copy_buffer(ring, from, to,
> - cur_pages * PAGE_SIZE,
> - bo->resv, &next, false, true);
> + r = amdgpu_copy_buffer(ring, from, to, cur_size,
> + resv, &next, false, true);
> if (r)
> goto error;
>
> dma_fence_put(fence);
> fence = next;
>
> - num_pages -= cur_pages;
> - if (!num_pages)
> + size -= cur_size;
> + if (!size)
> break;
>
> - old_size -= cur_pages;
> - if (!old_size) {
> - old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem);
> - old_size = old_mm->size;
> + src_node_size -= cur_size;
> + if (!src_node_size) {
> + src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm,
> + src->mem);
> + src_node_size = (src_mm->size << PAGE_SHIFT);
> } else {
> - old_start += cur_pages * PAGE_SIZE;
> + src_node_start += cur_size;
> + src_page_offset = src_node_start & (PAGE_SIZE - 1);
> }
> -
> - new_size -= cur_pages;
> - if (!new_size) {
> - new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem);
> - new_size = new_mm->size;
> + dst_node_size -= cur_size;
> + if (!dst_node_size) {
> + dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm,
> + dst->mem);
> + dst_node_size = (dst_mm->size << PAGE_SHIFT);
> } else {
> - new_start += cur_pages * PAGE_SIZE;
> + dst_node_start += cur_size;
> + dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
> }
> }
> +error:
> mutex_unlock(&adev->mman.gtt_window_lock);
> + if (f)
> + *f = dma_fence_get(fence);
> + dma_fence_put(fence);
> + return r;
> +}
> +
> +
> +static int amdgpu_move_blit(struct ttm_buffer_object *bo,
> + bool evict, bool no_wait_gpu,
> + struct ttm_mem_reg *new_mem,
> + struct ttm_mem_reg *old_mem)
> +{
I think I would rather prefer to change the callers of that function,
but not 100% sure.
How much change would that be? But not a blocking issue.
> + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
> + struct amdgpu_copy_mem src, dst;
> + struct dma_fence *fence = NULL;
> + int r;
> +
> + src.bo = bo;
> + dst.bo = bo;
> + src.mem = old_mem;
> + dst.mem = new_mem;
> + src.offset = 0;
> + dst.offset = 0;
> +
> + r = amdgpu_copy_ttm_mem_to_mem(adev, &src, &dst,
> + new_mem->num_pages << PAGE_SHIFT,
> + bo->resv, &fence);
> + if (r)
> + goto error;
>
> r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
> dma_fence_put(fence);
> return r;
>
> error:
> - mutex_unlock(&adev->mman.gtt_window_lock);
> -
> if (fence)
> dma_fence_wait(fence, false);
> dma_fence_put(fence);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 7abae68..2188034 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -58,6 +58,12 @@ struct amdgpu_mman {
> struct amd_sched_entity entity;
> };
>
> +struct amdgpu_copy_mem {
> + struct ttm_buffer_object *bo;
> + struct ttm_mem_reg *mem;
> + unsigned long offset;
> +};
> +
> extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func;
> extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func;
>
> @@ -72,6 +78,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> struct reservation_object *resv,
> struct dma_fence **fence, bool direct_submit,
> bool vm_needs_flush);
> +int amdgpu_copy_ttm_mem_to_mem(struct amdgpu_device *adev,
> + struct amdgpu_copy_mem *src,
> + struct amdgpu_copy_mem *dst,
> + uint64_t size,
> + struct reservation_object *resv,
> + struct dma_fence **f);
> int amdgpu_fill_buffer(struct amdgpu_bo *bo,
> uint64_t src_data,
> struct reservation_object *resv,
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2 2/2] drm/amdgpu: Add amdgpu_find_mm_node()
[not found] ` <1507327732-8797-2-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
@ 2017-10-09 9:10 ` Christian König
[not found] ` <7bac471e-759e-d596-67e4-be0173cecabc-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
0 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2017-10-09 9:10 UTC (permalink / raw)
To: Harish Kasiviswanathan, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Am 07.10.2017 um 00:08 schrieb Harish Kasiviswanathan:
> Change-Id: I12231e18bb60152843cd0e0213ddd0d0e04e7497
> Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 36 +++++++++++++++++++--------------
> 1 file changed, 21 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index dda4f08..e9bfe9d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -290,6 +290,23 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
> }
>
> /**
> + * amdgpu_find_mm_node - Helper function finds the drm_mm_node
> + * corresponding to @offset. It also modifies the offset to be
> + * within the drm_mm_node returned
> + */
> +static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
> + unsigned long *offset)
> +{
> + struct drm_mm_node *mm_node = mem->mm_node;
> +
> + while (*offset >= (mm_node->size << PAGE_SHIFT)) {
> + *offset -= (mm_node->size << PAGE_SHIFT);
> + ++mm_node;
> + }
> + return mm_node;
> +}
Please take a look at amdgpu_ttm_io_mem_pfn().
There we assume that each node has the same size and do:
> page_offset = do_div(offset, size);
> mm += offset;
Not 100% sure if that is a good idea, but when we add a common function
for this we should use it everywhere.
Regards,
Christian.
> +
> +/**
> * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
> *
> * The function copies @size bytes from {src->mem + src->offset} to
> @@ -319,21 +336,13 @@ int amdgpu_copy_ttm_mem_to_mem(struct amdgpu_device *adev,
> return -EINVAL;
> }
>
> - src_mm = src->mem->mm_node;
> - while (src->offset >= (src_mm->size << PAGE_SHIFT)) {
> - src->offset -= (src_mm->size << PAGE_SHIFT);
> - ++src_mm;
> - }
> + src_mm = amdgpu_find_mm_node(src->mem, &src->offset);
> src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
> src->offset;
> src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
> src_page_offset = src_node_start & (PAGE_SIZE - 1);
>
> - dst_mm = dst->mem->mm_node;
> - while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) {
> - dst->offset -= (dst_mm->size << PAGE_SHIFT);
> - ++dst_mm;
> - }
> + dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset);
> dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
> dst->offset;
> dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
> @@ -1216,7 +1225,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
> {
> struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
> struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
> - struct drm_mm_node *nodes = abo->tbo.mem.mm_node;
> + struct drm_mm_node *nodes;
> uint32_t value = 0;
> int ret = 0;
> uint64_t pos;
> @@ -1225,10 +1234,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
> if (bo->mem.mem_type != TTM_PL_VRAM)
> return -EIO;
>
> - while (offset >= (nodes->size << PAGE_SHIFT)) {
> - offset -= nodes->size << PAGE_SHIFT;
> - ++nodes;
> - }
> + nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset);
> pos = (nodes->start << PAGE_SHIFT) + offset;
>
> while (len && pos < adev->mc.mc_vram_size) {
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
* RE: [PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_move_blit
[not found] ` <586e28f0-7c99-a991-6167-0eaf56ffb0c7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-10-10 15:20 ` Kasiviswanathan, Harish
0 siblings, 0 replies; 8+ messages in thread
From: Kasiviswanathan, Harish @ 2017-10-10 15:20 UTC (permalink / raw)
To: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
-----Original Message-----
From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf Of Christian König
Sent: Monday, October 09, 2017 5:08 AM
To: Kasiviswanathan, Harish <Harish.Kasiviswanathan@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_move_blit
Am 07.10.2017 um 00:08 schrieb Harish Kasiviswanathan:
> Add more generic function amdgpu_copy_ttm_mem_to_mem() that supports
> arbitrary copy size, offsets and two BOs (source & dest.).
>
> This is useful for KFD Cross Memory Attach feature where data needs to
> be copied from BOs from different processes
>
> v2: Add struct amdgpu_copy_mem and changed amdgpu_copy_ttm_mem_to_mem()
> function parameters to use the struct
>
> Change-Id: I848d541a84a1c2d12827d9dcf6d9054d854b4159
> Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Looks like a nice cleanup to me, with the notes below fixed the patch is
Reviewed-by: Christian König <christian.koenig@amd.com>.
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 169 +++++++++++++++++++++++---------
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 12 +++
> 2 files changed, 132 insertions(+), 49 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 1086f03..dda4f08 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -289,97 +289,168 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
> return addr;
> }
>
> -static int amdgpu_move_blit(struct ttm_buffer_object *bo,
> - bool evict, bool no_wait_gpu,
> - struct ttm_mem_reg *new_mem,
> - struct ttm_mem_reg *old_mem)
> +/**
> + * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
> + *
> + * The function copies @size bytes from {src->mem + src->offset} to
> + * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
> + * move and different for a BO to BO copy.
> + *
> + * @f: Returns the last fence if multiple jobs are submitted.
> + */
> +int amdgpu_copy_ttm_mem_to_mem(struct amdgpu_device *adev,
Please name that amdgpu_ttm_copy_.... to note that the function is part
of the TTM subsystem in amdgpu.
[HK] - Will fix the name.
> + struct amdgpu_copy_mem *src,
> + struct amdgpu_copy_mem *dst,
> + uint64_t size,
> + struct reservation_object *resv,
> + struct dma_fence **f)
> {
> - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
> struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> -
> - struct drm_mm_node *old_mm, *new_mm;
> - uint64_t old_start, old_size, new_start, new_size;
> - unsigned long num_pages;
> + struct drm_mm_node *src_mm, *dst_mm;
> + uint64_t src_node_start, dst_node_start, src_node_size,
> + dst_node_size, src_page_offset, dst_page_offset;
> struct dma_fence *fence = NULL;
> - int r;
> -
> - BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0);
> + int r = 0;
> + const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
> + AMDGPU_GPU_PAGE_SIZE);
>
> if (!ring->ready) {
> DRM_ERROR("Trying to move memory with ring turned off.\n");
> return -EINVAL;
> }
>
> - old_mm = old_mem->mm_node;
> - old_size = old_mm->size;
> - old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem);
> + src_mm = src->mem->mm_node;
> + while (src->offset >= (src_mm->size << PAGE_SHIFT)) {
> + src->offset -= (src_mm->size << PAGE_SHIFT);
> + ++src_mm;
> + }
> + src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
> + src->offset;
> + src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
> + src_page_offset = src_node_start & (PAGE_SIZE - 1);
>
> - new_mm = new_mem->mm_node;
> - new_size = new_mm->size;
> - new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem);
> + dst_mm = dst->mem->mm_node;
> + while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) {
> + dst->offset -= (dst_mm->size << PAGE_SHIFT);
> + ++dst_mm;
> + }
> + dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
> + dst->offset;
> + dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
> + dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
>
> - num_pages = new_mem->num_pages;
> mutex_lock(&adev->mman.gtt_window_lock);
> - while (num_pages) {
> - unsigned long cur_pages = min(min(old_size, new_size),
> - (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE);
> - uint64_t from = old_start, to = new_start;
> +
> + while (size) {
> + unsigned long cur_size;
> + uint64_t from = src_node_start, to = dst_node_start;
> struct dma_fence *next;
>
> - if (old_mem->mem_type == TTM_PL_TT &&
> - !amdgpu_gtt_mgr_is_allocated(old_mem)) {
> - r = amdgpu_map_buffer(bo, old_mem, cur_pages,
> - old_start, 0, ring, &from);
> + /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
> + * begins at an offset, then adjust the size accordingly
> + */
> + cur_size = min3(min(src_node_size, dst_node_size), size,
> + GTT_MAX_BYTES);
> + if (cur_size + src_page_offset > GTT_MAX_BYTES ||
> + cur_size + dst_page_offset > GTT_MAX_BYTES)
> + cur_size -= max(src_page_offset, dst_page_offset);
> +
> + /* Map only what needs to be accessed. Map src to window 0 and
> + * dst to window 1
> + */
> + if (src->mem->mem_type == TTM_PL_TT &&
> + !amdgpu_gtt_mgr_is_allocated(src->mem)) {
> + r = amdgpu_map_buffer(src->bo, src->mem,
> + PFN_UP(cur_size + src_page_offset),
> + src_node_start, 0, ring,
> + &from);
> if (r)
> goto error;
> + /* Adjust the offset because amdgpu_map_buffer returns
> + * start of mapped page
> + */
> + from += src_page_offset;
> }
>
> - if (new_mem->mem_type == TTM_PL_TT &&
> - !amdgpu_gtt_mgr_is_allocated(new_mem)) {
> - r = amdgpu_map_buffer(bo, new_mem, cur_pages,
> - new_start, 1, ring, &to);
> + if (dst->mem->mem_type == TTM_PL_TT &&
> + !amdgpu_gtt_mgr_is_allocated(dst->mem)) {
> + r = amdgpu_map_buffer(dst->bo, dst->mem,
> + PFN_UP(cur_size + dst_page_offset),
> + dst_node_start, 1, ring,
> + &to);
> if (r)
> goto error;
> + to += dst_page_offset;
> }
>
> - r = amdgpu_copy_buffer(ring, from, to,
> - cur_pages * PAGE_SIZE,
> - bo->resv, &next, false, true);
> + r = amdgpu_copy_buffer(ring, from, to, cur_size,
> + resv, &next, false, true);
> if (r)
> goto error;
>
> dma_fence_put(fence);
> fence = next;
>
> - num_pages -= cur_pages;
> - if (!num_pages)
> + size -= cur_size;
> + if (!size)
> break;
>
> - old_size -= cur_pages;
> - if (!old_size) {
> - old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem);
> - old_size = old_mm->size;
> + src_node_size -= cur_size;
> + if (!src_node_size) {
> + src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm,
> + src->mem);
> + src_node_size = (src_mm->size << PAGE_SHIFT);
> } else {
> - old_start += cur_pages * PAGE_SIZE;
> + src_node_start += cur_size;
> + src_page_offset = src_node_start & (PAGE_SIZE - 1);
> }
> -
> - new_size -= cur_pages;
> - if (!new_size) {
> - new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem);
> - new_size = new_mm->size;
> + dst_node_size -= cur_size;
> + if (!dst_node_size) {
> + dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm,
> + dst->mem);
> + dst_node_size = (dst_mm->size << PAGE_SHIFT);
> } else {
> - new_start += cur_pages * PAGE_SIZE;
> + dst_node_start += cur_size;
> + dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
> }
> }
> +error:
> mutex_unlock(&adev->mman.gtt_window_lock);
> + if (f)
> + *f = dma_fence_get(fence);
> + dma_fence_put(fence);
> + return r;
> +}
> +
> +
> +static int amdgpu_move_blit(struct ttm_buffer_object *bo,
> + bool evict, bool no_wait_gpu,
> + struct ttm_mem_reg *new_mem,
> + struct ttm_mem_reg *old_mem)
> +{
I think I would rather prefer to change the callers of that function,
but not 100% sure.
How much change would that be? But not a blocking issue.
[HK]: The function is called 3 times in amdgpu_ttm.c. I don't see an elegant way to avoid this function. So for now I am leaving it as it is.
> + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
> + struct amdgpu_copy_mem src, dst;
> + struct dma_fence *fence = NULL;
> + int r;
> +
> + src.bo = bo;
> + dst.bo = bo;
> + src.mem = old_mem;
> + dst.mem = new_mem;
> + src.offset = 0;
> + dst.offset = 0;
> +
> + r = amdgpu_copy_ttm_mem_to_mem(adev, &src, &dst,
> + new_mem->num_pages << PAGE_SHIFT,
> + bo->resv, &fence);
> + if (r)
> + goto error;
>
> r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
> dma_fence_put(fence);
> return r;
>
> error:
> - mutex_unlock(&adev->mman.gtt_window_lock);
> -
> if (fence)
> dma_fence_wait(fence, false);
> dma_fence_put(fence);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 7abae68..2188034 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -58,6 +58,12 @@ struct amdgpu_mman {
> struct amd_sched_entity entity;
> };
>
> +struct amdgpu_copy_mem {
> + struct ttm_buffer_object *bo;
> + struct ttm_mem_reg *mem;
> + unsigned long offset;
> +};
> +
> extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func;
> extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func;
>
> @@ -72,6 +78,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
> struct reservation_object *resv,
> struct dma_fence **fence, bool direct_submit,
> bool vm_needs_flush);
> +int amdgpu_copy_ttm_mem_to_mem(struct amdgpu_device *adev,
> + struct amdgpu_copy_mem *src,
> + struct amdgpu_copy_mem *dst,
> + uint64_t size,
> + struct reservation_object *resv,
> + struct dma_fence **f);
> int amdgpu_fill_buffer(struct amdgpu_bo *bo,
> uint64_t src_data,
> struct reservation_object *resv,
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
* RE: [PATCH v2 2/2] drm/amdgpu: Add amdgpu_find_mm_node()
[not found] ` <7bac471e-759e-d596-67e4-be0173cecabc-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-10-10 15:28 ` Kasiviswanathan, Harish
0 siblings, 0 replies; 8+ messages in thread
From: Kasiviswanathan, Harish @ 2017-10-10 15:28 UTC (permalink / raw)
To: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
-----Original Message-----
From: Christian König [mailto:ckoenig.leichtzumerken@gmail.com]
Sent: Monday, October 09, 2017 5:11 AM
To: Kasiviswanathan, Harish <Harish.Kasiviswanathan@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH v2 2/2] drm/amdgpu: Add amdgpu_find_mm_node()
Am 07.10.2017 um 00:08 schrieb Harish Kasiviswanathan:
> Change-Id: I12231e18bb60152843cd0e0213ddd0d0e04e7497
> Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 36 +++++++++++++++++++--------------
> 1 file changed, 21 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index dda4f08..e9bfe9d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -290,6 +290,23 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
> }
>
> /**
> + * amdgpu_find_mm_node - Helper function finds the drm_mm_node
> + * corresponding to @offset. It also modifies the offset to be
> + * within the drm_mm_node returned
> + */
> +static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
> + unsigned long *offset)
> +{
> + struct drm_mm_node *mm_node = mem->mm_node;
> +
> + while (*offset >= (mm_node->size << PAGE_SHIFT)) {
> + *offset -= (mm_node->size << PAGE_SHIFT);
> + ++mm_node;
> + }
> + return mm_node;
> +}
Please take a look at amdgpu_ttm_io_mem_pfn().
There we assume that each node has the same size and do:
> page_offset = do_div(offset, size);
> mm += offset;
Not 100% sure if that is a good idea, but when we add a common function
for this we should use it everywhere.
[HK]: Ok I missed this one, probably because it was used in a different way. I will change this function to call amdgpu_find_mm_node().
The optimization (assuming all nodes are same size) will work in the current setup but as you mentioned I am also not sure if it is a good idea. To be safe I will be use the while() loop for now.
Regards,
Christian.
> +
> +/**
> * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
> *
> * The function copies @size bytes from {src->mem + src->offset} to
> @@ -319,21 +336,13 @@ int amdgpu_copy_ttm_mem_to_mem(struct amdgpu_device *adev,
> return -EINVAL;
> }
>
> - src_mm = src->mem->mm_node;
> - while (src->offset >= (src_mm->size << PAGE_SHIFT)) {
> - src->offset -= (src_mm->size << PAGE_SHIFT);
> - ++src_mm;
> - }
> + src_mm = amdgpu_find_mm_node(src->mem, &src->offset);
> src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
> src->offset;
> src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
> src_page_offset = src_node_start & (PAGE_SIZE - 1);
>
> - dst_mm = dst->mem->mm_node;
> - while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) {
> - dst->offset -= (dst_mm->size << PAGE_SHIFT);
> - ++dst_mm;
> - }
> + dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset);
> dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
> dst->offset;
> dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
> @@ -1216,7 +1225,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
> {
> struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
> struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
> - struct drm_mm_node *nodes = abo->tbo.mem.mm_node;
> + struct drm_mm_node *nodes;
> uint32_t value = 0;
> int ret = 0;
> uint64_t pos;
> @@ -1225,10 +1234,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
> if (bo->mem.mem_type != TTM_PL_VRAM)
> return -EIO;
>
> - while (offset >= (nodes->size << PAGE_SHIFT)) {
> - offset -= nodes->size << PAGE_SHIFT;
> - ++nodes;
> - }
> + nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset);
> pos = (nodes->start << PAGE_SHIFT) + offset;
>
> while (len && pos < adev->mc.mc_vram_size) {
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2 2/2] drm/amdgpu: Add amdgpu_find_mm_node()
[not found] ` <1507751374-11295-2-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
@ 2017-10-12 8:08 ` Christian König
0 siblings, 0 replies; 8+ messages in thread
From: Christian König @ 2017-10-12 8:08 UTC (permalink / raw)
To: Harish Kasiviswanathan, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Am 11.10.2017 um 21:49 schrieb Harish Kasiviswanathan:
> v2: Use amdgpu_find_mm_node() in amdgpu_ttm_io_mem_pfn()
>
> Change-Id: I12231e18bb60152843cd0e0213ddd0d0e04e7497
> Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 49 ++++++++++++++++++---------------
> 1 file changed, 27 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index c5e1e35..99b1971 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -290,7 +290,24 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
> }
>
> /**
> - * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
> + * amdgpu_find_mm_node - Helper function finds the drm_mm_node
> + * corresponding to @offset. It also modifies the offset to be
> + * within the drm_mm_node returned
> + */
> +static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
> + unsigned long *offset)
> +{
> + struct drm_mm_node *mm_node = mem->mm_node;
> +
> + while (*offset >= (mm_node->size << PAGE_SHIFT)) {
> + *offset -= (mm_node->size << PAGE_SHIFT);
> + ++mm_node;
> + }
> + return mm_node;
> +}
> +
> +/**
> + * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
> *
> * The function copies @size bytes from {src->mem + src->offset} to
> * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
> @@ -319,21 +336,13 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
> return -EINVAL;
> }
>
> - src_mm = src->mem->mm_node;
> - while (src->offset >= (src_mm->size << PAGE_SHIFT)) {
> - src->offset -= (src_mm->size << PAGE_SHIFT);
> - ++src_mm;
> - }
> + src_mm = amdgpu_find_mm_node(src->mem, &src->offset);
> src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
> src->offset;
> src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
> src_page_offset = src_node_start & (PAGE_SIZE - 1);
>
> - dst_mm = dst->mem->mm_node;
> - while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) {
> - dst->offset -= (dst_mm->size << PAGE_SHIFT);
> - ++dst_mm;
> - }
> + dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset);
> dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
> dst->offset;
> dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
> @@ -653,13 +662,12 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re
> static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
> unsigned long page_offset)
> {
> - struct drm_mm_node *mm = bo->mem.mm_node;
> - uint64_t size = mm->size;
> - uint64_t offset = page_offset;
> + struct drm_mm_node *mm;
> + unsigned long offset = (page_offset << PAGE_SHIFT);
>
> - page_offset = do_div(offset, size);
> - mm += offset;
> - return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + page_offset;
> + mm = amdgpu_find_mm_node(&bo->mem, &offset);
> + return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start +
> + (offset >> PAGE_SHIFT);
> }
>
> /*
> @@ -1216,7 +1224,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
> {
> struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
> struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
> - struct drm_mm_node *nodes = abo->tbo.mem.mm_node;
> + struct drm_mm_node *nodes;
> uint32_t value = 0;
> int ret = 0;
> uint64_t pos;
> @@ -1225,10 +1233,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
> if (bo->mem.mem_type != TTM_PL_VRAM)
> return -EIO;
>
> - while (offset >= (nodes->size << PAGE_SHIFT)) {
> - offset -= nodes->size << PAGE_SHIFT;
> - ++nodes;
> - }
> + nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset);
> pos = (nodes->start << PAGE_SHIFT) + offset;
>
> while (len && pos < adev->mc.mc_vram_size) {
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH v2 2/2] drm/amdgpu: Add amdgpu_find_mm_node()
[not found] ` <1507751374-11295-1-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
@ 2017-10-11 19:49 ` Harish Kasiviswanathan
[not found] ` <1507751374-11295-2-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
0 siblings, 1 reply; 8+ messages in thread
From: Harish Kasiviswanathan @ 2017-10-11 19:49 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Harish Kasiviswanathan
v2: Use amdgpu_find_mm_node() in amdgpu_ttm_io_mem_pfn()
Change-Id: I12231e18bb60152843cd0e0213ddd0d0e04e7497
Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 49 ++++++++++++++++++---------------
1 file changed, 27 insertions(+), 22 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c5e1e35..99b1971 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -290,7 +290,24 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
}
/**
- * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
+ * amdgpu_find_mm_node - Helper function finds the drm_mm_node
+ * corresponding to @offset. It also modifies the offset to be
+ * within the drm_mm_node returned
+ */
+static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
+ unsigned long *offset)
+{
+ struct drm_mm_node *mm_node = mem->mm_node;
+
+ while (*offset >= (mm_node->size << PAGE_SHIFT)) {
+ *offset -= (mm_node->size << PAGE_SHIFT);
+ ++mm_node;
+ }
+ return mm_node;
+}
+
+/**
+ * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
*
* The function copies @size bytes from {src->mem + src->offset} to
* {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
@@ -319,21 +336,13 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
return -EINVAL;
}
- src_mm = src->mem->mm_node;
- while (src->offset >= (src_mm->size << PAGE_SHIFT)) {
- src->offset -= (src_mm->size << PAGE_SHIFT);
- ++src_mm;
- }
+ src_mm = amdgpu_find_mm_node(src->mem, &src->offset);
src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
src->offset;
src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
src_page_offset = src_node_start & (PAGE_SIZE - 1);
- dst_mm = dst->mem->mm_node;
- while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) {
- dst->offset -= (dst_mm->size << PAGE_SHIFT);
- ++dst_mm;
- }
+ dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset);
dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
dst->offset;
dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
@@ -653,13 +662,12 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re
static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
unsigned long page_offset)
{
- struct drm_mm_node *mm = bo->mem.mm_node;
- uint64_t size = mm->size;
- uint64_t offset = page_offset;
+ struct drm_mm_node *mm;
+ unsigned long offset = (page_offset << PAGE_SHIFT);
- page_offset = do_div(offset, size);
- mm += offset;
- return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + page_offset;
+ mm = amdgpu_find_mm_node(&bo->mem, &offset);
+ return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start +
+ (offset >> PAGE_SHIFT);
}
/*
@@ -1216,7 +1224,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
{
struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
- struct drm_mm_node *nodes = abo->tbo.mem.mm_node;
+ struct drm_mm_node *nodes;
uint32_t value = 0;
int ret = 0;
uint64_t pos;
@@ -1225,10 +1233,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
if (bo->mem.mem_type != TTM_PL_VRAM)
return -EIO;
- while (offset >= (nodes->size << PAGE_SHIFT)) {
- offset -= nodes->size << PAGE_SHIFT;
- ++nodes;
- }
+ nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset);
pos = (nodes->start << PAGE_SHIFT) + offset;
while (len && pos < adev->mc.mc_vram_size) {
--
1.9.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 8+ messages in thread
end of thread, other threads:[~2017-10-12 8:08 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-06 22:08 [PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_move_blit Harish Kasiviswanathan
[not found] ` <1507327732-8797-1-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
2017-10-06 22:08 ` [PATCH v2 2/2] drm/amdgpu: Add amdgpu_find_mm_node() Harish Kasiviswanathan
[not found] ` <1507327732-8797-2-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
2017-10-09 9:10 ` Christian König
[not found] ` <7bac471e-759e-d596-67e4-be0173cecabc-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-10-10 15:28 ` Kasiviswanathan, Harish
2017-10-09 9:08 ` [PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_move_blit Christian König
[not found] ` <586e28f0-7c99-a991-6167-0eaf56ffb0c7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-10-10 15:20 ` Kasiviswanathan, Harish
2017-10-11 19:49 Harish Kasiviswanathan
[not found] ` <1507751374-11295-1-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
2017-10-11 19:49 ` [PATCH v2 2/2] drm/amdgpu: Add amdgpu_find_mm_node() Harish Kasiviswanathan
[not found] ` <1507751374-11295-2-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
2017-10-12 8:08 ` Christian König
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.