All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_move_blit
@ 2017-10-06 22:08 Harish Kasiviswanathan
       [not found] ` <1507327732-8797-1-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Harish Kasiviswanathan @ 2017-10-06 22:08 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Harish Kasiviswanathan

Add more generic function amdgpu_copy_ttm_mem_to_mem() that supports
arbitrary copy size, offsets and two BOs (source & dest.).

This is useful for KFD Cross Memory Attach feature where data needs to
be copied from BOs from different processes

v2: Add struct amdgpu_copy_mem and changed amdgpu_copy_ttm_mem_to_mem()
function parameters to use the struct

Change-Id: I848d541a84a1c2d12827d9dcf6d9054d854b4159
Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 169 +++++++++++++++++++++++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  12 +++
 2 files changed, 132 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 1086f03..dda4f08 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -289,97 +289,168 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
 	return addr;
 }
 
-static int amdgpu_move_blit(struct ttm_buffer_object *bo,
-			    bool evict, bool no_wait_gpu,
-			    struct ttm_mem_reg *new_mem,
-			    struct ttm_mem_reg *old_mem)
+/**
+ * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
+ *
+ * The function copies @size bytes from {src->mem + src->offset} to
+ * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
+ * move and different for a BO to BO copy.
+ *
+ * @f: Returns the last fence if multiple jobs are submitted.
+ */
+int amdgpu_copy_ttm_mem_to_mem(struct amdgpu_device *adev,
+			       struct amdgpu_copy_mem *src,
+			       struct amdgpu_copy_mem *dst,
+			       uint64_t size,
+			       struct reservation_object *resv,
+			       struct dma_fence **f)
 {
-	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
-
-	struct drm_mm_node *old_mm, *new_mm;
-	uint64_t old_start, old_size, new_start, new_size;
-	unsigned long num_pages;
+	struct drm_mm_node *src_mm, *dst_mm;
+	uint64_t src_node_start, dst_node_start, src_node_size,
+		 dst_node_size, src_page_offset, dst_page_offset;
 	struct dma_fence *fence = NULL;
-	int r;
-
-	BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0);
+	int r = 0;
+	const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
+					AMDGPU_GPU_PAGE_SIZE);
 
 	if (!ring->ready) {
 		DRM_ERROR("Trying to move memory with ring turned off.\n");
 		return -EINVAL;
 	}
 
-	old_mm = old_mem->mm_node;
-	old_size = old_mm->size;
-	old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem);
+	src_mm = src->mem->mm_node;
+	while (src->offset >= (src_mm->size << PAGE_SHIFT)) {
+		src->offset -= (src_mm->size << PAGE_SHIFT);
+		++src_mm;
+	}
+	src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
+					     src->offset;
+	src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
+	src_page_offset = src_node_start & (PAGE_SIZE - 1);
 
-	new_mm = new_mem->mm_node;
-	new_size = new_mm->size;
-	new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem);
+	dst_mm = dst->mem->mm_node;
+	while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) {
+		dst->offset -= (dst_mm->size << PAGE_SHIFT);
+		++dst_mm;
+	}
+	dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
+					     dst->offset;
+	dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
+	dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
 
-	num_pages = new_mem->num_pages;
 	mutex_lock(&adev->mman.gtt_window_lock);
-	while (num_pages) {
-		unsigned long cur_pages = min(min(old_size, new_size),
-					      (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE);
-		uint64_t from = old_start, to = new_start;
+
+	while (size) {
+		unsigned long cur_size;
+		uint64_t from = src_node_start, to = dst_node_start;
 		struct dma_fence *next;
 
-		if (old_mem->mem_type == TTM_PL_TT &&
-		    !amdgpu_gtt_mgr_is_allocated(old_mem)) {
-			r = amdgpu_map_buffer(bo, old_mem, cur_pages,
-					      old_start, 0, ring, &from);
+		/* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
+		 * begins at an offset, then adjust the size accordingly
+		 */
+		cur_size = min3(min(src_node_size, dst_node_size), size,
+				GTT_MAX_BYTES);
+		if (cur_size + src_page_offset > GTT_MAX_BYTES ||
+		    cur_size + dst_page_offset > GTT_MAX_BYTES)
+			cur_size -= max(src_page_offset, dst_page_offset);
+
+		/* Map only what needs to be accessed. Map src to window 0 and
+		 * dst to window 1
+		 */
+		if (src->mem->mem_type == TTM_PL_TT &&
+		    !amdgpu_gtt_mgr_is_allocated(src->mem)) {
+			r = amdgpu_map_buffer(src->bo, src->mem,
+					PFN_UP(cur_size + src_page_offset),
+					src_node_start, 0, ring,
+					&from);
 			if (r)
 				goto error;
+			/* Adjust the offset because amdgpu_map_buffer returns
+			 * start of mapped page
+			 */
+			from += src_page_offset;
 		}
 
-		if (new_mem->mem_type == TTM_PL_TT &&
-		    !amdgpu_gtt_mgr_is_allocated(new_mem)) {
-			r = amdgpu_map_buffer(bo, new_mem, cur_pages,
-					      new_start, 1, ring, &to);
+		if (dst->mem->mem_type == TTM_PL_TT &&
+		    !amdgpu_gtt_mgr_is_allocated(dst->mem)) {
+			r = amdgpu_map_buffer(dst->bo, dst->mem,
+					PFN_UP(cur_size + dst_page_offset),
+					dst_node_start, 1, ring,
+					&to);
 			if (r)
 				goto error;
+			to += dst_page_offset;
 		}
 
-		r = amdgpu_copy_buffer(ring, from, to,
-				       cur_pages * PAGE_SIZE,
-				       bo->resv, &next, false, true);
+		r = amdgpu_copy_buffer(ring, from, to, cur_size,
+				       resv, &next, false, true);
 		if (r)
 			goto error;
 
 		dma_fence_put(fence);
 		fence = next;
 
-		num_pages -= cur_pages;
-		if (!num_pages)
+		size -= cur_size;
+		if (!size)
 			break;
 
-		old_size -= cur_pages;
-		if (!old_size) {
-			old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem);
-			old_size = old_mm->size;
+		src_node_size -= cur_size;
+		if (!src_node_size) {
+			src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm,
+							     src->mem);
+			src_node_size = (src_mm->size << PAGE_SHIFT);
 		} else {
-			old_start += cur_pages * PAGE_SIZE;
+			src_node_start += cur_size;
+			src_page_offset = src_node_start & (PAGE_SIZE - 1);
 		}
-
-		new_size -= cur_pages;
-		if (!new_size) {
-			new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem);
-			new_size = new_mm->size;
+		dst_node_size -= cur_size;
+		if (!dst_node_size) {
+			dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm,
+							     dst->mem);
+			dst_node_size = (dst_mm->size << PAGE_SHIFT);
 		} else {
-			new_start += cur_pages * PAGE_SIZE;
+			dst_node_start += cur_size;
+			dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
 		}
 	}
+error:
 	mutex_unlock(&adev->mman.gtt_window_lock);
+	if (f)
+		*f = dma_fence_get(fence);
+	dma_fence_put(fence);
+	return r;
+}
+
+
+static int amdgpu_move_blit(struct ttm_buffer_object *bo,
+			    bool evict, bool no_wait_gpu,
+			    struct ttm_mem_reg *new_mem,
+			    struct ttm_mem_reg *old_mem)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+	struct amdgpu_copy_mem src, dst;
+	struct dma_fence *fence = NULL;
+	int r;
+
+	src.bo = bo;
+	dst.bo = bo;
+	src.mem = old_mem;
+	dst.mem = new_mem;
+	src.offset = 0;
+	dst.offset = 0;
+
+	r = amdgpu_copy_ttm_mem_to_mem(adev, &src, &dst,
+				       new_mem->num_pages << PAGE_SHIFT,
+				       bo->resv, &fence);
+	if (r)
+		goto error;
 
 	r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
 	dma_fence_put(fence);
 	return r;
 
 error:
-	mutex_unlock(&adev->mman.gtt_window_lock);
-
 	if (fence)
 		dma_fence_wait(fence, false);
 	dma_fence_put(fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 7abae68..2188034 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -58,6 +58,12 @@ struct amdgpu_mman {
 	struct amd_sched_entity			entity;
 };
 
+struct amdgpu_copy_mem {
+	struct ttm_buffer_object	*bo;
+	struct ttm_mem_reg		*mem;
+	unsigned long			offset;
+};
+
 extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func;
 extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func;
 
@@ -72,6 +78,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
 		       struct reservation_object *resv,
 		       struct dma_fence **fence, bool direct_submit,
 		       bool vm_needs_flush);
+int amdgpu_copy_ttm_mem_to_mem(struct amdgpu_device *adev,
+			       struct amdgpu_copy_mem *src,
+			       struct amdgpu_copy_mem *dst,
+			       uint64_t size,
+			       struct reservation_object *resv,
+			       struct dma_fence **f);
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 			uint64_t src_data,
 			struct reservation_object *resv,
-- 
1.9.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 2/2] drm/amdgpu: Add amdgpu_find_mm_node()
       [not found] ` <1507327732-8797-1-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
@ 2017-10-06 22:08   ` Harish Kasiviswanathan
       [not found]     ` <1507327732-8797-2-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
  2017-10-09  9:08   ` [PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_move_blit Christian König
  1 sibling, 1 reply; 8+ messages in thread
From: Harish Kasiviswanathan @ 2017-10-06 22:08 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Harish Kasiviswanathan

Change-Id: I12231e18bb60152843cd0e0213ddd0d0e04e7497
Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 36 +++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index dda4f08..e9bfe9d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -290,6 +290,23 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
 }
 
 /**
+ * amdgpu_find_mm_node - Helper function finds the drm_mm_node
+ *  corresponding to @offset. It also modifies the offset to be
+ *  within the drm_mm_node returned
+ */
+static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
+					       unsigned long *offset)
+{
+	struct drm_mm_node *mm_node = mem->mm_node;
+
+	while (*offset >= (mm_node->size << PAGE_SHIFT)) {
+		*offset -= (mm_node->size << PAGE_SHIFT);
+		++mm_node;
+	}
+	return mm_node;
+}
+
+/**
  * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
  *
  * The function copies @size bytes from {src->mem + src->offset} to
@@ -319,21 +336,13 @@ int amdgpu_copy_ttm_mem_to_mem(struct amdgpu_device *adev,
 		return -EINVAL;
 	}
 
-	src_mm = src->mem->mm_node;
-	while (src->offset >= (src_mm->size << PAGE_SHIFT)) {
-		src->offset -= (src_mm->size << PAGE_SHIFT);
-		++src_mm;
-	}
+	src_mm = amdgpu_find_mm_node(src->mem, &src->offset);
 	src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
 					     src->offset;
 	src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
 	src_page_offset = src_node_start & (PAGE_SIZE - 1);
 
-	dst_mm = dst->mem->mm_node;
-	while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) {
-		dst->offset -= (dst_mm->size << PAGE_SHIFT);
-		++dst_mm;
-	}
+	dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset);
 	dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
 					     dst->offset;
 	dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
@@ -1216,7 +1225,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
 {
 	struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
-	struct drm_mm_node *nodes = abo->tbo.mem.mm_node;
+	struct drm_mm_node *nodes;
 	uint32_t value = 0;
 	int ret = 0;
 	uint64_t pos;
@@ -1225,10 +1234,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
 	if (bo->mem.mem_type != TTM_PL_VRAM)
 		return -EIO;
 
-	while (offset >= (nodes->size << PAGE_SHIFT)) {
-		offset -= nodes->size << PAGE_SHIFT;
-		++nodes;
-	}
+	nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset);
 	pos = (nodes->start << PAGE_SHIFT) + offset;
 
 	while (len && pos < adev->mc.mc_vram_size) {
-- 
1.9.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_move_blit
       [not found] ` <1507327732-8797-1-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
  2017-10-06 22:08   ` [PATCH v2 2/2] drm/amdgpu: Add amdgpu_find_mm_node() Harish Kasiviswanathan
@ 2017-10-09  9:08   ` Christian König
       [not found]     ` <586e28f0-7c99-a991-6167-0eaf56ffb0c7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  1 sibling, 1 reply; 8+ messages in thread
From: Christian König @ 2017-10-09  9:08 UTC (permalink / raw)
  To: Harish Kasiviswanathan, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 07.10.2017 um 00:08 schrieb Harish Kasiviswanathan:
> Add more generic function amdgpu_copy_ttm_mem_to_mem() that supports
> arbitrary copy size, offsets and two BOs (source & dest.).
>
> This is useful for KFD Cross Memory Attach feature where data needs to
> be copied from BOs from different processes
>
> v2: Add struct amdgpu_copy_mem and changed amdgpu_copy_ttm_mem_to_mem()
> function parameters to use the struct
>
> Change-Id: I848d541a84a1c2d12827d9dcf6d9054d854b4159
> Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>

Looks like a nice cleanup to me, with the notes below fixed the patch is 
Reviewed-by: Christian König <christian.koenig@amd.com>.

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 169 +++++++++++++++++++++++---------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  12 +++
>   2 files changed, 132 insertions(+), 49 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 1086f03..dda4f08 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -289,97 +289,168 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
>   	return addr;
>   }
>   
> -static int amdgpu_move_blit(struct ttm_buffer_object *bo,
> -			    bool evict, bool no_wait_gpu,
> -			    struct ttm_mem_reg *new_mem,
> -			    struct ttm_mem_reg *old_mem)
> +/**
> + * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
> + *
> + * The function copies @size bytes from {src->mem + src->offset} to
> + * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
> + * move and different for a BO to BO copy.
> + *
> + * @f: Returns the last fence if multiple jobs are submitted.
> + */
> +int amdgpu_copy_ttm_mem_to_mem(struct amdgpu_device *adev,

Please name that amdgpu_ttm_copy_.... to note that the function is part 
of the TTM subsystem in amdgpu.

> +			       struct amdgpu_copy_mem *src,
> +			       struct amdgpu_copy_mem *dst,
> +			       uint64_t size,
> +			       struct reservation_object *resv,
> +			       struct dma_fence **f)
>   {
> -	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
>   	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> -
> -	struct drm_mm_node *old_mm, *new_mm;
> -	uint64_t old_start, old_size, new_start, new_size;
> -	unsigned long num_pages;
> +	struct drm_mm_node *src_mm, *dst_mm;
> +	uint64_t src_node_start, dst_node_start, src_node_size,
> +		 dst_node_size, src_page_offset, dst_page_offset;
>   	struct dma_fence *fence = NULL;
> -	int r;
> -
> -	BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0);
> +	int r = 0;
> +	const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
> +					AMDGPU_GPU_PAGE_SIZE);
>   
>   	if (!ring->ready) {
>   		DRM_ERROR("Trying to move memory with ring turned off.\n");
>   		return -EINVAL;
>   	}
>   
> -	old_mm = old_mem->mm_node;
> -	old_size = old_mm->size;
> -	old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem);
> +	src_mm = src->mem->mm_node;
> +	while (src->offset >= (src_mm->size << PAGE_SHIFT)) {
> +		src->offset -= (src_mm->size << PAGE_SHIFT);
> +		++src_mm;
> +	}
> +	src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
> +					     src->offset;
> +	src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
> +	src_page_offset = src_node_start & (PAGE_SIZE - 1);
>   
> -	new_mm = new_mem->mm_node;
> -	new_size = new_mm->size;
> -	new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem);
> +	dst_mm = dst->mem->mm_node;
> +	while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) {
> +		dst->offset -= (dst_mm->size << PAGE_SHIFT);
> +		++dst_mm;
> +	}
> +	dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
> +					     dst->offset;
> +	dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
> +	dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
>   
> -	num_pages = new_mem->num_pages;
>   	mutex_lock(&adev->mman.gtt_window_lock);
> -	while (num_pages) {
> -		unsigned long cur_pages = min(min(old_size, new_size),
> -					      (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE);
> -		uint64_t from = old_start, to = new_start;
> +
> +	while (size) {
> +		unsigned long cur_size;
> +		uint64_t from = src_node_start, to = dst_node_start;
>   		struct dma_fence *next;
>   
> -		if (old_mem->mem_type == TTM_PL_TT &&
> -		    !amdgpu_gtt_mgr_is_allocated(old_mem)) {
> -			r = amdgpu_map_buffer(bo, old_mem, cur_pages,
> -					      old_start, 0, ring, &from);
> +		/* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
> +		 * begins at an offset, then adjust the size accordingly
> +		 */
> +		cur_size = min3(min(src_node_size, dst_node_size), size,
> +				GTT_MAX_BYTES);
> +		if (cur_size + src_page_offset > GTT_MAX_BYTES ||
> +		    cur_size + dst_page_offset > GTT_MAX_BYTES)
> +			cur_size -= max(src_page_offset, dst_page_offset);
> +
> +		/* Map only what needs to be accessed. Map src to window 0 and
> +		 * dst to window 1
> +		 */
> +		if (src->mem->mem_type == TTM_PL_TT &&
> +		    !amdgpu_gtt_mgr_is_allocated(src->mem)) {
> +			r = amdgpu_map_buffer(src->bo, src->mem,
> +					PFN_UP(cur_size + src_page_offset),
> +					src_node_start, 0, ring,
> +					&from);
>   			if (r)
>   				goto error;
> +			/* Adjust the offset because amdgpu_map_buffer returns
> +			 * start of mapped page
> +			 */
> +			from += src_page_offset;
>   		}
>   
> -		if (new_mem->mem_type == TTM_PL_TT &&
> -		    !amdgpu_gtt_mgr_is_allocated(new_mem)) {
> -			r = amdgpu_map_buffer(bo, new_mem, cur_pages,
> -					      new_start, 1, ring, &to);
> +		if (dst->mem->mem_type == TTM_PL_TT &&
> +		    !amdgpu_gtt_mgr_is_allocated(dst->mem)) {
> +			r = amdgpu_map_buffer(dst->bo, dst->mem,
> +					PFN_UP(cur_size + dst_page_offset),
> +					dst_node_start, 1, ring,
> +					&to);
>   			if (r)
>   				goto error;
> +			to += dst_page_offset;
>   		}
>   
> -		r = amdgpu_copy_buffer(ring, from, to,
> -				       cur_pages * PAGE_SIZE,
> -				       bo->resv, &next, false, true);
> +		r = amdgpu_copy_buffer(ring, from, to, cur_size,
> +				       resv, &next, false, true);
>   		if (r)
>   			goto error;
>   
>   		dma_fence_put(fence);
>   		fence = next;
>   
> -		num_pages -= cur_pages;
> -		if (!num_pages)
> +		size -= cur_size;
> +		if (!size)
>   			break;
>   
> -		old_size -= cur_pages;
> -		if (!old_size) {
> -			old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem);
> -			old_size = old_mm->size;
> +		src_node_size -= cur_size;
> +		if (!src_node_size) {
> +			src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm,
> +							     src->mem);
> +			src_node_size = (src_mm->size << PAGE_SHIFT);
>   		} else {
> -			old_start += cur_pages * PAGE_SIZE;
> +			src_node_start += cur_size;
> +			src_page_offset = src_node_start & (PAGE_SIZE - 1);
>   		}
> -
> -		new_size -= cur_pages;
> -		if (!new_size) {
> -			new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem);
> -			new_size = new_mm->size;
> +		dst_node_size -= cur_size;
> +		if (!dst_node_size) {
> +			dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm,
> +							     dst->mem);
> +			dst_node_size = (dst_mm->size << PAGE_SHIFT);
>   		} else {
> -			new_start += cur_pages * PAGE_SIZE;
> +			dst_node_start += cur_size;
> +			dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
>   		}
>   	}
> +error:
>   	mutex_unlock(&adev->mman.gtt_window_lock);
> +	if (f)
> +		*f = dma_fence_get(fence);
> +	dma_fence_put(fence);
> +	return r;
> +}
> +
> +
> +static int amdgpu_move_blit(struct ttm_buffer_object *bo,
> +			    bool evict, bool no_wait_gpu,
> +			    struct ttm_mem_reg *new_mem,
> +			    struct ttm_mem_reg *old_mem)
> +{

I think I would rather prefer to change the callers of that function, 
but not 100% sure.

How much change would that be? But not a blocking issue.

> +	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
> +	struct amdgpu_copy_mem src, dst;
> +	struct dma_fence *fence = NULL;
> +	int r;
> +
> +	src.bo = bo;
> +	dst.bo = bo;
> +	src.mem = old_mem;
> +	dst.mem = new_mem;
> +	src.offset = 0;
> +	dst.offset = 0;
> +
> +	r = amdgpu_copy_ttm_mem_to_mem(adev, &src, &dst,
> +				       new_mem->num_pages << PAGE_SHIFT,
> +				       bo->resv, &fence);
> +	if (r)
> +		goto error;
>   
>   	r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
>   	dma_fence_put(fence);
>   	return r;
>   
>   error:
> -	mutex_unlock(&adev->mman.gtt_window_lock);
> -
>   	if (fence)
>   		dma_fence_wait(fence, false);
>   	dma_fence_put(fence);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 7abae68..2188034 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -58,6 +58,12 @@ struct amdgpu_mman {
>   	struct amd_sched_entity			entity;
>   };
>   
> +struct amdgpu_copy_mem {
> +	struct ttm_buffer_object	*bo;
> +	struct ttm_mem_reg		*mem;
> +	unsigned long			offset;
> +};
> +
>   extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func;
>   extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func;
>   
> @@ -72,6 +78,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
>   		       struct reservation_object *resv,
>   		       struct dma_fence **fence, bool direct_submit,
>   		       bool vm_needs_flush);
> +int amdgpu_copy_ttm_mem_to_mem(struct amdgpu_device *adev,
> +			       struct amdgpu_copy_mem *src,
> +			       struct amdgpu_copy_mem *dst,
> +			       uint64_t size,
> +			       struct reservation_object *resv,
> +			       struct dma_fence **f);
>   int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>   			uint64_t src_data,
>   			struct reservation_object *resv,


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 2/2] drm/amdgpu: Add amdgpu_find_mm_node()
       [not found]     ` <1507327732-8797-2-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
@ 2017-10-09  9:10       ` Christian König
       [not found]         ` <7bac471e-759e-d596-67e4-be0173cecabc-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2017-10-09  9:10 UTC (permalink / raw)
  To: Harish Kasiviswanathan, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 07.10.2017 um 00:08 schrieb Harish Kasiviswanathan:
> Change-Id: I12231e18bb60152843cd0e0213ddd0d0e04e7497
> Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 36 +++++++++++++++++++--------------
>   1 file changed, 21 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index dda4f08..e9bfe9d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -290,6 +290,23 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
>   }
>   
>   /**
> + * amdgpu_find_mm_node - Helper function finds the drm_mm_node
> + *  corresponding to @offset. It also modifies the offset to be
> + *  within the drm_mm_node returned
> + */
> +static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
> +					       unsigned long *offset)
> +{
> +	struct drm_mm_node *mm_node = mem->mm_node;
> +
> +	while (*offset >= (mm_node->size << PAGE_SHIFT)) {
> +		*offset -= (mm_node->size << PAGE_SHIFT);
> +		++mm_node;
> +	}
> +	return mm_node;
> +}

Please take a look at amdgpu_ttm_io_mem_pfn().

There we assume that each node has the same size and do:
>         page_offset = do_div(offset, size);
>         mm += offset;

Not 100% sure if that is a good idea, but when we add a common function 
for this we should use it everywhere.

Regards,
Christian.

> +
> +/**
>    * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
>    *
>    * The function copies @size bytes from {src->mem + src->offset} to
> @@ -319,21 +336,13 @@ int amdgpu_copy_ttm_mem_to_mem(struct amdgpu_device *adev,
>   		return -EINVAL;
>   	}
>   
> -	src_mm = src->mem->mm_node;
> -	while (src->offset >= (src_mm->size << PAGE_SHIFT)) {
> -		src->offset -= (src_mm->size << PAGE_SHIFT);
> -		++src_mm;
> -	}
> +	src_mm = amdgpu_find_mm_node(src->mem, &src->offset);
>   	src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
>   					     src->offset;
>   	src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
>   	src_page_offset = src_node_start & (PAGE_SIZE - 1);
>   
> -	dst_mm = dst->mem->mm_node;
> -	while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) {
> -		dst->offset -= (dst_mm->size << PAGE_SHIFT);
> -		++dst_mm;
> -	}
> +	dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset);
>   	dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
>   					     dst->offset;
>   	dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
> @@ -1216,7 +1225,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>   {
>   	struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
>   	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
> -	struct drm_mm_node *nodes = abo->tbo.mem.mm_node;
> +	struct drm_mm_node *nodes;
>   	uint32_t value = 0;
>   	int ret = 0;
>   	uint64_t pos;
> @@ -1225,10 +1234,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>   	if (bo->mem.mem_type != TTM_PL_VRAM)
>   		return -EIO;
>   
> -	while (offset >= (nodes->size << PAGE_SHIFT)) {
> -		offset -= nodes->size << PAGE_SHIFT;
> -		++nodes;
> -	}
> +	nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset);
>   	pos = (nodes->start << PAGE_SHIFT) + offset;
>   
>   	while (len && pos < adev->mc.mc_vram_size) {


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_move_blit
       [not found]     ` <586e28f0-7c99-a991-6167-0eaf56ffb0c7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-10-10 15:20       ` Kasiviswanathan, Harish
  0 siblings, 0 replies; 8+ messages in thread
From: Kasiviswanathan, Harish @ 2017-10-10 15:20 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW



-----Original Message-----
From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf Of Christian König
Sent: Monday, October 09, 2017 5:08 AM
To: Kasiviswanathan, Harish <Harish.Kasiviswanathan@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_move_blit

Am 07.10.2017 um 00:08 schrieb Harish Kasiviswanathan:
> Add more generic function amdgpu_copy_ttm_mem_to_mem() that supports
> arbitrary copy size, offsets and two BOs (source & dest.).
>
> This is useful for KFD Cross Memory Attach feature where data needs to
> be copied from BOs from different processes
>
> v2: Add struct amdgpu_copy_mem and changed amdgpu_copy_ttm_mem_to_mem()
> function parameters to use the struct
>
> Change-Id: I848d541a84a1c2d12827d9dcf6d9054d854b4159
> Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>

Looks like a nice cleanup to me, with the notes below fixed the patch is 
Reviewed-by: Christian König <christian.koenig@amd.com>.

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 169 +++++++++++++++++++++++---------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  12 +++
>   2 files changed, 132 insertions(+), 49 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 1086f03..dda4f08 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -289,97 +289,168 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
>   	return addr;
>   }
>   
> -static int amdgpu_move_blit(struct ttm_buffer_object *bo,
> -			    bool evict, bool no_wait_gpu,
> -			    struct ttm_mem_reg *new_mem,
> -			    struct ttm_mem_reg *old_mem)
> +/**
> + * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
> + *
> + * The function copies @size bytes from {src->mem + src->offset} to
> + * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
> + * move and different for a BO to BO copy.
> + *
> + * @f: Returns the last fence if multiple jobs are submitted.
> + */
> +int amdgpu_copy_ttm_mem_to_mem(struct amdgpu_device *adev,

Please name that amdgpu_ttm_copy_.... to note that the function is part 
of the TTM subsystem in amdgpu.

[HK] - Will fix the name.

> +			       struct amdgpu_copy_mem *src,
> +			       struct amdgpu_copy_mem *dst,
> +			       uint64_t size,
> +			       struct reservation_object *resv,
> +			       struct dma_fence **f)
>   {
> -	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
>   	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> -
> -	struct drm_mm_node *old_mm, *new_mm;
> -	uint64_t old_start, old_size, new_start, new_size;
> -	unsigned long num_pages;
> +	struct drm_mm_node *src_mm, *dst_mm;
> +	uint64_t src_node_start, dst_node_start, src_node_size,
> +		 dst_node_size, src_page_offset, dst_page_offset;
>   	struct dma_fence *fence = NULL;
> -	int r;
> -
> -	BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0);
> +	int r = 0;
> +	const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
> +					AMDGPU_GPU_PAGE_SIZE);
>   
>   	if (!ring->ready) {
>   		DRM_ERROR("Trying to move memory with ring turned off.\n");
>   		return -EINVAL;
>   	}
>   
> -	old_mm = old_mem->mm_node;
> -	old_size = old_mm->size;
> -	old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem);
> +	src_mm = src->mem->mm_node;
> +	while (src->offset >= (src_mm->size << PAGE_SHIFT)) {
> +		src->offset -= (src_mm->size << PAGE_SHIFT);
> +		++src_mm;
> +	}
> +	src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
> +					     src->offset;
> +	src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
> +	src_page_offset = src_node_start & (PAGE_SIZE - 1);
>   
> -	new_mm = new_mem->mm_node;
> -	new_size = new_mm->size;
> -	new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem);
> +	dst_mm = dst->mem->mm_node;
> +	while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) {
> +		dst->offset -= (dst_mm->size << PAGE_SHIFT);
> +		++dst_mm;
> +	}
> +	dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
> +					     dst->offset;
> +	dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
> +	dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
>   
> -	num_pages = new_mem->num_pages;
>   	mutex_lock(&adev->mman.gtt_window_lock);
> -	while (num_pages) {
> -		unsigned long cur_pages = min(min(old_size, new_size),
> -					      (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE);
> -		uint64_t from = old_start, to = new_start;
> +
> +	while (size) {
> +		unsigned long cur_size;
> +		uint64_t from = src_node_start, to = dst_node_start;
>   		struct dma_fence *next;
>   
> -		if (old_mem->mem_type == TTM_PL_TT &&
> -		    !amdgpu_gtt_mgr_is_allocated(old_mem)) {
> -			r = amdgpu_map_buffer(bo, old_mem, cur_pages,
> -					      old_start, 0, ring, &from);
> +		/* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
> +		 * begins at an offset, then adjust the size accordingly
> +		 */
> +		cur_size = min3(min(src_node_size, dst_node_size), size,
> +				GTT_MAX_BYTES);
> +		if (cur_size + src_page_offset > GTT_MAX_BYTES ||
> +		    cur_size + dst_page_offset > GTT_MAX_BYTES)
> +			cur_size -= max(src_page_offset, dst_page_offset);
> +
> +		/* Map only what needs to be accessed. Map src to window 0 and
> +		 * dst to window 1
> +		 */
> +		if (src->mem->mem_type == TTM_PL_TT &&
> +		    !amdgpu_gtt_mgr_is_allocated(src->mem)) {
> +			r = amdgpu_map_buffer(src->bo, src->mem,
> +					PFN_UP(cur_size + src_page_offset),
> +					src_node_start, 0, ring,
> +					&from);
>   			if (r)
>   				goto error;
> +			/* Adjust the offset because amdgpu_map_buffer returns
> +			 * start of mapped page
> +			 */
> +			from += src_page_offset;
>   		}
>   
> -		if (new_mem->mem_type == TTM_PL_TT &&
> -		    !amdgpu_gtt_mgr_is_allocated(new_mem)) {
> -			r = amdgpu_map_buffer(bo, new_mem, cur_pages,
> -					      new_start, 1, ring, &to);
> +		if (dst->mem->mem_type == TTM_PL_TT &&
> +		    !amdgpu_gtt_mgr_is_allocated(dst->mem)) {
> +			r = amdgpu_map_buffer(dst->bo, dst->mem,
> +					PFN_UP(cur_size + dst_page_offset),
> +					dst_node_start, 1, ring,
> +					&to);
>   			if (r)
>   				goto error;
> +			to += dst_page_offset;
>   		}
>   
> -		r = amdgpu_copy_buffer(ring, from, to,
> -				       cur_pages * PAGE_SIZE,
> -				       bo->resv, &next, false, true);
> +		r = amdgpu_copy_buffer(ring, from, to, cur_size,
> +				       resv, &next, false, true);
>   		if (r)
>   			goto error;
>   
>   		dma_fence_put(fence);
>   		fence = next;
>   
> -		num_pages -= cur_pages;
> -		if (!num_pages)
> +		size -= cur_size;
> +		if (!size)
>   			break;
>   
> -		old_size -= cur_pages;
> -		if (!old_size) {
> -			old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem);
> -			old_size = old_mm->size;
> +		src_node_size -= cur_size;
> +		if (!src_node_size) {
> +			src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm,
> +							     src->mem);
> +			src_node_size = (src_mm->size << PAGE_SHIFT);
>   		} else {
> -			old_start += cur_pages * PAGE_SIZE;
> +			src_node_start += cur_size;
> +			src_page_offset = src_node_start & (PAGE_SIZE - 1);
>   		}
> -
> -		new_size -= cur_pages;
> -		if (!new_size) {
> -			new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem);
> -			new_size = new_mm->size;
> +		dst_node_size -= cur_size;
> +		if (!dst_node_size) {
> +			dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm,
> +							     dst->mem);
> +			dst_node_size = (dst_mm->size << PAGE_SHIFT);
>   		} else {
> -			new_start += cur_pages * PAGE_SIZE;
> +			dst_node_start += cur_size;
> +			dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
>   		}
>   	}
> +error:
>   	mutex_unlock(&adev->mman.gtt_window_lock);
> +	if (f)
> +		*f = dma_fence_get(fence);
> +	dma_fence_put(fence);
> +	return r;
> +}
> +
> +
> +static int amdgpu_move_blit(struct ttm_buffer_object *bo,
> +			    bool evict, bool no_wait_gpu,
> +			    struct ttm_mem_reg *new_mem,
> +			    struct ttm_mem_reg *old_mem)
> +{

I think I would rather prefer to change the callers of that function, 
but not 100% sure.

How much change would that be? But not a blocking issue.

[HK]: The function is called 3 times in amdgpu_ttm.c. I don't see an elegant way to avoid this function. So for now I am leaving it as it is.

> +	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
> +	struct amdgpu_copy_mem src, dst;
> +	struct dma_fence *fence = NULL;
> +	int r;
> +
> +	src.bo = bo;
> +	dst.bo = bo;
> +	src.mem = old_mem;
> +	dst.mem = new_mem;
> +	src.offset = 0;
> +	dst.offset = 0;
> +
> +	r = amdgpu_copy_ttm_mem_to_mem(adev, &src, &dst,
> +				       new_mem->num_pages << PAGE_SHIFT,
> +				       bo->resv, &fence);
> +	if (r)
> +		goto error;
>   
>   	r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
>   	dma_fence_put(fence);
>   	return r;
>   
>   error:
> -	mutex_unlock(&adev->mman.gtt_window_lock);
> -
>   	if (fence)
>   		dma_fence_wait(fence, false);
>   	dma_fence_put(fence);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index 7abae68..2188034 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -58,6 +58,12 @@ struct amdgpu_mman {
>   	struct amd_sched_entity			entity;
>   };
>   
> +struct amdgpu_copy_mem {
> +	struct ttm_buffer_object	*bo;
> +	struct ttm_mem_reg		*mem;
> +	unsigned long			offset;
> +};
> +
>   extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func;
>   extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func;
>   
> @@ -72,6 +78,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
>   		       struct reservation_object *resv,
>   		       struct dma_fence **fence, bool direct_submit,
>   		       bool vm_needs_flush);
> +int amdgpu_copy_ttm_mem_to_mem(struct amdgpu_device *adev,
> +			       struct amdgpu_copy_mem *src,
> +			       struct amdgpu_copy_mem *dst,
> +			       uint64_t size,
> +			       struct reservation_object *resv,
> +			       struct dma_fence **f);
>   int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>   			uint64_t src_data,
>   			struct reservation_object *resv,


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH v2 2/2] drm/amdgpu: Add amdgpu_find_mm_node()
       [not found]         ` <7bac471e-759e-d596-67e4-be0173cecabc-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-10-10 15:28           ` Kasiviswanathan, Harish
  0 siblings, 0 replies; 8+ messages in thread
From: Kasiviswanathan, Harish @ 2017-10-10 15:28 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW



-----Original Message-----
From: Christian König [mailto:ckoenig.leichtzumerken@gmail.com] 
Sent: Monday, October 09, 2017 5:11 AM
To: Kasiviswanathan, Harish <Harish.Kasiviswanathan@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH v2 2/2] drm/amdgpu: Add amdgpu_find_mm_node()

Am 07.10.2017 um 00:08 schrieb Harish Kasiviswanathan:
> Change-Id: I12231e18bb60152843cd0e0213ddd0d0e04e7497
> Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 36 +++++++++++++++++++--------------
>   1 file changed, 21 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index dda4f08..e9bfe9d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -290,6 +290,23 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
>   }
>   
>   /**
> + * amdgpu_find_mm_node - Helper function finds the drm_mm_node
> + *  corresponding to @offset. It also modifies the offset to be
> + *  within the drm_mm_node returned
> + */
> +static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
> +					       unsigned long *offset)
> +{
> +	struct drm_mm_node *mm_node = mem->mm_node;
> +
> +	while (*offset >= (mm_node->size << PAGE_SHIFT)) {
> +		*offset -= (mm_node->size << PAGE_SHIFT);
> +		++mm_node;
> +	}
> +	return mm_node;
> +}

Please take a look at amdgpu_ttm_io_mem_pfn().

There we assume that each node has the same size and do:
>         page_offset = do_div(offset, size);
>         mm += offset;

Not 100% sure if that is a good idea, but when we add a common function 
for this we should use it everywhere.

[HK]: Ok I missed this one, probably because it was used in a different way. I will change this function to call amdgpu_find_mm_node().
The optimization (assuming all nodes are same size) will work in the current setup but as you mentioned I am also not sure if it is a good idea. To be safe I will be use the while() loop for now.

Regards,
Christian.

> +
> +/**
>    * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
>    *
>    * The function copies @size bytes from {src->mem + src->offset} to
> @@ -319,21 +336,13 @@ int amdgpu_copy_ttm_mem_to_mem(struct amdgpu_device *adev,
>   		return -EINVAL;
>   	}
>   
> -	src_mm = src->mem->mm_node;
> -	while (src->offset >= (src_mm->size << PAGE_SHIFT)) {
> -		src->offset -= (src_mm->size << PAGE_SHIFT);
> -		++src_mm;
> -	}
> +	src_mm = amdgpu_find_mm_node(src->mem, &src->offset);
>   	src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
>   					     src->offset;
>   	src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
>   	src_page_offset = src_node_start & (PAGE_SIZE - 1);
>   
> -	dst_mm = dst->mem->mm_node;
> -	while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) {
> -		dst->offset -= (dst_mm->size << PAGE_SHIFT);
> -		++dst_mm;
> -	}
> +	dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset);
>   	dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
>   					     dst->offset;
>   	dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
> @@ -1216,7 +1225,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>   {
>   	struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
>   	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
> -	struct drm_mm_node *nodes = abo->tbo.mem.mm_node;
> +	struct drm_mm_node *nodes;
>   	uint32_t value = 0;
>   	int ret = 0;
>   	uint64_t pos;
> @@ -1225,10 +1234,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>   	if (bo->mem.mem_type != TTM_PL_VRAM)
>   		return -EIO;
>   
> -	while (offset >= (nodes->size << PAGE_SHIFT)) {
> -		offset -= nodes->size << PAGE_SHIFT;
> -		++nodes;
> -	}
> +	nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset);
>   	pos = (nodes->start << PAGE_SHIFT) + offset;
>   
>   	while (len && pos < adev->mc.mc_vram_size) {


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 2/2] drm/amdgpu: Add amdgpu_find_mm_node()
       [not found]     ` <1507751374-11295-2-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
@ 2017-10-12  8:08       ` Christian König
  0 siblings, 0 replies; 8+ messages in thread
From: Christian König @ 2017-10-12  8:08 UTC (permalink / raw)
  To: Harish Kasiviswanathan, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 11.10.2017 um 21:49 schrieb Harish Kasiviswanathan:
> v2: Use amdgpu_find_mm_node() in amdgpu_ttm_io_mem_pfn()
>
> Change-Id: I12231e18bb60152843cd0e0213ddd0d0e04e7497
> Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 49 ++++++++++++++++++---------------
>   1 file changed, 27 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index c5e1e35..99b1971 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -290,7 +290,24 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
>   }
>   
>   /**
> - * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
> + * amdgpu_find_mm_node - Helper function finds the drm_mm_node
> + *  corresponding to @offset. It also modifies the offset to be
> + *  within the drm_mm_node returned
> + */
> +static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
> +					       unsigned long *offset)
> +{
> +	struct drm_mm_node *mm_node = mem->mm_node;
> +
> +	while (*offset >= (mm_node->size << PAGE_SHIFT)) {
> +		*offset -= (mm_node->size << PAGE_SHIFT);
> +		++mm_node;
> +	}
> +	return mm_node;
> +}
> +
> +/**
> + * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
>    *
>    * The function copies @size bytes from {src->mem + src->offset} to
>    * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
> @@ -319,21 +336,13 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
>   		return -EINVAL;
>   	}
>   
> -	src_mm = src->mem->mm_node;
> -	while (src->offset >= (src_mm->size << PAGE_SHIFT)) {
> -		src->offset -= (src_mm->size << PAGE_SHIFT);
> -		++src_mm;
> -	}
> +	src_mm = amdgpu_find_mm_node(src->mem, &src->offset);
>   	src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
>   					     src->offset;
>   	src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
>   	src_page_offset = src_node_start & (PAGE_SIZE - 1);
>   
> -	dst_mm = dst->mem->mm_node;
> -	while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) {
> -		dst->offset -= (dst_mm->size << PAGE_SHIFT);
> -		++dst_mm;
> -	}
> +	dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset);
>   	dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
>   					     dst->offset;
>   	dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
> @@ -653,13 +662,12 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re
>   static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
>   					   unsigned long page_offset)
>   {
> -	struct drm_mm_node *mm = bo->mem.mm_node;
> -	uint64_t size = mm->size;
> -	uint64_t offset = page_offset;
> +	struct drm_mm_node *mm;
> +	unsigned long offset = (page_offset << PAGE_SHIFT);
>   
> -	page_offset = do_div(offset, size);
> -	mm += offset;
> -	return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + page_offset;
> +	mm = amdgpu_find_mm_node(&bo->mem, &offset);
> +	return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start +
> +		(offset >> PAGE_SHIFT);
>   }
>   
>   /*
> @@ -1216,7 +1224,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>   {
>   	struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
>   	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
> -	struct drm_mm_node *nodes = abo->tbo.mem.mm_node;
> +	struct drm_mm_node *nodes;
>   	uint32_t value = 0;
>   	int ret = 0;
>   	uint64_t pos;
> @@ -1225,10 +1233,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
>   	if (bo->mem.mem_type != TTM_PL_VRAM)
>   		return -EIO;
>   
> -	while (offset >= (nodes->size << PAGE_SHIFT)) {
> -		offset -= nodes->size << PAGE_SHIFT;
> -		++nodes;
> -	}
> +	nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset);
>   	pos = (nodes->start << PAGE_SHIFT) + offset;
>   
>   	while (len && pos < adev->mc.mc_vram_size) {


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2 2/2] drm/amdgpu: Add amdgpu_find_mm_node()
       [not found] ` <1507751374-11295-1-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
@ 2017-10-11 19:49   ` Harish Kasiviswanathan
       [not found]     ` <1507751374-11295-2-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Harish Kasiviswanathan @ 2017-10-11 19:49 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Harish Kasiviswanathan

v2: Use amdgpu_find_mm_node() in amdgpu_ttm_io_mem_pfn()

Change-Id: I12231e18bb60152843cd0e0213ddd0d0e04e7497
Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 49 ++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c5e1e35..99b1971 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -290,7 +290,24 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
 }
 
 /**
- * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
+ * amdgpu_find_mm_node - Helper function finds the drm_mm_node
+ *  corresponding to @offset. It also modifies the offset to be
+ *  within the drm_mm_node returned
+ */
+static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
+					       unsigned long *offset)
+{
+	struct drm_mm_node *mm_node = mem->mm_node;
+
+	while (*offset >= (mm_node->size << PAGE_SHIFT)) {
+		*offset -= (mm_node->size << PAGE_SHIFT);
+		++mm_node;
+	}
+	return mm_node;
+}
+
+/**
+ * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
  *
  * The function copies @size bytes from {src->mem + src->offset} to
  * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
@@ -319,21 +336,13 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 		return -EINVAL;
 	}
 
-	src_mm = src->mem->mm_node;
-	while (src->offset >= (src_mm->size << PAGE_SHIFT)) {
-		src->offset -= (src_mm->size << PAGE_SHIFT);
-		++src_mm;
-	}
+	src_mm = amdgpu_find_mm_node(src->mem, &src->offset);
 	src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
 					     src->offset;
 	src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
 	src_page_offset = src_node_start & (PAGE_SIZE - 1);
 
-	dst_mm = dst->mem->mm_node;
-	while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) {
-		dst->offset -= (dst_mm->size << PAGE_SHIFT);
-		++dst_mm;
-	}
+	dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset);
 	dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
 					     dst->offset;
 	dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
@@ -653,13 +662,12 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re
 static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
 					   unsigned long page_offset)
 {
-	struct drm_mm_node *mm = bo->mem.mm_node;
-	uint64_t size = mm->size;
-	uint64_t offset = page_offset;
+	struct drm_mm_node *mm;
+	unsigned long offset = (page_offset << PAGE_SHIFT);
 
-	page_offset = do_div(offset, size);
-	mm += offset;
-	return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + page_offset;
+	mm = amdgpu_find_mm_node(&bo->mem, &offset);
+	return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start +
+		(offset >> PAGE_SHIFT);
 }
 
 /*
@@ -1216,7 +1224,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
 {
 	struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
-	struct drm_mm_node *nodes = abo->tbo.mem.mm_node;
+	struct drm_mm_node *nodes;
 	uint32_t value = 0;
 	int ret = 0;
 	uint64_t pos;
@@ -1225,10 +1233,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
 	if (bo->mem.mem_type != TTM_PL_VRAM)
 		return -EIO;
 
-	while (offset >= (nodes->size << PAGE_SHIFT)) {
-		offset -= nodes->size << PAGE_SHIFT;
-		++nodes;
-	}
+	nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset);
 	pos = (nodes->start << PAGE_SHIFT) + offset;
 
 	while (len && pos < adev->mc.mc_vram_size) {
-- 
1.9.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2017-10-12  8:08 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-06 22:08 [PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_move_blit Harish Kasiviswanathan
     [not found] ` <1507327732-8797-1-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
2017-10-06 22:08   ` [PATCH v2 2/2] drm/amdgpu: Add amdgpu_find_mm_node() Harish Kasiviswanathan
     [not found]     ` <1507327732-8797-2-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
2017-10-09  9:10       ` Christian König
     [not found]         ` <7bac471e-759e-d596-67e4-be0173cecabc-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-10-10 15:28           ` Kasiviswanathan, Harish
2017-10-09  9:08   ` [PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_move_blit Christian König
     [not found]     ` <586e28f0-7c99-a991-6167-0eaf56ffb0c7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-10-10 15:20       ` Kasiviswanathan, Harish
2017-10-11 19:49 Harish Kasiviswanathan
     [not found] ` <1507751374-11295-1-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
2017-10-11 19:49   ` [PATCH v2 2/2] drm/amdgpu: Add amdgpu_find_mm_node() Harish Kasiviswanathan
     [not found]     ` <1507751374-11295-2-git-send-email-Harish.Kasiviswanathan-5C7GfCeVMHo@public.gmane.org>
2017-10-12  8:08       ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.