All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses dma buf.
@ 2017-11-29  0:20 Samuel Li
       [not found] ` <1511914846-25292-1-git-send-email-Samuel.Li-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Samuel Li @ 2017-11-29  0:20 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

To improve cpu read performance. This is implemented for APUs currently.

Change-Id: I300a7daed8f2b0ba6be71a43196a6b8617ddc62e
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h               |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c       |  10 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c           |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c         | 108 ++++++++++++++++++++++
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |   8 +-
 5 files changed, 126 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index f8657c3..193db70 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -417,6 +417,8 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
 struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 					struct drm_gem_object *gobj,
 					int flags);
+struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
+					    struct dma_buf *dma_buf);
 int amdgpu_gem_prime_pin(struct drm_gem_object *obj);
 void amdgpu_gem_prime_unpin(struct drm_gem_object *obj);
 struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index d704a45..b5483e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -147,6 +147,7 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
 	struct drm_device *dev = crtc->dev;
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev->flags & AMD_IS_APU);
 	struct amdgpu_framebuffer *old_amdgpu_fb;
 	struct amdgpu_framebuffer *new_amdgpu_fb;
 	struct drm_gem_object *obj;
@@ -190,8 +191,13 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
 
 	r = amdgpu_bo_pin(new_abo, AMDGPU_GEM_DOMAIN_VRAM, &base);
 	if (unlikely(r != 0)) {
-		DRM_ERROR("failed to pin new abo buffer before flip\n");
-		goto unreserve;
+		/* latest APUs support gtt scan out */
+		if (gtt_scannable)
+			r = amdgpu_bo_pin(new_abo, AMDGPU_GEM_DOMAIN_GTT, &base);
+		if (unlikely(r != 0)) {
+			DRM_ERROR("failed to pin new abo buffer before flip\n");
+			goto unreserve;
+		}
 	}
 
 	r = reservation_object_get_fences_rcu(new_abo->tbo.resv, &work->excl,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 31383e0..df30b08 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -868,7 +868,7 @@ static struct drm_driver kms_driver = {
 	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
 	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
 	.gem_prime_export = amdgpu_gem_prime_export,
-	.gem_prime_import = drm_gem_prime_import,
+	.gem_prime_import = amdgpu_gem_prime_import,
 	.gem_prime_pin = amdgpu_gem_prime_pin,
 	.gem_prime_unpin = amdgpu_gem_prime_unpin,
 	.gem_prime_res_obj = amdgpu_gem_prime_res_obj,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index ae9c106..9e1424d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -164,6 +164,82 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
 	return bo->tbo.resv;
 }
 
+static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
+{
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	long i, ret = 0;
+	unsigned old_count;
+	bool reads = (direction == DMA_BIDIRECTIONAL || direction == DMA_FROM_DEVICE);
+	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev->flags & AMD_IS_APU);
+	u32 domain;
+
+	if (!reads || !gtt_scannable)
+		return 0;
+
+	ret = amdgpu_bo_reserve(bo, false);
+	if (unlikely(ret != 0))
+		return ret;
+
+	/*
+	 * Wait for all shared fences to complete before we switch to future
+	 * use of exclusive fence on this prime shared bo.
+	 */
+	ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false,
+						  MAX_SCHEDULE_TIMEOUT);
+
+	if (unlikely(ret < 0)) {
+		DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret);
+		amdgpu_bo_unreserve(bo);
+		return ret;
+	}
+
+	ret = 0;
+	/* Pin to gtt */
+	domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
+	if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
+		old_count = bo->pin_count;
+		for (i = 0; i < old_count; i++)
+			amdgpu_bo_unpin(bo);
+		for (i = 0; i < old_count; i++) {
+			ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL);
+			if (unlikely(ret != 0))
+				break;
+		}
+	}
+	if (ret == 0)
+		ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL);
+
+	amdgpu_bo_unreserve(bo);
+	return ret;
+}
+
+static int amdgpu_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
+{
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
+	int ret = 0;
+	bool reads = (direction == DMA_BIDIRECTIONAL || direction == DMA_FROM_DEVICE);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev->flags & AMD_IS_APU);
+
+	if (!reads || !gtt_scannable)
+		return 0;
+
+	mb();
+	ret = amdgpu_bo_reserve(bo, true);
+	if (unlikely(ret != 0))
+		return ret;
+
+	amdgpu_bo_unpin(bo);
+
+	amdgpu_bo_unreserve(bo);
+
+	return 0;
+}
+
+static struct dma_buf_ops amdgpu_dmabuf_ops;
+static atomic_t aops_lock;
+
 struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 					struct drm_gem_object *gobj,
 					int flags)
@@ -178,5 +254,37 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 	buf = drm_gem_prime_export(dev, gobj, flags);
 	if (!IS_ERR(buf))
 		buf->file->f_mapping = dev->anon_inode->i_mapping;
+
+	while (amdgpu_dmabuf_ops.begin_cpu_access != amdgpu_gem_begin_cpu_access ||
+		amdgpu_dmabuf_ops.end_cpu_access != amdgpu_gem_end_cpu_access )
+	{
+		if (!atomic_cmpxchg(&aops_lock, 0, 1)) {
+			amdgpu_dmabuf_ops = *(buf->ops);
+			amdgpu_dmabuf_ops.begin_cpu_access = amdgpu_gem_begin_cpu_access;
+			amdgpu_dmabuf_ops.end_cpu_access = amdgpu_gem_end_cpu_access;
+		}
+	}
+	buf->ops = &amdgpu_dmabuf_ops;
+
 	return buf;
 }
+
+struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
+					    struct dma_buf *dma_buf)
+{
+	struct drm_gem_object *obj;
+
+	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
+		obj = dma_buf->priv;
+		if (obj->dev == dev) {
+			/*
+			 * Importing dmabuf exported from out own gem increases
+			 * refcount on gem itself instead of f_count of dmabuf.
+			 */
+			drm_gem_object_get(obj);
+			return obj;
+		}
+	}
+
+	return drm_gem_prime_import(dev, dma_buf);
+ }
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index a3bf021..f25b830 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2987,6 +2987,8 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane,
 	int r;
 	struct dm_plane_state *dm_plane_state_new, *dm_plane_state_old;
 	unsigned int awidth;
+	struct amdgpu_device *adev = plane->dev->dev_private;
+	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev->flags & AMD_IS_APU);
 
 	dm_plane_state_old = to_dm_plane_state(plane->state);
 	dm_plane_state_new = to_dm_plane_state(new_state);
@@ -3005,7 +3007,11 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane,
 		return r;
 
 	r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_VRAM, &afb->address);
-
+	if (unlikely(r != 0)) {
+		/* latest APUs support gtt scan out */
+		if (gtt_scannable)
+			r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_GTT, &afb->address);
+	}
 
 	amdgpu_bo_unreserve(rbo);
 
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses dma buf.
       [not found] ` <1511914846-25292-1-git-send-email-Samuel.Li-5C7GfCeVMHo@public.gmane.org>
@ 2017-11-29  9:38   ` Christian König
       [not found]     ` <57016c36-d7d5-d099-9bfb-e66132037b80-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2017-11-29  9:38 UTC (permalink / raw)
  To: Samuel Li, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 29.11.2017 um 01:20 schrieb Samuel Li:
> To improve cpu read performance. This is implemented for APUs currently.

And once more a NAK for this approach.

What we could do is migrate the BO to GTT during mmap, but pinning it is 
out of question.

Regards,
Christian.

>
> Change-Id: I300a7daed8f2b0ba6be71a43196a6b8617ddc62e
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h               |   2 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_display.c       |  10 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c           |   2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c         | 108 ++++++++++++++++++++++
>   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |   8 +-
>   5 files changed, 126 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index f8657c3..193db70 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -417,6 +417,8 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
>   struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
>   					struct drm_gem_object *gobj,
>   					int flags);
> +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
> +					    struct dma_buf *dma_buf);
>   int amdgpu_gem_prime_pin(struct drm_gem_object *obj);
>   void amdgpu_gem_prime_unpin(struct drm_gem_object *obj);
>   struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> index d704a45..b5483e4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> @@ -147,6 +147,7 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
>   	struct drm_device *dev = crtc->dev;
>   	struct amdgpu_device *adev = dev->dev_private;
>   	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
> +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev->flags & AMD_IS_APU);
>   	struct amdgpu_framebuffer *old_amdgpu_fb;
>   	struct amdgpu_framebuffer *new_amdgpu_fb;
>   	struct drm_gem_object *obj;
> @@ -190,8 +191,13 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
>   
>   	r = amdgpu_bo_pin(new_abo, AMDGPU_GEM_DOMAIN_VRAM, &base);
>   	if (unlikely(r != 0)) {
> -		DRM_ERROR("failed to pin new abo buffer before flip\n");
> -		goto unreserve;
> +		/* latest APUs support gtt scan out */
> +		if (gtt_scannable)
> +			r = amdgpu_bo_pin(new_abo, AMDGPU_GEM_DOMAIN_GTT, &base);
> +		if (unlikely(r != 0)) {
> +			DRM_ERROR("failed to pin new abo buffer before flip\n");
> +			goto unreserve;
> +		}
>   	}
>   
>   	r = reservation_object_get_fences_rcu(new_abo->tbo.resv, &work->excl,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 31383e0..df30b08 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -868,7 +868,7 @@ static struct drm_driver kms_driver = {
>   	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
>   	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
>   	.gem_prime_export = amdgpu_gem_prime_export,
> -	.gem_prime_import = drm_gem_prime_import,
> +	.gem_prime_import = amdgpu_gem_prime_import,
>   	.gem_prime_pin = amdgpu_gem_prime_pin,
>   	.gem_prime_unpin = amdgpu_gem_prime_unpin,
>   	.gem_prime_res_obj = amdgpu_gem_prime_res_obj,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> index ae9c106..9e1424d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> @@ -164,6 +164,82 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
>   	return bo->tbo.resv;
>   }
>   
> +static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
> +{
> +	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
> +	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> +	long i, ret = 0;
> +	unsigned old_count;
> +	bool reads = (direction == DMA_BIDIRECTIONAL || direction == DMA_FROM_DEVICE);
> +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev->flags & AMD_IS_APU);
> +	u32 domain;
> +
> +	if (!reads || !gtt_scannable)
> +		return 0;
> +
> +	ret = amdgpu_bo_reserve(bo, false);
> +	if (unlikely(ret != 0))
> +		return ret;
> +
> +	/*
> +	 * Wait for all shared fences to complete before we switch to future
> +	 * use of exclusive fence on this prime shared bo.
> +	 */
> +	ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false,
> +						  MAX_SCHEDULE_TIMEOUT);
> +
> +	if (unlikely(ret < 0)) {
> +		DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret);
> +		amdgpu_bo_unreserve(bo);
> +		return ret;
> +	}
> +
> +	ret = 0;
> +	/* Pin to gtt */
> +	domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
> +	if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
> +		old_count = bo->pin_count;
> +		for (i = 0; i < old_count; i++)
> +			amdgpu_bo_unpin(bo);
> +		for (i = 0; i < old_count; i++) {
> +			ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL);
> +			if (unlikely(ret != 0))
> +				break;
> +		}
> +	}
> +	if (ret == 0)
> +		ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL);
> +
> +	amdgpu_bo_unreserve(bo);
> +	return ret;
> +}
> +
> +static int amdgpu_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
> +{
> +	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
> +	int ret = 0;
> +	bool reads = (direction == DMA_BIDIRECTIONAL || direction == DMA_FROM_DEVICE);
> +	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev->flags & AMD_IS_APU);
> +
> +	if (!reads || !gtt_scannable)
> +		return 0;
> +
> +	mb();
> +	ret = amdgpu_bo_reserve(bo, true);
> +	if (unlikely(ret != 0))
> +		return ret;
> +
> +	amdgpu_bo_unpin(bo);
> +
> +	amdgpu_bo_unreserve(bo);
> +
> +	return 0;
> +}
> +
> +static struct dma_buf_ops amdgpu_dmabuf_ops;
> +static atomic_t aops_lock;
> +
>   struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
>   					struct drm_gem_object *gobj,
>   					int flags)
> @@ -178,5 +254,37 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
>   	buf = drm_gem_prime_export(dev, gobj, flags);
>   	if (!IS_ERR(buf))
>   		buf->file->f_mapping = dev->anon_inode->i_mapping;
> +
> +	while (amdgpu_dmabuf_ops.begin_cpu_access != amdgpu_gem_begin_cpu_access ||
> +		amdgpu_dmabuf_ops.end_cpu_access != amdgpu_gem_end_cpu_access )
> +	{
> +		if (!atomic_cmpxchg(&aops_lock, 0, 1)) {
> +			amdgpu_dmabuf_ops = *(buf->ops);
> +			amdgpu_dmabuf_ops.begin_cpu_access = amdgpu_gem_begin_cpu_access;
> +			amdgpu_dmabuf_ops.end_cpu_access = amdgpu_gem_end_cpu_access;
> +		}
> +	}
> +	buf->ops = &amdgpu_dmabuf_ops;
> +
>   	return buf;
>   }
> +
> +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
> +					    struct dma_buf *dma_buf)
> +{
> +	struct drm_gem_object *obj;
> +
> +	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
> +		obj = dma_buf->priv;
> +		if (obj->dev == dev) {
> +			/*
> +			 * Importing dmabuf exported from out own gem increases
> +			 * refcount on gem itself instead of f_count of dmabuf.
> +			 */
> +			drm_gem_object_get(obj);
> +			return obj;
> +		}
> +	}
> +
> +	return drm_gem_prime_import(dev, dma_buf);
> + }
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index a3bf021..f25b830 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -2987,6 +2987,8 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane,
>   	int r;
>   	struct dm_plane_state *dm_plane_state_new, *dm_plane_state_old;
>   	unsigned int awidth;
> +	struct amdgpu_device *adev = plane->dev->dev_private;
> +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev->flags & AMD_IS_APU);
>   
>   	dm_plane_state_old = to_dm_plane_state(plane->state);
>   	dm_plane_state_new = to_dm_plane_state(new_state);
> @@ -3005,7 +3007,11 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane,
>   		return r;
>   
>   	r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_VRAM, &afb->address);
> -
> +	if (unlikely(r != 0)) {
> +		/* latest APUs support gtt scan out */
> +		if (gtt_scannable)
> +			r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_GTT, &afb->address);
> +	}
>   
>   	amdgpu_bo_unreserve(rbo);
>   

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses dma buf.
       [not found]     ` <57016c36-d7d5-d099-9bfb-e66132037b80-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-11-29 14:48       ` Li, Samuel
       [not found]         ` <BY2PR12MB063022C560621FA4101DDE8CF53B0-K//h7OWB4q6KMKmDyTWPpAdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Li, Samuel @ 2017-11-29 14:48 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Actually it needs to be pinned here, since otherwise page flip will pin it into vram.

SAm


> -----Original Message-----
> From: Christian König [mailto:ckoenig.leichtzumerken@gmail.com]
> Sent: Wednesday, November 29, 2017 4:39 AM
> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses dma
> buf.
> 
> Am 29.11.2017 um 01:20 schrieb Samuel Li:
> > To improve cpu read performance. This is implemented for APUs currently.
> 
> And once more a NAK for this approach.
> 
> What we could do is migrate the BO to GTT during mmap, but pinning it is out
> of question.
> 
> Regards,
> Christian.
> 
> >
> > Change-Id: I300a7daed8f2b0ba6be71a43196a6b8617ddc62e
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu.h               |   2 +
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_display.c       |  10 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c           |   2 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c         | 108
> ++++++++++++++++++++++
> >   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |   8 +-
> >   5 files changed, 126 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > index f8657c3..193db70 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > @@ -417,6 +417,8 @@ amdgpu_gem_prime_import_sg_table(struct
> drm_device *dev,
> >   struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
> >   					struct drm_gem_object *gobj,
> >   					int flags);
> > +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device
> *dev,
> > +					    struct dma_buf *dma_buf);
> >   int amdgpu_gem_prime_pin(struct drm_gem_object *obj);
> >   void amdgpu_gem_prime_unpin(struct drm_gem_object *obj);
> >   struct reservation_object *amdgpu_gem_prime_res_obj(struct
> > drm_gem_object *); diff --git
> > a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> > index d704a45..b5483e4 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> > @@ -147,6 +147,7 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc
> *crtc,
> >   	struct drm_device *dev = crtc->dev;
> >   	struct amdgpu_device *adev = dev->dev_private;
> >   	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
> > +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev-
> >flags
> > +& AMD_IS_APU);
> >   	struct amdgpu_framebuffer *old_amdgpu_fb;
> >   	struct amdgpu_framebuffer *new_amdgpu_fb;
> >   	struct drm_gem_object *obj;
> > @@ -190,8 +191,13 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc
> > *crtc,
> >
> >   	r = amdgpu_bo_pin(new_abo, AMDGPU_GEM_DOMAIN_VRAM,
> &base);
> >   	if (unlikely(r != 0)) {
> > -		DRM_ERROR("failed to pin new abo buffer before flip\n");
> > -		goto unreserve;
> > +		/* latest APUs support gtt scan out */
> > +		if (gtt_scannable)
> > +			r = amdgpu_bo_pin(new_abo,
> AMDGPU_GEM_DOMAIN_GTT, &base);
> > +		if (unlikely(r != 0)) {
> > +			DRM_ERROR("failed to pin new abo buffer before
> flip\n");
> > +			goto unreserve;
> > +		}
> >   	}
> >
> >   	r = reservation_object_get_fences_rcu(new_abo->tbo.resv,
> > &work->excl, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > index 31383e0..df30b08 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > @@ -868,7 +868,7 @@ static struct drm_driver kms_driver = {
> >   	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
> >   	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
> >   	.gem_prime_export = amdgpu_gem_prime_export,
> > -	.gem_prime_import = drm_gem_prime_import,
> > +	.gem_prime_import = amdgpu_gem_prime_import,
> >   	.gem_prime_pin = amdgpu_gem_prime_pin,
> >   	.gem_prime_unpin = amdgpu_gem_prime_unpin,
> >   	.gem_prime_res_obj = amdgpu_gem_prime_res_obj, diff --git
> > a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> > index ae9c106..9e1424d 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> > @@ -164,6 +164,82 @@ struct reservation_object
> *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
> >   	return bo->tbo.resv;
> >   }
> >
> > +static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
> enum
> > +dma_data_direction direction) {
> > +	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
> > +	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> > +	long i, ret = 0;
> > +	unsigned old_count;
> > +	bool reads = (direction == DMA_BIDIRECTIONAL || direction ==
> DMA_FROM_DEVICE);
> > +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev-
> >flags & AMD_IS_APU);
> > +	u32 domain;
> > +
> > +	if (!reads || !gtt_scannable)
> > +		return 0;
> > +
> > +	ret = amdgpu_bo_reserve(bo, false);
> > +	if (unlikely(ret != 0))
> > +		return ret;
> > +
> > +	/*
> > +	 * Wait for all shared fences to complete before we switch to future
> > +	 * use of exclusive fence on this prime shared bo.
> > +	 */
> > +	ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false,
> > +						  MAX_SCHEDULE_TIMEOUT);
> > +
> > +	if (unlikely(ret < 0)) {
> > +		DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret);
> > +		amdgpu_bo_unreserve(bo);
> > +		return ret;
> > +	}
> > +
> > +	ret = 0;
> > +	/* Pin to gtt */
> > +	domain = amdgpu_mem_type_to_domain(bo-
> >tbo.mem.mem_type);
> > +	if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
> > +		old_count = bo->pin_count;
> > +		for (i = 0; i < old_count; i++)
> > +			amdgpu_bo_unpin(bo);
> > +		for (i = 0; i < old_count; i++) {
> > +			ret = amdgpu_bo_pin(bo,
> AMDGPU_GEM_DOMAIN_GTT, NULL);
> > +			if (unlikely(ret != 0))
> > +				break;
> > +		}
> > +	}
> > +	if (ret == 0)
> > +		ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT,
> NULL);
> > +
> > +	amdgpu_bo_unreserve(bo);
> > +	return ret;
> > +}
> > +
> > +static int amdgpu_gem_end_cpu_access(struct dma_buf *dma_buf,
> enum
> > +dma_data_direction direction) {
> > +	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
> > +	int ret = 0;
> > +	bool reads = (direction == DMA_BIDIRECTIONAL || direction ==
> DMA_FROM_DEVICE);
> > +	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> > +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev-
> >flags
> > +& AMD_IS_APU);
> > +
> > +	if (!reads || !gtt_scannable)
> > +		return 0;
> > +
> > +	mb();
> > +	ret = amdgpu_bo_reserve(bo, true);
> > +	if (unlikely(ret != 0))
> > +		return ret;
> > +
> > +	amdgpu_bo_unpin(bo);
> > +
> > +	amdgpu_bo_unreserve(bo);
> > +
> > +	return 0;
> > +}
> > +
> > +static struct dma_buf_ops amdgpu_dmabuf_ops; static atomic_t
> > +aops_lock;
> > +
> >   struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
> >   					struct drm_gem_object *gobj,
> >   					int flags)
> > @@ -178,5 +254,37 @@ struct dma_buf
> *amdgpu_gem_prime_export(struct drm_device *dev,
> >   	buf = drm_gem_prime_export(dev, gobj, flags);
> >   	if (!IS_ERR(buf))
> >   		buf->file->f_mapping = dev->anon_inode->i_mapping;
> > +
> > +	while (amdgpu_dmabuf_ops.begin_cpu_access !=
> amdgpu_gem_begin_cpu_access ||
> > +		amdgpu_dmabuf_ops.end_cpu_access !=
> amdgpu_gem_end_cpu_access )
> > +	{
> > +		if (!atomic_cmpxchg(&aops_lock, 0, 1)) {
> > +			amdgpu_dmabuf_ops = *(buf->ops);
> > +			amdgpu_dmabuf_ops.begin_cpu_access =
> amdgpu_gem_begin_cpu_access;
> > +			amdgpu_dmabuf_ops.end_cpu_access =
> amdgpu_gem_end_cpu_access;
> > +		}
> > +	}
> > +	buf->ops = &amdgpu_dmabuf_ops;
> > +
> >   	return buf;
> >   }
> > +
> > +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device
> *dev,
> > +					    struct dma_buf *dma_buf)
> > +{
> > +	struct drm_gem_object *obj;
> > +
> > +	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
> > +		obj = dma_buf->priv;
> > +		if (obj->dev == dev) {
> > +			/*
> > +			 * Importing dmabuf exported from out own gem
> increases
> > +			 * refcount on gem itself instead of f_count of
> dmabuf.
> > +			 */
> > +			drm_gem_object_get(obj);
> > +			return obj;
> > +		}
> > +	}
> > +
> > +	return drm_gem_prime_import(dev, dma_buf);  }
> > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > index a3bf021..f25b830 100644
> > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > @@ -2987,6 +2987,8 @@ static int dm_plane_helper_prepare_fb(struct
> drm_plane *plane,
> >   	int r;
> >   	struct dm_plane_state *dm_plane_state_new,
> *dm_plane_state_old;
> >   	unsigned int awidth;
> > +	struct amdgpu_device *adev = plane->dev->dev_private;
> > +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev-
> >flags
> > +& AMD_IS_APU);
> >
> >   	dm_plane_state_old = to_dm_plane_state(plane->state);
> >   	dm_plane_state_new = to_dm_plane_state(new_state); @@ -
> 3005,7
> > +3007,11 @@ static int dm_plane_helper_prepare_fb(struct drm_plane
> *plane,
> >   		return r;
> >
> >   	r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_VRAM, &afb-
> >address);
> > -
> > +	if (unlikely(r != 0)) {
> > +		/* latest APUs support gtt scan out */
> > +		if (gtt_scannable)
> > +			r = amdgpu_bo_pin(rbo,
> AMDGPU_GEM_DOMAIN_GTT, &afb->address);
> > +	}
> >
> >   	amdgpu_bo_unreserve(rbo);
> >

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses dma buf.
       [not found]         ` <BY2PR12MB063022C560621FA4101DDE8CF53B0-K//h7OWB4q6KMKmDyTWPpAdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2017-11-29 14:54           ` Christian König
       [not found]             ` <6c99af44-efbe-a737-5021-2d1c242528f5-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2017-11-29 14:54 UTC (permalink / raw)
  To: Li, Samuel, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

And exactly that's the reason why it is a no-go.

Scanning out from GTT isn't supported at the moment.

Christian.

Am 29.11.2017 um 15:48 schrieb Li, Samuel:
> Actually it needs to be pinned here, since otherwise page flip will pin it into vram.
>
> SAm
>
>
>> -----Original Message-----
>> From: Christian König [mailto:ckoenig.leichtzumerken@gmail.com]
>> Sent: Wednesday, November 29, 2017 4:39 AM
>> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses dma
>> buf.
>>
>> Am 29.11.2017 um 01:20 schrieb Samuel Li:
>>> To improve cpu read performance. This is implemented for APUs currently.
>> And once more a NAK for this approach.
>>
>> What we could do is migrate the BO to GTT during mmap, but pinning it is out
>> of question.
>>
>> Regards,
>> Christian.
>>
>>> Change-Id: I300a7daed8f2b0ba6be71a43196a6b8617ddc62e
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h               |   2 +
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_display.c       |  10 +-
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c           |   2 +-
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c         | 108
>> ++++++++++++++++++++++
>>>    drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |   8 +-
>>>    5 files changed, 126 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index f8657c3..193db70 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -417,6 +417,8 @@ amdgpu_gem_prime_import_sg_table(struct
>> drm_device *dev,
>>>    struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
>>>    					struct drm_gem_object *gobj,
>>>    					int flags);
>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device
>> *dev,
>>> +					    struct dma_buf *dma_buf);
>>>    int amdgpu_gem_prime_pin(struct drm_gem_object *obj);
>>>    void amdgpu_gem_prime_unpin(struct drm_gem_object *obj);
>>>    struct reservation_object *amdgpu_gem_prime_res_obj(struct
>>> drm_gem_object *); diff --git
>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
>>> index d704a45..b5483e4 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
>>> @@ -147,6 +147,7 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc
>> *crtc,
>>>    	struct drm_device *dev = crtc->dev;
>>>    	struct amdgpu_device *adev = dev->dev_private;
>>>    	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
>>> +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev-
>>> flags
>>> +& AMD_IS_APU);
>>>    	struct amdgpu_framebuffer *old_amdgpu_fb;
>>>    	struct amdgpu_framebuffer *new_amdgpu_fb;
>>>    	struct drm_gem_object *obj;
>>> @@ -190,8 +191,13 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc
>>> *crtc,
>>>
>>>    	r = amdgpu_bo_pin(new_abo, AMDGPU_GEM_DOMAIN_VRAM,
>> &base);
>>>    	if (unlikely(r != 0)) {
>>> -		DRM_ERROR("failed to pin new abo buffer before flip\n");
>>> -		goto unreserve;
>>> +		/* latest APUs support gtt scan out */
>>> +		if (gtt_scannable)
>>> +			r = amdgpu_bo_pin(new_abo,
>> AMDGPU_GEM_DOMAIN_GTT, &base);
>>> +		if (unlikely(r != 0)) {
>>> +			DRM_ERROR("failed to pin new abo buffer before
>> flip\n");
>>> +			goto unreserve;
>>> +		}
>>>    	}
>>>
>>>    	r = reservation_object_get_fences_rcu(new_abo->tbo.resv,
>>> &work->excl, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> index 31383e0..df30b08 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> @@ -868,7 +868,7 @@ static struct drm_driver kms_driver = {
>>>    	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
>>>    	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
>>>    	.gem_prime_export = amdgpu_gem_prime_export,
>>> -	.gem_prime_import = drm_gem_prime_import,
>>> +	.gem_prime_import = amdgpu_gem_prime_import,
>>>    	.gem_prime_pin = amdgpu_gem_prime_pin,
>>>    	.gem_prime_unpin = amdgpu_gem_prime_unpin,
>>>    	.gem_prime_res_obj = amdgpu_gem_prime_res_obj, diff --git
>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>> index ae9c106..9e1424d 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>> @@ -164,6 +164,82 @@ struct reservation_object
>> *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
>>>    	return bo->tbo.resv;
>>>    }
>>>
>>> +static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
>> enum
>>> +dma_data_direction direction) {
>>> +	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
>>> +	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>> +	long i, ret = 0;
>>> +	unsigned old_count;
>>> +	bool reads = (direction == DMA_BIDIRECTIONAL || direction ==
>> DMA_FROM_DEVICE);
>>> +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev-
>>> flags & AMD_IS_APU);
>>> +	u32 domain;
>>> +
>>> +	if (!reads || !gtt_scannable)
>>> +		return 0;
>>> +
>>> +	ret = amdgpu_bo_reserve(bo, false);
>>> +	if (unlikely(ret != 0))
>>> +		return ret;
>>> +
>>> +	/*
>>> +	 * Wait for all shared fences to complete before we switch to future
>>> +	 * use of exclusive fence on this prime shared bo.
>>> +	 */
>>> +	ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false,
>>> +						  MAX_SCHEDULE_TIMEOUT);
>>> +
>>> +	if (unlikely(ret < 0)) {
>>> +		DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret);
>>> +		amdgpu_bo_unreserve(bo);
>>> +		return ret;
>>> +	}
>>> +
>>> +	ret = 0;
>>> +	/* Pin to gtt */
>>> +	domain = amdgpu_mem_type_to_domain(bo-
>>> tbo.mem.mem_type);
>>> +	if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
>>> +		old_count = bo->pin_count;
>>> +		for (i = 0; i < old_count; i++)
>>> +			amdgpu_bo_unpin(bo);
>>> +		for (i = 0; i < old_count; i++) {
>>> +			ret = amdgpu_bo_pin(bo,
>> AMDGPU_GEM_DOMAIN_GTT, NULL);
>>> +			if (unlikely(ret != 0))
>>> +				break;
>>> +		}
>>> +	}
>>> +	if (ret == 0)
>>> +		ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT,
>> NULL);
>>> +
>>> +	amdgpu_bo_unreserve(bo);
>>> +	return ret;
>>> +}
>>> +
>>> +static int amdgpu_gem_end_cpu_access(struct dma_buf *dma_buf,
>> enum
>>> +dma_data_direction direction) {
>>> +	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
>>> +	int ret = 0;
>>> +	bool reads = (direction == DMA_BIDIRECTIONAL || direction ==
>> DMA_FROM_DEVICE);
>>> +	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>> +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev-
>>> flags
>>> +& AMD_IS_APU);
>>> +
>>> +	if (!reads || !gtt_scannable)
>>> +		return 0;
>>> +
>>> +	mb();
>>> +	ret = amdgpu_bo_reserve(bo, true);
>>> +	if (unlikely(ret != 0))
>>> +		return ret;
>>> +
>>> +	amdgpu_bo_unpin(bo);
>>> +
>>> +	amdgpu_bo_unreserve(bo);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static struct dma_buf_ops amdgpu_dmabuf_ops; static atomic_t
>>> +aops_lock;
>>> +
>>>    struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
>>>    					struct drm_gem_object *gobj,
>>>    					int flags)
>>> @@ -178,5 +254,37 @@ struct dma_buf
>> *amdgpu_gem_prime_export(struct drm_device *dev,
>>>    	buf = drm_gem_prime_export(dev, gobj, flags);
>>>    	if (!IS_ERR(buf))
>>>    		buf->file->f_mapping = dev->anon_inode->i_mapping;
>>> +
>>> +	while (amdgpu_dmabuf_ops.begin_cpu_access !=
>> amdgpu_gem_begin_cpu_access ||
>>> +		amdgpu_dmabuf_ops.end_cpu_access !=
>> amdgpu_gem_end_cpu_access )
>>> +	{
>>> +		if (!atomic_cmpxchg(&aops_lock, 0, 1)) {
>>> +			amdgpu_dmabuf_ops = *(buf->ops);
>>> +			amdgpu_dmabuf_ops.begin_cpu_access =
>> amdgpu_gem_begin_cpu_access;
>>> +			amdgpu_dmabuf_ops.end_cpu_access =
>> amdgpu_gem_end_cpu_access;
>>> +		}
>>> +	}
>>> +	buf->ops = &amdgpu_dmabuf_ops;
>>> +
>>>    	return buf;
>>>    }
>>> +
>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device
>> *dev,
>>> +					    struct dma_buf *dma_buf)
>>> +{
>>> +	struct drm_gem_object *obj;
>>> +
>>> +	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
>>> +		obj = dma_buf->priv;
>>> +		if (obj->dev == dev) {
>>> +			/*
>>> +			 * Importing dmabuf exported from out own gem
>> increases
>>> +			 * refcount on gem itself instead of f_count of
>> dmabuf.
>>> +			 */
>>> +			drm_gem_object_get(obj);
>>> +			return obj;
>>> +		}
>>> +	}
>>> +
>>> +	return drm_gem_prime_import(dev, dma_buf);  }
>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>> index a3bf021..f25b830 100644
>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>> @@ -2987,6 +2987,8 @@ static int dm_plane_helper_prepare_fb(struct
>> drm_plane *plane,
>>>    	int r;
>>>    	struct dm_plane_state *dm_plane_state_new,
>> *dm_plane_state_old;
>>>    	unsigned int awidth;
>>> +	struct amdgpu_device *adev = plane->dev->dev_private;
>>> +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev-
>>> flags
>>> +& AMD_IS_APU);
>>>
>>>    	dm_plane_state_old = to_dm_plane_state(plane->state);
>>>    	dm_plane_state_new = to_dm_plane_state(new_state); @@ -
>> 3005,7
>>> +3007,11 @@ static int dm_plane_helper_prepare_fb(struct drm_plane
>> *plane,
>>>    		return r;
>>>
>>>    	r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_VRAM, &afb-
>>> address);
>>> -
>>> +	if (unlikely(r != 0)) {
>>> +		/* latest APUs support gtt scan out */
>>> +		if (gtt_scannable)
>>> +			r = amdgpu_bo_pin(rbo,
>> AMDGPU_GEM_DOMAIN_GTT, &afb->address);
>>> +	}
>>>
>>>    	amdgpu_bo_unreserve(rbo);
>>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses dma buf.
       [not found]             ` <6c99af44-efbe-a737-5021-2d1c242528f5-5C7GfCeVMHo@public.gmane.org>
@ 2017-11-29 14:56               ` Li, Samuel
       [not found]                 ` <BY2PR12MB06309E0735541567ECF1E3D8F53B0-K//h7OWB4q6KMKmDyTWPpAdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Li, Samuel @ 2017-11-29 14:56 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

One major purpose of the ChromeOS mmap_test is to avoid buffer copying. What is the concern for scanning out from GTT on APUs?

Sam

> -----Original Message-----
> From: Koenig, Christian
> Sent: Wednesday, November 29, 2017 9:54 AM
> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses dma
> buf.
> 
> And exactly that's the reason why it is a no-go.
> 
> Scanning out from GTT isn't supported at the moment.
> 
> Christian.
> 
> Am 29.11.2017 um 15:48 schrieb Li, Samuel:
> > Actually it needs to be pinned here, since otherwise page flip will pin it into
> vram.
> >
> > SAm
> >
> >
> >> -----Original Message-----
> >> From: Christian König [mailto:ckoenig.leichtzumerken@gmail.com]
> >> Sent: Wednesday, November 29, 2017 4:39 AM
> >> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
> >> Subject: Re: [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses
> >> dma buf.
> >>
> >> Am 29.11.2017 um 01:20 schrieb Samuel Li:
> >>> To improve cpu read performance. This is implemented for APUs
> currently.
> >> And once more a NAK for this approach.
> >>
> >> What we could do is migrate the BO to GTT during mmap, but pinning it
> >> is out of question.
> >>
> >> Regards,
> >> Christian.
> >>
> >>> Change-Id: I300a7daed8f2b0ba6be71a43196a6b8617ddc62e
> >>> ---
> >>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h               |   2 +
> >>>    drivers/gpu/drm/amd/amdgpu/amdgpu_display.c       |  10 +-
> >>>    drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c           |   2 +-
> >>>    drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c         | 108
> >> ++++++++++++++++++++++
> >>>    drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |   8 +-
> >>>    5 files changed, 126 insertions(+), 4 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>> index f8657c3..193db70 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>> @@ -417,6 +417,8 @@ amdgpu_gem_prime_import_sg_table(struct
> >> drm_device *dev,
> >>>    struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
> >>>    					struct drm_gem_object *gobj,
> >>>    					int flags);
> >>> +struct drm_gem_object *amdgpu_gem_prime_import(struct
> drm_device
> >> *dev,
> >>> +					    struct dma_buf *dma_buf);
> >>>    int amdgpu_gem_prime_pin(struct drm_gem_object *obj);
> >>>    void amdgpu_gem_prime_unpin(struct drm_gem_object *obj);
> >>>    struct reservation_object *amdgpu_gem_prime_res_obj(struct
> >>> drm_gem_object *); diff --git
> >>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> >>> index d704a45..b5483e4 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> >>> @@ -147,6 +147,7 @@ int amdgpu_crtc_page_flip_target(struct
> drm_crtc
> >> *crtc,
> >>>    	struct drm_device *dev = crtc->dev;
> >>>    	struct amdgpu_device *adev = dev->dev_private;
> >>>    	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
> >>> +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev-
> >>> flags
> >>> +& AMD_IS_APU);
> >>>    	struct amdgpu_framebuffer *old_amdgpu_fb;
> >>>    	struct amdgpu_framebuffer *new_amdgpu_fb;
> >>>    	struct drm_gem_object *obj;
> >>> @@ -190,8 +191,13 @@ int amdgpu_crtc_page_flip_target(struct
> >>> drm_crtc *crtc,
> >>>
> >>>    	r = amdgpu_bo_pin(new_abo, AMDGPU_GEM_DOMAIN_VRAM,
> >> &base);
> >>>    	if (unlikely(r != 0)) {
> >>> -		DRM_ERROR("failed to pin new abo buffer before flip\n");
> >>> -		goto unreserve;
> >>> +		/* latest APUs support gtt scan out */
> >>> +		if (gtt_scannable)
> >>> +			r = amdgpu_bo_pin(new_abo,
> >> AMDGPU_GEM_DOMAIN_GTT, &base);
> >>> +		if (unlikely(r != 0)) {
> >>> +			DRM_ERROR("failed to pin new abo buffer before
> >> flip\n");
> >>> +			goto unreserve;
> >>> +		}
> >>>    	}
> >>>
> >>>    	r = reservation_object_get_fences_rcu(new_abo->tbo.resv,
> >>> &work->excl, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>> index 31383e0..df30b08 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>> @@ -868,7 +868,7 @@ static struct drm_driver kms_driver = {
> >>>    	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
> >>>    	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
> >>>    	.gem_prime_export = amdgpu_gem_prime_export,
> >>> -	.gem_prime_import = drm_gem_prime_import,
> >>> +	.gem_prime_import = amdgpu_gem_prime_import,
> >>>    	.gem_prime_pin = amdgpu_gem_prime_pin,
> >>>    	.gem_prime_unpin = amdgpu_gem_prime_unpin,
> >>>    	.gem_prime_res_obj = amdgpu_gem_prime_res_obj, diff --git
> >>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>> index ae9c106..9e1424d 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>> @@ -164,6 +164,82 @@ struct reservation_object
> >> *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
> >>>    	return bo->tbo.resv;
> >>>    }
> >>>
> >>> +static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
> >> enum
> >>> +dma_data_direction direction) {
> >>> +	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
> >>> +	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> >>> +	long i, ret = 0;
> >>> +	unsigned old_count;
> >>> +	bool reads = (direction == DMA_BIDIRECTIONAL || direction ==
> >> DMA_FROM_DEVICE);
> >>> +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev-
> >>> flags & AMD_IS_APU);
> >>> +	u32 domain;
> >>> +
> >>> +	if (!reads || !gtt_scannable)
> >>> +		return 0;
> >>> +
> >>> +	ret = amdgpu_bo_reserve(bo, false);
> >>> +	if (unlikely(ret != 0))
> >>> +		return ret;
> >>> +
> >>> +	/*
> >>> +	 * Wait for all shared fences to complete before we switch to future
> >>> +	 * use of exclusive fence on this prime shared bo.
> >>> +	 */
> >>> +	ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false,
> >>> +						  MAX_SCHEDULE_TIMEOUT);
> >>> +
> >>> +	if (unlikely(ret < 0)) {
> >>> +		DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret);
> >>> +		amdgpu_bo_unreserve(bo);
> >>> +		return ret;
> >>> +	}
> >>> +
> >>> +	ret = 0;
> >>> +	/* Pin to gtt */
> >>> +	domain = amdgpu_mem_type_to_domain(bo-
> >>> tbo.mem.mem_type);
> >>> +	if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
> >>> +		old_count = bo->pin_count;
> >>> +		for (i = 0; i < old_count; i++)
> >>> +			amdgpu_bo_unpin(bo);
> >>> +		for (i = 0; i < old_count; i++) {
> >>> +			ret = amdgpu_bo_pin(bo,
> >> AMDGPU_GEM_DOMAIN_GTT, NULL);
> >>> +			if (unlikely(ret != 0))
> >>> +				break;
> >>> +		}
> >>> +	}
> >>> +	if (ret == 0)
> >>> +		ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT,
> >> NULL);
> >>> +
> >>> +	amdgpu_bo_unreserve(bo);
> >>> +	return ret;
> >>> +}
> >>> +
> >>> +static int amdgpu_gem_end_cpu_access(struct dma_buf *dma_buf,
> >> enum
> >>> +dma_data_direction direction) {
> >>> +	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
> >>> +	int ret = 0;
> >>> +	bool reads = (direction == DMA_BIDIRECTIONAL || direction ==
> >> DMA_FROM_DEVICE);
> >>> +	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> >>> +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev-
> >>> flags
> >>> +& AMD_IS_APU);
> >>> +
> >>> +	if (!reads || !gtt_scannable)
> >>> +		return 0;
> >>> +
> >>> +	mb();
> >>> +	ret = amdgpu_bo_reserve(bo, true);
> >>> +	if (unlikely(ret != 0))
> >>> +		return ret;
> >>> +
> >>> +	amdgpu_bo_unpin(bo);
> >>> +
> >>> +	amdgpu_bo_unreserve(bo);
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static struct dma_buf_ops amdgpu_dmabuf_ops; static atomic_t
> >>> +aops_lock;
> >>> +
> >>>    struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
> >>>    					struct drm_gem_object *gobj,
> >>>    					int flags)
> >>> @@ -178,5 +254,37 @@ struct dma_buf
> >> *amdgpu_gem_prime_export(struct drm_device *dev,
> >>>    	buf = drm_gem_prime_export(dev, gobj, flags);
> >>>    	if (!IS_ERR(buf))
> >>>    		buf->file->f_mapping = dev->anon_inode->i_mapping;
> >>> +
> >>> +	while (amdgpu_dmabuf_ops.begin_cpu_access !=
> >> amdgpu_gem_begin_cpu_access ||
> >>> +		amdgpu_dmabuf_ops.end_cpu_access !=
> >> amdgpu_gem_end_cpu_access )
> >>> +	{
> >>> +		if (!atomic_cmpxchg(&aops_lock, 0, 1)) {
> >>> +			amdgpu_dmabuf_ops = *(buf->ops);
> >>> +			amdgpu_dmabuf_ops.begin_cpu_access =
> >> amdgpu_gem_begin_cpu_access;
> >>> +			amdgpu_dmabuf_ops.end_cpu_access =
> >> amdgpu_gem_end_cpu_access;
> >>> +		}
> >>> +	}
> >>> +	buf->ops = &amdgpu_dmabuf_ops;
> >>> +
> >>>    	return buf;
> >>>    }
> >>> +
> >>> +struct drm_gem_object *amdgpu_gem_prime_import(struct
> drm_device
> >> *dev,
> >>> +					    struct dma_buf *dma_buf)
> >>> +{
> >>> +	struct drm_gem_object *obj;
> >>> +
> >>> +	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
> >>> +		obj = dma_buf->priv;
> >>> +		if (obj->dev == dev) {
> >>> +			/*
> >>> +			 * Importing dmabuf exported from out own gem
> >> increases
> >>> +			 * refcount on gem itself instead of f_count of
> >> dmabuf.
> >>> +			 */
> >>> +			drm_gem_object_get(obj);
> >>> +			return obj;
> >>> +		}
> >>> +	}
> >>> +
> >>> +	return drm_gem_prime_import(dev, dma_buf);  }
> >>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> >>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> >>> index a3bf021..f25b830 100644
> >>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> >>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> >>> @@ -2987,6 +2987,8 @@ static int dm_plane_helper_prepare_fb(struct
> >> drm_plane *plane,
> >>>    	int r;
> >>>    	struct dm_plane_state *dm_plane_state_new,
> >> *dm_plane_state_old;
> >>>    	unsigned int awidth;
> >>> +	struct amdgpu_device *adev = plane->dev->dev_private;
> >>> +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev-
> >>> flags
> >>> +& AMD_IS_APU);
> >>>
> >>>    	dm_plane_state_old = to_dm_plane_state(plane->state);
> >>>    	dm_plane_state_new = to_dm_plane_state(new_state); @@ -
> >> 3005,7
> >>> +3007,11 @@ static int dm_plane_helper_prepare_fb(struct drm_plane
> >> *plane,
> >>>    		return r;
> >>>
> >>>    	r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_VRAM, &afb-
> address);
> >>> -
> >>> +	if (unlikely(r != 0)) {
> >>> +		/* latest APUs support gtt scan out */
> >>> +		if (gtt_scannable)
> >>> +			r = amdgpu_bo_pin(rbo,
> >> AMDGPU_GEM_DOMAIN_GTT, &afb->address);
> >>> +	}
> >>>
> >>>    	amdgpu_bo_unreserve(rbo);
> >>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses dma buf.
       [not found]                 ` <BY2PR12MB06309E0735541567ECF1E3D8F53B0-K//h7OWB4q6KMKmDyTWPpAdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2017-11-29 15:01                   ` Christian König
       [not found]                     ` <66363160-0778-d484-b91d-5f056b78b5c6-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2017-11-29 15:01 UTC (permalink / raw)
  To: Li, Samuel, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

> What is the concern for scanning out from GTT on APUs?
It's simply not implemented yet. You need quite a bunch of different 
setup in DC for this to work.

I've send out a WIP branch a for this a few weeks ago, but haven't 
worked on this in a while. BTW it is only supported on Carizzo and Raven.

But even then pinning a BO to GTT for this would still be a no-go. We 
could just avoid the copy on scanout when the BO is already inside GTT 
because of the CPU access.

In general we should rather work on this as Michel described and avoid 
creating the BO in VRAM in the first place if possible.

Regards,
Christian.

Am 29.11.2017 um 15:56 schrieb Li, Samuel:
> One major purpose of the ChromeOS mmap_test is to avoid buffer copying. What is the concern for scanning out from GTT on APUs?
>
> Sam
>
>> -----Original Message-----
>> From: Koenig, Christian
>> Sent: Wednesday, November 29, 2017 9:54 AM
>> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses dma
>> buf.
>>
>> And exactly that's the reason why it is a no-go.
>>
>> Scanning out from GTT isn't supported at the moment.
>>
>> Christian.
>>
>> Am 29.11.2017 um 15:48 schrieb Li, Samuel:
>>> Actually it needs to be pinned here, since otherwise page flip will pin it into
>> vram.
>>> SAm
>>>
>>>
>>>> -----Original Message-----
>>>> From: Christian König [mailto:ckoenig.leichtzumerken@gmail.com]
>>>> Sent: Wednesday, November 29, 2017 4:39 AM
>>>> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
>>>> Subject: Re: [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses
>>>> dma buf.
>>>>
>>>> Am 29.11.2017 um 01:20 schrieb Samuel Li:
>>>>> To improve cpu read performance. This is implemented for APUs
>> currently.
>>>> And once more a NAK for this approach.
>>>>
>>>> What we could do is migrate the BO to GTT during mmap, but pinning it
>>>> is out of question.
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>> Change-Id: I300a7daed8f2b0ba6be71a43196a6b8617ddc62e
>>>>> ---
>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu.h               |   2 +
>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_display.c       |  10 +-
>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c           |   2 +-
>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c         | 108
>>>> ++++++++++++++++++++++
>>>>>     drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |   8 +-
>>>>>     5 files changed, 126 insertions(+), 4 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> index f8657c3..193db70 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> @@ -417,6 +417,8 @@ amdgpu_gem_prime_import_sg_table(struct
>>>> drm_device *dev,
>>>>>     struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
>>>>>     					struct drm_gem_object *gobj,
>>>>>     					int flags);
>>>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct
>> drm_device
>>>> *dev,
>>>>> +					    struct dma_buf *dma_buf);
>>>>>     int amdgpu_gem_prime_pin(struct drm_gem_object *obj);
>>>>>     void amdgpu_gem_prime_unpin(struct drm_gem_object *obj);
>>>>>     struct reservation_object *amdgpu_gem_prime_res_obj(struct
>>>>> drm_gem_object *); diff --git
>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
>>>>> index d704a45..b5483e4 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
>>>>> @@ -147,6 +147,7 @@ int amdgpu_crtc_page_flip_target(struct
>> drm_crtc
>>>> *crtc,
>>>>>     	struct drm_device *dev = crtc->dev;
>>>>>     	struct amdgpu_device *adev = dev->dev_private;
>>>>>     	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
>>>>> +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev-
>>>>> flags
>>>>> +& AMD_IS_APU);
>>>>>     	struct amdgpu_framebuffer *old_amdgpu_fb;
>>>>>     	struct amdgpu_framebuffer *new_amdgpu_fb;
>>>>>     	struct drm_gem_object *obj;
>>>>> @@ -190,8 +191,13 @@ int amdgpu_crtc_page_flip_target(struct
>>>>> drm_crtc *crtc,
>>>>>
>>>>>     	r = amdgpu_bo_pin(new_abo, AMDGPU_GEM_DOMAIN_VRAM,
>>>> &base);
>>>>>     	if (unlikely(r != 0)) {
>>>>> -		DRM_ERROR("failed to pin new abo buffer before flip\n");
>>>>> -		goto unreserve;
>>>>> +		/* latest APUs support gtt scan out */
>>>>> +		if (gtt_scannable)
>>>>> +			r = amdgpu_bo_pin(new_abo,
>>>> AMDGPU_GEM_DOMAIN_GTT, &base);
>>>>> +		if (unlikely(r != 0)) {
>>>>> +			DRM_ERROR("failed to pin new abo buffer before
>>>> flip\n");
>>>>> +			goto unreserve;
>>>>> +		}
>>>>>     	}
>>>>>
>>>>>     	r = reservation_object_get_fences_rcu(new_abo->tbo.resv,
>>>>> &work->excl, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>> index 31383e0..df30b08 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>> @@ -868,7 +868,7 @@ static struct drm_driver kms_driver = {
>>>>>     	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
>>>>>     	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
>>>>>     	.gem_prime_export = amdgpu_gem_prime_export,
>>>>> -	.gem_prime_import = drm_gem_prime_import,
>>>>> +	.gem_prime_import = amdgpu_gem_prime_import,
>>>>>     	.gem_prime_pin = amdgpu_gem_prime_pin,
>>>>>     	.gem_prime_unpin = amdgpu_gem_prime_unpin,
>>>>>     	.gem_prime_res_obj = amdgpu_gem_prime_res_obj, diff --git
>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>> index ae9c106..9e1424d 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>> @@ -164,6 +164,82 @@ struct reservation_object
>>>> *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
>>>>>     	return bo->tbo.resv;
>>>>>     }
>>>>>
>>>>> +static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
>>>> enum
>>>>> +dma_data_direction direction) {
>>>>> +	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
>>>>> +	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>>>> +	long i, ret = 0;
>>>>> +	unsigned old_count;
>>>>> +	bool reads = (direction == DMA_BIDIRECTIONAL || direction ==
>>>> DMA_FROM_DEVICE);
>>>>> +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev-
>>>>> flags & AMD_IS_APU);
>>>>> +	u32 domain;
>>>>> +
>>>>> +	if (!reads || !gtt_scannable)
>>>>> +		return 0;
>>>>> +
>>>>> +	ret = amdgpu_bo_reserve(bo, false);
>>>>> +	if (unlikely(ret != 0))
>>>>> +		return ret;
>>>>> +
>>>>> +	/*
>>>>> +	 * Wait for all shared fences to complete before we switch to future
>>>>> +	 * use of exclusive fence on this prime shared bo.
>>>>> +	 */
>>>>> +	ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false,
>>>>> +						  MAX_SCHEDULE_TIMEOUT);
>>>>> +
>>>>> +	if (unlikely(ret < 0)) {
>>>>> +		DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret);
>>>>> +		amdgpu_bo_unreserve(bo);
>>>>> +		return ret;
>>>>> +	}
>>>>> +
>>>>> +	ret = 0;
>>>>> +	/* Pin to gtt */
>>>>> +	domain = amdgpu_mem_type_to_domain(bo-
>>>>> tbo.mem.mem_type);
>>>>> +	if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
>>>>> +		old_count = bo->pin_count;
>>>>> +		for (i = 0; i < old_count; i++)
>>>>> +			amdgpu_bo_unpin(bo);
>>>>> +		for (i = 0; i < old_count; i++) {
>>>>> +			ret = amdgpu_bo_pin(bo,
>>>> AMDGPU_GEM_DOMAIN_GTT, NULL);
>>>>> +			if (unlikely(ret != 0))
>>>>> +				break;
>>>>> +		}
>>>>> +	}
>>>>> +	if (ret == 0)
>>>>> +		ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT,
>>>> NULL);
>>>>> +
>>>>> +	amdgpu_bo_unreserve(bo);
>>>>> +	return ret;
>>>>> +}
>>>>> +
>>>>> +static int amdgpu_gem_end_cpu_access(struct dma_buf *dma_buf,
>>>> enum
>>>>> +dma_data_direction direction) {
>>>>> +	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
>>>>> +	int ret = 0;
>>>>> +	bool reads = (direction == DMA_BIDIRECTIONAL || direction ==
>>>> DMA_FROM_DEVICE);
>>>>> +	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>>>> +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev-
>>>>> flags
>>>>> +& AMD_IS_APU);
>>>>> +
>>>>> +	if (!reads || !gtt_scannable)
>>>>> +		return 0;
>>>>> +
>>>>> +	mb();
>>>>> +	ret = amdgpu_bo_reserve(bo, true);
>>>>> +	if (unlikely(ret != 0))
>>>>> +		return ret;
>>>>> +
>>>>> +	amdgpu_bo_unpin(bo);
>>>>> +
>>>>> +	amdgpu_bo_unreserve(bo);
>>>>> +
>>>>> +	return 0;
>>>>> +}
>>>>> +
>>>>> +static struct dma_buf_ops amdgpu_dmabuf_ops; static atomic_t
>>>>> +aops_lock;
>>>>> +
>>>>>     struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
>>>>>     					struct drm_gem_object *gobj,
>>>>>     					int flags)
>>>>> @@ -178,5 +254,37 @@ struct dma_buf
>>>> *amdgpu_gem_prime_export(struct drm_device *dev,
>>>>>     	buf = drm_gem_prime_export(dev, gobj, flags);
>>>>>     	if (!IS_ERR(buf))
>>>>>     		buf->file->f_mapping = dev->anon_inode->i_mapping;
>>>>> +
>>>>> +	while (amdgpu_dmabuf_ops.begin_cpu_access !=
>>>> amdgpu_gem_begin_cpu_access ||
>>>>> +		amdgpu_dmabuf_ops.end_cpu_access !=
>>>> amdgpu_gem_end_cpu_access )
>>>>> +	{
>>>>> +		if (!atomic_cmpxchg(&aops_lock, 0, 1)) {
>>>>> +			amdgpu_dmabuf_ops = *(buf->ops);
>>>>> +			amdgpu_dmabuf_ops.begin_cpu_access =
>>>> amdgpu_gem_begin_cpu_access;
>>>>> +			amdgpu_dmabuf_ops.end_cpu_access =
>>>> amdgpu_gem_end_cpu_access;
>>>>> +		}
>>>>> +	}
>>>>> +	buf->ops = &amdgpu_dmabuf_ops;
>>>>> +
>>>>>     	return buf;
>>>>>     }
>>>>> +
>>>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct
>> drm_device
>>>> *dev,
>>>>> +					    struct dma_buf *dma_buf)
>>>>> +{
>>>>> +	struct drm_gem_object *obj;
>>>>> +
>>>>> +	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
>>>>> +		obj = dma_buf->priv;
>>>>> +		if (obj->dev == dev) {
>>>>> +			/*
>>>>> +			 * Importing dmabuf exported from out own gem
>>>> increases
>>>>> +			 * refcount on gem itself instead of f_count of
>>>> dmabuf.
>>>>> +			 */
>>>>> +			drm_gem_object_get(obj);
>>>>> +			return obj;
>>>>> +		}
>>>>> +	}
>>>>> +
>>>>> +	return drm_gem_prime_import(dev, dma_buf);  }
>>>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>> index a3bf021..f25b830 100644
>>>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>> @@ -2987,6 +2987,8 @@ static int dm_plane_helper_prepare_fb(struct
>>>> drm_plane *plane,
>>>>>     	int r;
>>>>>     	struct dm_plane_state *dm_plane_state_new,
>>>> *dm_plane_state_old;
>>>>>     	unsigned int awidth;
>>>>> +	struct amdgpu_device *adev = plane->dev->dev_private;
>>>>> +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO && adev-
>>>>> flags
>>>>> +& AMD_IS_APU);
>>>>>
>>>>>     	dm_plane_state_old = to_dm_plane_state(plane->state);
>>>>>     	dm_plane_state_new = to_dm_plane_state(new_state); @@ -
>>>> 3005,7
>>>>> +3007,11 @@ static int dm_plane_helper_prepare_fb(struct drm_plane
>>>> *plane,
>>>>>     		return r;
>>>>>
>>>>>     	r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_VRAM, &afb-
>> address);
>>>>> -
>>>>> +	if (unlikely(r != 0)) {
>>>>> +		/* latest APUs support gtt scan out */
>>>>> +		if (gtt_scannable)
>>>>> +			r = amdgpu_bo_pin(rbo,
>>>> AMDGPU_GEM_DOMAIN_GTT, &afb->address);
>>>>> +	}
>>>>>
>>>>>     	amdgpu_bo_unreserve(rbo);
>>>>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses dma buf.
       [not found]                     ` <66363160-0778-d484-b91d-5f056b78b5c6-5C7GfCeVMHo@public.gmane.org>
@ 2017-11-29 16:52                       ` Li, Samuel
       [not found]                         ` <BY2PR12MB06301C4BA16E0E00F23591F7F53B0-K//h7OWB4q6KMKmDyTWPpAdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Li, Samuel @ 2017-11-29 16:52 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

To explain the operations a little bit, the tests include repeated testing of the following sequences,
91         const int sequences[4][4] = {
92                 { STEP_MMAP, STEP_FAULT, STEP_FLIP, STEP_DRAW },
93                 { STEP_MMAP, STEP_FLIP, STEP_DRAW, STEP_SKIP },
94                 { STEP_MMAP, STEP_DRAW, STEP_FLIP, STEP_SKIP },
95                 { STEP_FLIP, STEP_MMAP, STEP_DRAW, STEP_SKIP },
96         };
Here STEP_MMAP includes prime_mmap() and begin_cpu_access(). STEP_DRAW includes repeated cpu reads/writes.
It looks to me the dma_buf has to be pinned to gtt here, to prevent it being pinned back by STEP_FLIP before drawing.

> I've send out a WIP branch a for this a few weeks ago, but haven't worked on
> this in a while. BTW it is only supported on Carizzo and Raven.
Can you show me the branch? Looks like we need to get if finished.

Sam




> -----Original Message-----
> From: Koenig, Christian
> Sent: Wednesday, November 29, 2017 10:01 AM
> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses dma
> buf.
> 
> > What is the concern for scanning out from GTT on APUs?
> It's simply not implemented yet. You need quite a bunch of different setup in
> DC for this to work.
> 
> I've send out a WIP branch a for this a few weeks ago, but haven't worked on
> this in a while. BTW it is only supported on Carizzo and Raven.
> 
> But even then pinning a BO to GTT for this would still be a no-go. We could
> just avoid the copy on scanout when the BO is already inside GTT because of
> the CPU access.
> 
> In general we should rather work on this as Michel described and avoid
> creating the BO in VRAM in the first place if possible.
> 
> Regards,
> Christian.
> 
> Am 29.11.2017 um 15:56 schrieb Li, Samuel:
> > One major purpose of the ChromeOS mmap_test is to avoid buffer copying.
> What is the concern for scanning out from GTT on APUs?
> >
> > Sam
> >
> >> -----Original Message-----
> >> From: Koenig, Christian
> >> Sent: Wednesday, November 29, 2017 9:54 AM
> >> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
> >> Subject: Re: [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses
> >> dma buf.
> >>
> >> And exactly that's the reason why it is a no-go.
> >>
> >> Scanning out from GTT isn't supported at the moment.
> >>
> >> Christian.
> >>
> >> Am 29.11.2017 um 15:48 schrieb Li, Samuel:
> >>> Actually it needs to be pinned here, since otherwise page flip will
> >>> pin it into
> >> vram.
> >>> SAm
> >>>
> >>>
> >>>> -----Original Message-----
> >>>> From: Christian König [mailto:ckoenig.leichtzumerken@gmail.com]
> >>>> Sent: Wednesday, November 29, 2017 4:39 AM
> >>>> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
> >>>> Subject: Re: [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses
> >>>> dma buf.
> >>>>
> >>>> Am 29.11.2017 um 01:20 schrieb Samuel Li:
> >>>>> To improve cpu read performance. This is implemented for APUs
> >> currently.
> >>>> And once more a NAK for this approach.
> >>>>
> >>>> What we could do is migrate the BO to GTT during mmap, but pinning
> >>>> it is out of question.
> >>>>
> >>>> Regards,
> >>>> Christian.
> >>>>
> >>>>> Change-Id: I300a7daed8f2b0ba6be71a43196a6b8617ddc62e
> >>>>> ---
> >>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu.h               |   2 +
> >>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_display.c       |  10 +-
> >>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c           |   2 +-
> >>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c         | 108
> >>>> ++++++++++++++++++++++
> >>>>>     drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |   8 +-
> >>>>>     5 files changed, 126 insertions(+), 4 deletions(-)
> >>>>>
> >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>>>> index f8657c3..193db70 100644
> >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>>>> @@ -417,6 +417,8 @@ amdgpu_gem_prime_import_sg_table(struct
> >>>> drm_device *dev,
> >>>>>     struct dma_buf *amdgpu_gem_prime_export(struct drm_device
> *dev,
> >>>>>     					struct drm_gem_object
> *gobj,
> >>>>>     					int flags);
> >>>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct
> >> drm_device
> >>>> *dev,
> >>>>> +					    struct dma_buf *dma_buf);
> >>>>>     int amdgpu_gem_prime_pin(struct drm_gem_object *obj);
> >>>>>     void amdgpu_gem_prime_unpin(struct drm_gem_object *obj);
> >>>>>     struct reservation_object *amdgpu_gem_prime_res_obj(struct
> >>>>> drm_gem_object *); diff --git
> >>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> >>>>> index d704a45..b5483e4 100644
> >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> >>>>> @@ -147,6 +147,7 @@ int amdgpu_crtc_page_flip_target(struct
> >> drm_crtc
> >>>> *crtc,
> >>>>>     	struct drm_device *dev = crtc->dev;
> >>>>>     	struct amdgpu_device *adev = dev->dev_private;
> >>>>>     	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
> >>>>> +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO &&
> adev-
> >>>>> flags
> >>>>> +& AMD_IS_APU);
> >>>>>     	struct amdgpu_framebuffer *old_amdgpu_fb;
> >>>>>     	struct amdgpu_framebuffer *new_amdgpu_fb;
> >>>>>     	struct drm_gem_object *obj;
> >>>>> @@ -190,8 +191,13 @@ int amdgpu_crtc_page_flip_target(struct
> >>>>> drm_crtc *crtc,
> >>>>>
> >>>>>     	r = amdgpu_bo_pin(new_abo,
> AMDGPU_GEM_DOMAIN_VRAM,
> >>>> &base);
> >>>>>     	if (unlikely(r != 0)) {
> >>>>> -		DRM_ERROR("failed to pin new abo buffer before
> flip\n");
> >>>>> -		goto unreserve;
> >>>>> +		/* latest APUs support gtt scan out */
> >>>>> +		if (gtt_scannable)
> >>>>> +			r = amdgpu_bo_pin(new_abo,
> >>>> AMDGPU_GEM_DOMAIN_GTT, &base);
> >>>>> +		if (unlikely(r != 0)) {
> >>>>> +			DRM_ERROR("failed to pin new abo buffer
> before
> >>>> flip\n");
> >>>>> +			goto unreserve;
> >>>>> +		}
> >>>>>     	}
> >>>>>
> >>>>>     	r = reservation_object_get_fences_rcu(new_abo->tbo.resv,
> >>>>> &work->excl, diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>>>> index 31383e0..df30b08 100644
> >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>>>> @@ -868,7 +868,7 @@ static struct drm_driver kms_driver = {
> >>>>>     	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
> >>>>>     	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
> >>>>>     	.gem_prime_export = amdgpu_gem_prime_export,
> >>>>> -	.gem_prime_import = drm_gem_prime_import,
> >>>>> +	.gem_prime_import = amdgpu_gem_prime_import,
> >>>>>     	.gem_prime_pin = amdgpu_gem_prime_pin,
> >>>>>     	.gem_prime_unpin = amdgpu_gem_prime_unpin,
> >>>>>     	.gem_prime_res_obj = amdgpu_gem_prime_res_obj, diff --
> git
> >>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>>>> index ae9c106..9e1424d 100644
> >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>>>> @@ -164,6 +164,82 @@ struct reservation_object
> >>>> *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
> >>>>>     	return bo->tbo.resv;
> >>>>>     }
> >>>>>
> >>>>> +static int amdgpu_gem_begin_cpu_access(struct dma_buf
> *dma_buf,
> >>>> enum
> >>>>> +dma_data_direction direction) {
> >>>>> +	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf-
> >priv);
> >>>>> +	struct amdgpu_device *adev = amdgpu_ttm_adev(bo-
> >tbo.bdev);
> >>>>> +	long i, ret = 0;
> >>>>> +	unsigned old_count;
> >>>>> +	bool reads = (direction == DMA_BIDIRECTIONAL || direction
> ==
> >>>> DMA_FROM_DEVICE);
> >>>>> +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO &&
> adev-
> >>>>> flags & AMD_IS_APU);
> >>>>> +	u32 domain;
> >>>>> +
> >>>>> +	if (!reads || !gtt_scannable)
> >>>>> +		return 0;
> >>>>> +
> >>>>> +	ret = amdgpu_bo_reserve(bo, false);
> >>>>> +	if (unlikely(ret != 0))
> >>>>> +		return ret;
> >>>>> +
> >>>>> +	/*
> >>>>> +	 * Wait for all shared fences to complete before we switch to
> future
> >>>>> +	 * use of exclusive fence on this prime shared bo.
> >>>>> +	 */
> >>>>> +	ret = reservation_object_wait_timeout_rcu(bo->tbo.resv,
> true, false,
> >>>>> +
> MAX_SCHEDULE_TIMEOUT);
> >>>>> +
> >>>>> +	if (unlikely(ret < 0)) {
> >>>>> +		DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret);
> >>>>> +		amdgpu_bo_unreserve(bo);
> >>>>> +		return ret;
> >>>>> +	}
> >>>>> +
> >>>>> +	ret = 0;
> >>>>> +	/* Pin to gtt */
> >>>>> +	domain = amdgpu_mem_type_to_domain(bo-
> >>>>> tbo.mem.mem_type);
> >>>>> +	if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
> >>>>> +		old_count = bo->pin_count;
> >>>>> +		for (i = 0; i < old_count; i++)
> >>>>> +			amdgpu_bo_unpin(bo);
> >>>>> +		for (i = 0; i < old_count; i++) {
> >>>>> +			ret = amdgpu_bo_pin(bo,
> >>>> AMDGPU_GEM_DOMAIN_GTT, NULL);
> >>>>> +			if (unlikely(ret != 0))
> >>>>> +				break;
> >>>>> +		}
> >>>>> +	}
> >>>>> +	if (ret == 0)
> >>>>> +		ret = amdgpu_bo_pin(bo,
> AMDGPU_GEM_DOMAIN_GTT,
> >>>> NULL);
> >>>>> +
> >>>>> +	amdgpu_bo_unreserve(bo);
> >>>>> +	return ret;
> >>>>> +}
> >>>>> +
> >>>>> +static int amdgpu_gem_end_cpu_access(struct dma_buf *dma_buf,
> >>>> enum
> >>>>> +dma_data_direction direction) {
> >>>>> +	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf-
> >priv);
> >>>>> +	int ret = 0;
> >>>>> +	bool reads = (direction == DMA_BIDIRECTIONAL || direction
> ==
> >>>> DMA_FROM_DEVICE);
> >>>>> +	struct amdgpu_device *adev = amdgpu_ttm_adev(bo-
> >tbo.bdev);
> >>>>> +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO &&
> adev-
> >>>>> flags
> >>>>> +& AMD_IS_APU);
> >>>>> +
> >>>>> +	if (!reads || !gtt_scannable)
> >>>>> +		return 0;
> >>>>> +
> >>>>> +	mb();
> >>>>> +	ret = amdgpu_bo_reserve(bo, true);
> >>>>> +	if (unlikely(ret != 0))
> >>>>> +		return ret;
> >>>>> +
> >>>>> +	amdgpu_bo_unpin(bo);
> >>>>> +
> >>>>> +	amdgpu_bo_unreserve(bo);
> >>>>> +
> >>>>> +	return 0;
> >>>>> +}
> >>>>> +
> >>>>> +static struct dma_buf_ops amdgpu_dmabuf_ops; static atomic_t
> >>>>> +aops_lock;
> >>>>> +
> >>>>>     struct dma_buf *amdgpu_gem_prime_export(struct drm_device
> *dev,
> >>>>>     					struct drm_gem_object
> *gobj,
> >>>>>     					int flags)
> >>>>> @@ -178,5 +254,37 @@ struct dma_buf
> >>>> *amdgpu_gem_prime_export(struct drm_device *dev,
> >>>>>     	buf = drm_gem_prime_export(dev, gobj, flags);
> >>>>>     	if (!IS_ERR(buf))
> >>>>>     		buf->file->f_mapping = dev->anon_inode-
> >i_mapping;
> >>>>> +
> >>>>> +	while (amdgpu_dmabuf_ops.begin_cpu_access !=
> >>>> amdgpu_gem_begin_cpu_access ||
> >>>>> +		amdgpu_dmabuf_ops.end_cpu_access !=
> >>>> amdgpu_gem_end_cpu_access )
> >>>>> +	{
> >>>>> +		if (!atomic_cmpxchg(&aops_lock, 0, 1)) {
> >>>>> +			amdgpu_dmabuf_ops = *(buf->ops);
> >>>>> +			amdgpu_dmabuf_ops.begin_cpu_access =
> >>>> amdgpu_gem_begin_cpu_access;
> >>>>> +			amdgpu_dmabuf_ops.end_cpu_access =
> >>>> amdgpu_gem_end_cpu_access;
> >>>>> +		}
> >>>>> +	}
> >>>>> +	buf->ops = &amdgpu_dmabuf_ops;
> >>>>> +
> >>>>>     	return buf;
> >>>>>     }
> >>>>> +
> >>>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct
> >> drm_device
> >>>> *dev,
> >>>>> +					    struct dma_buf *dma_buf)
> {
> >>>>> +	struct drm_gem_object *obj;
> >>>>> +
> >>>>> +	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
> >>>>> +		obj = dma_buf->priv;
> >>>>> +		if (obj->dev == dev) {
> >>>>> +			/*
> >>>>> +			 * Importing dmabuf exported from out own
> gem
> >>>> increases
> >>>>> +			 * refcount on gem itself instead of f_count
> of
> >>>> dmabuf.
> >>>>> +			 */
> >>>>> +			drm_gem_object_get(obj);
> >>>>> +			return obj;
> >>>>> +		}
> >>>>> +	}
> >>>>> +
> >>>>> +	return drm_gem_prime_import(dev, dma_buf);  }
> >>>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> >>>>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> >>>>> index a3bf021..f25b830 100644
> >>>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> >>>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> >>>>> @@ -2987,6 +2987,8 @@ static int
> dm_plane_helper_prepare_fb(struct
> >>>> drm_plane *plane,
> >>>>>     	int r;
> >>>>>     	struct dm_plane_state *dm_plane_state_new,
> >>>> *dm_plane_state_old;
> >>>>>     	unsigned int awidth;
> >>>>> +	struct amdgpu_device *adev = plane->dev->dev_private;
> >>>>> +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO &&
> adev-
> >>>>> flags
> >>>>> +& AMD_IS_APU);
> >>>>>
> >>>>>     	dm_plane_state_old = to_dm_plane_state(plane->state);
> >>>>>     	dm_plane_state_new = to_dm_plane_state(new_state);
> @@ -
> >>>> 3005,7
> >>>>> +3007,11 @@ static int dm_plane_helper_prepare_fb(struct
> drm_plane
> >>>> *plane,
> >>>>>     		return r;
> >>>>>
> >>>>>     	r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_VRAM,
> &afb-
> >> address);
> >>>>> -
> >>>>> +	if (unlikely(r != 0)) {
> >>>>> +		/* latest APUs support gtt scan out */
> >>>>> +		if (gtt_scannable)
> >>>>> +			r = amdgpu_bo_pin(rbo,
> >>>> AMDGPU_GEM_DOMAIN_GTT, &afb->address);
> >>>>> +	}
> >>>>>
> >>>>>     	amdgpu_bo_unreserve(rbo);
> >>>>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses dma buf.
       [not found]                         ` <BY2PR12MB06301C4BA16E0E00F23591F7F53B0-K//h7OWB4q6KMKmDyTWPpAdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2017-11-29 17:16                           ` Christian König
  0 siblings, 0 replies; 8+ messages in thread
From: Christian König @ 2017-11-29 17:16 UTC (permalink / raw)
  To: Li, Samuel, Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 29.11.2017 um 17:52 schrieb Li, Samuel:
> To explain the operations a little bit, the tests include repeated testing of the following sequences,
> 91         const int sequences[4][4] = {
> 92                 { STEP_MMAP, STEP_FAULT, STEP_FLIP, STEP_DRAW },
> 93                 { STEP_MMAP, STEP_FLIP, STEP_DRAW, STEP_SKIP },
> 94                 { STEP_MMAP, STEP_DRAW, STEP_FLIP, STEP_SKIP },
> 95                 { STEP_FLIP, STEP_MMAP, STEP_DRAW, STEP_SKIP },
> 96         };
> Here STEP_MMAP includes prime_mmap() and begin_cpu_access(). STEP_DRAW includes repeated cpu reads/writes.
> It looks to me the dma_buf has to be pinned to gtt here, to prevent it being pinned back by STEP_FLIP before drawing.

When STEP_FLIP needs the BO in VRAM it would fail when we pin it to GTT 
while it is mapped and that is not something we can do.

What we can do is migrate the BO into GTT when we mmap it and migrate it 
back to VRAM on demand when it is pinned.

When scanning out from GTT is implemented the flip will then 
automatically pin it to GTT instead of VRAM.

>> I've send out a WIP branch a for this a few weeks ago, but haven't worked on
>> this in a while. BTW it is only supported on Carizzo and Raven.
> Can you show me the branch? Looks like we need to get if finished.

The amdgpu part was already done and is also already committed IIRC 
(that was only a bunch of bugs fixes anyway).

What's missing is the DC part, they need to do some extra bandwith 
calculation when scanning out from GTT is enabled and that was 
problematic to always activate.

You need to ping Harry about the status there.

Christian.

>
> Sam
>
>
>
>
>> -----Original Message-----
>> From: Koenig, Christian
>> Sent: Wednesday, November 29, 2017 10:01 AM
>> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses dma
>> buf.
>>
>>> What is the concern for scanning out from GTT on APUs?
>> It's simply not implemented yet. You need quite a bunch of different setup in
>> DC for this to work.
>>
>> I've send out a WIP branch a for this a few weeks ago, but haven't worked on
>> this in a while. BTW it is only supported on Carizzo and Raven.
>>
>> But even then pinning a BO to GTT for this would still be a no-go. We could
>> just avoid the copy on scanout when the BO is already inside GTT because of
>> the CPU access.
>>
>> In general we should rather work on this as Michel described and avoid
>> creating the BO in VRAM in the first place if possible.
>>
>> Regards,
>> Christian.
>>
>> Am 29.11.2017 um 15:56 schrieb Li, Samuel:
>>> One major purpose of the ChromeOS mmap_test is to avoid buffer copying.
>> What is the concern for scanning out from GTT on APUs?
>>> Sam
>>>
>>>> -----Original Message-----
>>>> From: Koenig, Christian
>>>> Sent: Wednesday, November 29, 2017 9:54 AM
>>>> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
>>>> Subject: Re: [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses
>>>> dma buf.
>>>>
>>>> And exactly that's the reason why it is a no-go.
>>>>
>>>> Scanning out from GTT isn't supported at the moment.
>>>>
>>>> Christian.
>>>>
>>>> Am 29.11.2017 um 15:48 schrieb Li, Samuel:
>>>>> Actually it needs to be pinned here, since otherwise page flip will
>>>>> pin it into
>>>> vram.
>>>>> SAm
>>>>>
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: Christian König [mailto:ckoenig.leichtzumerken@gmail.com]
>>>>>> Sent: Wednesday, November 29, 2017 4:39 AM
>>>>>> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
>>>>>> Subject: Re: [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses
>>>>>> dma buf.
>>>>>>
>>>>>> Am 29.11.2017 um 01:20 schrieb Samuel Li:
>>>>>>> To improve cpu read performance. This is implemented for APUs
>>>> currently.
>>>>>> And once more a NAK for this approach.
>>>>>>
>>>>>> What we could do is migrate the BO to GTT during mmap, but pinning
>>>>>> it is out of question.
>>>>>>
>>>>>> Regards,
>>>>>> Christian.
>>>>>>
>>>>>>> Change-Id: I300a7daed8f2b0ba6be71a43196a6b8617ddc62e
>>>>>>> ---
>>>>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu.h               |   2 +
>>>>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_display.c       |  10 +-
>>>>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c           |   2 +-
>>>>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c         | 108
>>>>>> ++++++++++++++++++++++
>>>>>>>      drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |   8 +-
>>>>>>>      5 files changed, 126 insertions(+), 4 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>> index f8657c3..193db70 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>> @@ -417,6 +417,8 @@ amdgpu_gem_prime_import_sg_table(struct
>>>>>> drm_device *dev,
>>>>>>>      struct dma_buf *amdgpu_gem_prime_export(struct drm_device
>> *dev,
>>>>>>>      					struct drm_gem_object
>> *gobj,
>>>>>>>      					int flags);
>>>>>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct
>>>> drm_device
>>>>>> *dev,
>>>>>>> +					    struct dma_buf *dma_buf);
>>>>>>>      int amdgpu_gem_prime_pin(struct drm_gem_object *obj);
>>>>>>>      void amdgpu_gem_prime_unpin(struct drm_gem_object *obj);
>>>>>>>      struct reservation_object *amdgpu_gem_prime_res_obj(struct
>>>>>>> drm_gem_object *); diff --git
>>>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
>>>>>>> index d704a45..b5483e4 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
>>>>>>> @@ -147,6 +147,7 @@ int amdgpu_crtc_page_flip_target(struct
>>>> drm_crtc
>>>>>> *crtc,
>>>>>>>      	struct drm_device *dev = crtc->dev;
>>>>>>>      	struct amdgpu_device *adev = dev->dev_private;
>>>>>>>      	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
>>>>>>> +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO &&
>> adev-
>>>>>>> flags
>>>>>>> +& AMD_IS_APU);
>>>>>>>      	struct amdgpu_framebuffer *old_amdgpu_fb;
>>>>>>>      	struct amdgpu_framebuffer *new_amdgpu_fb;
>>>>>>>      	struct drm_gem_object *obj;
>>>>>>> @@ -190,8 +191,13 @@ int amdgpu_crtc_page_flip_target(struct
>>>>>>> drm_crtc *crtc,
>>>>>>>
>>>>>>>      	r = amdgpu_bo_pin(new_abo,
>> AMDGPU_GEM_DOMAIN_VRAM,
>>>>>> &base);
>>>>>>>      	if (unlikely(r != 0)) {
>>>>>>> -		DRM_ERROR("failed to pin new abo buffer before
>> flip\n");
>>>>>>> -		goto unreserve;
>>>>>>> +		/* latest APUs support gtt scan out */
>>>>>>> +		if (gtt_scannable)
>>>>>>> +			r = amdgpu_bo_pin(new_abo,
>>>>>> AMDGPU_GEM_DOMAIN_GTT, &base);
>>>>>>> +		if (unlikely(r != 0)) {
>>>>>>> +			DRM_ERROR("failed to pin new abo buffer
>> before
>>>>>> flip\n");
>>>>>>> +			goto unreserve;
>>>>>>> +		}
>>>>>>>      	}
>>>>>>>
>>>>>>>      	r = reservation_object_get_fences_rcu(new_abo->tbo.resv,
>>>>>>> &work->excl, diff --git
>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>>>> index 31383e0..df30b08 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>>>> @@ -868,7 +868,7 @@ static struct drm_driver kms_driver = {
>>>>>>>      	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
>>>>>>>      	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
>>>>>>>      	.gem_prime_export = amdgpu_gem_prime_export,
>>>>>>> -	.gem_prime_import = drm_gem_prime_import,
>>>>>>> +	.gem_prime_import = amdgpu_gem_prime_import,
>>>>>>>      	.gem_prime_pin = amdgpu_gem_prime_pin,
>>>>>>>      	.gem_prime_unpin = amdgpu_gem_prime_unpin,
>>>>>>>      	.gem_prime_res_obj = amdgpu_gem_prime_res_obj, diff --
>> git
>>>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>>>> index ae9c106..9e1424d 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>>>> @@ -164,6 +164,82 @@ struct reservation_object
>>>>>> *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
>>>>>>>      	return bo->tbo.resv;
>>>>>>>      }
>>>>>>>
>>>>>>> +static int amdgpu_gem_begin_cpu_access(struct dma_buf
>> *dma_buf,
>>>>>> enum
>>>>>>> +dma_data_direction direction) {
>>>>>>> +	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf-
>>> priv);
>>>>>>> +	struct amdgpu_device *adev = amdgpu_ttm_adev(bo-
>>> tbo.bdev);
>>>>>>> +	long i, ret = 0;
>>>>>>> +	unsigned old_count;
>>>>>>> +	bool reads = (direction == DMA_BIDIRECTIONAL || direction
>> ==
>>>>>> DMA_FROM_DEVICE);
>>>>>>> +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO &&
>> adev-
>>>>>>> flags & AMD_IS_APU);
>>>>>>> +	u32 domain;
>>>>>>> +
>>>>>>> +	if (!reads || !gtt_scannable)
>>>>>>> +		return 0;
>>>>>>> +
>>>>>>> +	ret = amdgpu_bo_reserve(bo, false);
>>>>>>> +	if (unlikely(ret != 0))
>>>>>>> +		return ret;
>>>>>>> +
>>>>>>> +	/*
>>>>>>> +	 * Wait for all shared fences to complete before we switch to
>> future
>>>>>>> +	 * use of exclusive fence on this prime shared bo.
>>>>>>> +	 */
>>>>>>> +	ret = reservation_object_wait_timeout_rcu(bo->tbo.resv,
>> true, false,
>>>>>>> +
>> MAX_SCHEDULE_TIMEOUT);
>>>>>>> +
>>>>>>> +	if (unlikely(ret < 0)) {
>>>>>>> +		DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret);
>>>>>>> +		amdgpu_bo_unreserve(bo);
>>>>>>> +		return ret;
>>>>>>> +	}
>>>>>>> +
>>>>>>> +	ret = 0;
>>>>>>> +	/* Pin to gtt */
>>>>>>> +	domain = amdgpu_mem_type_to_domain(bo-
>>>>>>> tbo.mem.mem_type);
>>>>>>> +	if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
>>>>>>> +		old_count = bo->pin_count;
>>>>>>> +		for (i = 0; i < old_count; i++)
>>>>>>> +			amdgpu_bo_unpin(bo);
>>>>>>> +		for (i = 0; i < old_count; i++) {
>>>>>>> +			ret = amdgpu_bo_pin(bo,
>>>>>> AMDGPU_GEM_DOMAIN_GTT, NULL);
>>>>>>> +			if (unlikely(ret != 0))
>>>>>>> +				break;
>>>>>>> +		}
>>>>>>> +	}
>>>>>>> +	if (ret == 0)
>>>>>>> +		ret = amdgpu_bo_pin(bo,
>> AMDGPU_GEM_DOMAIN_GTT,
>>>>>> NULL);
>>>>>>> +
>>>>>>> +	amdgpu_bo_unreserve(bo);
>>>>>>> +	return ret;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static int amdgpu_gem_end_cpu_access(struct dma_buf *dma_buf,
>>>>>> enum
>>>>>>> +dma_data_direction direction) {
>>>>>>> +	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf-
>>> priv);
>>>>>>> +	int ret = 0;
>>>>>>> +	bool reads = (direction == DMA_BIDIRECTIONAL || direction
>> ==
>>>>>> DMA_FROM_DEVICE);
>>>>>>> +	struct amdgpu_device *adev = amdgpu_ttm_adev(bo-
>>> tbo.bdev);
>>>>>>> +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO &&
>> adev-
>>>>>>> flags
>>>>>>> +& AMD_IS_APU);
>>>>>>> +
>>>>>>> +	if (!reads || !gtt_scannable)
>>>>>>> +		return 0;
>>>>>>> +
>>>>>>> +	mb();
>>>>>>> +	ret = amdgpu_bo_reserve(bo, true);
>>>>>>> +	if (unlikely(ret != 0))
>>>>>>> +		return ret;
>>>>>>> +
>>>>>>> +	amdgpu_bo_unpin(bo);
>>>>>>> +
>>>>>>> +	amdgpu_bo_unreserve(bo);
>>>>>>> +
>>>>>>> +	return 0;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static struct dma_buf_ops amdgpu_dmabuf_ops; static atomic_t
>>>>>>> +aops_lock;
>>>>>>> +
>>>>>>>      struct dma_buf *amdgpu_gem_prime_export(struct drm_device
>> *dev,
>>>>>>>      					struct drm_gem_object
>> *gobj,
>>>>>>>      					int flags)
>>>>>>> @@ -178,5 +254,37 @@ struct dma_buf
>>>>>> *amdgpu_gem_prime_export(struct drm_device *dev,
>>>>>>>      	buf = drm_gem_prime_export(dev, gobj, flags);
>>>>>>>      	if (!IS_ERR(buf))
>>>>>>>      		buf->file->f_mapping = dev->anon_inode-
>>> i_mapping;
>>>>>>> +
>>>>>>> +	while (amdgpu_dmabuf_ops.begin_cpu_access !=
>>>>>> amdgpu_gem_begin_cpu_access ||
>>>>>>> +		amdgpu_dmabuf_ops.end_cpu_access !=
>>>>>> amdgpu_gem_end_cpu_access )
>>>>>>> +	{
>>>>>>> +		if (!atomic_cmpxchg(&aops_lock, 0, 1)) {
>>>>>>> +			amdgpu_dmabuf_ops = *(buf->ops);
>>>>>>> +			amdgpu_dmabuf_ops.begin_cpu_access =
>>>>>> amdgpu_gem_begin_cpu_access;
>>>>>>> +			amdgpu_dmabuf_ops.end_cpu_access =
>>>>>> amdgpu_gem_end_cpu_access;
>>>>>>> +		}
>>>>>>> +	}
>>>>>>> +	buf->ops = &amdgpu_dmabuf_ops;
>>>>>>> +
>>>>>>>      	return buf;
>>>>>>>      }
>>>>>>> +
>>>>>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct
>>>> drm_device
>>>>>> *dev,
>>>>>>> +					    struct dma_buf *dma_buf)
>> {
>>>>>>> +	struct drm_gem_object *obj;
>>>>>>> +
>>>>>>> +	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
>>>>>>> +		obj = dma_buf->priv;
>>>>>>> +		if (obj->dev == dev) {
>>>>>>> +			/*
>>>>>>> +			 * Importing dmabuf exported from out own
>> gem
>>>>>> increases
>>>>>>> +			 * refcount on gem itself instead of f_count
>> of
>>>>>> dmabuf.
>>>>>>> +			 */
>>>>>>> +			drm_gem_object_get(obj);
>>>>>>> +			return obj;
>>>>>>> +		}
>>>>>>> +	}
>>>>>>> +
>>>>>>> +	return drm_gem_prime_import(dev, dma_buf);  }
>>>>>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>>>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>>>> index a3bf021..f25b830 100644
>>>>>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>>>> @@ -2987,6 +2987,8 @@ static int
>> dm_plane_helper_prepare_fb(struct
>>>>>> drm_plane *plane,
>>>>>>>      	int r;
>>>>>>>      	struct dm_plane_state *dm_plane_state_new,
>>>>>> *dm_plane_state_old;
>>>>>>>      	unsigned int awidth;
>>>>>>> +	struct amdgpu_device *adev = plane->dev->dev_private;
>>>>>>> +	bool gtt_scannable = (adev->asic_type >= CHIP_CARRIZO &&
>> adev-
>>>>>>> flags
>>>>>>> +& AMD_IS_APU);
>>>>>>>
>>>>>>>      	dm_plane_state_old = to_dm_plane_state(plane->state);
>>>>>>>      	dm_plane_state_new = to_dm_plane_state(new_state);
>> @@ -
>>>>>> 3005,7
>>>>>>> +3007,11 @@ static int dm_plane_helper_prepare_fb(struct
>> drm_plane
>>>>>> *plane,
>>>>>>>      		return r;
>>>>>>>
>>>>>>>      	r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_VRAM,
>> &afb-
>>>> address);
>>>>>>> -
>>>>>>> +	if (unlikely(r != 0)) {
>>>>>>> +		/* latest APUs support gtt scan out */
>>>>>>> +		if (gtt_scannable)
>>>>>>> +			r = amdgpu_bo_pin(rbo,
>>>>>> AMDGPU_GEM_DOMAIN_GTT, &afb->address);
>>>>>>> +	}
>>>>>>>
>>>>>>>      	amdgpu_bo_unreserve(rbo);
>>>>>>>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2017-11-29 17:16 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-29  0:20 [PATCH 1/1] drm/amdgpu: Pin to gtt before cpu accesses dma buf Samuel Li
     [not found] ` <1511914846-25292-1-git-send-email-Samuel.Li-5C7GfCeVMHo@public.gmane.org>
2017-11-29  9:38   ` Christian König
     [not found]     ` <57016c36-d7d5-d099-9bfb-e66132037b80-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-11-29 14:48       ` Li, Samuel
     [not found]         ` <BY2PR12MB063022C560621FA4101DDE8CF53B0-K//h7OWB4q6KMKmDyTWPpAdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2017-11-29 14:54           ` Christian König
     [not found]             ` <6c99af44-efbe-a737-5021-2d1c242528f5-5C7GfCeVMHo@public.gmane.org>
2017-11-29 14:56               ` Li, Samuel
     [not found]                 ` <BY2PR12MB06309E0735541567ECF1E3D8F53B0-K//h7OWB4q6KMKmDyTWPpAdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2017-11-29 15:01                   ` Christian König
     [not found]                     ` <66363160-0778-d484-b91d-5f056b78b5c6-5C7GfCeVMHo@public.gmane.org>
2017-11-29 16:52                       ` Li, Samuel
     [not found]                         ` <BY2PR12MB06301C4BA16E0E00F23591F7F53B0-K//h7OWB4q6KMKmDyTWPpAdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2017-11-29 17:16                           ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.