All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support
@ 2017-09-13 22:39 Samuel Li
       [not found] ` <1505342358-21512-1-git-send-email-Samuel.Li-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Samuel Li @ 2017-09-13 22:39 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Samuel Li

v2: drop hdp invalidate/flush.

Signed-off-by: Samuel Li <Samuel.Li@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 77 ++++++++++++++++++++++++++++++-
 3 files changed, 81 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d2aaad7..188b705 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -395,11 +395,14 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
 struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 					struct drm_gem_object *gobj,
 					int flags);
+struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
+					    struct dma_buf *dma_buf);
 int amdgpu_gem_prime_pin(struct drm_gem_object *obj);
 void amdgpu_gem_prime_unpin(struct drm_gem_object *obj);
 struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
 void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
 void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
+int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
 int amdgpu_gem_debugfs_init(struct amdgpu_device *adev);
 
 /* sub-allocation manager, it has to be protected by another lock.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 2cdf844..9b63ac5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -835,7 +835,7 @@ static struct drm_driver kms_driver = {
 	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
 	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
 	.gem_prime_export = amdgpu_gem_prime_export,
-	.gem_prime_import = drm_gem_prime_import,
+	.gem_prime_import = amdgpu_gem_prime_import,
 	.gem_prime_pin = amdgpu_gem_prime_pin,
 	.gem_prime_unpin = amdgpu_gem_prime_unpin,
 	.gem_prime_res_obj = amdgpu_gem_prime_res_obj,
@@ -843,6 +843,7 @@ static struct drm_driver kms_driver = {
 	.gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table,
 	.gem_prime_vmap = amdgpu_gem_prime_vmap,
 	.gem_prime_vunmap = amdgpu_gem_prime_vunmap,
+	.gem_prime_mmap = amdgpu_gem_prime_mmap,
 
 	.name = DRIVER_NAME,
 	.desc = DRIVER_DESC,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index 5b3f928..13c977a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -57,6 +57,40 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
 	ttm_bo_kunmap(&bo->dma_buf_vmap);
 }
 
+int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	unsigned asize = amdgpu_bo_size(bo);
+	int ret;
+
+	if (!vma->vm_file)
+		return -ENODEV;
+
+	if (adev == NULL)
+		return -ENODEV;
+
+	/* Check for valid size. */
+	if (asize < vma->vm_end - vma->vm_start)
+		return -EINVAL;
+
+	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
+	    (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
+		return -EPERM;
+	}
+	vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT;
+
+	/* prime mmap does not need to check access, so allow here */
+	ret = drm_vma_node_allow(&obj->vma_node, vma->vm_file->private_data);
+	if (ret)
+		return ret;
+
+	ret = ttm_bo_mmap(vma->vm_file, vma, &adev->mman.bdev);
+	drm_vma_node_revoke(&obj->vma_node, vma->vm_file->private_data);
+
+	return ret;
+}
+
 struct drm_gem_object *
 amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
 				 struct dma_buf_attachment *attach,
@@ -130,14 +164,55 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
 	return bo->tbo.resv;
 }
 
+static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
+{
+	return amdgpu_gem_prime_pin(dma_buf->priv);
+}
+
+static int amdgpu_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
+{
+	amdgpu_gem_prime_unpin(dma_buf->priv);
+
+	return 0;
+}
+
+static struct dma_buf_ops amdgpu_dmabuf_ops;
+
 struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 					struct drm_gem_object *gobj,
 					int flags)
 {
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
+	struct dma_buf *dma_buf;
 
 	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
 		return ERR_PTR(-EPERM);
 
-	return drm_gem_prime_export(dev, gobj, flags);
+	dma_buf = drm_gem_prime_export(dev, gobj, flags);
+	amdgpu_dmabuf_ops = *(dma_buf->ops);
+	amdgpu_dmabuf_ops.begin_cpu_access = amdgpu_gem_begin_cpu_access;
+	amdgpu_dmabuf_ops.end_cpu_access = amdgpu_gem_end_cpu_access;
+	dma_buf->ops = &amdgpu_dmabuf_ops;
+
+	return dma_buf;
+}
+
+struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
+					    struct dma_buf *dma_buf)
+{
+	struct drm_gem_object *obj;
+
+	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
+		obj = dma_buf->priv;
+		if (obj->dev == dev) {
+			/*
+			 * Importing dmabuf exported from out own gem increases
+			 * refcount on gem itself instead of f_count of dmabuf.
+			 */
+			drm_gem_object_get(obj);
+			return obj;
+		}
+	}
+
+	return drm_gem_prime_import(dev, dma_buf);
 }
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support
       [not found] ` <1505342358-21512-1-git-send-email-Samuel.Li-5C7GfCeVMHo@public.gmane.org>
@ 2017-09-15 15:05   ` Christian König
       [not found]     ` <f68eaecb-ee43-78ba-2609-2ff6bb53ac50-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Christian König @ 2017-09-15 15:05 UTC (permalink / raw)
  To: Samuel Li, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 14.09.2017 um 00:39 schrieb Samuel Li:
> v2: drop hdp invalidate/flush.
>
> Signed-off-by: Samuel Li <Samuel.Li@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  3 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  3 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 77 ++++++++++++++++++++++++++++++-
>   3 files changed, 81 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index d2aaad7..188b705 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -395,11 +395,14 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
>   struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
>   					struct drm_gem_object *gobj,
>   					int flags);
> +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
> +					    struct dma_buf *dma_buf);
>   int amdgpu_gem_prime_pin(struct drm_gem_object *obj);
>   void amdgpu_gem_prime_unpin(struct drm_gem_object *obj);
>   struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
>   void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
>   void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
> +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
>   int amdgpu_gem_debugfs_init(struct amdgpu_device *adev);
>   
>   /* sub-allocation manager, it has to be protected by another lock.
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 2cdf844..9b63ac5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -835,7 +835,7 @@ static struct drm_driver kms_driver = {
>   	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
>   	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
>   	.gem_prime_export = amdgpu_gem_prime_export,
> -	.gem_prime_import = drm_gem_prime_import,
> +	.gem_prime_import = amdgpu_gem_prime_import,
>   	.gem_prime_pin = amdgpu_gem_prime_pin,
>   	.gem_prime_unpin = amdgpu_gem_prime_unpin,
>   	.gem_prime_res_obj = amdgpu_gem_prime_res_obj,
> @@ -843,6 +843,7 @@ static struct drm_driver kms_driver = {
>   	.gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table,
>   	.gem_prime_vmap = amdgpu_gem_prime_vmap,
>   	.gem_prime_vunmap = amdgpu_gem_prime_vunmap,
> +	.gem_prime_mmap = amdgpu_gem_prime_mmap,
>   
>   	.name = DRIVER_NAME,
>   	.desc = DRIVER_DESC,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> index 5b3f928..13c977a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> @@ -57,6 +57,40 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
>   	ttm_bo_kunmap(&bo->dma_buf_vmap);
>   }
>   
> +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
> +{
> +	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
> +	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> +	unsigned asize = amdgpu_bo_size(bo);
> +	int ret;
> +
> +	if (!vma->vm_file)
> +		return -ENODEV;
> +
> +	if (adev == NULL)
> +		return -ENODEV;
> +
> +	/* Check for valid size. */
> +	if (asize < vma->vm_end - vma->vm_start)
> +		return -EINVAL;
> +
> +	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
> +	    (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
> +		return -EPERM;
> +	}
> +	vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT;

Maybe better use "vma->vm_pgoff += amdgpu_bo_mmap_offset(bo) >> 
PAGE_SHIFT;", but I'm not sure.

How other drivers handle this?

> +
> +	/* prime mmap does not need to check access, so allow here */
> +	ret = drm_vma_node_allow(&obj->vma_node, vma->vm_file->private_data);
> +	if (ret)
> +		return ret;
> +
> +	ret = ttm_bo_mmap(vma->vm_file, vma, &adev->mman.bdev);
> +	drm_vma_node_revoke(&obj->vma_node, vma->vm_file->private_data);
> +
> +	return ret;
> +}
> +
>   struct drm_gem_object *
>   amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
>   				 struct dma_buf_attachment *attach,
> @@ -130,14 +164,55 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
>   	return bo->tbo.resv;
>   }
>   
> +static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
> +{
> +	return amdgpu_gem_prime_pin(dma_buf->priv);
> +}
> +
> +static int amdgpu_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
> +{
> +	amdgpu_gem_prime_unpin(dma_buf->priv);
> +
> +	return 0;
> +}
> +
> +static struct dma_buf_ops amdgpu_dmabuf_ops;
> +
>   struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
>   					struct drm_gem_object *gobj,
>   					int flags)
>   {
>   	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
> +	struct dma_buf *dma_buf;
>   
>   	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
>   		return ERR_PTR(-EPERM);
>   
> -	return drm_gem_prime_export(dev, gobj, flags);
> +	dma_buf = drm_gem_prime_export(dev, gobj, flags);
> +	amdgpu_dmabuf_ops = *(dma_buf->ops);
> +	amdgpu_dmabuf_ops.begin_cpu_access = amdgpu_gem_begin_cpu_access;
> +	amdgpu_dmabuf_ops.end_cpu_access = amdgpu_gem_end_cpu_access;
> +	dma_buf->ops = &amdgpu_dmabuf_ops;

This isn't race free and needs to be fixed.

Better add callbacks to drm_prime.c similar to drm_gem_dmabuf_mmap().

Alternative you could just completely drop amdgpu_gem_begin_cpu_access() 
and amdgpu_gem_end_cpu_access() as well.

When the buffer is shared between device it is pinned anyway and when it 
isn't shared ttm_bo_mmap() is able to handle VRAM mappings as well.

Regards,
Christian.

> +
> +	return dma_buf;
> +}
> +
> +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
> +					    struct dma_buf *dma_buf)
> +{
> +	struct drm_gem_object *obj;
> +
> +	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
> +		obj = dma_buf->priv;
> +		if (obj->dev == dev) {
> +			/*
> +			 * Importing dmabuf exported from out own gem increases
> +			 * refcount on gem itself instead of f_count of dmabuf.
> +			 */
> +			drm_gem_object_get(obj);
> +			return obj;
> +		}
> +	}
> +
> +	return drm_gem_prime_import(dev, dma_buf);
>   }


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support
       [not found]     ` <f68eaecb-ee43-78ba-2609-2ff6bb53ac50-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-09-19 21:22       ` Samuel Li
       [not found]         ` <b1064525-5b36-1c12-4da6-aa4a805caaec-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Samuel Li @ 2017-09-19 21:22 UTC (permalink / raw)
  To: christian.koenig-5C7GfCeVMHo, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

>> +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT;
> Maybe better use "vma->vm_pgoff += amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT;", but I'm not sure.
> How other drivers handle this?
This is a good catch. Looks like pgoff is honored during prime mmap, not a fake offset here.

>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
> This isn't race free and needs to be fixed.
> Better add callbacks to drm_prime.c similar to drm_gem_dmabuf_mmap().
What do you mean "This isn't race free"?

Regards,
Sam



On 2017-09-15 11:05 AM, Christian König wrote:
> Am 14.09.2017 um 00:39 schrieb Samuel Li:
>> v2: drop hdp invalidate/flush.
>>
>> Signed-off-by: Samuel Li <Samuel.Li@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  3 ++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  3 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 77 ++++++++++++++++++++++++++++++-
>>   3 files changed, 81 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index d2aaad7..188b705 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -395,11 +395,14 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
>>   struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
>>                       struct drm_gem_object *gobj,
>>                       int flags);
>> +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
>> +                        struct dma_buf *dma_buf);
>>   int amdgpu_gem_prime_pin(struct drm_gem_object *obj);
>>   void amdgpu_gem_prime_unpin(struct drm_gem_object *obj);
>>   struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
>>   void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
>>   void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
>> +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
>>   int amdgpu_gem_debugfs_init(struct amdgpu_device *adev);
>>     /* sub-allocation manager, it has to be protected by another lock.
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> index 2cdf844..9b63ac5 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> @@ -835,7 +835,7 @@ static struct drm_driver kms_driver = {
>>       .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
>>       .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
>>       .gem_prime_export = amdgpu_gem_prime_export,
>> -    .gem_prime_import = drm_gem_prime_import,
>> +    .gem_prime_import = amdgpu_gem_prime_import,
>>       .gem_prime_pin = amdgpu_gem_prime_pin,
>>       .gem_prime_unpin = amdgpu_gem_prime_unpin,
>>       .gem_prime_res_obj = amdgpu_gem_prime_res_obj,
>> @@ -843,6 +843,7 @@ static struct drm_driver kms_driver = {
>>       .gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table,
>>       .gem_prime_vmap = amdgpu_gem_prime_vmap,
>>       .gem_prime_vunmap = amdgpu_gem_prime_vunmap,
>> +    .gem_prime_mmap = amdgpu_gem_prime_mmap,
>>         .name = DRIVER_NAME,
>>       .desc = DRIVER_DESC,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>> index 5b3f928..13c977a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>> @@ -57,6 +57,40 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
>>       ttm_bo_kunmap(&bo->dma_buf_vmap);
>>   }
>>   +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
>> +{
>> +    struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
>> +    struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>> +    unsigned asize = amdgpu_bo_size(bo);
>> +    int ret;
>> +
>> +    if (!vma->vm_file)
>> +        return -ENODEV;
>> +
>> +    if (adev == NULL)
>> +        return -ENODEV;
>> +
>> +    /* Check for valid size. */
>> +    if (asize < vma->vm_end - vma->vm_start)
>> +        return -EINVAL;
>> +
>> +    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
>> +        (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
>> +        return -EPERM;
>> +    }
>> +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT;
> 
> Maybe better use "vma->vm_pgoff += amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT;", but I'm not sure.
> 
> How other drivers handle this?
> 
>> +
>> +    /* prime mmap does not need to check access, so allow here */
>> +    ret = drm_vma_node_allow(&obj->vma_node, vma->vm_file->private_data);
>> +    if (ret)
>> +        return ret;
>> +
>> +    ret = ttm_bo_mmap(vma->vm_file, vma, &adev->mman.bdev);
>> +    drm_vma_node_revoke(&obj->vma_node, vma->vm_file->private_data);
>> +
>> +    return ret;
>> +}
>> +
>>   struct drm_gem_object *
>>   amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
>>                    struct dma_buf_attachment *attach,
>> @@ -130,14 +164,55 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
>>       return bo->tbo.resv;
>>   }
>>   +static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
>> +{
>> +    return amdgpu_gem_prime_pin(dma_buf->priv);
>> +}
>> +
>> +static int amdgpu_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
>> +{
>> +    amdgpu_gem_prime_unpin(dma_buf->priv);
>> +
>> +    return 0;
>> +}
>> +
>> +static struct dma_buf_ops amdgpu_dmabuf_ops;
>> +
>>   struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
>>                       struct drm_gem_object *gobj,
>>                       int flags)
>>   {
>>       struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>> +    struct dma_buf *dma_buf;
>>         if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
>>           return ERR_PTR(-EPERM);
>>   -    return drm_gem_prime_export(dev, gobj, flags);
>> +    dma_buf = drm_gem_prime_export(dev, gobj, flags);
>> +    amdgpu_dmabuf_ops = *(dma_buf->ops);
>> +    amdgpu_dmabuf_ops.begin_cpu_access = amdgpu_gem_begin_cpu_access;
>> +    amdgpu_dmabuf_ops.end_cpu_access = amdgpu_gem_end_cpu_access;
>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
> 
> This isn't race free and needs to be fixed.
> 
> Better add callbacks to drm_prime.c similar to drm_gem_dmabuf_mmap().
> 
> Alternative you could just completely drop amdgpu_gem_begin_cpu_access() and amdgpu_gem_end_cpu_access() as well.
> 
> When the buffer is shared between device it is pinned anyway and when it isn't shared ttm_bo_mmap() is able to handle VRAM mappings as well.
> 
> Regards,
> Christian.
> 
>> +
>> +    return dma_buf;
>> +}
>> +
>> +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
>> +                        struct dma_buf *dma_buf)
>> +{
>> +    struct drm_gem_object *obj;
>> +
>> +    if (dma_buf->ops == &amdgpu_dmabuf_ops) {
>> +        obj = dma_buf->priv;
>> +        if (obj->dev == dev) {
>> +            /*
>> +             * Importing dmabuf exported from out own gem increases
>> +             * refcount on gem itself instead of f_count of dmabuf.
>> +             */
>> +            drm_gem_object_get(obj);
>> +            return obj;
>> +        }
>> +    }
>> +
>> +    return drm_gem_prime_import(dev, dma_buf);
>>   }
> 
> 
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support
       [not found]         ` <b1064525-5b36-1c12-4da6-aa4a805caaec-5C7GfCeVMHo@public.gmane.org>
@ 2017-09-20  6:57           ` Christian König
       [not found]             ` <f0a22ddb-d747-6d93-adf3-428fd10f0ab5-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Christian König @ 2017-09-20  6:57 UTC (permalink / raw)
  To: Samuel Li, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

> What do you mean "This isn't race free"?

Take a look at the code again:
> +    dma_buf = drm_gem_prime_export(dev, gobj, flags);
> +    amdgpu_dmabuf_ops = *(dma_buf->ops);
> +    amdgpu_dmabuf_ops.begin_cpu_access = amdgpu_gem_begin_cpu_access;
> +    amdgpu_dmabuf_ops.end_cpu_access = amdgpu_gem_end_cpu_access;
> +    dma_buf->ops = &amdgpu_dmabuf_ops;

What happens when another thread is using amdgpu_dmabuf_ops to call 
begin_cpu_access/end_cpu_access when you are fixing it up again?

I would just completely drop the two callbacks, pinning is not necessary 
for CPU access and thinking more about it it actually has some unwanted 
side effects.

Regards,
Christian.

Am 19.09.2017 um 23:22 schrieb Samuel Li:
>>> +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT;
>> Maybe better use "vma->vm_pgoff += amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT;", but I'm not sure.
>> How other drivers handle this?
> This is a good catch. Looks like pgoff is honored during prime mmap, not a fake offset here.
>
>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
>> This isn't race free and needs to be fixed.
>> Better add callbacks to drm_prime.c similar to drm_gem_dmabuf_mmap().
> What do you mean "This isn't race free"?
>
> Regards,
> Sam
>
>
>
> On 2017-09-15 11:05 AM, Christian König wrote:
>> Am 14.09.2017 um 00:39 schrieb Samuel Li:
>>> v2: drop hdp invalidate/flush.
>>>
>>> Signed-off-by: Samuel Li <Samuel.Li@amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  3 ++
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  3 +-
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 77 ++++++++++++++++++++++++++++++-
>>>    3 files changed, 81 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index d2aaad7..188b705 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -395,11 +395,14 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
>>>    struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
>>>                        struct drm_gem_object *gobj,
>>>                        int flags);
>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
>>> +                        struct dma_buf *dma_buf);
>>>    int amdgpu_gem_prime_pin(struct drm_gem_object *obj);
>>>    void amdgpu_gem_prime_unpin(struct drm_gem_object *obj);
>>>    struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
>>>    void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
>>>    void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
>>> +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
>>>    int amdgpu_gem_debugfs_init(struct amdgpu_device *adev);
>>>      /* sub-allocation manager, it has to be protected by another lock.
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> index 2cdf844..9b63ac5 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> @@ -835,7 +835,7 @@ static struct drm_driver kms_driver = {
>>>        .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
>>>        .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
>>>        .gem_prime_export = amdgpu_gem_prime_export,
>>> -    .gem_prime_import = drm_gem_prime_import,
>>> +    .gem_prime_import = amdgpu_gem_prime_import,
>>>        .gem_prime_pin = amdgpu_gem_prime_pin,
>>>        .gem_prime_unpin = amdgpu_gem_prime_unpin,
>>>        .gem_prime_res_obj = amdgpu_gem_prime_res_obj,
>>> @@ -843,6 +843,7 @@ static struct drm_driver kms_driver = {
>>>        .gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table,
>>>        .gem_prime_vmap = amdgpu_gem_prime_vmap,
>>>        .gem_prime_vunmap = amdgpu_gem_prime_vunmap,
>>> +    .gem_prime_mmap = amdgpu_gem_prime_mmap,
>>>          .name = DRIVER_NAME,
>>>        .desc = DRIVER_DESC,
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>> index 5b3f928..13c977a 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>> @@ -57,6 +57,40 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
>>>        ttm_bo_kunmap(&bo->dma_buf_vmap);
>>>    }
>>>    +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
>>> +{
>>> +    struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
>>> +    struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>> +    unsigned asize = amdgpu_bo_size(bo);
>>> +    int ret;
>>> +
>>> +    if (!vma->vm_file)
>>> +        return -ENODEV;
>>> +
>>> +    if (adev == NULL)
>>> +        return -ENODEV;
>>> +
>>> +    /* Check for valid size. */
>>> +    if (asize < vma->vm_end - vma->vm_start)
>>> +        return -EINVAL;
>>> +
>>> +    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
>>> +        (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
>>> +        return -EPERM;
>>> +    }
>>> +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT;
>> Maybe better use "vma->vm_pgoff += amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT;", but I'm not sure.
>>
>> How other drivers handle this?
>>
>>> +
>>> +    /* prime mmap does not need to check access, so allow here */
>>> +    ret = drm_vma_node_allow(&obj->vma_node, vma->vm_file->private_data);
>>> +    if (ret)
>>> +        return ret;
>>> +
>>> +    ret = ttm_bo_mmap(vma->vm_file, vma, &adev->mman.bdev);
>>> +    drm_vma_node_revoke(&obj->vma_node, vma->vm_file->private_data);
>>> +
>>> +    return ret;
>>> +}
>>> +
>>>    struct drm_gem_object *
>>>    amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
>>>                     struct dma_buf_attachment *attach,
>>> @@ -130,14 +164,55 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
>>>        return bo->tbo.resv;
>>>    }
>>>    +static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
>>> +{
>>> +    return amdgpu_gem_prime_pin(dma_buf->priv);
>>> +}
>>> +
>>> +static int amdgpu_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
>>> +{
>>> +    amdgpu_gem_prime_unpin(dma_buf->priv);
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static struct dma_buf_ops amdgpu_dmabuf_ops;
>>> +
>>>    struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
>>>                        struct drm_gem_object *gobj,
>>>                        int flags)
>>>    {
>>>        struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>>> +    struct dma_buf *dma_buf;
>>>          if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
>>>            return ERR_PTR(-EPERM);
>>>    -    return drm_gem_prime_export(dev, gobj, flags);
>>> +    dma_buf = drm_gem_prime_export(dev, gobj, flags);
>>> +    amdgpu_dmabuf_ops = *(dma_buf->ops);
>>> +    amdgpu_dmabuf_ops.begin_cpu_access = amdgpu_gem_begin_cpu_access;
>>> +    amdgpu_dmabuf_ops.end_cpu_access = amdgpu_gem_end_cpu_access;
>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
>> This isn't race free and needs to be fixed.
>>
>> Better add callbacks to drm_prime.c similar to drm_gem_dmabuf_mmap().
>>
>> Alternative you could just completely drop amdgpu_gem_begin_cpu_access() and amdgpu_gem_end_cpu_access() as well.
>>
>> When the buffer is shared between device it is pinned anyway and when it isn't shared ttm_bo_mmap() is able to handle VRAM mappings as well.
>>
>> Regards,
>> Christian.
>>
>>> +
>>> +    return dma_buf;
>>> +}
>>> +
>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
>>> +                        struct dma_buf *dma_buf)
>>> +{
>>> +    struct drm_gem_object *obj;
>>> +
>>> +    if (dma_buf->ops == &amdgpu_dmabuf_ops) {
>>> +        obj = dma_buf->priv;
>>> +        if (obj->dev == dev) {
>>> +            /*
>>> +             * Importing dmabuf exported from out own gem increases
>>> +             * refcount on gem itself instead of f_count of dmabuf.
>>> +             */
>>> +            drm_gem_object_get(obj);
>>> +            return obj;
>>> +        }
>>> +    }
>>> +
>>> +    return drm_gem_prime_import(dev, dma_buf);
>>>    }
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support
       [not found]             ` <f0a22ddb-d747-6d93-adf3-428fd10f0ab5-5C7GfCeVMHo@public.gmane.org>
@ 2017-09-20 15:44               ` Li, Samuel
       [not found]                 ` <BLUPR12MB06283E58EC4B6434D71DCF50F5610-7LeqcoF/hwrL9O90+oShTwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Li, Samuel @ 2017-09-20 15:44 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

> What happens when another thread is using amdgpu_dmabuf_ops to call
> begin_cpu_access/end_cpu_access when you are fixing it up again?
Right, that is an issue.

> I would just completely drop the two callbacks, pinning is not necessary for
> CPU access and thinking more about it it actually has some unwanted side
> effects.
CPU access needs synchronization anyway, so the two callbacks cannot be dropped (other drivers implemented too), so I would like to keep it there for now.

Sam


> -----Original Message-----
> From: Koenig, Christian
> Sent: Wednesday, September 20, 2017 2:58 AM
> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support
> 
> > What do you mean "This isn't race free"?
> 
> Take a look at the code again:
> > +    dma_buf = drm_gem_prime_export(dev, gobj, flags);
> > +    amdgpu_dmabuf_ops = *(dma_buf->ops);
> > +    amdgpu_dmabuf_ops.begin_cpu_access =
> amdgpu_gem_begin_cpu_access;
> > +    amdgpu_dmabuf_ops.end_cpu_access =
> amdgpu_gem_end_cpu_access;
> > +    dma_buf->ops = &amdgpu_dmabuf_ops;
> 
> What happens when another thread is using amdgpu_dmabuf_ops to call
> begin_cpu_access/end_cpu_access when you are fixing it up again?
> 
> I would just completely drop the two callbacks, pinning is not necessary for
> CPU access and thinking more about it it actually has some unwanted side
> effects.
> 
> Regards,
> Christian.
> 
> Am 19.09.2017 um 23:22 schrieb Samuel Li:
> >>> +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT;
> >> Maybe better use "vma->vm_pgoff += amdgpu_bo_mmap_offset(bo) >>
> PAGE_SHIFT;", but I'm not sure.
> >> How other drivers handle this?
> > This is a good catch. Looks like pgoff is honored during prime mmap, not a
> fake offset here.
> >
> >>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
> >> This isn't race free and needs to be fixed.
> >> Better add callbacks to drm_prime.c similar to
> drm_gem_dmabuf_mmap().
> > What do you mean "This isn't race free"?
> >
> > Regards,
> > Sam
> >
> >
> >
> > On 2017-09-15 11:05 AM, Christian König wrote:
> >> Am 14.09.2017 um 00:39 schrieb Samuel Li:
> >>> v2: drop hdp invalidate/flush.
> >>>
> >>> Signed-off-by: Samuel Li <Samuel.Li@amd.com>
> >>> ---
> >>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  3 ++
> >>>    drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  3 +-
> >>>    drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 77
> ++++++++++++++++++++++++++++++-
> >>>    3 files changed, 81 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>> index d2aaad7..188b705 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>> @@ -395,11 +395,14 @@ amdgpu_gem_prime_import_sg_table(struct
> drm_device *dev,
> >>>    struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
> >>>                        struct drm_gem_object *gobj,
> >>>                        int flags);
> >>> +struct drm_gem_object *amdgpu_gem_prime_import(struct
> drm_device *dev,
> >>> +                        struct dma_buf *dma_buf);
> >>>    int amdgpu_gem_prime_pin(struct drm_gem_object *obj);
> >>>    void amdgpu_gem_prime_unpin(struct drm_gem_object *obj);
> >>>    struct reservation_object *amdgpu_gem_prime_res_obj(struct
> drm_gem_object *);
> >>>    void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
> >>>    void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void
> >>> *vaddr);
> >>> +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct
> >>> +vm_area_struct *vma);
> >>>    int amdgpu_gem_debugfs_init(struct amdgpu_device *adev);
> >>>      /* sub-allocation manager, it has to be protected by another lock.
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>> index 2cdf844..9b63ac5 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>> @@ -835,7 +835,7 @@ static struct drm_driver kms_driver = {
> >>>        .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
> >>>        .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
> >>>        .gem_prime_export = amdgpu_gem_prime_export,
> >>> -    .gem_prime_import = drm_gem_prime_import,
> >>> +    .gem_prime_import = amdgpu_gem_prime_import,
> >>>        .gem_prime_pin = amdgpu_gem_prime_pin,
> >>>        .gem_prime_unpin = amdgpu_gem_prime_unpin,
> >>>        .gem_prime_res_obj = amdgpu_gem_prime_res_obj, @@ -843,6
> >>> +843,7 @@ static struct drm_driver kms_driver = {
> >>>        .gem_prime_import_sg_table =
> amdgpu_gem_prime_import_sg_table,
> >>>        .gem_prime_vmap = amdgpu_gem_prime_vmap,
> >>>        .gem_prime_vunmap = amdgpu_gem_prime_vunmap,
> >>> +    .gem_prime_mmap = amdgpu_gem_prime_mmap,
> >>>          .name = DRIVER_NAME,
> >>>        .desc = DRIVER_DESC,
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>> index 5b3f928..13c977a 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>> @@ -57,6 +57,40 @@ void amdgpu_gem_prime_vunmap(struct
> drm_gem_object *obj, void *vaddr)
> >>>        ttm_bo_kunmap(&bo->dma_buf_vmap);
> >>>    }
> >>>    +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct
> >>> vm_area_struct *vma)
> >>> +{
> >>> +    struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
> >>> +    struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> >>> +    unsigned asize = amdgpu_bo_size(bo);
> >>> +    int ret;
> >>> +
> >>> +    if (!vma->vm_file)
> >>> +        return -ENODEV;
> >>> +
> >>> +    if (adev == NULL)
> >>> +        return -ENODEV;
> >>> +
> >>> +    /* Check for valid size. */
> >>> +    if (asize < vma->vm_end - vma->vm_start)
> >>> +        return -EINVAL;
> >>> +
> >>> +    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
> >>> +        (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
> >>> +        return -EPERM;
> >>> +    }
> >>> +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT;
> >> Maybe better use "vma->vm_pgoff += amdgpu_bo_mmap_offset(bo) >>
> PAGE_SHIFT;", but I'm not sure.
> >>
> >> How other drivers handle this?
> >>
> >>> +
> >>> +    /* prime mmap does not need to check access, so allow here */
> >>> +    ret = drm_vma_node_allow(&obj->vma_node, vma->vm_file-
> >private_data);
> >>> +    if (ret)
> >>> +        return ret;
> >>> +
> >>> +    ret = ttm_bo_mmap(vma->vm_file, vma, &adev->mman.bdev);
> >>> +    drm_vma_node_revoke(&obj->vma_node,
> >>> + vma->vm_file->private_data);
> >>> +
> >>> +    return ret;
> >>> +}
> >>> +
> >>>    struct drm_gem_object *
> >>>    amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
> >>>                     struct dma_buf_attachment *attach, @@ -130,14
> >>> +164,55 @@ struct reservation_object
> *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
> >>>        return bo->tbo.resv;
> >>>    }
> >>>    +static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
> >>> enum dma_data_direction direction)
> >>> +{
> >>> +    return amdgpu_gem_prime_pin(dma_buf->priv);
> >>> +}
> >>> +
> >>> +static int amdgpu_gem_end_cpu_access(struct dma_buf *dma_buf,
> enum
> >>> +dma_data_direction direction) {
> >>> +    amdgpu_gem_prime_unpin(dma_buf->priv);
> >>> +
> >>> +    return 0;
> >>> +}
> >>> +
> >>> +static struct dma_buf_ops amdgpu_dmabuf_ops;
> >>> +
> >>>    struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
> >>>                        struct drm_gem_object *gobj,
> >>>                        int flags)
> >>>    {
> >>>        struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
> >>> +    struct dma_buf *dma_buf;
> >>>          if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
> >>>            return ERR_PTR(-EPERM);
> >>>    -    return drm_gem_prime_export(dev, gobj, flags);
> >>> +    dma_buf = drm_gem_prime_export(dev, gobj, flags);
> >>> +    amdgpu_dmabuf_ops = *(dma_buf->ops);
> >>> +    amdgpu_dmabuf_ops.begin_cpu_access =
> amdgpu_gem_begin_cpu_access;
> >>> +    amdgpu_dmabuf_ops.end_cpu_access =
> amdgpu_gem_end_cpu_access;
> >>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
> >> This isn't race free and needs to be fixed.
> >>
> >> Better add callbacks to drm_prime.c similar to
> drm_gem_dmabuf_mmap().
> >>
> >> Alternative you could just completely drop
> amdgpu_gem_begin_cpu_access() and amdgpu_gem_end_cpu_access() as
> well.
> >>
> >> When the buffer is shared between device it is pinned anyway and when
> it isn't shared ttm_bo_mmap() is able to handle VRAM mappings as well.
> >>
> >> Regards,
> >> Christian.
> >>
> >>> +
> >>> +    return dma_buf;
> >>> +}
> >>> +
> >>> +struct drm_gem_object *amdgpu_gem_prime_import(struct
> drm_device *dev,
> >>> +                        struct dma_buf *dma_buf) {
> >>> +    struct drm_gem_object *obj;
> >>> +
> >>> +    if (dma_buf->ops == &amdgpu_dmabuf_ops) {
> >>> +        obj = dma_buf->priv;
> >>> +        if (obj->dev == dev) {
> >>> +            /*
> >>> +             * Importing dmabuf exported from out own gem increases
> >>> +             * refcount on gem itself instead of f_count of dmabuf.
> >>> +             */
> >>> +            drm_gem_object_get(obj);
> >>> +            return obj;
> >>> +        }
> >>> +    }
> >>> +
> >>> +    return drm_gem_prime_import(dev, dma_buf);
> >>>    }
> >>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support
       [not found]                 ` <BLUPR12MB06283E58EC4B6434D71DCF50F5610-7LeqcoF/hwrL9O90+oShTwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2017-09-20 16:20                   ` Christian König
       [not found]                     ` <ddf0fe6c-4de7-ae03-a61f-30e3cc1cf315-5C7GfCeVMHo@public.gmane.org>
  2017-09-21  6:25                   ` Deucher, Alexander
  1 sibling, 1 reply; 15+ messages in thread
From: Christian König @ 2017-09-20 16:20 UTC (permalink / raw)
  To: Li, Samuel, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 20.09.2017 um 17:44 schrieb Li, Samuel:
>> What happens when another thread is using amdgpu_dmabuf_ops to call
>> begin_cpu_access/end_cpu_access when you are fixing it up again?
> Right, that is an issue.

A simple "if (!amdgpu_dmabuf_ops.begin_cpu_access)" should be able to 
deal with that.

>
>> I would just completely drop the two callbacks, pinning is not necessary for
>> CPU access and thinking more about it it actually has some unwanted side
>> effects.
> CPU access needs synchronization anyway, so the two callbacks cannot be dropped (other drivers implemented too), so I would like to keep it there for now.

Wait a second what do you mean with "CPU access needs synchronization"?

At least i915 makes the memory GPU inaccessible when you start to use it 
with the CPU.

If that is what this callback should do then this implementation would 
be incorrect. Pinning doesn't wait for any GPU operation to finish.

Regards,
Christian.

>
> Sam
>
>
>> -----Original Message-----
>> From: Koenig, Christian
>> Sent: Wednesday, September 20, 2017 2:58 AM
>> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support
>>
>>> What do you mean "This isn't race free"?
>> Take a look at the code again:
>>> +    dma_buf = drm_gem_prime_export(dev, gobj, flags);
>>> +    amdgpu_dmabuf_ops = *(dma_buf->ops);
>>> +    amdgpu_dmabuf_ops.begin_cpu_access =
>> amdgpu_gem_begin_cpu_access;
>>> +    amdgpu_dmabuf_ops.end_cpu_access =
>> amdgpu_gem_end_cpu_access;
>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
>> What happens when another thread is using amdgpu_dmabuf_ops to call
>> begin_cpu_access/end_cpu_access when you are fixing it up again?
>>
>> I would just completely drop the two callbacks, pinning is not necessary for
>> CPU access and thinking more about it it actually has some unwanted side
>> effects.
>>
>> Regards,
>> Christian.
>>
>> Am 19.09.2017 um 23:22 schrieb Samuel Li:
>>>>> +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT;
>>>> Maybe better use "vma->vm_pgoff += amdgpu_bo_mmap_offset(bo) >>
>> PAGE_SHIFT;", but I'm not sure.
>>>> How other drivers handle this?
>>> This is a good catch. Looks like pgoff is honored during prime mmap, not a
>> fake offset here.
>>>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
>>>> This isn't race free and needs to be fixed.
>>>> Better add callbacks to drm_prime.c similar to
>> drm_gem_dmabuf_mmap().
>>> What do you mean "This isn't race free"?
>>>
>>> Regards,
>>> Sam
>>>
>>>
>>>
>>> On 2017-09-15 11:05 AM, Christian König wrote:
>>>> Am 14.09.2017 um 00:39 schrieb Samuel Li:
>>>>> v2: drop hdp invalidate/flush.
>>>>>
>>>>> Signed-off-by: Samuel Li <Samuel.Li@amd.com>
>>>>> ---
>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  3 ++
>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  3 +-
>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 77
>> ++++++++++++++++++++++++++++++-
>>>>>     3 files changed, 81 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> index d2aaad7..188b705 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> @@ -395,11 +395,14 @@ amdgpu_gem_prime_import_sg_table(struct
>> drm_device *dev,
>>>>>     struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
>>>>>                         struct drm_gem_object *gobj,
>>>>>                         int flags);
>>>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct
>> drm_device *dev,
>>>>> +                        struct dma_buf *dma_buf);
>>>>>     int amdgpu_gem_prime_pin(struct drm_gem_object *obj);
>>>>>     void amdgpu_gem_prime_unpin(struct drm_gem_object *obj);
>>>>>     struct reservation_object *amdgpu_gem_prime_res_obj(struct
>> drm_gem_object *);
>>>>>     void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
>>>>>     void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void
>>>>> *vaddr);
>>>>> +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct
>>>>> +vm_area_struct *vma);
>>>>>     int amdgpu_gem_debugfs_init(struct amdgpu_device *adev);
>>>>>       /* sub-allocation manager, it has to be protected by another lock.
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>> index 2cdf844..9b63ac5 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>> @@ -835,7 +835,7 @@ static struct drm_driver kms_driver = {
>>>>>         .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
>>>>>         .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
>>>>>         .gem_prime_export = amdgpu_gem_prime_export,
>>>>> -    .gem_prime_import = drm_gem_prime_import,
>>>>> +    .gem_prime_import = amdgpu_gem_prime_import,
>>>>>         .gem_prime_pin = amdgpu_gem_prime_pin,
>>>>>         .gem_prime_unpin = amdgpu_gem_prime_unpin,
>>>>>         .gem_prime_res_obj = amdgpu_gem_prime_res_obj, @@ -843,6
>>>>> +843,7 @@ static struct drm_driver kms_driver = {
>>>>>         .gem_prime_import_sg_table =
>> amdgpu_gem_prime_import_sg_table,
>>>>>         .gem_prime_vmap = amdgpu_gem_prime_vmap,
>>>>>         .gem_prime_vunmap = amdgpu_gem_prime_vunmap,
>>>>> +    .gem_prime_mmap = amdgpu_gem_prime_mmap,
>>>>>           .name = DRIVER_NAME,
>>>>>         .desc = DRIVER_DESC,
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>> index 5b3f928..13c977a 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>> @@ -57,6 +57,40 @@ void amdgpu_gem_prime_vunmap(struct
>> drm_gem_object *obj, void *vaddr)
>>>>>         ttm_bo_kunmap(&bo->dma_buf_vmap);
>>>>>     }
>>>>>     +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct
>>>>> vm_area_struct *vma)
>>>>> +{
>>>>> +    struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
>>>>> +    struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>>>> +    unsigned asize = amdgpu_bo_size(bo);
>>>>> +    int ret;
>>>>> +
>>>>> +    if (!vma->vm_file)
>>>>> +        return -ENODEV;
>>>>> +
>>>>> +    if (adev == NULL)
>>>>> +        return -ENODEV;
>>>>> +
>>>>> +    /* Check for valid size. */
>>>>> +    if (asize < vma->vm_end - vma->vm_start)
>>>>> +        return -EINVAL;
>>>>> +
>>>>> +    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
>>>>> +        (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
>>>>> +        return -EPERM;
>>>>> +    }
>>>>> +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT;
>>>> Maybe better use "vma->vm_pgoff += amdgpu_bo_mmap_offset(bo) >>
>> PAGE_SHIFT;", but I'm not sure.
>>>> How other drivers handle this?
>>>>
>>>>> +
>>>>> +    /* prime mmap does not need to check access, so allow here */
>>>>> +    ret = drm_vma_node_allow(&obj->vma_node, vma->vm_file-
>>> private_data);
>>>>> +    if (ret)
>>>>> +        return ret;
>>>>> +
>>>>> +    ret = ttm_bo_mmap(vma->vm_file, vma, &adev->mman.bdev);
>>>>> +    drm_vma_node_revoke(&obj->vma_node,
>>>>> + vma->vm_file->private_data);
>>>>> +
>>>>> +    return ret;
>>>>> +}
>>>>> +
>>>>>     struct drm_gem_object *
>>>>>     amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
>>>>>                      struct dma_buf_attachment *attach, @@ -130,14
>>>>> +164,55 @@ struct reservation_object
>> *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
>>>>>         return bo->tbo.resv;
>>>>>     }
>>>>>     +static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
>>>>> enum dma_data_direction direction)
>>>>> +{
>>>>> +    return amdgpu_gem_prime_pin(dma_buf->priv);
>>>>> +}
>>>>> +
>>>>> +static int amdgpu_gem_end_cpu_access(struct dma_buf *dma_buf,
>> enum
>>>>> +dma_data_direction direction) {
>>>>> +    amdgpu_gem_prime_unpin(dma_buf->priv);
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>> +static struct dma_buf_ops amdgpu_dmabuf_ops;
>>>>> +
>>>>>     struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
>>>>>                         struct drm_gem_object *gobj,
>>>>>                         int flags)
>>>>>     {
>>>>>         struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>>>>> +    struct dma_buf *dma_buf;
>>>>>           if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
>>>>>             return ERR_PTR(-EPERM);
>>>>>     -    return drm_gem_prime_export(dev, gobj, flags);
>>>>> +    dma_buf = drm_gem_prime_export(dev, gobj, flags);
>>>>> +    amdgpu_dmabuf_ops = *(dma_buf->ops);
>>>>> +    amdgpu_dmabuf_ops.begin_cpu_access =
>> amdgpu_gem_begin_cpu_access;
>>>>> +    amdgpu_dmabuf_ops.end_cpu_access =
>> amdgpu_gem_end_cpu_access;
>>>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
>>>> This isn't race free and needs to be fixed.
>>>>
>>>> Better add callbacks to drm_prime.c similar to
>> drm_gem_dmabuf_mmap().
>>>> Alternative you could just completely drop
>> amdgpu_gem_begin_cpu_access() and amdgpu_gem_end_cpu_access() as
>> well.
>>>> When the buffer is shared between device it is pinned anyway and when
>> it isn't shared ttm_bo_mmap() is able to handle VRAM mappings as well.
>>>> Regards,
>>>> Christian.
>>>>
>>>>> +
>>>>> +    return dma_buf;
>>>>> +}
>>>>> +
>>>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct
>> drm_device *dev,
>>>>> +                        struct dma_buf *dma_buf) {
>>>>> +    struct drm_gem_object *obj;
>>>>> +
>>>>> +    if (dma_buf->ops == &amdgpu_dmabuf_ops) {
>>>>> +        obj = dma_buf->priv;
>>>>> +        if (obj->dev == dev) {
>>>>> +            /*
>>>>> +             * Importing dmabuf exported from out own gem increases
>>>>> +             * refcount on gem itself instead of f_count of dmabuf.
>>>>> +             */
>>>>> +            drm_gem_object_get(obj);
>>>>> +            return obj;
>>>>> +        }
>>>>> +    }
>>>>> +
>>>>> +    return drm_gem_prime_import(dev, dma_buf);
>>>>>     }


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support
       [not found]                     ` <ddf0fe6c-4de7-ae03-a61f-30e3cc1cf315-5C7GfCeVMHo@public.gmane.org>
@ 2017-09-20 17:34                       ` Li, Samuel
       [not found]                         ` <BLUPR12MB06286EB4DD86B34C84F124EAF5610-7LeqcoF/hwrL9O90+oShTwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Li, Samuel @ 2017-09-20 17:34 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

> If that is what this callback should do then this implementation would be
> incorrect. Pinning doesn't wait for any GPU operation to finish.

During pining, it will all the fences to finish. That shall be OK.

Sam



> -----Original Message-----
> From: Koenig, Christian
> Sent: Wednesday, September 20, 2017 12:21 PM
> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support
> 
> Am 20.09.2017 um 17:44 schrieb Li, Samuel:
> >> What happens when another thread is using amdgpu_dmabuf_ops to call
> >> begin_cpu_access/end_cpu_access when you are fixing it up again?
> > Right, that is an issue.
> 
> A simple "if (!amdgpu_dmabuf_ops.begin_cpu_access)" should be able to
> deal with that.
> 
> >
> >> I would just completely drop the two callbacks, pinning is not
> >> necessary for CPU access and thinking more about it it actually has
> >> some unwanted side effects.
> > CPU access needs synchronization anyway, so the two callbacks cannot be
> dropped (other drivers implemented too), so I would like to keep it there for
> now.
> 
> Wait a second what do you mean with "CPU access needs synchronization"?
> 
> At least i915 makes the memory GPU inaccessible when you start to use it
> with the CPU.
> 
> If that is what this callback should do then this implementation would be
> incorrect. Pinning doesn't wait for any GPU operation to finish.
> 
> Regards,
> Christian.
> 
> >
> > Sam
> >
> >
> >> -----Original Message-----
> >> From: Koenig, Christian
> >> Sent: Wednesday, September 20, 2017 2:58 AM
> >> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
> >> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap
> support
> >>
> >>> What do you mean "This isn't race free"?
> >> Take a look at the code again:
> >>> +    dma_buf = drm_gem_prime_export(dev, gobj, flags);
> >>> +    amdgpu_dmabuf_ops = *(dma_buf->ops);
> >>> +    amdgpu_dmabuf_ops.begin_cpu_access =
> >> amdgpu_gem_begin_cpu_access;
> >>> +    amdgpu_dmabuf_ops.end_cpu_access =
> >> amdgpu_gem_end_cpu_access;
> >>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
> >> What happens when another thread is using amdgpu_dmabuf_ops to call
> >> begin_cpu_access/end_cpu_access when you are fixing it up again?
> >>
> >> I would just completely drop the two callbacks, pinning is not
> >> necessary for CPU access and thinking more about it it actually has
> >> some unwanted side effects.
> >>
> >> Regards,
> >> Christian.
> >>
> >> Am 19.09.2017 um 23:22 schrieb Samuel Li:
> >>>>> +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT;
> >>>> Maybe better use "vma->vm_pgoff +=
> amdgpu_bo_mmap_offset(bo) >>
> >> PAGE_SHIFT;", but I'm not sure.
> >>>> How other drivers handle this?
> >>> This is a good catch. Looks like pgoff is honored during prime mmap,
> >>> not a
> >> fake offset here.
> >>>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
> >>>> This isn't race free and needs to be fixed.
> >>>> Better add callbacks to drm_prime.c similar to
> >> drm_gem_dmabuf_mmap().
> >>> What do you mean "This isn't race free"?
> >>>
> >>> Regards,
> >>> Sam
> >>>
> >>>
> >>>
> >>> On 2017-09-15 11:05 AM, Christian König wrote:
> >>>> Am 14.09.2017 um 00:39 schrieb Samuel Li:
> >>>>> v2: drop hdp invalidate/flush.
> >>>>>
> >>>>> Signed-off-by: Samuel Li <Samuel.Li@amd.com>
> >>>>> ---
> >>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  3 ++
> >>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  3 +-
> >>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 77
> >> ++++++++++++++++++++++++++++++-
> >>>>>     3 files changed, 81 insertions(+), 2 deletions(-)
> >>>>>
> >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>>>> index d2aaad7..188b705 100644
> >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>>>> @@ -395,11 +395,14 @@
> amdgpu_gem_prime_import_sg_table(struct
> >> drm_device *dev,
> >>>>>     struct dma_buf *amdgpu_gem_prime_export(struct drm_device
> *dev,
> >>>>>                         struct drm_gem_object *gobj,
> >>>>>                         int flags);
> >>>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct
> >> drm_device *dev,
> >>>>> +                        struct dma_buf *dma_buf);
> >>>>>     int amdgpu_gem_prime_pin(struct drm_gem_object *obj);
> >>>>>     void amdgpu_gem_prime_unpin(struct drm_gem_object *obj);
> >>>>>     struct reservation_object *amdgpu_gem_prime_res_obj(struct
> >> drm_gem_object *);
> >>>>>     void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
> >>>>>     void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj,
> void
> >>>>> *vaddr);
> >>>>> +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct
> >>>>> +vm_area_struct *vma);
> >>>>>     int amdgpu_gem_debugfs_init(struct amdgpu_device *adev);
> >>>>>       /* sub-allocation manager, it has to be protected by another lock.
> >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>>>> index 2cdf844..9b63ac5 100644
> >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>>>> @@ -835,7 +835,7 @@ static struct drm_driver kms_driver = {
> >>>>>         .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
> >>>>>         .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
> >>>>>         .gem_prime_export = amdgpu_gem_prime_export,
> >>>>> -    .gem_prime_import = drm_gem_prime_import,
> >>>>> +    .gem_prime_import = amdgpu_gem_prime_import,
> >>>>>         .gem_prime_pin = amdgpu_gem_prime_pin,
> >>>>>         .gem_prime_unpin = amdgpu_gem_prime_unpin,
> >>>>>         .gem_prime_res_obj = amdgpu_gem_prime_res_obj, @@ -843,6
> >>>>> +843,7 @@ static struct drm_driver kms_driver = {
> >>>>>         .gem_prime_import_sg_table =
> >> amdgpu_gem_prime_import_sg_table,
> >>>>>         .gem_prime_vmap = amdgpu_gem_prime_vmap,
> >>>>>         .gem_prime_vunmap = amdgpu_gem_prime_vunmap,
> >>>>> +    .gem_prime_mmap = amdgpu_gem_prime_mmap,
> >>>>>           .name = DRIVER_NAME,
> >>>>>         .desc = DRIVER_DESC,
> >>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>>>> index 5b3f928..13c977a 100644
> >>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>>>> @@ -57,6 +57,40 @@ void amdgpu_gem_prime_vunmap(struct
> >> drm_gem_object *obj, void *vaddr)
> >>>>>         ttm_bo_kunmap(&bo->dma_buf_vmap);
> >>>>>     }
> >>>>>     +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
> struct
> >>>>> vm_area_struct *vma)
> >>>>> +{
> >>>>> +    struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
> >>>>> +    struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> >>>>> +    unsigned asize = amdgpu_bo_size(bo);
> >>>>> +    int ret;
> >>>>> +
> >>>>> +    if (!vma->vm_file)
> >>>>> +        return -ENODEV;
> >>>>> +
> >>>>> +    if (adev == NULL)
> >>>>> +        return -ENODEV;
> >>>>> +
> >>>>> +    /* Check for valid size. */
> >>>>> +    if (asize < vma->vm_end - vma->vm_start)
> >>>>> +        return -EINVAL;
> >>>>> +
> >>>>> +    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
> >>>>> +        (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
> >>>>> +        return -EPERM;
> >>>>> +    }
> >>>>> +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT;
> >>>> Maybe better use "vma->vm_pgoff +=
> amdgpu_bo_mmap_offset(bo) >>
> >> PAGE_SHIFT;", but I'm not sure.
> >>>> How other drivers handle this?
> >>>>
> >>>>> +
> >>>>> +    /* prime mmap does not need to check access, so allow here */
> >>>>> +    ret = drm_vma_node_allow(&obj->vma_node, vma->vm_file-
> >>> private_data);
> >>>>> +    if (ret)
> >>>>> +        return ret;
> >>>>> +
> >>>>> +    ret = ttm_bo_mmap(vma->vm_file, vma, &adev->mman.bdev);
> >>>>> +    drm_vma_node_revoke(&obj->vma_node,
> >>>>> + vma->vm_file->private_data);
> >>>>> +
> >>>>> +    return ret;
> >>>>> +}
> >>>>> +
> >>>>>     struct drm_gem_object *
> >>>>>     amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
> >>>>>                      struct dma_buf_attachment *attach, @@ -130,14
> >>>>> +164,55 @@ struct reservation_object
> >> *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
> >>>>>         return bo->tbo.resv;
> >>>>>     }
> >>>>>     +static int amdgpu_gem_begin_cpu_access(struct dma_buf
> >>>>> *dma_buf, enum dma_data_direction direction)
> >>>>> +{
> >>>>> +    return amdgpu_gem_prime_pin(dma_buf->priv);
> >>>>> +}
> >>>>> +
> >>>>> +static int amdgpu_gem_end_cpu_access(struct dma_buf *dma_buf,
> >> enum
> >>>>> +dma_data_direction direction) {
> >>>>> +    amdgpu_gem_prime_unpin(dma_buf->priv);
> >>>>> +
> >>>>> +    return 0;
> >>>>> +}
> >>>>> +
> >>>>> +static struct dma_buf_ops amdgpu_dmabuf_ops;
> >>>>> +
> >>>>>     struct dma_buf *amdgpu_gem_prime_export(struct drm_device
> *dev,
> >>>>>                         struct drm_gem_object *gobj,
> >>>>>                         int flags)
> >>>>>     {
> >>>>>         struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
> >>>>> +    struct dma_buf *dma_buf;
> >>>>>           if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
> >>>>>             return ERR_PTR(-EPERM);
> >>>>>     -    return drm_gem_prime_export(dev, gobj, flags);
> >>>>> +    dma_buf = drm_gem_prime_export(dev, gobj, flags);
> >>>>> +    amdgpu_dmabuf_ops = *(dma_buf->ops);
> >>>>> +    amdgpu_dmabuf_ops.begin_cpu_access =
> >> amdgpu_gem_begin_cpu_access;
> >>>>> +    amdgpu_dmabuf_ops.end_cpu_access =
> >> amdgpu_gem_end_cpu_access;
> >>>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
> >>>> This isn't race free and needs to be fixed.
> >>>>
> >>>> Better add callbacks to drm_prime.c similar to
> >> drm_gem_dmabuf_mmap().
> >>>> Alternative you could just completely drop
> >> amdgpu_gem_begin_cpu_access() and amdgpu_gem_end_cpu_access()
> as
> >> well.
> >>>> When the buffer is shared between device it is pinned anyway and
> >>>> when
> >> it isn't shared ttm_bo_mmap() is able to handle VRAM mappings as well.
> >>>> Regards,
> >>>> Christian.
> >>>>
> >>>>> +
> >>>>> +    return dma_buf;
> >>>>> +}
> >>>>> +
> >>>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct
> >> drm_device *dev,
> >>>>> +                        struct dma_buf *dma_buf) {
> >>>>> +    struct drm_gem_object *obj;
> >>>>> +
> >>>>> +    if (dma_buf->ops == &amdgpu_dmabuf_ops) {
> >>>>> +        obj = dma_buf->priv;
> >>>>> +        if (obj->dev == dev) {
> >>>>> +            /*
> >>>>> +             * Importing dmabuf exported from out own gem increases
> >>>>> +             * refcount on gem itself instead of f_count of dmabuf.
> >>>>> +             */
> >>>>> +            drm_gem_object_get(obj);
> >>>>> +            return obj;
> >>>>> +        }
> >>>>> +    }
> >>>>> +
> >>>>> +    return drm_gem_prime_import(dev, dma_buf);
> >>>>>     }
> 

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support
       [not found]                         ` <BLUPR12MB06286EB4DD86B34C84F124EAF5610-7LeqcoF/hwrL9O90+oShTwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2017-09-20 17:38                           ` Christian König
       [not found]                             ` <70db5b39-546e-ac15-635e-dad73420e8bc-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Christian König @ 2017-09-20 17:38 UTC (permalink / raw)
  To: Li, Samuel, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 20.09.2017 um 19:34 schrieb Li, Samuel:
>> If that is what this callback should do then this implementation would be
>> incorrect. Pinning doesn't wait for any GPU operation to finish.
> During pining, it will all the fences to finish. That shall be OK.

No that isn't correct. Pinning just notes that the BO shouldn't be moved 
any more.

It doesn't wait for anything.

Christian.

>
> Sam
>
>
>
>> -----Original Message-----
>> From: Koenig, Christian
>> Sent: Wednesday, September 20, 2017 12:21 PM
>> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support
>>
>> Am 20.09.2017 um 17:44 schrieb Li, Samuel:
>>>> What happens when another thread is using amdgpu_dmabuf_ops to call
>>>> begin_cpu_access/end_cpu_access when you are fixing it up again?
>>> Right, that is an issue.
>> A simple "if (!amdgpu_dmabuf_ops.begin_cpu_access)" should be able to
>> deal with that.
>>
>>>> I would just completely drop the two callbacks, pinning is not
>>>> necessary for CPU access and thinking more about it it actually has
>>>> some unwanted side effects.
>>> CPU access needs synchronization anyway, so the two callbacks cannot be
>> dropped (other drivers implemented too), so I would like to keep it there for
>> now.
>>
>> Wait a second what do you mean with "CPU access needs synchronization"?
>>
>> At least i915 makes the memory GPU inaccessible when you start to use it
>> with the CPU.
>>
>> If that is what this callback should do then this implementation would be
>> incorrect. Pinning doesn't wait for any GPU operation to finish.
>>
>> Regards,
>> Christian.
>>
>>> Sam
>>>
>>>
>>>> -----Original Message-----
>>>> From: Koenig, Christian
>>>> Sent: Wednesday, September 20, 2017 2:58 AM
>>>> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
>>>> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap
>> support
>>>>> What do you mean "This isn't race free"?
>>>> Take a look at the code again:
>>>>> +    dma_buf = drm_gem_prime_export(dev, gobj, flags);
>>>>> +    amdgpu_dmabuf_ops = *(dma_buf->ops);
>>>>> +    amdgpu_dmabuf_ops.begin_cpu_access =
>>>> amdgpu_gem_begin_cpu_access;
>>>>> +    amdgpu_dmabuf_ops.end_cpu_access =
>>>> amdgpu_gem_end_cpu_access;
>>>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
>>>> What happens when another thread is using amdgpu_dmabuf_ops to call
>>>> begin_cpu_access/end_cpu_access when you are fixing it up again?
>>>>
>>>> I would just completely drop the two callbacks, pinning is not
>>>> necessary for CPU access and thinking more about it it actually has
>>>> some unwanted side effects.
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>> Am 19.09.2017 um 23:22 schrieb Samuel Li:
>>>>>>> +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT;
>>>>>> Maybe better use "vma->vm_pgoff +=
>> amdgpu_bo_mmap_offset(bo) >>
>>>> PAGE_SHIFT;", but I'm not sure.
>>>>>> How other drivers handle this?
>>>>> This is a good catch. Looks like pgoff is honored during prime mmap,
>>>>> not a
>>>> fake offset here.
>>>>>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
>>>>>> This isn't race free and needs to be fixed.
>>>>>> Better add callbacks to drm_prime.c similar to
>>>> drm_gem_dmabuf_mmap().
>>>>> What do you mean "This isn't race free"?
>>>>>
>>>>> Regards,
>>>>> Sam
>>>>>
>>>>>
>>>>>
>>>>> On 2017-09-15 11:05 AM, Christian König wrote:
>>>>>> Am 14.09.2017 um 00:39 schrieb Samuel Li:
>>>>>>> v2: drop hdp invalidate/flush.
>>>>>>>
>>>>>>> Signed-off-by: Samuel Li <Samuel.Li@amd.com>
>>>>>>> ---
>>>>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  3 ++
>>>>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  3 +-
>>>>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 77
>>>> ++++++++++++++++++++++++++++++-
>>>>>>>      3 files changed, 81 insertions(+), 2 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>> index d2aaad7..188b705 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>> @@ -395,11 +395,14 @@
>> amdgpu_gem_prime_import_sg_table(struct
>>>> drm_device *dev,
>>>>>>>      struct dma_buf *amdgpu_gem_prime_export(struct drm_device
>> *dev,
>>>>>>>                          struct drm_gem_object *gobj,
>>>>>>>                          int flags);
>>>>>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct
>>>> drm_device *dev,
>>>>>>> +                        struct dma_buf *dma_buf);
>>>>>>>      int amdgpu_gem_prime_pin(struct drm_gem_object *obj);
>>>>>>>      void amdgpu_gem_prime_unpin(struct drm_gem_object *obj);
>>>>>>>      struct reservation_object *amdgpu_gem_prime_res_obj(struct
>>>> drm_gem_object *);
>>>>>>>      void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
>>>>>>>      void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj,
>> void
>>>>>>> *vaddr);
>>>>>>> +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct
>>>>>>> +vm_area_struct *vma);
>>>>>>>      int amdgpu_gem_debugfs_init(struct amdgpu_device *adev);
>>>>>>>        /* sub-allocation manager, it has to be protected by another lock.
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>>>> index 2cdf844..9b63ac5 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>>>> @@ -835,7 +835,7 @@ static struct drm_driver kms_driver = {
>>>>>>>          .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
>>>>>>>          .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
>>>>>>>          .gem_prime_export = amdgpu_gem_prime_export,
>>>>>>> -    .gem_prime_import = drm_gem_prime_import,
>>>>>>> +    .gem_prime_import = amdgpu_gem_prime_import,
>>>>>>>          .gem_prime_pin = amdgpu_gem_prime_pin,
>>>>>>>          .gem_prime_unpin = amdgpu_gem_prime_unpin,
>>>>>>>          .gem_prime_res_obj = amdgpu_gem_prime_res_obj, @@ -843,6
>>>>>>> +843,7 @@ static struct drm_driver kms_driver = {
>>>>>>>          .gem_prime_import_sg_table =
>>>> amdgpu_gem_prime_import_sg_table,
>>>>>>>          .gem_prime_vmap = amdgpu_gem_prime_vmap,
>>>>>>>          .gem_prime_vunmap = amdgpu_gem_prime_vunmap,
>>>>>>> +    .gem_prime_mmap = amdgpu_gem_prime_mmap,
>>>>>>>            .name = DRIVER_NAME,
>>>>>>>          .desc = DRIVER_DESC,
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>>>> index 5b3f928..13c977a 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>>>> @@ -57,6 +57,40 @@ void amdgpu_gem_prime_vunmap(struct
>>>> drm_gem_object *obj, void *vaddr)
>>>>>>>          ttm_bo_kunmap(&bo->dma_buf_vmap);
>>>>>>>      }
>>>>>>>      +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
>> struct
>>>>>>> vm_area_struct *vma)
>>>>>>> +{
>>>>>>> +    struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
>>>>>>> +    struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>>>>>> +    unsigned asize = amdgpu_bo_size(bo);
>>>>>>> +    int ret;
>>>>>>> +
>>>>>>> +    if (!vma->vm_file)
>>>>>>> +        return -ENODEV;
>>>>>>> +
>>>>>>> +    if (adev == NULL)
>>>>>>> +        return -ENODEV;
>>>>>>> +
>>>>>>> +    /* Check for valid size. */
>>>>>>> +    if (asize < vma->vm_end - vma->vm_start)
>>>>>>> +        return -EINVAL;
>>>>>>> +
>>>>>>> +    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
>>>>>>> +        (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
>>>>>>> +        return -EPERM;
>>>>>>> +    }
>>>>>>> +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT;
>>>>>> Maybe better use "vma->vm_pgoff +=
>> amdgpu_bo_mmap_offset(bo) >>
>>>> PAGE_SHIFT;", but I'm not sure.
>>>>>> How other drivers handle this?
>>>>>>
>>>>>>> +
>>>>>>> +    /* prime mmap does not need to check access, so allow here */
>>>>>>> +    ret = drm_vma_node_allow(&obj->vma_node, vma->vm_file-
>>>>> private_data);
>>>>>>> +    if (ret)
>>>>>>> +        return ret;
>>>>>>> +
>>>>>>> +    ret = ttm_bo_mmap(vma->vm_file, vma, &adev->mman.bdev);
>>>>>>> +    drm_vma_node_revoke(&obj->vma_node,
>>>>>>> + vma->vm_file->private_data);
>>>>>>> +
>>>>>>> +    return ret;
>>>>>>> +}
>>>>>>> +
>>>>>>>      struct drm_gem_object *
>>>>>>>      amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
>>>>>>>                       struct dma_buf_attachment *attach, @@ -130,14
>>>>>>> +164,55 @@ struct reservation_object
>>>> *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
>>>>>>>          return bo->tbo.resv;
>>>>>>>      }
>>>>>>>      +static int amdgpu_gem_begin_cpu_access(struct dma_buf
>>>>>>> *dma_buf, enum dma_data_direction direction)
>>>>>>> +{
>>>>>>> +    return amdgpu_gem_prime_pin(dma_buf->priv);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static int amdgpu_gem_end_cpu_access(struct dma_buf *dma_buf,
>>>> enum
>>>>>>> +dma_data_direction direction) {
>>>>>>> +    amdgpu_gem_prime_unpin(dma_buf->priv);
>>>>>>> +
>>>>>>> +    return 0;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static struct dma_buf_ops amdgpu_dmabuf_ops;
>>>>>>> +
>>>>>>>      struct dma_buf *amdgpu_gem_prime_export(struct drm_device
>> *dev,
>>>>>>>                          struct drm_gem_object *gobj,
>>>>>>>                          int flags)
>>>>>>>      {
>>>>>>>          struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>>>>>>> +    struct dma_buf *dma_buf;
>>>>>>>            if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
>>>>>>>              return ERR_PTR(-EPERM);
>>>>>>>      -    return drm_gem_prime_export(dev, gobj, flags);
>>>>>>> +    dma_buf = drm_gem_prime_export(dev, gobj, flags);
>>>>>>> +    amdgpu_dmabuf_ops = *(dma_buf->ops);
>>>>>>> +    amdgpu_dmabuf_ops.begin_cpu_access =
>>>> amdgpu_gem_begin_cpu_access;
>>>>>>> +    amdgpu_dmabuf_ops.end_cpu_access =
>>>> amdgpu_gem_end_cpu_access;
>>>>>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
>>>>>> This isn't race free and needs to be fixed.
>>>>>>
>>>>>> Better add callbacks to drm_prime.c similar to
>>>> drm_gem_dmabuf_mmap().
>>>>>> Alternative you could just completely drop
>>>> amdgpu_gem_begin_cpu_access() and amdgpu_gem_end_cpu_access()
>> as
>>>> well.
>>>>>> When the buffer is shared between device it is pinned anyway and
>>>>>> when
>>>> it isn't shared ttm_bo_mmap() is able to handle VRAM mappings as well.
>>>>>> Regards,
>>>>>> Christian.
>>>>>>
>>>>>>> +
>>>>>>> +    return dma_buf;
>>>>>>> +}
>>>>>>> +
>>>>>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct
>>>> drm_device *dev,
>>>>>>> +                        struct dma_buf *dma_buf) {
>>>>>>> +    struct drm_gem_object *obj;
>>>>>>> +
>>>>>>> +    if (dma_buf->ops == &amdgpu_dmabuf_ops) {
>>>>>>> +        obj = dma_buf->priv;
>>>>>>> +        if (obj->dev == dev) {
>>>>>>> +            /*
>>>>>>> +             * Importing dmabuf exported from out own gem increases
>>>>>>> +             * refcount on gem itself instead of f_count of dmabuf.
>>>>>>> +             */
>>>>>>> +            drm_gem_object_get(obj);
>>>>>>> +            return obj;
>>>>>>> +        }
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    return drm_gem_prime_import(dev, dma_buf);
>>>>>>>      }


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support
       [not found]                             ` <70db5b39-546e-ac15-635e-dad73420e8bc-5C7GfCeVMHo@public.gmane.org>
@ 2017-09-20 17:47                               ` Li, Samuel
       [not found]                                 ` <BLUPR12MB0628DCFA21305B174B919D9EF5610-7LeqcoF/hwrL9O90+oShTwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Li, Samuel @ 2017-09-20 17:47 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

> No that isn't correct. Pinning just notes that the BO shouldn't be moved any
> more.  It doesn't wait for anything.

It does. The following is from amdgpu_gem_prime_pin(),
91          * Wait for all shared fences to complete before we switch to future
 92          * use of exclusive fence on this prime shared bo.
 93          */
 94         ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false,
 95                                                   MAX_SCHEDULE_TIMEOUT);
 96         if (unlikely(ret < 0)) {
 97                 DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret);
 98                 amdgpu_bo_unreserve(bo);
 99                 return ret;
100         }

Besides, pinning process prepares all the stuff before and after moving buffer(ttm_bo_validate, amdgpu_ttm_bind), I think if a buffer can be moved, it is probably also in a good condition to be accessed.

Sam

> -----Original Message-----
> From: Koenig, Christian
> Sent: Wednesday, September 20, 2017 1:38 PM
> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support
> 
> Am 20.09.2017 um 19:34 schrieb Li, Samuel:
> >> If that is what this callback should do then this implementation
> >> would be incorrect. Pinning doesn't wait for any GPU operation to finish.
> > During pining, it will all the fences to finish. That shall be OK.
> 
> No that isn't correct. Pinning just notes that the BO shouldn't be moved any
> more.
> 
> It doesn't wait for anything.
> 
> Christian.
> 
> >
> > Sam
> >
> >
> >
> >> -----Original Message-----
> >> From: Koenig, Christian
> >> Sent: Wednesday, September 20, 2017 12:21 PM
> >> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
> >> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap
> support
> >>
> >> Am 20.09.2017 um 17:44 schrieb Li, Samuel:
> >>>> What happens when another thread is using amdgpu_dmabuf_ops to
> call
> >>>> begin_cpu_access/end_cpu_access when you are fixing it up again?
> >>> Right, that is an issue.
> >> A simple "if (!amdgpu_dmabuf_ops.begin_cpu_access)" should be able
> to
> >> deal with that.
> >>
> >>>> I would just completely drop the two callbacks, pinning is not
> >>>> necessary for CPU access and thinking more about it it actually has
> >>>> some unwanted side effects.
> >>> CPU access needs synchronization anyway, so the two callbacks cannot
> >>> be
> >> dropped (other drivers implemented too), so I would like to keep it
> >> there for now.
> >>
> >> Wait a second what do you mean with "CPU access needs
> synchronization"?
> >>
> >> At least i915 makes the memory GPU inaccessible when you start to use
> >> it with the CPU.
> >>
> >> If that is what this callback should do then this implementation
> >> would be incorrect. Pinning doesn't wait for any GPU operation to finish.
> >>
> >> Regards,
> >> Christian.
> >>
> >>> Sam
> >>>
> >>>
> >>>> -----Original Message-----
> >>>> From: Koenig, Christian
> >>>> Sent: Wednesday, September 20, 2017 2:58 AM
> >>>> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
> >>>> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap
> >> support
> >>>>> What do you mean "This isn't race free"?
> >>>> Take a look at the code again:
> >>>>> +    dma_buf = drm_gem_prime_export(dev, gobj, flags);
> >>>>> +    amdgpu_dmabuf_ops = *(dma_buf->ops);
> >>>>> +    amdgpu_dmabuf_ops.begin_cpu_access =
> >>>> amdgpu_gem_begin_cpu_access;
> >>>>> +    amdgpu_dmabuf_ops.end_cpu_access =
> >>>> amdgpu_gem_end_cpu_access;
> >>>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
> >>>> What happens when another thread is using amdgpu_dmabuf_ops to
> call
> >>>> begin_cpu_access/end_cpu_access when you are fixing it up again?
> >>>>
> >>>> I would just completely drop the two callbacks, pinning is not
> >>>> necessary for CPU access and thinking more about it it actually has
> >>>> some unwanted side effects.
> >>>>
> >>>> Regards,
> >>>> Christian.
> >>>>
> >>>> Am 19.09.2017 um 23:22 schrieb Samuel Li:
> >>>>>>> +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >>
> PAGE_SHIFT;
> >>>>>> Maybe better use "vma->vm_pgoff +=
> >> amdgpu_bo_mmap_offset(bo) >>
> >>>> PAGE_SHIFT;", but I'm not sure.
> >>>>>> How other drivers handle this?
> >>>>> This is a good catch. Looks like pgoff is honored during prime
> >>>>> mmap, not a
> >>>> fake offset here.
> >>>>>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
> >>>>>> This isn't race free and needs to be fixed.
> >>>>>> Better add callbacks to drm_prime.c similar to
> >>>> drm_gem_dmabuf_mmap().
> >>>>> What do you mean "This isn't race free"?
> >>>>>
> >>>>> Regards,
> >>>>> Sam
> >>>>>
> >>>>>
> >>>>>
> >>>>> On 2017-09-15 11:05 AM, Christian König wrote:
> >>>>>> Am 14.09.2017 um 00:39 schrieb Samuel Li:
> >>>>>>> v2: drop hdp invalidate/flush.
> >>>>>>>
> >>>>>>> Signed-off-by: Samuel Li <Samuel.Li@amd.com>
> >>>>>>> ---
> >>>>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  3 ++
> >>>>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  3 +-
> >>>>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 77
> >>>> ++++++++++++++++++++++++++++++-
> >>>>>>>      3 files changed, 81 insertions(+), 2 deletions(-)
> >>>>>>>
> >>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>>>>>> index d2aaad7..188b705 100644
> >>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>>>>>> @@ -395,11 +395,14 @@
> >> amdgpu_gem_prime_import_sg_table(struct
> >>>> drm_device *dev,
> >>>>>>>      struct dma_buf *amdgpu_gem_prime_export(struct
> drm_device
> >> *dev,
> >>>>>>>                          struct drm_gem_object *gobj,
> >>>>>>>                          int flags);
> >>>>>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct
> >>>> drm_device *dev,
> >>>>>>> +                        struct dma_buf *dma_buf);
> >>>>>>>      int amdgpu_gem_prime_pin(struct drm_gem_object *obj);
> >>>>>>>      void amdgpu_gem_prime_unpin(struct drm_gem_object *obj);
> >>>>>>>      struct reservation_object *amdgpu_gem_prime_res_obj(struct
> >>>> drm_gem_object *);
> >>>>>>>      void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
> >>>>>>>      void amdgpu_gem_prime_vunmap(struct drm_gem_object
> *obj,
> >> void
> >>>>>>> *vaddr);
> >>>>>>> +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
> struct
> >>>>>>> +vm_area_struct *vma);
> >>>>>>>      int amdgpu_gem_debugfs_init(struct amdgpu_device *adev);
> >>>>>>>        /* sub-allocation manager, it has to be protected by another
> lock.
> >>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>>>>>> index 2cdf844..9b63ac5 100644
> >>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>>>>>> @@ -835,7 +835,7 @@ static struct drm_driver kms_driver = {
> >>>>>>>          .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
> >>>>>>>          .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
> >>>>>>>          .gem_prime_export = amdgpu_gem_prime_export,
> >>>>>>> -    .gem_prime_import = drm_gem_prime_import,
> >>>>>>> +    .gem_prime_import = amdgpu_gem_prime_import,
> >>>>>>>          .gem_prime_pin = amdgpu_gem_prime_pin,
> >>>>>>>          .gem_prime_unpin = amdgpu_gem_prime_unpin,
> >>>>>>>          .gem_prime_res_obj = amdgpu_gem_prime_res_obj, @@
> >>>>>>> -843,6
> >>>>>>> +843,7 @@ static struct drm_driver kms_driver = {
> >>>>>>>          .gem_prime_import_sg_table =
> >>>> amdgpu_gem_prime_import_sg_table,
> >>>>>>>          .gem_prime_vmap = amdgpu_gem_prime_vmap,
> >>>>>>>          .gem_prime_vunmap = amdgpu_gem_prime_vunmap,
> >>>>>>> +    .gem_prime_mmap = amdgpu_gem_prime_mmap,
> >>>>>>>            .name = DRIVER_NAME,
> >>>>>>>          .desc = DRIVER_DESC,
> >>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>>>>>> index 5b3f928..13c977a 100644
> >>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>>>>>> @@ -57,6 +57,40 @@ void amdgpu_gem_prime_vunmap(struct
> >>>> drm_gem_object *obj, void *vaddr)
> >>>>>>>          ttm_bo_kunmap(&bo->dma_buf_vmap);
> >>>>>>>      }
> >>>>>>>      +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
> >> struct
> >>>>>>> vm_area_struct *vma)
> >>>>>>> +{
> >>>>>>> +    struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
> >>>>>>> +    struct amdgpu_device *adev = amdgpu_ttm_adev(bo-
> >tbo.bdev);
> >>>>>>> +    unsigned asize = amdgpu_bo_size(bo);
> >>>>>>> +    int ret;
> >>>>>>> +
> >>>>>>> +    if (!vma->vm_file)
> >>>>>>> +        return -ENODEV;
> >>>>>>> +
> >>>>>>> +    if (adev == NULL)
> >>>>>>> +        return -ENODEV;
> >>>>>>> +
> >>>>>>> +    /* Check for valid size. */
> >>>>>>> +    if (asize < vma->vm_end - vma->vm_start)
> >>>>>>> +        return -EINVAL;
> >>>>>>> +
> >>>>>>> +    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
> >>>>>>> +        (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
> >>>>>>> +        return -EPERM;
> >>>>>>> +    }
> >>>>>>> +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >>
> PAGE_SHIFT;
> >>>>>> Maybe better use "vma->vm_pgoff +=
> >> amdgpu_bo_mmap_offset(bo) >>
> >>>> PAGE_SHIFT;", but I'm not sure.
> >>>>>> How other drivers handle this?
> >>>>>>
> >>>>>>> +
> >>>>>>> +    /* prime mmap does not need to check access, so allow here */
> >>>>>>> +    ret = drm_vma_node_allow(&obj->vma_node, vma->vm_file-
> >>>>> private_data);
> >>>>>>> +    if (ret)
> >>>>>>> +        return ret;
> >>>>>>> +
> >>>>>>> +    ret = ttm_bo_mmap(vma->vm_file, vma, &adev->mman.bdev);
> >>>>>>> +    drm_vma_node_revoke(&obj->vma_node,
> >>>>>>> + vma->vm_file->private_data);
> >>>>>>> +
> >>>>>>> +    return ret;
> >>>>>>> +}
> >>>>>>> +
> >>>>>>>      struct drm_gem_object *
> >>>>>>>      amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
> >>>>>>>                       struct dma_buf_attachment *attach, @@
> >>>>>>> -130,14
> >>>>>>> +164,55 @@ struct reservation_object
> >>>> *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
> >>>>>>>          return bo->tbo.resv;
> >>>>>>>      }
> >>>>>>>      +static int amdgpu_gem_begin_cpu_access(struct dma_buf
> >>>>>>> *dma_buf, enum dma_data_direction direction)
> >>>>>>> +{
> >>>>>>> +    return amdgpu_gem_prime_pin(dma_buf->priv);
> >>>>>>> +}
> >>>>>>> +
> >>>>>>> +static int amdgpu_gem_end_cpu_access(struct dma_buf
> *dma_buf,
> >>>> enum
> >>>>>>> +dma_data_direction direction) {
> >>>>>>> +    amdgpu_gem_prime_unpin(dma_buf->priv);
> >>>>>>> +
> >>>>>>> +    return 0;
> >>>>>>> +}
> >>>>>>> +
> >>>>>>> +static struct dma_buf_ops amdgpu_dmabuf_ops;
> >>>>>>> +
> >>>>>>>      struct dma_buf *amdgpu_gem_prime_export(struct
> drm_device
> >> *dev,
> >>>>>>>                          struct drm_gem_object *gobj,
> >>>>>>>                          int flags)
> >>>>>>>      {
> >>>>>>>          struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
> >>>>>>> +    struct dma_buf *dma_buf;
> >>>>>>>            if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
> >>>>>>>              return ERR_PTR(-EPERM);
> >>>>>>>      -    return drm_gem_prime_export(dev, gobj, flags);
> >>>>>>> +    dma_buf = drm_gem_prime_export(dev, gobj, flags);
> >>>>>>> +    amdgpu_dmabuf_ops = *(dma_buf->ops);
> >>>>>>> +    amdgpu_dmabuf_ops.begin_cpu_access =
> >>>> amdgpu_gem_begin_cpu_access;
> >>>>>>> +    amdgpu_dmabuf_ops.end_cpu_access =
> >>>> amdgpu_gem_end_cpu_access;
> >>>>>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
> >>>>>> This isn't race free and needs to be fixed.
> >>>>>>
> >>>>>> Better add callbacks to drm_prime.c similar to
> >>>> drm_gem_dmabuf_mmap().
> >>>>>> Alternative you could just completely drop
> >>>> amdgpu_gem_begin_cpu_access() and
> amdgpu_gem_end_cpu_access()
> >> as
> >>>> well.
> >>>>>> When the buffer is shared between device it is pinned anyway and
> >>>>>> when
> >>>> it isn't shared ttm_bo_mmap() is able to handle VRAM mappings as
> well.
> >>>>>> Regards,
> >>>>>> Christian.
> >>>>>>
> >>>>>>> +
> >>>>>>> +    return dma_buf;
> >>>>>>> +}
> >>>>>>> +
> >>>>>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct
> >>>> drm_device *dev,
> >>>>>>> +                        struct dma_buf *dma_buf) {
> >>>>>>> +    struct drm_gem_object *obj;
> >>>>>>> +
> >>>>>>> +    if (dma_buf->ops == &amdgpu_dmabuf_ops) {
> >>>>>>> +        obj = dma_buf->priv;
> >>>>>>> +        if (obj->dev == dev) {
> >>>>>>> +            /*
> >>>>>>> +             * Importing dmabuf exported from out own gem increases
> >>>>>>> +             * refcount on gem itself instead of f_count of dmabuf.
> >>>>>>> +             */
> >>>>>>> +            drm_gem_object_get(obj);
> >>>>>>> +            return obj;
> >>>>>>> +        }
> >>>>>>> +    }
> >>>>>>> +
> >>>>>>> +    return drm_gem_prime_import(dev, dma_buf);
> >>>>>>>      }
> 

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support
       [not found]                                 ` <BLUPR12MB0628DCFA21305B174B919D9EF5610-7LeqcoF/hwrL9O90+oShTwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2017-09-20 18:15                                   ` Christian König
       [not found]                                     ` <085926a6-90fc-2cf7-f05e-e762fa401fe8-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Christian König @ 2017-09-20 18:15 UTC (permalink / raw)
  To: Li, Samuel, Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 20.09.2017 um 19:47 schrieb Li, Samuel:
>> No that isn't correct. Pinning just notes that the BO shouldn't be moved any
>> more.  It doesn't wait for anything.
> It does. The following is from amdgpu_gem_prime_pin(),
> 91          * Wait for all shared fences to complete before we switch to future
>   92          * use of exclusive fence on this prime shared bo.
>   93          */
>   94         ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false,
>   95                                                   MAX_SCHEDULE_TIMEOUT);
>   96         if (unlikely(ret < 0)) {
>   97                 DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret);
>   98                 amdgpu_bo_unreserve(bo);
>   99                 return ret;
> 100         }
>
> Besides, pinning process prepares all the stuff before and after moving buffer(ttm_bo_validate, amdgpu_ttm_bind),

No, you misunderstood what this is good for.

The waiting done here is only for the shared fence to switch from 
explicitly to implicitly synchronization for correct interaction with 
the Intel driver.

As soon the the BO is exported that shouldn't wait for anything any more.

> I think if a buffer can be moved, it is probably also in a good condition to be accessed.

That is complete nonsense.

ttm_bo_validate() is an asynchronous operation to enable GPU access to 
the BO, it isn't related at all to possible CPU access and can actually 
prevent it a number of cases.

amdgpu_ttm_bind() in turn binds the BO to the GART space and isn't 
related to CPU access either.

What you most likely need to do here is to use 
reservation_object_wait_timeout_rcu() to wait for all GPU operations to end.

Regards,
Christian.

>
> Sam
>
>> -----Original Message-----
>> From: Koenig, Christian
>> Sent: Wednesday, September 20, 2017 1:38 PM
>> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support
>>
>> Am 20.09.2017 um 19:34 schrieb Li, Samuel:
>>>> If that is what this callback should do then this implementation
>>>> would be incorrect. Pinning doesn't wait for any GPU operation to finish.
>>> During pining, it will all the fences to finish. That shall be OK.
>> No that isn't correct. Pinning just notes that the BO shouldn't be moved any
>> more.
>>
>> It doesn't wait for anything.
>>
>> Christian.
>>
>>> Sam
>>>
>>>
>>>
>>>> -----Original Message-----
>>>> From: Koenig, Christian
>>>> Sent: Wednesday, September 20, 2017 12:21 PM
>>>> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
>>>> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap
>> support
>>>> Am 20.09.2017 um 17:44 schrieb Li, Samuel:
>>>>>> What happens when another thread is using amdgpu_dmabuf_ops to
>> call
>>>>>> begin_cpu_access/end_cpu_access when you are fixing it up again?
>>>>> Right, that is an issue.
>>>> A simple "if (!amdgpu_dmabuf_ops.begin_cpu_access)" should be able
>> to
>>>> deal with that.
>>>>
>>>>>> I would just completely drop the two callbacks, pinning is not
>>>>>> necessary for CPU access and thinking more about it it actually has
>>>>>> some unwanted side effects.
>>>>> CPU access needs synchronization anyway, so the two callbacks cannot
>>>>> be
>>>> dropped (other drivers implemented too), so I would like to keep it
>>>> there for now.
>>>>
>>>> Wait a second what do you mean with "CPU access needs
>> synchronization"?
>>>> At least i915 makes the memory GPU inaccessible when you start to use
>>>> it with the CPU.
>>>>
>>>> If that is what this callback should do then this implementation
>>>> would be incorrect. Pinning doesn't wait for any GPU operation to finish.
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>> Sam
>>>>>
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: Koenig, Christian
>>>>>> Sent: Wednesday, September 20, 2017 2:58 AM
>>>>>> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
>>>>>> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap
>>>> support
>>>>>>> What do you mean "This isn't race free"?
>>>>>> Take a look at the code again:
>>>>>>> +    dma_buf = drm_gem_prime_export(dev, gobj, flags);
>>>>>>> +    amdgpu_dmabuf_ops = *(dma_buf->ops);
>>>>>>> +    amdgpu_dmabuf_ops.begin_cpu_access =
>>>>>> amdgpu_gem_begin_cpu_access;
>>>>>>> +    amdgpu_dmabuf_ops.end_cpu_access =
>>>>>> amdgpu_gem_end_cpu_access;
>>>>>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
>>>>>> What happens when another thread is using amdgpu_dmabuf_ops to
>> call
>>>>>> begin_cpu_access/end_cpu_access when you are fixing it up again?
>>>>>>
>>>>>> I would just completely drop the two callbacks, pinning is not
>>>>>> necessary for CPU access and thinking more about it it actually has
>>>>>> some unwanted side effects.
>>>>>>
>>>>>> Regards,
>>>>>> Christian.
>>>>>>
>>>>>> Am 19.09.2017 um 23:22 schrieb Samuel Li:
>>>>>>>>> +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >>
>> PAGE_SHIFT;
>>>>>>>> Maybe better use "vma->vm_pgoff +=
>>>> amdgpu_bo_mmap_offset(bo) >>
>>>>>> PAGE_SHIFT;", but I'm not sure.
>>>>>>>> How other drivers handle this?
>>>>>>> This is a good catch. Looks like pgoff is honored during prime
>>>>>>> mmap, not a
>>>>>> fake offset here.
>>>>>>>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
>>>>>>>> This isn't race free and needs to be fixed.
>>>>>>>> Better add callbacks to drm_prime.c similar to
>>>>>> drm_gem_dmabuf_mmap().
>>>>>>> What do you mean "This isn't race free"?
>>>>>>>
>>>>>>> Regards,
>>>>>>> Sam
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> On 2017-09-15 11:05 AM, Christian König wrote:
>>>>>>>> Am 14.09.2017 um 00:39 schrieb Samuel Li:
>>>>>>>>> v2: drop hdp invalidate/flush.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Samuel Li <Samuel.Li@amd.com>
>>>>>>>>> ---
>>>>>>>>>       drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  3 ++
>>>>>>>>>       drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  3 +-
>>>>>>>>>       drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 77
>>>>>> ++++++++++++++++++++++++++++++-
>>>>>>>>>       3 files changed, 81 insertions(+), 2 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>>>> index d2aaad7..188b705 100644
>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>>>> @@ -395,11 +395,14 @@
>>>> amdgpu_gem_prime_import_sg_table(struct
>>>>>> drm_device *dev,
>>>>>>>>>       struct dma_buf *amdgpu_gem_prime_export(struct
>> drm_device
>>>> *dev,
>>>>>>>>>                           struct drm_gem_object *gobj,
>>>>>>>>>                           int flags);
>>>>>>>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct
>>>>>> drm_device *dev,
>>>>>>>>> +                        struct dma_buf *dma_buf);
>>>>>>>>>       int amdgpu_gem_prime_pin(struct drm_gem_object *obj);
>>>>>>>>>       void amdgpu_gem_prime_unpin(struct drm_gem_object *obj);
>>>>>>>>>       struct reservation_object *amdgpu_gem_prime_res_obj(struct
>>>>>> drm_gem_object *);
>>>>>>>>>       void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
>>>>>>>>>       void amdgpu_gem_prime_vunmap(struct drm_gem_object
>> *obj,
>>>> void
>>>>>>>>> *vaddr);
>>>>>>>>> +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
>> struct
>>>>>>>>> +vm_area_struct *vma);
>>>>>>>>>       int amdgpu_gem_debugfs_init(struct amdgpu_device *adev);
>>>>>>>>>         /* sub-allocation manager, it has to be protected by another
>> lock.
>>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>>>>>> index 2cdf844..9b63ac5 100644
>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>>>>>> @@ -835,7 +835,7 @@ static struct drm_driver kms_driver = {
>>>>>>>>>           .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
>>>>>>>>>           .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
>>>>>>>>>           .gem_prime_export = amdgpu_gem_prime_export,
>>>>>>>>> -    .gem_prime_import = drm_gem_prime_import,
>>>>>>>>> +    .gem_prime_import = amdgpu_gem_prime_import,
>>>>>>>>>           .gem_prime_pin = amdgpu_gem_prime_pin,
>>>>>>>>>           .gem_prime_unpin = amdgpu_gem_prime_unpin,
>>>>>>>>>           .gem_prime_res_obj = amdgpu_gem_prime_res_obj, @@
>>>>>>>>> -843,6
>>>>>>>>> +843,7 @@ static struct drm_driver kms_driver = {
>>>>>>>>>           .gem_prime_import_sg_table =
>>>>>> amdgpu_gem_prime_import_sg_table,
>>>>>>>>>           .gem_prime_vmap = amdgpu_gem_prime_vmap,
>>>>>>>>>           .gem_prime_vunmap = amdgpu_gem_prime_vunmap,
>>>>>>>>> +    .gem_prime_mmap = amdgpu_gem_prime_mmap,
>>>>>>>>>             .name = DRIVER_NAME,
>>>>>>>>>           .desc = DRIVER_DESC,
>>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>>>>>> index 5b3f928..13c977a 100644
>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>>>>>> @@ -57,6 +57,40 @@ void amdgpu_gem_prime_vunmap(struct
>>>>>> drm_gem_object *obj, void *vaddr)
>>>>>>>>>           ttm_bo_kunmap(&bo->dma_buf_vmap);
>>>>>>>>>       }
>>>>>>>>>       +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
>>>> struct
>>>>>>>>> vm_area_struct *vma)
>>>>>>>>> +{
>>>>>>>>> +    struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
>>>>>>>>> +    struct amdgpu_device *adev = amdgpu_ttm_adev(bo-
>>> tbo.bdev);
>>>>>>>>> +    unsigned asize = amdgpu_bo_size(bo);
>>>>>>>>> +    int ret;
>>>>>>>>> +
>>>>>>>>> +    if (!vma->vm_file)
>>>>>>>>> +        return -ENODEV;
>>>>>>>>> +
>>>>>>>>> +    if (adev == NULL)
>>>>>>>>> +        return -ENODEV;
>>>>>>>>> +
>>>>>>>>> +    /* Check for valid size. */
>>>>>>>>> +    if (asize < vma->vm_end - vma->vm_start)
>>>>>>>>> +        return -EINVAL;
>>>>>>>>> +
>>>>>>>>> +    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
>>>>>>>>> +        (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
>>>>>>>>> +        return -EPERM;
>>>>>>>>> +    }
>>>>>>>>> +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >>
>> PAGE_SHIFT;
>>>>>>>> Maybe better use "vma->vm_pgoff +=
>>>> amdgpu_bo_mmap_offset(bo) >>
>>>>>> PAGE_SHIFT;", but I'm not sure.
>>>>>>>> How other drivers handle this?
>>>>>>>>
>>>>>>>>> +
>>>>>>>>> +    /* prime mmap does not need to check access, so allow here */
>>>>>>>>> +    ret = drm_vma_node_allow(&obj->vma_node, vma->vm_file-
>>>>>>> private_data);
>>>>>>>>> +    if (ret)
>>>>>>>>> +        return ret;
>>>>>>>>> +
>>>>>>>>> +    ret = ttm_bo_mmap(vma->vm_file, vma, &adev->mman.bdev);
>>>>>>>>> +    drm_vma_node_revoke(&obj->vma_node,
>>>>>>>>> + vma->vm_file->private_data);
>>>>>>>>> +
>>>>>>>>> +    return ret;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>>       struct drm_gem_object *
>>>>>>>>>       amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
>>>>>>>>>                        struct dma_buf_attachment *attach, @@
>>>>>>>>> -130,14
>>>>>>>>> +164,55 @@ struct reservation_object
>>>>>> *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
>>>>>>>>>           return bo->tbo.resv;
>>>>>>>>>       }
>>>>>>>>>       +static int amdgpu_gem_begin_cpu_access(struct dma_buf
>>>>>>>>> *dma_buf, enum dma_data_direction direction)
>>>>>>>>> +{
>>>>>>>>> +    return amdgpu_gem_prime_pin(dma_buf->priv);
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +static int amdgpu_gem_end_cpu_access(struct dma_buf
>> *dma_buf,
>>>>>> enum
>>>>>>>>> +dma_data_direction direction) {
>>>>>>>>> +    amdgpu_gem_prime_unpin(dma_buf->priv);
>>>>>>>>> +
>>>>>>>>> +    return 0;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +static struct dma_buf_ops amdgpu_dmabuf_ops;
>>>>>>>>> +
>>>>>>>>>       struct dma_buf *amdgpu_gem_prime_export(struct
>> drm_device
>>>> *dev,
>>>>>>>>>                           struct drm_gem_object *gobj,
>>>>>>>>>                           int flags)
>>>>>>>>>       {
>>>>>>>>>           struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>>>>>>>>> +    struct dma_buf *dma_buf;
>>>>>>>>>             if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
>>>>>>>>>               return ERR_PTR(-EPERM);
>>>>>>>>>       -    return drm_gem_prime_export(dev, gobj, flags);
>>>>>>>>> +    dma_buf = drm_gem_prime_export(dev, gobj, flags);
>>>>>>>>> +    amdgpu_dmabuf_ops = *(dma_buf->ops);
>>>>>>>>> +    amdgpu_dmabuf_ops.begin_cpu_access =
>>>>>> amdgpu_gem_begin_cpu_access;
>>>>>>>>> +    amdgpu_dmabuf_ops.end_cpu_access =
>>>>>> amdgpu_gem_end_cpu_access;
>>>>>>>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
>>>>>>>> This isn't race free and needs to be fixed.
>>>>>>>>
>>>>>>>> Better add callbacks to drm_prime.c similar to
>>>>>> drm_gem_dmabuf_mmap().
>>>>>>>> Alternative you could just completely drop
>>>>>> amdgpu_gem_begin_cpu_access() and
>> amdgpu_gem_end_cpu_access()
>>>> as
>>>>>> well.
>>>>>>>> When the buffer is shared between device it is pinned anyway and
>>>>>>>> when
>>>>>> it isn't shared ttm_bo_mmap() is able to handle VRAM mappings as
>> well.
>>>>>>>> Regards,
>>>>>>>> Christian.
>>>>>>>>
>>>>>>>>> +
>>>>>>>>> +    return dma_buf;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct
>>>>>> drm_device *dev,
>>>>>>>>> +                        struct dma_buf *dma_buf) {
>>>>>>>>> +    struct drm_gem_object *obj;
>>>>>>>>> +
>>>>>>>>> +    if (dma_buf->ops == &amdgpu_dmabuf_ops) {
>>>>>>>>> +        obj = dma_buf->priv;
>>>>>>>>> +        if (obj->dev == dev) {
>>>>>>>>> +            /*
>>>>>>>>> +             * Importing dmabuf exported from out own gem increases
>>>>>>>>> +             * refcount on gem itself instead of f_count of dmabuf.
>>>>>>>>> +             */
>>>>>>>>> +            drm_gem_object_get(obj);
>>>>>>>>> +            return obj;
>>>>>>>>> +        }
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    return drm_gem_prime_import(dev, dma_buf);
>>>>>>>>>       }
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support
       [not found]                                     ` <085926a6-90fc-2cf7-f05e-e762fa401fe8-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-09-20 20:31                                       ` Li, Samuel
       [not found]                                         ` <BLUPR12MB0628E9219A8C44D7220AA5F8F5610-7LeqcoF/hwrL9O90+oShTwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Li, Samuel @ 2017-09-20 20:31 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

> The waiting done here is only for the shared fence to switch from explicitly to
> implicitly synchronization for correct interaction with the Intel driver.
Actually here it is going to wait for all fences,
94         ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true...

> ttm_bo_validate() is an asynchronous operation to enable GPU access to the
> BO, it isn't related at all to possible CPU access and can actually prevent it a
> number of cases.
> > amdgpu_ttm_bind() in turn binds the BO to the GART space and isn't related
> to CPU access either.
ttm_bo_validate() can move buffer is necessary, and amdgpu_ttm_bind() will flush hdp, which we have discussed before.

> What you most likely need to do here is to use
> reservation_object_wait_timeout_rcu() to wait for all GPU operations to end.
Right, that is called by amdgpu_gem_prime_pin().

Sam

> -----Original Message-----
> From: Christian König [mailto:ckoenig.leichtzumerken@gmail.com]
> Sent: Wednesday, September 20, 2017 2:15 PM
> To: Li, Samuel <Samuel.Li@amd.com>; Koenig, Christian
> <Christian.Koenig@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support
> 
> Am 20.09.2017 um 19:47 schrieb Li, Samuel:
> >> No that isn't correct. Pinning just notes that the BO shouldn't be
> >> moved any more.  It doesn't wait for anything.
> > It does. The following is from amdgpu_gem_prime_pin(),
> > 91          * Wait for all shared fences to complete before we switch to future
> >   92          * use of exclusive fence on this prime shared bo.
> >   93          */
> >   94         ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true,
> false,
> >   95                                                   MAX_SCHEDULE_TIMEOUT);
> >   96         if (unlikely(ret < 0)) {
> >   97                 DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret);
> >   98                 amdgpu_bo_unreserve(bo);
> >   99                 return ret;
> > 100         }
> >
> > Besides, pinning process prepares all the stuff before and after
> > moving buffer(ttm_bo_validate, amdgpu_ttm_bind),
> 
> No, you misunderstood what this is good for.
> 
> The waiting done here is only for the shared fence to switch from explicitly to
> implicitly synchronization for correct interaction with the Intel driver.
> 
> As soon the the BO is exported that shouldn't wait for anything any more.
> 
> > I think if a buffer can be moved, it is probably also in a good condition to be
> accessed.
> 
> That is complete nonsense.
> 
> ttm_bo_validate() is an asynchronous operation to enable GPU access to the
> BO, it isn't related at all to possible CPU access and can actually prevent it a
> number of cases.
> 
> amdgpu_ttm_bind() in turn binds the BO to the GART space and isn't related
> to CPU access either.
> 
> What you most likely need to do here is to use
> reservation_object_wait_timeout_rcu() to wait for all GPU operations to end.
> 
> Regards,
> Christian.
> 
> >
> > Sam
> >
> >> -----Original Message-----
> >> From: Koenig, Christian
> >> Sent: Wednesday, September 20, 2017 1:38 PM
> >> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
> >> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap
> support
> >>
> >> Am 20.09.2017 um 19:34 schrieb Li, Samuel:
> >>>> If that is what this callback should do then this implementation
> >>>> would be incorrect. Pinning doesn't wait for any GPU operation to
> finish.
> >>> During pining, it will all the fences to finish. That shall be OK.
> >> No that isn't correct. Pinning just notes that the BO shouldn't be
> >> moved any more.
> >>
> >> It doesn't wait for anything.
> >>
> >> Christian.
> >>
> >>> Sam
> >>>
> >>>
> >>>
> >>>> -----Original Message-----
> >>>> From: Koenig, Christian
> >>>> Sent: Wednesday, September 20, 2017 12:21 PM
> >>>> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
> >>>> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap
> >> support
> >>>> Am 20.09.2017 um 17:44 schrieb Li, Samuel:
> >>>>>> What happens when another thread is using amdgpu_dmabuf_ops
> to
> >> call
> >>>>>> begin_cpu_access/end_cpu_access when you are fixing it up again?
> >>>>> Right, that is an issue.
> >>>> A simple "if (!amdgpu_dmabuf_ops.begin_cpu_access)" should be
> able
> >> to
> >>>> deal with that.
> >>>>
> >>>>>> I would just completely drop the two callbacks, pinning is not
> >>>>>> necessary for CPU access and thinking more about it it actually
> >>>>>> has some unwanted side effects.
> >>>>> CPU access needs synchronization anyway, so the two callbacks
> >>>>> cannot be
> >>>> dropped (other drivers implemented too), so I would like to keep it
> >>>> there for now.
> >>>>
> >>>> Wait a second what do you mean with "CPU access needs
> >> synchronization"?
> >>>> At least i915 makes the memory GPU inaccessible when you start to
> >>>> use it with the CPU.
> >>>>
> >>>> If that is what this callback should do then this implementation
> >>>> would be incorrect. Pinning doesn't wait for any GPU operation to
> finish.
> >>>>
> >>>> Regards,
> >>>> Christian.
> >>>>
> >>>>> Sam
> >>>>>
> >>>>>
> >>>>>> -----Original Message-----
> >>>>>> From: Koenig, Christian
> >>>>>> Sent: Wednesday, September 20, 2017 2:58 AM
> >>>>>> To: Li, Samuel <Samuel.Li@amd.com>; amd-
> gfx@lists.freedesktop.org
> >>>>>> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap
> >>>> support
> >>>>>>> What do you mean "This isn't race free"?
> >>>>>> Take a look at the code again:
> >>>>>>> +    dma_buf = drm_gem_prime_export(dev, gobj, flags);
> >>>>>>> +    amdgpu_dmabuf_ops = *(dma_buf->ops);
> >>>>>>> +    amdgpu_dmabuf_ops.begin_cpu_access =
> >>>>>> amdgpu_gem_begin_cpu_access;
> >>>>>>> +    amdgpu_dmabuf_ops.end_cpu_access =
> >>>>>> amdgpu_gem_end_cpu_access;
> >>>>>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
> >>>>>> What happens when another thread is using amdgpu_dmabuf_ops
> to
> >> call
> >>>>>> begin_cpu_access/end_cpu_access when you are fixing it up again?
> >>>>>>
> >>>>>> I would just completely drop the two callbacks, pinning is not
> >>>>>> necessary for CPU access and thinking more about it it actually
> >>>>>> has some unwanted side effects.
> >>>>>>
> >>>>>> Regards,
> >>>>>> Christian.
> >>>>>>
> >>>>>> Am 19.09.2017 um 23:22 schrieb Samuel Li:
> >>>>>>>>> +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >>
> >> PAGE_SHIFT;
> >>>>>>>> Maybe better use "vma->vm_pgoff +=
> >>>> amdgpu_bo_mmap_offset(bo) >>
> >>>>>> PAGE_SHIFT;", but I'm not sure.
> >>>>>>>> How other drivers handle this?
> >>>>>>> This is a good catch. Looks like pgoff is honored during prime
> >>>>>>> mmap, not a
> >>>>>> fake offset here.
> >>>>>>>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
> >>>>>>>> This isn't race free and needs to be fixed.
> >>>>>>>> Better add callbacks to drm_prime.c similar to
> >>>>>> drm_gem_dmabuf_mmap().
> >>>>>>> What do you mean "This isn't race free"?
> >>>>>>>
> >>>>>>> Regards,
> >>>>>>> Sam
> >>>>>>>
> >>>>>>>
> >>>>>>>
> >>>>>>> On 2017-09-15 11:05 AM, Christian König wrote:
> >>>>>>>> Am 14.09.2017 um 00:39 schrieb Samuel Li:
> >>>>>>>>> v2: drop hdp invalidate/flush.
> >>>>>>>>>
> >>>>>>>>> Signed-off-by: Samuel Li <Samuel.Li@amd.com>
> >>>>>>>>> ---
> >>>>>>>>>       drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  3 ++
> >>>>>>>>>       drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  3 +-
> >>>>>>>>>       drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 77
> >>>>>> ++++++++++++++++++++++++++++++-
> >>>>>>>>>       3 files changed, 81 insertions(+), 2 deletions(-)
> >>>>>>>>>
> >>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>>>>>>>> index d2aaad7..188b705 100644
> >>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>>>>>>>> @@ -395,11 +395,14 @@
> >>>> amdgpu_gem_prime_import_sg_table(struct
> >>>>>> drm_device *dev,
> >>>>>>>>>       struct dma_buf *amdgpu_gem_prime_export(struct
> >> drm_device
> >>>> *dev,
> >>>>>>>>>                           struct drm_gem_object *gobj,
> >>>>>>>>>                           int flags);
> >>>>>>>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct
> >>>>>> drm_device *dev,
> >>>>>>>>> +                        struct dma_buf *dma_buf);
> >>>>>>>>>       int amdgpu_gem_prime_pin(struct drm_gem_object *obj);
> >>>>>>>>>       void amdgpu_gem_prime_unpin(struct drm_gem_object
> *obj);
> >>>>>>>>>       struct reservation_object
> >>>>>>>>> *amdgpu_gem_prime_res_obj(struct
> >>>>>> drm_gem_object *);
> >>>>>>>>>       void *amdgpu_gem_prime_vmap(struct drm_gem_object
> *obj);
> >>>>>>>>>       void amdgpu_gem_prime_vunmap(struct drm_gem_object
> >> *obj,
> >>>> void
> >>>>>>>>> *vaddr);
> >>>>>>>>> +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
> >> struct
> >>>>>>>>> +vm_area_struct *vma);
> >>>>>>>>>       int amdgpu_gem_debugfs_init(struct amdgpu_device *adev);
> >>>>>>>>>         /* sub-allocation manager, it has to be protected by
> >>>>>>>>> another
> >> lock.
> >>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>>>>>>>> index 2cdf844..9b63ac5 100644
> >>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>>>>>>>> @@ -835,7 +835,7 @@ static struct drm_driver kms_driver = {
> >>>>>>>>>           .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
> >>>>>>>>>           .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
> >>>>>>>>>           .gem_prime_export = amdgpu_gem_prime_export,
> >>>>>>>>> -    .gem_prime_import = drm_gem_prime_import,
> >>>>>>>>> +    .gem_prime_import = amdgpu_gem_prime_import,
> >>>>>>>>>           .gem_prime_pin = amdgpu_gem_prime_pin,
> >>>>>>>>>           .gem_prime_unpin = amdgpu_gem_prime_unpin,
> >>>>>>>>>           .gem_prime_res_obj = amdgpu_gem_prime_res_obj, @@
> >>>>>>>>> -843,6
> >>>>>>>>> +843,7 @@ static struct drm_driver kms_driver = {
> >>>>>>>>>           .gem_prime_import_sg_table =
> >>>>>> amdgpu_gem_prime_import_sg_table,
> >>>>>>>>>           .gem_prime_vmap = amdgpu_gem_prime_vmap,
> >>>>>>>>>           .gem_prime_vunmap = amdgpu_gem_prime_vunmap,
> >>>>>>>>> +    .gem_prime_mmap = amdgpu_gem_prime_mmap,
> >>>>>>>>>             .name = DRIVER_NAME,
> >>>>>>>>>           .desc = DRIVER_DESC, diff --git
> >>>>>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>>>>>>>> index 5b3f928..13c977a 100644
> >>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>>>>>>>> @@ -57,6 +57,40 @@ void amdgpu_gem_prime_vunmap(struct
> >>>>>> drm_gem_object *obj, void *vaddr)
> >>>>>>>>>           ttm_bo_kunmap(&bo->dma_buf_vmap);
> >>>>>>>>>       }
> >>>>>>>>>       +int amdgpu_gem_prime_mmap(struct drm_gem_object
> *obj,
> >>>> struct
> >>>>>>>>> vm_area_struct *vma)
> >>>>>>>>> +{
> >>>>>>>>> +    struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
> >>>>>>>>> +    struct amdgpu_device *adev = amdgpu_ttm_adev(bo-
> >>> tbo.bdev);
> >>>>>>>>> +    unsigned asize = amdgpu_bo_size(bo);
> >>>>>>>>> +    int ret;
> >>>>>>>>> +
> >>>>>>>>> +    if (!vma->vm_file)
> >>>>>>>>> +        return -ENODEV;
> >>>>>>>>> +
> >>>>>>>>> +    if (adev == NULL)
> >>>>>>>>> +        return -ENODEV;
> >>>>>>>>> +
> >>>>>>>>> +    /* Check for valid size. */
> >>>>>>>>> +    if (asize < vma->vm_end - vma->vm_start)
> >>>>>>>>> +        return -EINVAL;
> >>>>>>>>> +
> >>>>>>>>> +    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
> >>>>>>>>> +        (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
> >>>>>>>>> +        return -EPERM;
> >>>>>>>>> +    }
> >>>>>>>>> +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >>
> >> PAGE_SHIFT;
> >>>>>>>> Maybe better use "vma->vm_pgoff +=
> >>>> amdgpu_bo_mmap_offset(bo) >>
> >>>>>> PAGE_SHIFT;", but I'm not sure.
> >>>>>>>> How other drivers handle this?
> >>>>>>>>
> >>>>>>>>> +
> >>>>>>>>> +    /* prime mmap does not need to check access, so allow here
> */
> >>>>>>>>> +    ret = drm_vma_node_allow(&obj->vma_node, vma-
> >vm_file-
> >>>>>>> private_data);
> >>>>>>>>> +    if (ret)
> >>>>>>>>> +        return ret;
> >>>>>>>>> +
> >>>>>>>>> +    ret = ttm_bo_mmap(vma->vm_file, vma, &adev-
> >mman.bdev);
> >>>>>>>>> +    drm_vma_node_revoke(&obj->vma_node,
> >>>>>>>>> + vma->vm_file->private_data);
> >>>>>>>>> +
> >>>>>>>>> +    return ret;
> >>>>>>>>> +}
> >>>>>>>>> +
> >>>>>>>>>       struct drm_gem_object *
> >>>>>>>>>       amdgpu_gem_prime_import_sg_table(struct drm_device
> *dev,
> >>>>>>>>>                        struct dma_buf_attachment *attach, @@
> >>>>>>>>> -130,14
> >>>>>>>>> +164,55 @@ struct reservation_object
> >>>>>> *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
> >>>>>>>>>           return bo->tbo.resv;
> >>>>>>>>>       }
> >>>>>>>>>       +static int amdgpu_gem_begin_cpu_access(struct dma_buf
> >>>>>>>>> *dma_buf, enum dma_data_direction direction)
> >>>>>>>>> +{
> >>>>>>>>> +    return amdgpu_gem_prime_pin(dma_buf->priv);
> >>>>>>>>> +}
> >>>>>>>>> +
> >>>>>>>>> +static int amdgpu_gem_end_cpu_access(struct dma_buf
> >> *dma_buf,
> >>>>>> enum
> >>>>>>>>> +dma_data_direction direction) {
> >>>>>>>>> +    amdgpu_gem_prime_unpin(dma_buf->priv);
> >>>>>>>>> +
> >>>>>>>>> +    return 0;
> >>>>>>>>> +}
> >>>>>>>>> +
> >>>>>>>>> +static struct dma_buf_ops amdgpu_dmabuf_ops;
> >>>>>>>>> +
> >>>>>>>>>       struct dma_buf *amdgpu_gem_prime_export(struct
> >> drm_device
> >>>> *dev,
> >>>>>>>>>                           struct drm_gem_object *gobj,
> >>>>>>>>>                           int flags)
> >>>>>>>>>       {
> >>>>>>>>>           struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
> >>>>>>>>> +    struct dma_buf *dma_buf;
> >>>>>>>>>             if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
> >>>>>>>>>               return ERR_PTR(-EPERM);
> >>>>>>>>>       -    return drm_gem_prime_export(dev, gobj, flags);
> >>>>>>>>> +    dma_buf = drm_gem_prime_export(dev, gobj, flags);
> >>>>>>>>> +    amdgpu_dmabuf_ops = *(dma_buf->ops);
> >>>>>>>>> +    amdgpu_dmabuf_ops.begin_cpu_access =
> >>>>>> amdgpu_gem_begin_cpu_access;
> >>>>>>>>> +    amdgpu_dmabuf_ops.end_cpu_access =
> >>>>>> amdgpu_gem_end_cpu_access;
> >>>>>>>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
> >>>>>>>> This isn't race free and needs to be fixed.
> >>>>>>>>
> >>>>>>>> Better add callbacks to drm_prime.c similar to
> >>>>>> drm_gem_dmabuf_mmap().
> >>>>>>>> Alternative you could just completely drop
> >>>>>> amdgpu_gem_begin_cpu_access() and
> >> amdgpu_gem_end_cpu_access()
> >>>> as
> >>>>>> well.
> >>>>>>>> When the buffer is shared between device it is pinned anyway
> >>>>>>>> and when
> >>>>>> it isn't shared ttm_bo_mmap() is able to handle VRAM mappings as
> >> well.
> >>>>>>>> Regards,
> >>>>>>>> Christian.
> >>>>>>>>
> >>>>>>>>> +
> >>>>>>>>> +    return dma_buf;
> >>>>>>>>> +}
> >>>>>>>>> +
> >>>>>>>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct
> >>>>>> drm_device *dev,
> >>>>>>>>> +                        struct dma_buf *dma_buf) {
> >>>>>>>>> +    struct drm_gem_object *obj;
> >>>>>>>>> +
> >>>>>>>>> +    if (dma_buf->ops == &amdgpu_dmabuf_ops) {
> >>>>>>>>> +        obj = dma_buf->priv;
> >>>>>>>>> +        if (obj->dev == dev) {
> >>>>>>>>> +            /*
> >>>>>>>>> +             * Importing dmabuf exported from out own gem
> increases
> >>>>>>>>> +             * refcount on gem itself instead of f_count of dmabuf.
> >>>>>>>>> +             */
> >>>>>>>>> +            drm_gem_object_get(obj);
> >>>>>>>>> +            return obj;
> >>>>>>>>> +        }
> >>>>>>>>> +    }
> >>>>>>>>> +
> >>>>>>>>> +    return drm_gem_prime_import(dev, dma_buf);
> >>>>>>>>>       }
> > _______________________________________________
> > amd-gfx mailing list
> > amd-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> 

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support
       [not found]                 ` <BLUPR12MB06283E58EC4B6434D71DCF50F5610-7LeqcoF/hwrL9O90+oShTwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  2017-09-20 16:20                   ` Christian König
@ 2017-09-21  6:25                   ` Deucher, Alexander
  1 sibling, 0 replies; 15+ messages in thread
From: Deucher, Alexander @ 2017-09-21  6:25 UTC (permalink / raw)
  To: Li, Samuel, Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf
> Of Li, Samuel
> Sent: Wednesday, September 20, 2017 11:44 AM
> To: Koenig, Christian; amd-gfx@lists.freedesktop.org
> Subject: RE: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support
> 
> > What happens when another thread is using amdgpu_dmabuf_ops to call
> > begin_cpu_access/end_cpu_access when you are fixing it up again?
> Right, that is an issue.
> 
> > I would just completely drop the two callbacks, pinning is not necessary for
> > CPU access and thinking more about it it actually has some unwanted side
> > effects.
> CPU access needs synchronization anyway, so the two callbacks cannot be
> dropped (other drivers implemented too), so I would like to keep it there for
> now.

Pinning only needs to happen for device access (i.e., so the pages don't get swapped out).

Alex

> 
> Sam
> 
> 
> > -----Original Message-----
> > From: Koenig, Christian
> > Sent: Wednesday, September 20, 2017 2:58 AM
> > To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
> > Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support
> >
> > > What do you mean "This isn't race free"?
> >
> > Take a look at the code again:
> > > +    dma_buf = drm_gem_prime_export(dev, gobj, flags);
> > > +    amdgpu_dmabuf_ops = *(dma_buf->ops);
> > > +    amdgpu_dmabuf_ops.begin_cpu_access =
> > amdgpu_gem_begin_cpu_access;
> > > +    amdgpu_dmabuf_ops.end_cpu_access =
> > amdgpu_gem_end_cpu_access;
> > > +    dma_buf->ops = &amdgpu_dmabuf_ops;
> >
> > What happens when another thread is using amdgpu_dmabuf_ops to call
> > begin_cpu_access/end_cpu_access when you are fixing it up again?
> >
> > I would just completely drop the two callbacks, pinning is not necessary for
> > CPU access and thinking more about it it actually has some unwanted side
> > effects.
> >
> > Regards,
> > Christian.
> >
> > Am 19.09.2017 um 23:22 schrieb Samuel Li:
> > >>> +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT;
> > >> Maybe better use "vma->vm_pgoff += amdgpu_bo_mmap_offset(bo)
> >>
> > PAGE_SHIFT;", but I'm not sure.
> > >> How other drivers handle this?
> > > This is a good catch. Looks like pgoff is honored during prime mmap, not a
> > fake offset here.
> > >
> > >>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
> > >> This isn't race free and needs to be fixed.
> > >> Better add callbacks to drm_prime.c similar to
> > drm_gem_dmabuf_mmap().
> > > What do you mean "This isn't race free"?
> > >
> > > Regards,
> > > Sam
> > >
> > >
> > >
> > > On 2017-09-15 11:05 AM, Christian König wrote:
> > >> Am 14.09.2017 um 00:39 schrieb Samuel Li:
> > >>> v2: drop hdp invalidate/flush.
> > >>>
> > >>> Signed-off-by: Samuel Li <Samuel.Li@amd.com>
> > >>> ---
> > >>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  3 ++
> > >>>    drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  3 +-
> > >>>    drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 77
> > ++++++++++++++++++++++++++++++-
> > >>>    3 files changed, 81 insertions(+), 2 deletions(-)
> > >>>
> > >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > >>> index d2aaad7..188b705 100644
> > >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > >>> @@ -395,11 +395,14 @@
> amdgpu_gem_prime_import_sg_table(struct
> > drm_device *dev,
> > >>>    struct dma_buf *amdgpu_gem_prime_export(struct drm_device
> *dev,
> > >>>                        struct drm_gem_object *gobj,
> > >>>                        int flags);
> > >>> +struct drm_gem_object *amdgpu_gem_prime_import(struct
> > drm_device *dev,
> > >>> +                        struct dma_buf *dma_buf);
> > >>>    int amdgpu_gem_prime_pin(struct drm_gem_object *obj);
> > >>>    void amdgpu_gem_prime_unpin(struct drm_gem_object *obj);
> > >>>    struct reservation_object *amdgpu_gem_prime_res_obj(struct
> > drm_gem_object *);
> > >>>    void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
> > >>>    void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj,
> void
> > >>> *vaddr);
> > >>> +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct
> > >>> +vm_area_struct *vma);
> > >>>    int amdgpu_gem_debugfs_init(struct amdgpu_device *adev);
> > >>>      /* sub-allocation manager, it has to be protected by another lock.
> > >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > >>> index 2cdf844..9b63ac5 100644
> > >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > >>> @@ -835,7 +835,7 @@ static struct drm_driver kms_driver = {
> > >>>        .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
> > >>>        .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
> > >>>        .gem_prime_export = amdgpu_gem_prime_export,
> > >>> -    .gem_prime_import = drm_gem_prime_import,
> > >>> +    .gem_prime_import = amdgpu_gem_prime_import,
> > >>>        .gem_prime_pin = amdgpu_gem_prime_pin,
> > >>>        .gem_prime_unpin = amdgpu_gem_prime_unpin,
> > >>>        .gem_prime_res_obj = amdgpu_gem_prime_res_obj, @@ -843,6
> > >>> +843,7 @@ static struct drm_driver kms_driver = {
> > >>>        .gem_prime_import_sg_table =
> > amdgpu_gem_prime_import_sg_table,
> > >>>        .gem_prime_vmap = amdgpu_gem_prime_vmap,
> > >>>        .gem_prime_vunmap = amdgpu_gem_prime_vunmap,
> > >>> +    .gem_prime_mmap = amdgpu_gem_prime_mmap,
> > >>>          .name = DRIVER_NAME,
> > >>>        .desc = DRIVER_DESC,
> > >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> > >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> > >>> index 5b3f928..13c977a 100644
> > >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> > >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> > >>> @@ -57,6 +57,40 @@ void amdgpu_gem_prime_vunmap(struct
> > drm_gem_object *obj, void *vaddr)
> > >>>        ttm_bo_kunmap(&bo->dma_buf_vmap);
> > >>>    }
> > >>>    +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
> struct
> > >>> vm_area_struct *vma)
> > >>> +{
> > >>> +    struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
> > >>> +    struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
> > >>> +    unsigned asize = amdgpu_bo_size(bo);
> > >>> +    int ret;
> > >>> +
> > >>> +    if (!vma->vm_file)
> > >>> +        return -ENODEV;
> > >>> +
> > >>> +    if (adev == NULL)
> > >>> +        return -ENODEV;
> > >>> +
> > >>> +    /* Check for valid size. */
> > >>> +    if (asize < vma->vm_end - vma->vm_start)
> > >>> +        return -EINVAL;
> > >>> +
> > >>> +    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
> > >>> +        (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
> > >>> +        return -EPERM;
> > >>> +    }
> > >>> +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT;
> > >> Maybe better use "vma->vm_pgoff += amdgpu_bo_mmap_offset(bo)
> >>
> > PAGE_SHIFT;", but I'm not sure.
> > >>
> > >> How other drivers handle this?
> > >>
> > >>> +
> > >>> +    /* prime mmap does not need to check access, so allow here */
> > >>> +    ret = drm_vma_node_allow(&obj->vma_node, vma->vm_file-
> > >private_data);
> > >>> +    if (ret)
> > >>> +        return ret;
> > >>> +
> > >>> +    ret = ttm_bo_mmap(vma->vm_file, vma, &adev->mman.bdev);
> > >>> +    drm_vma_node_revoke(&obj->vma_node,
> > >>> + vma->vm_file->private_data);
> > >>> +
> > >>> +    return ret;
> > >>> +}
> > >>> +
> > >>>    struct drm_gem_object *
> > >>>    amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
> > >>>                     struct dma_buf_attachment *attach, @@ -130,14
> > >>> +164,55 @@ struct reservation_object
> > *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
> > >>>        return bo->tbo.resv;
> > >>>    }
> > >>>    +static int amdgpu_gem_begin_cpu_access(struct dma_buf
> *dma_buf,
> > >>> enum dma_data_direction direction)
> > >>> +{
> > >>> +    return amdgpu_gem_prime_pin(dma_buf->priv);
> > >>> +}
> > >>> +
> > >>> +static int amdgpu_gem_end_cpu_access(struct dma_buf *dma_buf,
> > enum
> > >>> +dma_data_direction direction) {
> > >>> +    amdgpu_gem_prime_unpin(dma_buf->priv);
> > >>> +
> > >>> +    return 0;
> > >>> +}
> > >>> +
> > >>> +static struct dma_buf_ops amdgpu_dmabuf_ops;
> > >>> +
> > >>>    struct dma_buf *amdgpu_gem_prime_export(struct drm_device
> *dev,
> > >>>                        struct drm_gem_object *gobj,
> > >>>                        int flags)
> > >>>    {
> > >>>        struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
> > >>> +    struct dma_buf *dma_buf;
> > >>>          if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
> > >>>            return ERR_PTR(-EPERM);
> > >>>    -    return drm_gem_prime_export(dev, gobj, flags);
> > >>> +    dma_buf = drm_gem_prime_export(dev, gobj, flags);
> > >>> +    amdgpu_dmabuf_ops = *(dma_buf->ops);
> > >>> +    amdgpu_dmabuf_ops.begin_cpu_access =
> > amdgpu_gem_begin_cpu_access;
> > >>> +    amdgpu_dmabuf_ops.end_cpu_access =
> > amdgpu_gem_end_cpu_access;
> > >>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
> > >> This isn't race free and needs to be fixed.
> > >>
> > >> Better add callbacks to drm_prime.c similar to
> > drm_gem_dmabuf_mmap().
> > >>
> > >> Alternative you could just completely drop
> > amdgpu_gem_begin_cpu_access() and amdgpu_gem_end_cpu_access()
> as
> > well.
> > >>
> > >> When the buffer is shared between device it is pinned anyway and
> when
> > it isn't shared ttm_bo_mmap() is able to handle VRAM mappings as well.
> > >>
> > >> Regards,
> > >> Christian.
> > >>
> > >>> +
> > >>> +    return dma_buf;
> > >>> +}
> > >>> +
> > >>> +struct drm_gem_object *amdgpu_gem_prime_import(struct
> > drm_device *dev,
> > >>> +                        struct dma_buf *dma_buf) {
> > >>> +    struct drm_gem_object *obj;
> > >>> +
> > >>> +    if (dma_buf->ops == &amdgpu_dmabuf_ops) {
> > >>> +        obj = dma_buf->priv;
> > >>> +        if (obj->dev == dev) {
> > >>> +            /*
> > >>> +             * Importing dmabuf exported from out own gem increases
> > >>> +             * refcount on gem itself instead of f_count of dmabuf.
> > >>> +             */
> > >>> +            drm_gem_object_get(obj);
> > >>> +            return obj;
> > >>> +        }
> > >>> +    }
> > >>> +
> > >>> +    return drm_gem_prime_import(dev, dma_buf);
> > >>>    }
> > >>
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support
       [not found]                                         ` <BLUPR12MB0628E9219A8C44D7220AA5F8F5610-7LeqcoF/hwrL9O90+oShTwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2017-09-21  7:39                                           ` Christian König
       [not found]                                             ` <ca08675a-7bb0-5e44-bb1f-dab3a0f14802-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Christian König @ 2017-09-21  7:39 UTC (permalink / raw)
  To: Li, Samuel, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 20.09.2017 um 22:31 schrieb Li, Samuel:
>> The waiting done here is only for the shared fence to switch from explicitly to
>> implicitly synchronization for correct interaction with the Intel driver.
> Actually here it is going to wait for all fences,
> 94         ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true...

That is actually a bug, we only need to wait here when 
prime_shared_count == 0.

>> ttm_bo_validate() is an asynchronous operation to enable GPU access to the
>> BO, it isn't related at all to possible CPU access and can actually prevent it a
>> number of cases.
>>> amdgpu_ttm_bind() in turn binds the BO to the GART space and isn't related
>> to CPU access either.
> ttm_bo_validate() can move buffer is necessary,

No, that is a very common misconception.

ttm_bo_validate() schedules the buffer to be moved at some point in the 
future.

The only thing which saves us here is the fact that TTM waits for the 
move to be completed in it's page fault handler.

> and amdgpu_ttm_bind() will flush hdp, which we have discussed before.

And as concluded before that is unnecessary.

>
>> What you most likely need to do here is to use
>> reservation_object_wait_timeout_rcu() to wait for all GPU operations to end.
> Right, that is called by amdgpu_gem_prime_pin().

Please just completely drop the begin_cpu_access() and end_cpu_access() 
callbacks.

Checking other drivers the only one actually implementing something 
special is the i915 driver which needs to remove the BO from the GTT 
domain for cache coherency I think.

The omap driver implements those callbacks to grab an extra reference to 
the pages while they are access, we don't need to do this since TTM 
grabs an extra reference during the mmap anyway.

All other drivers implementing mmap (exynos, rockchip, tilcdc, arcpgu, 
msm, meson, etnaviv, sti, qxl, vc4, rcar-du, atmel-hlcdc, imx, mediatek, 
mali, vgem, fsl-dcu, zte, hisilicon, sun4i, mxsfb) don't have a 
begin/end callback.

Pinning the BO can actually cause problem with the display code when the 
BO needs to be scanned out, so we should avoid that as much as possible.

Regards,
Christian.

>
> Sam
>
>> -----Original Message-----
>> From: Christian König [mailto:ckoenig.leichtzumerken@gmail.com]
>> Sent: Wednesday, September 20, 2017 2:15 PM
>> To: Li, Samuel <Samuel.Li@amd.com>; Koenig, Christian
>> <Christian.Koenig@amd.com>; amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support
>>
>> Am 20.09.2017 um 19:47 schrieb Li, Samuel:
>>>> No that isn't correct. Pinning just notes that the BO shouldn't be
>>>> moved any more.  It doesn't wait for anything.
>>> It does. The following is from amdgpu_gem_prime_pin(),
>>> 91          * Wait for all shared fences to complete before we switch to future
>>>    92          * use of exclusive fence on this prime shared bo.
>>>    93          */
>>>    94         ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true,
>> false,
>>>    95                                                   MAX_SCHEDULE_TIMEOUT);
>>>    96         if (unlikely(ret < 0)) {
>>>    97                 DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret);
>>>    98                 amdgpu_bo_unreserve(bo);
>>>    99                 return ret;
>>> 100         }
>>>
>>> Besides, pinning process prepares all the stuff before and after
>>> moving buffer(ttm_bo_validate, amdgpu_ttm_bind),
>> No, you misunderstood what this is good for.
>>
>> The waiting done here is only for the shared fence to switch from explicitly to
>> implicitly synchronization for correct interaction with the Intel driver.
>>
>> As soon the the BO is exported that shouldn't wait for anything any more.
>>
>>> I think if a buffer can be moved, it is probably also in a good condition to be
>> accessed.
>>
>> That is complete nonsense.
>>
>> ttm_bo_validate() is an asynchronous operation to enable GPU access to the
>> BO, it isn't related at all to possible CPU access and can actually prevent it a
>> number of cases.
>>
>> amdgpu_ttm_bind() in turn binds the BO to the GART space and isn't related
>> to CPU access either.
>>
>> What you most likely need to do here is to use
>> reservation_object_wait_timeout_rcu() to wait for all GPU operations to end.
>>
>> Regards,
>> Christian.
>>
>>> Sam
>>>
>>>> -----Original Message-----
>>>> From: Koenig, Christian
>>>> Sent: Wednesday, September 20, 2017 1:38 PM
>>>> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
>>>> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap
>> support
>>>> Am 20.09.2017 um 19:34 schrieb Li, Samuel:
>>>>>> If that is what this callback should do then this implementation
>>>>>> would be incorrect. Pinning doesn't wait for any GPU operation to
>> finish.
>>>>> During pining, it will all the fences to finish. That shall be OK.
>>>> No that isn't correct. Pinning just notes that the BO shouldn't be
>>>> moved any more.
>>>>
>>>> It doesn't wait for anything.
>>>>
>>>> Christian.
>>>>
>>>>> Sam
>>>>>
>>>>>
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: Koenig, Christian
>>>>>> Sent: Wednesday, September 20, 2017 12:21 PM
>>>>>> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
>>>>>> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap
>>>> support
>>>>>> Am 20.09.2017 um 17:44 schrieb Li, Samuel:
>>>>>>>> What happens when another thread is using amdgpu_dmabuf_ops
>> to
>>>> call
>>>>>>>> begin_cpu_access/end_cpu_access when you are fixing it up again?
>>>>>>> Right, that is an issue.
>>>>>> A simple "if (!amdgpu_dmabuf_ops.begin_cpu_access)" should be
>> able
>>>> to
>>>>>> deal with that.
>>>>>>
>>>>>>>> I would just completely drop the two callbacks, pinning is not
>>>>>>>> necessary for CPU access and thinking more about it it actually
>>>>>>>> has some unwanted side effects.
>>>>>>> CPU access needs synchronization anyway, so the two callbacks
>>>>>>> cannot be
>>>>>> dropped (other drivers implemented too), so I would like to keep it
>>>>>> there for now.
>>>>>>
>>>>>> Wait a second what do you mean with "CPU access needs
>>>> synchronization"?
>>>>>> At least i915 makes the memory GPU inaccessible when you start to
>>>>>> use it with the CPU.
>>>>>>
>>>>>> If that is what this callback should do then this implementation
>>>>>> would be incorrect. Pinning doesn't wait for any GPU operation to
>> finish.
>>>>>> Regards,
>>>>>> Christian.
>>>>>>
>>>>>>> Sam
>>>>>>>
>>>>>>>
>>>>>>>> -----Original Message-----
>>>>>>>> From: Koenig, Christian
>>>>>>>> Sent: Wednesday, September 20, 2017 2:58 AM
>>>>>>>> To: Li, Samuel <Samuel.Li@amd.com>; amd-
>> gfx@lists.freedesktop.org
>>>>>>>> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap
>>>>>> support
>>>>>>>>> What do you mean "This isn't race free"?
>>>>>>>> Take a look at the code again:
>>>>>>>>> +    dma_buf = drm_gem_prime_export(dev, gobj, flags);
>>>>>>>>> +    amdgpu_dmabuf_ops = *(dma_buf->ops);
>>>>>>>>> +    amdgpu_dmabuf_ops.begin_cpu_access =
>>>>>>>> amdgpu_gem_begin_cpu_access;
>>>>>>>>> +    amdgpu_dmabuf_ops.end_cpu_access =
>>>>>>>> amdgpu_gem_end_cpu_access;
>>>>>>>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
>>>>>>>> What happens when another thread is using amdgpu_dmabuf_ops
>> to
>>>> call
>>>>>>>> begin_cpu_access/end_cpu_access when you are fixing it up again?
>>>>>>>>
>>>>>>>> I would just completely drop the two callbacks, pinning is not
>>>>>>>> necessary for CPU access and thinking more about it it actually
>>>>>>>> has some unwanted side effects.
>>>>>>>>
>>>>>>>> Regards,
>>>>>>>> Christian.
>>>>>>>>
>>>>>>>> Am 19.09.2017 um 23:22 schrieb Samuel Li:
>>>>>>>>>>> +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >>
>>>> PAGE_SHIFT;
>>>>>>>>>> Maybe better use "vma->vm_pgoff +=
>>>>>> amdgpu_bo_mmap_offset(bo) >>
>>>>>>>> PAGE_SHIFT;", but I'm not sure.
>>>>>>>>>> How other drivers handle this?
>>>>>>>>> This is a good catch. Looks like pgoff is honored during prime
>>>>>>>>> mmap, not a
>>>>>>>> fake offset here.
>>>>>>>>>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
>>>>>>>>>> This isn't race free and needs to be fixed.
>>>>>>>>>> Better add callbacks to drm_prime.c similar to
>>>>>>>> drm_gem_dmabuf_mmap().
>>>>>>>>> What do you mean "This isn't race free"?
>>>>>>>>>
>>>>>>>>> Regards,
>>>>>>>>> Sam
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> On 2017-09-15 11:05 AM, Christian König wrote:
>>>>>>>>>> Am 14.09.2017 um 00:39 schrieb Samuel Li:
>>>>>>>>>>> v2: drop hdp invalidate/flush.
>>>>>>>>>>>
>>>>>>>>>>> Signed-off-by: Samuel Li <Samuel.Li@amd.com>
>>>>>>>>>>> ---
>>>>>>>>>>>        drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  3 ++
>>>>>>>>>>>        drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  3 +-
>>>>>>>>>>>        drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 77
>>>>>>>> ++++++++++++++++++++++++++++++-
>>>>>>>>>>>        3 files changed, 81 insertions(+), 2 deletions(-)
>>>>>>>>>>>
>>>>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>>>>>> index d2aaad7..188b705 100644
>>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>>>>>> @@ -395,11 +395,14 @@
>>>>>> amdgpu_gem_prime_import_sg_table(struct
>>>>>>>> drm_device *dev,
>>>>>>>>>>>        struct dma_buf *amdgpu_gem_prime_export(struct
>>>> drm_device
>>>>>> *dev,
>>>>>>>>>>>                            struct drm_gem_object *gobj,
>>>>>>>>>>>                            int flags);
>>>>>>>>>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct
>>>>>>>> drm_device *dev,
>>>>>>>>>>> +                        struct dma_buf *dma_buf);
>>>>>>>>>>>        int amdgpu_gem_prime_pin(struct drm_gem_object *obj);
>>>>>>>>>>>        void amdgpu_gem_prime_unpin(struct drm_gem_object
>> *obj);
>>>>>>>>>>>        struct reservation_object
>>>>>>>>>>> *amdgpu_gem_prime_res_obj(struct
>>>>>>>> drm_gem_object *);
>>>>>>>>>>>        void *amdgpu_gem_prime_vmap(struct drm_gem_object
>> *obj);
>>>>>>>>>>>        void amdgpu_gem_prime_vunmap(struct drm_gem_object
>>>> *obj,
>>>>>> void
>>>>>>>>>>> *vaddr);
>>>>>>>>>>> +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
>>>> struct
>>>>>>>>>>> +vm_area_struct *vma);
>>>>>>>>>>>        int amdgpu_gem_debugfs_init(struct amdgpu_device *adev);
>>>>>>>>>>>          /* sub-allocation manager, it has to be protected by
>>>>>>>>>>> another
>>>> lock.
>>>>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>>>>>>>> index 2cdf844..9b63ac5 100644
>>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>>>>>>>> @@ -835,7 +835,7 @@ static struct drm_driver kms_driver = {
>>>>>>>>>>>            .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
>>>>>>>>>>>            .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
>>>>>>>>>>>            .gem_prime_export = amdgpu_gem_prime_export,
>>>>>>>>>>> -    .gem_prime_import = drm_gem_prime_import,
>>>>>>>>>>> +    .gem_prime_import = amdgpu_gem_prime_import,
>>>>>>>>>>>            .gem_prime_pin = amdgpu_gem_prime_pin,
>>>>>>>>>>>            .gem_prime_unpin = amdgpu_gem_prime_unpin,
>>>>>>>>>>>            .gem_prime_res_obj = amdgpu_gem_prime_res_obj, @@
>>>>>>>>>>> -843,6
>>>>>>>>>>> +843,7 @@ static struct drm_driver kms_driver = {
>>>>>>>>>>>            .gem_prime_import_sg_table =
>>>>>>>> amdgpu_gem_prime_import_sg_table,
>>>>>>>>>>>            .gem_prime_vmap = amdgpu_gem_prime_vmap,
>>>>>>>>>>>            .gem_prime_vunmap = amdgpu_gem_prime_vunmap,
>>>>>>>>>>> +    .gem_prime_mmap = amdgpu_gem_prime_mmap,
>>>>>>>>>>>              .name = DRIVER_NAME,
>>>>>>>>>>>            .desc = DRIVER_DESC, diff --git
>>>>>>>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>>>>>>>> index 5b3f928..13c977a 100644
>>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>>>>>>>> @@ -57,6 +57,40 @@ void amdgpu_gem_prime_vunmap(struct
>>>>>>>> drm_gem_object *obj, void *vaddr)
>>>>>>>>>>>            ttm_bo_kunmap(&bo->dma_buf_vmap);
>>>>>>>>>>>        }
>>>>>>>>>>>        +int amdgpu_gem_prime_mmap(struct drm_gem_object
>> *obj,
>>>>>> struct
>>>>>>>>>>> vm_area_struct *vma)
>>>>>>>>>>> +{
>>>>>>>>>>> +    struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
>>>>>>>>>>> +    struct amdgpu_device *adev = amdgpu_ttm_adev(bo-
>>>>> tbo.bdev);
>>>>>>>>>>> +    unsigned asize = amdgpu_bo_size(bo);
>>>>>>>>>>> +    int ret;
>>>>>>>>>>> +
>>>>>>>>>>> +    if (!vma->vm_file)
>>>>>>>>>>> +        return -ENODEV;
>>>>>>>>>>> +
>>>>>>>>>>> +    if (adev == NULL)
>>>>>>>>>>> +        return -ENODEV;
>>>>>>>>>>> +
>>>>>>>>>>> +    /* Check for valid size. */
>>>>>>>>>>> +    if (asize < vma->vm_end - vma->vm_start)
>>>>>>>>>>> +        return -EINVAL;
>>>>>>>>>>> +
>>>>>>>>>>> +    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
>>>>>>>>>>> +        (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
>>>>>>>>>>> +        return -EPERM;
>>>>>>>>>>> +    }
>>>>>>>>>>> +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >>
>>>> PAGE_SHIFT;
>>>>>>>>>> Maybe better use "vma->vm_pgoff +=
>>>>>> amdgpu_bo_mmap_offset(bo) >>
>>>>>>>> PAGE_SHIFT;", but I'm not sure.
>>>>>>>>>> How other drivers handle this?
>>>>>>>>>>
>>>>>>>>>>> +
>>>>>>>>>>> +    /* prime mmap does not need to check access, so allow here
>> */
>>>>>>>>>>> +    ret = drm_vma_node_allow(&obj->vma_node, vma-
>>> vm_file-
>>>>>>>>> private_data);
>>>>>>>>>>> +    if (ret)
>>>>>>>>>>> +        return ret;
>>>>>>>>>>> +
>>>>>>>>>>> +    ret = ttm_bo_mmap(vma->vm_file, vma, &adev-
>>> mman.bdev);
>>>>>>>>>>> +    drm_vma_node_revoke(&obj->vma_node,
>>>>>>>>>>> + vma->vm_file->private_data);
>>>>>>>>>>> +
>>>>>>>>>>> +    return ret;
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>>        struct drm_gem_object *
>>>>>>>>>>>        amdgpu_gem_prime_import_sg_table(struct drm_device
>> *dev,
>>>>>>>>>>>                         struct dma_buf_attachment *attach, @@
>>>>>>>>>>> -130,14
>>>>>>>>>>> +164,55 @@ struct reservation_object
>>>>>>>> *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
>>>>>>>>>>>            return bo->tbo.resv;
>>>>>>>>>>>        }
>>>>>>>>>>>        +static int amdgpu_gem_begin_cpu_access(struct dma_buf
>>>>>>>>>>> *dma_buf, enum dma_data_direction direction)
>>>>>>>>>>> +{
>>>>>>>>>>> +    return amdgpu_gem_prime_pin(dma_buf->priv);
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>> +static int amdgpu_gem_end_cpu_access(struct dma_buf
>>>> *dma_buf,
>>>>>>>> enum
>>>>>>>>>>> +dma_data_direction direction) {
>>>>>>>>>>> +    amdgpu_gem_prime_unpin(dma_buf->priv);
>>>>>>>>>>> +
>>>>>>>>>>> +    return 0;
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>> +static struct dma_buf_ops amdgpu_dmabuf_ops;
>>>>>>>>>>> +
>>>>>>>>>>>        struct dma_buf *amdgpu_gem_prime_export(struct
>>>> drm_device
>>>>>> *dev,
>>>>>>>>>>>                            struct drm_gem_object *gobj,
>>>>>>>>>>>                            int flags)
>>>>>>>>>>>        {
>>>>>>>>>>>            struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>>>>>>>>>>> +    struct dma_buf *dma_buf;
>>>>>>>>>>>              if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
>>>>>>>>>>>                return ERR_PTR(-EPERM);
>>>>>>>>>>>        -    return drm_gem_prime_export(dev, gobj, flags);
>>>>>>>>>>> +    dma_buf = drm_gem_prime_export(dev, gobj, flags);
>>>>>>>>>>> +    amdgpu_dmabuf_ops = *(dma_buf->ops);
>>>>>>>>>>> +    amdgpu_dmabuf_ops.begin_cpu_access =
>>>>>>>> amdgpu_gem_begin_cpu_access;
>>>>>>>>>>> +    amdgpu_dmabuf_ops.end_cpu_access =
>>>>>>>> amdgpu_gem_end_cpu_access;
>>>>>>>>>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
>>>>>>>>>> This isn't race free and needs to be fixed.
>>>>>>>>>>
>>>>>>>>>> Better add callbacks to drm_prime.c similar to
>>>>>>>> drm_gem_dmabuf_mmap().
>>>>>>>>>> Alternative you could just completely drop
>>>>>>>> amdgpu_gem_begin_cpu_access() and
>>>> amdgpu_gem_end_cpu_access()
>>>>>> as
>>>>>>>> well.
>>>>>>>>>> When the buffer is shared between device it is pinned anyway
>>>>>>>>>> and when
>>>>>>>> it isn't shared ttm_bo_mmap() is able to handle VRAM mappings as
>>>> well.
>>>>>>>>>> Regards,
>>>>>>>>>> Christian.
>>>>>>>>>>
>>>>>>>>>>> +
>>>>>>>>>>> +    return dma_buf;
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>> +struct drm_gem_object *amdgpu_gem_prime_import(struct
>>>>>>>> drm_device *dev,
>>>>>>>>>>> +                        struct dma_buf *dma_buf) {
>>>>>>>>>>> +    struct drm_gem_object *obj;
>>>>>>>>>>> +
>>>>>>>>>>> +    if (dma_buf->ops == &amdgpu_dmabuf_ops) {
>>>>>>>>>>> +        obj = dma_buf->priv;
>>>>>>>>>>> +        if (obj->dev == dev) {
>>>>>>>>>>> +            /*
>>>>>>>>>>> +             * Importing dmabuf exported from out own gem
>> increases
>>>>>>>>>>> +             * refcount on gem itself instead of f_count of dmabuf.
>>>>>>>>>>> +             */
>>>>>>>>>>> +            drm_gem_object_get(obj);
>>>>>>>>>>> +            return obj;
>>>>>>>>>>> +        }
>>>>>>>>>>> +    }
>>>>>>>>>>> +
>>>>>>>>>>> +    return drm_gem_prime_import(dev, dma_buf);
>>>>>>>>>>>        }
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support
       [not found]                                             ` <ca08675a-7bb0-5e44-bb1f-dab3a0f14802-5C7GfCeVMHo@public.gmane.org>
@ 2017-09-21 14:31                                               ` Li, Samuel
       [not found]                                                 ` <BLUPR12MB0628E666BD4B7BD7483B9F63F5660-7LeqcoF/hwrL9O90+oShTwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Li, Samuel @ 2017-09-21 14:31 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

> Please just completely drop the begin_cpu_access() and end_cpu_access()
> callbacks.
> Checking other drivers the only one actually implementing something special
> is the i915 driver which needs to remove the BO from the GTT domain for
> cache coherency I think.

Well, as the first step, we can drop the begin/end() callbacks for now, and revisit later.

Sam

> -----Original Message-----
> From: Koenig, Christian
> Sent: Thursday, September 21, 2017 3:39 AM
> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support
> 
> Am 20.09.2017 um 22:31 schrieb Li, Samuel:
> >> The waiting done here is only for the shared fence to switch from
> >> explicitly to implicitly synchronization for correct interaction with the Intel
> driver.
> > Actually here it is going to wait for all fences,
> > 94         ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true...
> 
> That is actually a bug, we only need to wait here when prime_shared_count
> == 0.
> 
> >> ttm_bo_validate() is an asynchronous operation to enable GPU access
> >> to the BO, it isn't related at all to possible CPU access and can
> >> actually prevent it a number of cases.
> >>> amdgpu_ttm_bind() in turn binds the BO to the GART space and isn't
> >>> related
> >> to CPU access either.
> > ttm_bo_validate() can move buffer is necessary,
> 
> No, that is a very common misconception.
> 
> ttm_bo_validate() schedules the buffer to be moved at some point in the
> future.
> 
> The only thing which saves us here is the fact that TTM waits for the move to
> be completed in it's page fault handler.
> 
> > and amdgpu_ttm_bind() will flush hdp, which we have discussed before.
> 
> And as concluded before that is unnecessary.
> 
> >
> >> What you most likely need to do here is to use
> >> reservation_object_wait_timeout_rcu() to wait for all GPU operations to
> end.
> > Right, that is called by amdgpu_gem_prime_pin().
> 
> Please just completely drop the begin_cpu_access() and end_cpu_access()
> callbacks.
> 
> Checking other drivers the only one actually implementing something special
> is the i915 driver which needs to remove the BO from the GTT domain for
> cache coherency I think.
> 
> The omap driver implements those callbacks to grab an extra reference to
> the pages while they are access, we don't need to do this since TTM grabs an
> extra reference during the mmap anyway.
> 
> All other drivers implementing mmap (exynos, rockchip, tilcdc, arcpgu, msm,
> meson, etnaviv, sti, qxl, vc4, rcar-du, atmel-hlcdc, imx, mediatek, mali, vgem,
> fsl-dcu, zte, hisilicon, sun4i, mxsfb) don't have a begin/end callback.
> 
> Pinning the BO can actually cause problem with the display code when the
> BO needs to be scanned out, so we should avoid that as much as possible.
> 
> Regards,
> Christian.
> 
> >
> > Sam
> >
> >> -----Original Message-----
> >> From: Christian König [mailto:ckoenig.leichtzumerken@gmail.com]
> >> Sent: Wednesday, September 20, 2017 2:15 PM
> >> To: Li, Samuel <Samuel.Li@amd.com>; Koenig, Christian
> >> <Christian.Koenig@amd.com>; amd-gfx@lists.freedesktop.org
> >> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap
> support
> >>
> >> Am 20.09.2017 um 19:47 schrieb Li, Samuel:
> >>>> No that isn't correct. Pinning just notes that the BO shouldn't be
> >>>> moved any more.  It doesn't wait for anything.
> >>> It does. The following is from amdgpu_gem_prime_pin(),
> >>> 91          * Wait for all shared fences to complete before we switch to
> future
> >>>    92          * use of exclusive fence on this prime shared bo.
> >>>    93          */
> >>>    94         ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true,
> >> false,
> >>>    95                                                   MAX_SCHEDULE_TIMEOUT);
> >>>    96         if (unlikely(ret < 0)) {
> >>>    97                 DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret);
> >>>    98                 amdgpu_bo_unreserve(bo);
> >>>    99                 return ret;
> >>> 100         }
> >>>
> >>> Besides, pinning process prepares all the stuff before and after
> >>> moving buffer(ttm_bo_validate, amdgpu_ttm_bind),
> >> No, you misunderstood what this is good for.
> >>
> >> The waiting done here is only for the shared fence to switch from
> >> explicitly to implicitly synchronization for correct interaction with the Intel
> driver.
> >>
> >> As soon the the BO is exported that shouldn't wait for anything any more.
> >>
> >>> I think if a buffer can be moved, it is probably also in a good
> >>> condition to be
> >> accessed.
> >>
> >> That is complete nonsense.
> >>
> >> ttm_bo_validate() is an asynchronous operation to enable GPU access
> >> to the BO, it isn't related at all to possible CPU access and can
> >> actually prevent it a number of cases.
> >>
> >> amdgpu_ttm_bind() in turn binds the BO to the GART space and isn't
> >> related to CPU access either.
> >>
> >> What you most likely need to do here is to use
> >> reservation_object_wait_timeout_rcu() to wait for all GPU operations to
> end.
> >>
> >> Regards,
> >> Christian.
> >>
> >>> Sam
> >>>
> >>>> -----Original Message-----
> >>>> From: Koenig, Christian
> >>>> Sent: Wednesday, September 20, 2017 1:38 PM
> >>>> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
> >>>> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap
> >> support
> >>>> Am 20.09.2017 um 19:34 schrieb Li, Samuel:
> >>>>>> If that is what this callback should do then this implementation
> >>>>>> would be incorrect. Pinning doesn't wait for any GPU operation to
> >> finish.
> >>>>> During pining, it will all the fences to finish. That shall be OK.
> >>>> No that isn't correct. Pinning just notes that the BO shouldn't be
> >>>> moved any more.
> >>>>
> >>>> It doesn't wait for anything.
> >>>>
> >>>> Christian.
> >>>>
> >>>>> Sam
> >>>>>
> >>>>>
> >>>>>
> >>>>>> -----Original Message-----
> >>>>>> From: Koenig, Christian
> >>>>>> Sent: Wednesday, September 20, 2017 12:21 PM
> >>>>>> To: Li, Samuel <Samuel.Li@amd.com>; amd-
> gfx@lists.freedesktop.org
> >>>>>> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap
> >>>> support
> >>>>>> Am 20.09.2017 um 17:44 schrieb Li, Samuel:
> >>>>>>>> What happens when another thread is using
> amdgpu_dmabuf_ops
> >> to
> >>>> call
> >>>>>>>> begin_cpu_access/end_cpu_access when you are fixing it up
> again?
> >>>>>>> Right, that is an issue.
> >>>>>> A simple "if (!amdgpu_dmabuf_ops.begin_cpu_access)" should be
> >> able
> >>>> to
> >>>>>> deal with that.
> >>>>>>
> >>>>>>>> I would just completely drop the two callbacks, pinning is not
> >>>>>>>> necessary for CPU access and thinking more about it it actually
> >>>>>>>> has some unwanted side effects.
> >>>>>>> CPU access needs synchronization anyway, so the two callbacks
> >>>>>>> cannot be
> >>>>>> dropped (other drivers implemented too), so I would like to keep
> >>>>>> it there for now.
> >>>>>>
> >>>>>> Wait a second what do you mean with "CPU access needs
> >>>> synchronization"?
> >>>>>> At least i915 makes the memory GPU inaccessible when you start to
> >>>>>> use it with the CPU.
> >>>>>>
> >>>>>> If that is what this callback should do then this implementation
> >>>>>> would be incorrect. Pinning doesn't wait for any GPU operation to
> >> finish.
> >>>>>> Regards,
> >>>>>> Christian.
> >>>>>>
> >>>>>>> Sam
> >>>>>>>
> >>>>>>>
> >>>>>>>> -----Original Message-----
> >>>>>>>> From: Koenig, Christian
> >>>>>>>> Sent: Wednesday, September 20, 2017 2:58 AM
> >>>>>>>> To: Li, Samuel <Samuel.Li@amd.com>; amd-
> >> gfx@lists.freedesktop.org
> >>>>>>>> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add
> gem_prime_mmap
> >>>>>> support
> >>>>>>>>> What do you mean "This isn't race free"?
> >>>>>>>> Take a look at the code again:
> >>>>>>>>> +    dma_buf = drm_gem_prime_export(dev, gobj, flags);
> >>>>>>>>> +    amdgpu_dmabuf_ops = *(dma_buf->ops);
> >>>>>>>>> +    amdgpu_dmabuf_ops.begin_cpu_access =
> >>>>>>>> amdgpu_gem_begin_cpu_access;
> >>>>>>>>> +    amdgpu_dmabuf_ops.end_cpu_access =
> >>>>>>>> amdgpu_gem_end_cpu_access;
> >>>>>>>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
> >>>>>>>> What happens when another thread is using
> amdgpu_dmabuf_ops
> >> to
> >>>> call
> >>>>>>>> begin_cpu_access/end_cpu_access when you are fixing it up
> again?
> >>>>>>>>
> >>>>>>>> I would just completely drop the two callbacks, pinning is not
> >>>>>>>> necessary for CPU access and thinking more about it it actually
> >>>>>>>> has some unwanted side effects.
> >>>>>>>>
> >>>>>>>> Regards,
> >>>>>>>> Christian.
> >>>>>>>>
> >>>>>>>> Am 19.09.2017 um 23:22 schrieb Samuel Li:
> >>>>>>>>>>> +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >>
> >>>> PAGE_SHIFT;
> >>>>>>>>>> Maybe better use "vma->vm_pgoff +=
> >>>>>> amdgpu_bo_mmap_offset(bo) >>
> >>>>>>>> PAGE_SHIFT;", but I'm not sure.
> >>>>>>>>>> How other drivers handle this?
> >>>>>>>>> This is a good catch. Looks like pgoff is honored during prime
> >>>>>>>>> mmap, not a
> >>>>>>>> fake offset here.
> >>>>>>>>>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
> >>>>>>>>>> This isn't race free and needs to be fixed.
> >>>>>>>>>> Better add callbacks to drm_prime.c similar to
> >>>>>>>> drm_gem_dmabuf_mmap().
> >>>>>>>>> What do you mean "This isn't race free"?
> >>>>>>>>>
> >>>>>>>>> Regards,
> >>>>>>>>> Sam
> >>>>>>>>>
> >>>>>>>>>
> >>>>>>>>>
> >>>>>>>>> On 2017-09-15 11:05 AM, Christian König wrote:
> >>>>>>>>>> Am 14.09.2017 um 00:39 schrieb Samuel Li:
> >>>>>>>>>>> v2: drop hdp invalidate/flush.
> >>>>>>>>>>>
> >>>>>>>>>>> Signed-off-by: Samuel Li <Samuel.Li@amd.com>
> >>>>>>>>>>> ---
> >>>>>>>>>>>        drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  3 ++
> >>>>>>>>>>>        drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  3 +-
> >>>>>>>>>>>        drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 77
> >>>>>>>> ++++++++++++++++++++++++++++++-
> >>>>>>>>>>>        3 files changed, 81 insertions(+), 2 deletions(-)
> >>>>>>>>>>>
> >>>>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>>>>>>>>>> index d2aaad7..188b705 100644
> >>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >>>>>>>>>>> @@ -395,11 +395,14 @@
> >>>>>> amdgpu_gem_prime_import_sg_table(struct
> >>>>>>>> drm_device *dev,
> >>>>>>>>>>>        struct dma_buf *amdgpu_gem_prime_export(struct
> >>>> drm_device
> >>>>>> *dev,
> >>>>>>>>>>>                            struct drm_gem_object *gobj,
> >>>>>>>>>>>                            int flags);
> >>>>>>>>>>> +struct drm_gem_object
> *amdgpu_gem_prime_import(struct
> >>>>>>>> drm_device *dev,
> >>>>>>>>>>> +                        struct dma_buf *dma_buf);
> >>>>>>>>>>>        int amdgpu_gem_prime_pin(struct drm_gem_object
> *obj);
> >>>>>>>>>>>        void amdgpu_gem_prime_unpin(struct drm_gem_object
> >> *obj);
> >>>>>>>>>>>        struct reservation_object
> >>>>>>>>>>> *amdgpu_gem_prime_res_obj(struct
> >>>>>>>> drm_gem_object *);
> >>>>>>>>>>>        void *amdgpu_gem_prime_vmap(struct drm_gem_object
> >> *obj);
> >>>>>>>>>>>        void amdgpu_gem_prime_vunmap(struct
> drm_gem_object
> >>>> *obj,
> >>>>>> void
> >>>>>>>>>>> *vaddr);
> >>>>>>>>>>> +int amdgpu_gem_prime_mmap(struct drm_gem_object
> *obj,
> >>>> struct
> >>>>>>>>>>> +vm_area_struct *vma);
> >>>>>>>>>>>        int amdgpu_gem_debugfs_init(struct amdgpu_device
> *adev);
> >>>>>>>>>>>          /* sub-allocation manager, it has to be protected
> >>>>>>>>>>> by another
> >>>> lock.
> >>>>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>>>>>>>>>> index 2cdf844..9b63ac5 100644
> >>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> >>>>>>>>>>> @@ -835,7 +835,7 @@ static struct drm_driver kms_driver = {
> >>>>>>>>>>>            .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
> >>>>>>>>>>>            .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
> >>>>>>>>>>>            .gem_prime_export = amdgpu_gem_prime_export,
> >>>>>>>>>>> -    .gem_prime_import = drm_gem_prime_import,
> >>>>>>>>>>> +    .gem_prime_import = amdgpu_gem_prime_import,
> >>>>>>>>>>>            .gem_prime_pin = amdgpu_gem_prime_pin,
> >>>>>>>>>>>            .gem_prime_unpin = amdgpu_gem_prime_unpin,
> >>>>>>>>>>>            .gem_prime_res_obj = amdgpu_gem_prime_res_obj,
> @@
> >>>>>>>>>>> -843,6
> >>>>>>>>>>> +843,7 @@ static struct drm_driver kms_driver = {
> >>>>>>>>>>>            .gem_prime_import_sg_table =
> >>>>>>>> amdgpu_gem_prime_import_sg_table,
> >>>>>>>>>>>            .gem_prime_vmap = amdgpu_gem_prime_vmap,
> >>>>>>>>>>>            .gem_prime_vunmap = amdgpu_gem_prime_vunmap,
> >>>>>>>>>>> +    .gem_prime_mmap = amdgpu_gem_prime_mmap,
> >>>>>>>>>>>              .name = DRIVER_NAME,
> >>>>>>>>>>>            .desc = DRIVER_DESC, diff --git
> >>>>>>>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>>>>>>>>>> index 5b3f928..13c977a 100644
> >>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
> >>>>>>>>>>> @@ -57,6 +57,40 @@ void
> amdgpu_gem_prime_vunmap(struct
> >>>>>>>> drm_gem_object *obj, void *vaddr)
> >>>>>>>>>>>            ttm_bo_kunmap(&bo->dma_buf_vmap);
> >>>>>>>>>>>        }
> >>>>>>>>>>>        +int amdgpu_gem_prime_mmap(struct drm_gem_object
> >> *obj,
> >>>>>> struct
> >>>>>>>>>>> vm_area_struct *vma)
> >>>>>>>>>>> +{
> >>>>>>>>>>> +    struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
> >>>>>>>>>>> +    struct amdgpu_device *adev = amdgpu_ttm_adev(bo-
> >>>>> tbo.bdev);
> >>>>>>>>>>> +    unsigned asize = amdgpu_bo_size(bo);
> >>>>>>>>>>> +    int ret;
> >>>>>>>>>>> +
> >>>>>>>>>>> +    if (!vma->vm_file)
> >>>>>>>>>>> +        return -ENODEV;
> >>>>>>>>>>> +
> >>>>>>>>>>> +    if (adev == NULL)
> >>>>>>>>>>> +        return -ENODEV;
> >>>>>>>>>>> +
> >>>>>>>>>>> +    /* Check for valid size. */
> >>>>>>>>>>> +    if (asize < vma->vm_end - vma->vm_start)
> >>>>>>>>>>> +        return -EINVAL;
> >>>>>>>>>>> +
> >>>>>>>>>>> +    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
> >>>>>>>>>>> +        (bo->flags &
> AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
> >>>>>>>>>>> +        return -EPERM;
> >>>>>>>>>>> +    }
> >>>>>>>>>>> +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >>
> >>>> PAGE_SHIFT;
> >>>>>>>>>> Maybe better use "vma->vm_pgoff +=
> >>>>>> amdgpu_bo_mmap_offset(bo) >>
> >>>>>>>> PAGE_SHIFT;", but I'm not sure.
> >>>>>>>>>> How other drivers handle this?
> >>>>>>>>>>
> >>>>>>>>>>> +
> >>>>>>>>>>> +    /* prime mmap does not need to check access, so allow
> >>>>>>>>>>> + here
> >> */
> >>>>>>>>>>> +    ret = drm_vma_node_allow(&obj->vma_node, vma-
> >>> vm_file-
> >>>>>>>>> private_data);
> >>>>>>>>>>> +    if (ret)
> >>>>>>>>>>> +        return ret;
> >>>>>>>>>>> +
> >>>>>>>>>>> +    ret = ttm_bo_mmap(vma->vm_file, vma, &adev-
> >>> mman.bdev);
> >>>>>>>>>>> +    drm_vma_node_revoke(&obj->vma_node,
> >>>>>>>>>>> + vma->vm_file->private_data);
> >>>>>>>>>>> +
> >>>>>>>>>>> +    return ret;
> >>>>>>>>>>> +}
> >>>>>>>>>>> +
> >>>>>>>>>>>        struct drm_gem_object *
> >>>>>>>>>>>        amdgpu_gem_prime_import_sg_table(struct drm_device
> >> *dev,
> >>>>>>>>>>>                         struct dma_buf_attachment *attach,
> >>>>>>>>>>> @@
> >>>>>>>>>>> -130,14
> >>>>>>>>>>> +164,55 @@ struct reservation_object
> >>>>>>>> *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
> >>>>>>>>>>>            return bo->tbo.resv;
> >>>>>>>>>>>        }
> >>>>>>>>>>>        +static int amdgpu_gem_begin_cpu_access(struct
> >>>>>>>>>>> dma_buf *dma_buf, enum dma_data_direction direction)
> >>>>>>>>>>> +{
> >>>>>>>>>>> +    return amdgpu_gem_prime_pin(dma_buf->priv);
> >>>>>>>>>>> +}
> >>>>>>>>>>> +
> >>>>>>>>>>> +static int amdgpu_gem_end_cpu_access(struct dma_buf
> >>>> *dma_buf,
> >>>>>>>> enum
> >>>>>>>>>>> +dma_data_direction direction) {
> >>>>>>>>>>> +    amdgpu_gem_prime_unpin(dma_buf->priv);
> >>>>>>>>>>> +
> >>>>>>>>>>> +    return 0;
> >>>>>>>>>>> +}
> >>>>>>>>>>> +
> >>>>>>>>>>> +static struct dma_buf_ops amdgpu_dmabuf_ops;
> >>>>>>>>>>> +
> >>>>>>>>>>>        struct dma_buf *amdgpu_gem_prime_export(struct
> >>>> drm_device
> >>>>>> *dev,
> >>>>>>>>>>>                            struct drm_gem_object *gobj,
> >>>>>>>>>>>                            int flags)
> >>>>>>>>>>>        {
> >>>>>>>>>>>            struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
> >>>>>>>>>>> +    struct dma_buf *dma_buf;
> >>>>>>>>>>>              if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
> >>>>>>>>>>>                return ERR_PTR(-EPERM);
> >>>>>>>>>>>        -    return drm_gem_prime_export(dev, gobj, flags);
> >>>>>>>>>>> +    dma_buf = drm_gem_prime_export(dev, gobj, flags);
> >>>>>>>>>>> +    amdgpu_dmabuf_ops = *(dma_buf->ops);
> >>>>>>>>>>> +    amdgpu_dmabuf_ops.begin_cpu_access =
> >>>>>>>> amdgpu_gem_begin_cpu_access;
> >>>>>>>>>>> +    amdgpu_dmabuf_ops.end_cpu_access =
> >>>>>>>> amdgpu_gem_end_cpu_access;
> >>>>>>>>>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
> >>>>>>>>>> This isn't race free and needs to be fixed.
> >>>>>>>>>>
> >>>>>>>>>> Better add callbacks to drm_prime.c similar to
> >>>>>>>> drm_gem_dmabuf_mmap().
> >>>>>>>>>> Alternative you could just completely drop
> >>>>>>>> amdgpu_gem_begin_cpu_access() and
> >>>> amdgpu_gem_end_cpu_access()
> >>>>>> as
> >>>>>>>> well.
> >>>>>>>>>> When the buffer is shared between device it is pinned anyway
> >>>>>>>>>> and when
> >>>>>>>> it isn't shared ttm_bo_mmap() is able to handle VRAM mappings
> >>>>>>>> as
> >>>> well.
> >>>>>>>>>> Regards,
> >>>>>>>>>> Christian.
> >>>>>>>>>>
> >>>>>>>>>>> +
> >>>>>>>>>>> +    return dma_buf;
> >>>>>>>>>>> +}
> >>>>>>>>>>> +
> >>>>>>>>>>> +struct drm_gem_object
> *amdgpu_gem_prime_import(struct
> >>>>>>>> drm_device *dev,
> >>>>>>>>>>> +                        struct dma_buf *dma_buf) {
> >>>>>>>>>>> +    struct drm_gem_object *obj;
> >>>>>>>>>>> +
> >>>>>>>>>>> +    if (dma_buf->ops == &amdgpu_dmabuf_ops) {
> >>>>>>>>>>> +        obj = dma_buf->priv;
> >>>>>>>>>>> +        if (obj->dev == dev) {
> >>>>>>>>>>> +            /*
> >>>>>>>>>>> +             * Importing dmabuf exported from out own gem
> >> increases
> >>>>>>>>>>> +             * refcount on gem itself instead of f_count of dmabuf.
> >>>>>>>>>>> +             */
> >>>>>>>>>>> +            drm_gem_object_get(obj);
> >>>>>>>>>>> +            return obj;
> >>>>>>>>>>> +        }
> >>>>>>>>>>> +    }
> >>>>>>>>>>> +
> >>>>>>>>>>> +    return drm_gem_prime_import(dev, dma_buf);
> >>>>>>>>>>>        }
> >>> _______________________________________________
> >>> amd-gfx mailing list
> >>> amd-gfx@lists.freedesktop.org
> >>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> 

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support
       [not found]                                                 ` <BLUPR12MB0628E666BD4B7BD7483B9F63F5660-7LeqcoF/hwrL9O90+oShTwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2017-09-21 14:34                                                   ` Christian König
  0 siblings, 0 replies; 15+ messages in thread
From: Christian König @ 2017-09-21 14:34 UTC (permalink / raw)
  To: Li, Samuel, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 21.09.2017 um 16:31 schrieb Li, Samuel:
>> Please just completely drop the begin_cpu_access() and end_cpu_access()
>> callbacks.
>> Checking other drivers the only one actually implementing something special
>> is the i915 driver which needs to remove the BO from the GTT domain for
>> cache coherency I think.
> Well, as the first step, we can drop the begin/end() callbacks for now, and revisit later.

Yeah, that's something I can perfectly live with. How about adding an in 
code comment and noting it in the commit message?

Christian.

>
> Sam
>
>> -----Original Message-----
>> From: Koenig, Christian
>> Sent: Thursday, September 21, 2017 3:39 AM
>> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support
>>
>> Am 20.09.2017 um 22:31 schrieb Li, Samuel:
>>>> The waiting done here is only for the shared fence to switch from
>>>> explicitly to implicitly synchronization for correct interaction with the Intel
>> driver.
>>> Actually here it is going to wait for all fences,
>>> 94         ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true...
>> That is actually a bug, we only need to wait here when prime_shared_count
>> == 0.
>>
>>>> ttm_bo_validate() is an asynchronous operation to enable GPU access
>>>> to the BO, it isn't related at all to possible CPU access and can
>>>> actually prevent it a number of cases.
>>>>> amdgpu_ttm_bind() in turn binds the BO to the GART space and isn't
>>>>> related
>>>> to CPU access either.
>>> ttm_bo_validate() can move buffer is necessary,
>> No, that is a very common misconception.
>>
>> ttm_bo_validate() schedules the buffer to be moved at some point in the
>> future.
>>
>> The only thing which saves us here is the fact that TTM waits for the move to
>> be completed in it's page fault handler.
>>
>>> and amdgpu_ttm_bind() will flush hdp, which we have discussed before.
>> And as concluded before that is unnecessary.
>>
>>>> What you most likely need to do here is to use
>>>> reservation_object_wait_timeout_rcu() to wait for all GPU operations to
>> end.
>>> Right, that is called by amdgpu_gem_prime_pin().
>> Please just completely drop the begin_cpu_access() and end_cpu_access()
>> callbacks.
>>
>> Checking other drivers the only one actually implementing something special
>> is the i915 driver which needs to remove the BO from the GTT domain for
>> cache coherency I think.
>>
>> The omap driver implements those callbacks to grab an extra reference to
>> the pages while they are access, we don't need to do this since TTM grabs an
>> extra reference during the mmap anyway.
>>
>> All other drivers implementing mmap (exynos, rockchip, tilcdc, arcpgu, msm,
>> meson, etnaviv, sti, qxl, vc4, rcar-du, atmel-hlcdc, imx, mediatek, mali, vgem,
>> fsl-dcu, zte, hisilicon, sun4i, mxsfb) don't have a begin/end callback.
>>
>> Pinning the BO can actually cause problem with the display code when the
>> BO needs to be scanned out, so we should avoid that as much as possible.
>>
>> Regards,
>> Christian.
>>
>>> Sam
>>>
>>>> -----Original Message-----
>>>> From: Christian König [mailto:ckoenig.leichtzumerken@gmail.com]
>>>> Sent: Wednesday, September 20, 2017 2:15 PM
>>>> To: Li, Samuel <Samuel.Li@amd.com>; Koenig, Christian
>>>> <Christian.Koenig@amd.com>; amd-gfx@lists.freedesktop.org
>>>> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap
>> support
>>>> Am 20.09.2017 um 19:47 schrieb Li, Samuel:
>>>>>> No that isn't correct. Pinning just notes that the BO shouldn't be
>>>>>> moved any more.  It doesn't wait for anything.
>>>>> It does. The following is from amdgpu_gem_prime_pin(),
>>>>> 91          * Wait for all shared fences to complete before we switch to
>> future
>>>>>     92          * use of exclusive fence on this prime shared bo.
>>>>>     93          */
>>>>>     94         ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true,
>>>> false,
>>>>>     95                                                   MAX_SCHEDULE_TIMEOUT);
>>>>>     96         if (unlikely(ret < 0)) {
>>>>>     97                 DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret);
>>>>>     98                 amdgpu_bo_unreserve(bo);
>>>>>     99                 return ret;
>>>>> 100         }
>>>>>
>>>>> Besides, pinning process prepares all the stuff before and after
>>>>> moving buffer(ttm_bo_validate, amdgpu_ttm_bind),
>>>> No, you misunderstood what this is good for.
>>>>
>>>> The waiting done here is only for the shared fence to switch from
>>>> explicitly to implicitly synchronization for correct interaction with the Intel
>> driver.
>>>> As soon the the BO is exported that shouldn't wait for anything any more.
>>>>
>>>>> I think if a buffer can be moved, it is probably also in a good
>>>>> condition to be
>>>> accessed.
>>>>
>>>> That is complete nonsense.
>>>>
>>>> ttm_bo_validate() is an asynchronous operation to enable GPU access
>>>> to the BO, it isn't related at all to possible CPU access and can
>>>> actually prevent it a number of cases.
>>>>
>>>> amdgpu_ttm_bind() in turn binds the BO to the GART space and isn't
>>>> related to CPU access either.
>>>>
>>>> What you most likely need to do here is to use
>>>> reservation_object_wait_timeout_rcu() to wait for all GPU operations to
>> end.
>>>> Regards,
>>>> Christian.
>>>>
>>>>> Sam
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: Koenig, Christian
>>>>>> Sent: Wednesday, September 20, 2017 1:38 PM
>>>>>> To: Li, Samuel <Samuel.Li@amd.com>; amd-gfx@lists.freedesktop.org
>>>>>> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap
>>>> support
>>>>>> Am 20.09.2017 um 19:34 schrieb Li, Samuel:
>>>>>>>> If that is what this callback should do then this implementation
>>>>>>>> would be incorrect. Pinning doesn't wait for any GPU operation to
>>>> finish.
>>>>>>> During pining, it will all the fences to finish. That shall be OK.
>>>>>> No that isn't correct. Pinning just notes that the BO shouldn't be
>>>>>> moved any more.
>>>>>>
>>>>>> It doesn't wait for anything.
>>>>>>
>>>>>> Christian.
>>>>>>
>>>>>>> Sam
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>> -----Original Message-----
>>>>>>>> From: Koenig, Christian
>>>>>>>> Sent: Wednesday, September 20, 2017 12:21 PM
>>>>>>>> To: Li, Samuel <Samuel.Li@amd.com>; amd-
>> gfx@lists.freedesktop.org
>>>>>>>> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap
>>>>>> support
>>>>>>>> Am 20.09.2017 um 17:44 schrieb Li, Samuel:
>>>>>>>>>> What happens when another thread is using
>> amdgpu_dmabuf_ops
>>>> to
>>>>>> call
>>>>>>>>>> begin_cpu_access/end_cpu_access when you are fixing it up
>> again?
>>>>>>>>> Right, that is an issue.
>>>>>>>> A simple "if (!amdgpu_dmabuf_ops.begin_cpu_access)" should be
>>>> able
>>>>>> to
>>>>>>>> deal with that.
>>>>>>>>
>>>>>>>>>> I would just completely drop the two callbacks, pinning is not
>>>>>>>>>> necessary for CPU access and thinking more about it it actually
>>>>>>>>>> has some unwanted side effects.
>>>>>>>>> CPU access needs synchronization anyway, so the two callbacks
>>>>>>>>> cannot be
>>>>>>>> dropped (other drivers implemented too), so I would like to keep
>>>>>>>> it there for now.
>>>>>>>>
>>>>>>>> Wait a second what do you mean with "CPU access needs
>>>>>> synchronization"?
>>>>>>>> At least i915 makes the memory GPU inaccessible when you start to
>>>>>>>> use it with the CPU.
>>>>>>>>
>>>>>>>> If that is what this callback should do then this implementation
>>>>>>>> would be incorrect. Pinning doesn't wait for any GPU operation to
>>>> finish.
>>>>>>>> Regards,
>>>>>>>> Christian.
>>>>>>>>
>>>>>>>>> Sam
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>> -----Original Message-----
>>>>>>>>>> From: Koenig, Christian
>>>>>>>>>> Sent: Wednesday, September 20, 2017 2:58 AM
>>>>>>>>>> To: Li, Samuel <Samuel.Li@amd.com>; amd-
>>>> gfx@lists.freedesktop.org
>>>>>>>>>> Subject: Re: [PATCH v2 1/1] drm/amdgpu: Add
>> gem_prime_mmap
>>>>>>>> support
>>>>>>>>>>> What do you mean "This isn't race free"?
>>>>>>>>>> Take a look at the code again:
>>>>>>>>>>> +    dma_buf = drm_gem_prime_export(dev, gobj, flags);
>>>>>>>>>>> +    amdgpu_dmabuf_ops = *(dma_buf->ops);
>>>>>>>>>>> +    amdgpu_dmabuf_ops.begin_cpu_access =
>>>>>>>>>> amdgpu_gem_begin_cpu_access;
>>>>>>>>>>> +    amdgpu_dmabuf_ops.end_cpu_access =
>>>>>>>>>> amdgpu_gem_end_cpu_access;
>>>>>>>>>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
>>>>>>>>>> What happens when another thread is using
>> amdgpu_dmabuf_ops
>>>> to
>>>>>> call
>>>>>>>>>> begin_cpu_access/end_cpu_access when you are fixing it up
>> again?
>>>>>>>>>> I would just completely drop the two callbacks, pinning is not
>>>>>>>>>> necessary for CPU access and thinking more about it it actually
>>>>>>>>>> has some unwanted side effects.
>>>>>>>>>>
>>>>>>>>>> Regards,
>>>>>>>>>> Christian.
>>>>>>>>>>
>>>>>>>>>> Am 19.09.2017 um 23:22 schrieb Samuel Li:
>>>>>>>>>>>>> +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >>
>>>>>> PAGE_SHIFT;
>>>>>>>>>>>> Maybe better use "vma->vm_pgoff +=
>>>>>>>> amdgpu_bo_mmap_offset(bo) >>
>>>>>>>>>> PAGE_SHIFT;", but I'm not sure.
>>>>>>>>>>>> How other drivers handle this?
>>>>>>>>>>> This is a good catch. Looks like pgoff is honored during prime
>>>>>>>>>>> mmap, not a
>>>>>>>>>> fake offset here.
>>>>>>>>>>>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
>>>>>>>>>>>> This isn't race free and needs to be fixed.
>>>>>>>>>>>> Better add callbacks to drm_prime.c similar to
>>>>>>>>>> drm_gem_dmabuf_mmap().
>>>>>>>>>>> What do you mean "This isn't race free"?
>>>>>>>>>>>
>>>>>>>>>>> Regards,
>>>>>>>>>>> Sam
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> On 2017-09-15 11:05 AM, Christian König wrote:
>>>>>>>>>>>> Am 14.09.2017 um 00:39 schrieb Samuel Li:
>>>>>>>>>>>>> v2: drop hdp invalidate/flush.
>>>>>>>>>>>>>
>>>>>>>>>>>>> Signed-off-by: Samuel Li <Samuel.Li@amd.com>
>>>>>>>>>>>>> ---
>>>>>>>>>>>>>         drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  3 ++
>>>>>>>>>>>>>         drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  3 +-
>>>>>>>>>>>>>         drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 77
>>>>>>>>>> ++++++++++++++++++++++++++++++-
>>>>>>>>>>>>>         3 files changed, 81 insertions(+), 2 deletions(-)
>>>>>>>>>>>>>
>>>>>>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>>>>>>>> index d2aaad7..188b705 100644
>>>>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>>>>>>>>>> @@ -395,11 +395,14 @@
>>>>>>>> amdgpu_gem_prime_import_sg_table(struct
>>>>>>>>>> drm_device *dev,
>>>>>>>>>>>>>         struct dma_buf *amdgpu_gem_prime_export(struct
>>>>>> drm_device
>>>>>>>> *dev,
>>>>>>>>>>>>>                             struct drm_gem_object *gobj,
>>>>>>>>>>>>>                             int flags);
>>>>>>>>>>>>> +struct drm_gem_object
>> *amdgpu_gem_prime_import(struct
>>>>>>>>>> drm_device *dev,
>>>>>>>>>>>>> +                        struct dma_buf *dma_buf);
>>>>>>>>>>>>>         int amdgpu_gem_prime_pin(struct drm_gem_object
>> *obj);
>>>>>>>>>>>>>         void amdgpu_gem_prime_unpin(struct drm_gem_object
>>>> *obj);
>>>>>>>>>>>>>         struct reservation_object
>>>>>>>>>>>>> *amdgpu_gem_prime_res_obj(struct
>>>>>>>>>> drm_gem_object *);
>>>>>>>>>>>>>         void *amdgpu_gem_prime_vmap(struct drm_gem_object
>>>> *obj);
>>>>>>>>>>>>>         void amdgpu_gem_prime_vunmap(struct
>> drm_gem_object
>>>>>> *obj,
>>>>>>>> void
>>>>>>>>>>>>> *vaddr);
>>>>>>>>>>>>> +int amdgpu_gem_prime_mmap(struct drm_gem_object
>> *obj,
>>>>>> struct
>>>>>>>>>>>>> +vm_area_struct *vma);
>>>>>>>>>>>>>         int amdgpu_gem_debugfs_init(struct amdgpu_device
>> *adev);
>>>>>>>>>>>>>           /* sub-allocation manager, it has to be protected
>>>>>>>>>>>>> by another
>>>>>> lock.
>>>>>>>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>>>>>>>>>> index 2cdf844..9b63ac5 100644
>>>>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>>>>>>>>>> @@ -835,7 +835,7 @@ static struct drm_driver kms_driver = {
>>>>>>>>>>>>>             .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
>>>>>>>>>>>>>             .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
>>>>>>>>>>>>>             .gem_prime_export = amdgpu_gem_prime_export,
>>>>>>>>>>>>> -    .gem_prime_import = drm_gem_prime_import,
>>>>>>>>>>>>> +    .gem_prime_import = amdgpu_gem_prime_import,
>>>>>>>>>>>>>             .gem_prime_pin = amdgpu_gem_prime_pin,
>>>>>>>>>>>>>             .gem_prime_unpin = amdgpu_gem_prime_unpin,
>>>>>>>>>>>>>             .gem_prime_res_obj = amdgpu_gem_prime_res_obj,
>> @@
>>>>>>>>>>>>> -843,6
>>>>>>>>>>>>> +843,7 @@ static struct drm_driver kms_driver = {
>>>>>>>>>>>>>             .gem_prime_import_sg_table =
>>>>>>>>>> amdgpu_gem_prime_import_sg_table,
>>>>>>>>>>>>>             .gem_prime_vmap = amdgpu_gem_prime_vmap,
>>>>>>>>>>>>>             .gem_prime_vunmap = amdgpu_gem_prime_vunmap,
>>>>>>>>>>>>> +    .gem_prime_mmap = amdgpu_gem_prime_mmap,
>>>>>>>>>>>>>               .name = DRIVER_NAME,
>>>>>>>>>>>>>             .desc = DRIVER_DESC, diff --git
>>>>>>>>>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>>>>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>>>>>>>>>> index 5b3f928..13c977a 100644
>>>>>>>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>>>>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
>>>>>>>>>>>>> @@ -57,6 +57,40 @@ void
>> amdgpu_gem_prime_vunmap(struct
>>>>>>>>>> drm_gem_object *obj, void *vaddr)
>>>>>>>>>>>>>             ttm_bo_kunmap(&bo->dma_buf_vmap);
>>>>>>>>>>>>>         }
>>>>>>>>>>>>>         +int amdgpu_gem_prime_mmap(struct drm_gem_object
>>>> *obj,
>>>>>>>> struct
>>>>>>>>>>>>> vm_area_struct *vma)
>>>>>>>>>>>>> +{
>>>>>>>>>>>>> +    struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
>>>>>>>>>>>>> +    struct amdgpu_device *adev = amdgpu_ttm_adev(bo-
>>>>>>> tbo.bdev);
>>>>>>>>>>>>> +    unsigned asize = amdgpu_bo_size(bo);
>>>>>>>>>>>>> +    int ret;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +    if (!vma->vm_file)
>>>>>>>>>>>>> +        return -ENODEV;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +    if (adev == NULL)
>>>>>>>>>>>>> +        return -ENODEV;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +    /* Check for valid size. */
>>>>>>>>>>>>> +    if (asize < vma->vm_end - vma->vm_start)
>>>>>>>>>>>>> +        return -EINVAL;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
>>>>>>>>>>>>> +        (bo->flags &
>> AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
>>>>>>>>>>>>> +        return -EPERM;
>>>>>>>>>>>>> +    }
>>>>>>>>>>>>> +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >>
>>>>>> PAGE_SHIFT;
>>>>>>>>>>>> Maybe better use "vma->vm_pgoff +=
>>>>>>>> amdgpu_bo_mmap_offset(bo) >>
>>>>>>>>>> PAGE_SHIFT;", but I'm not sure.
>>>>>>>>>>>> How other drivers handle this?
>>>>>>>>>>>>
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +    /* prime mmap does not need to check access, so allow
>>>>>>>>>>>>> + here
>>>> */
>>>>>>>>>>>>> +    ret = drm_vma_node_allow(&obj->vma_node, vma-
>>>>> vm_file-
>>>>>>>>>>> private_data);
>>>>>>>>>>>>> +    if (ret)
>>>>>>>>>>>>> +        return ret;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +    ret = ttm_bo_mmap(vma->vm_file, vma, &adev-
>>>>> mman.bdev);
>>>>>>>>>>>>> +    drm_vma_node_revoke(&obj->vma_node,
>>>>>>>>>>>>> + vma->vm_file->private_data);
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +    return ret;
>>>>>>>>>>>>> +}
>>>>>>>>>>>>> +
>>>>>>>>>>>>>         struct drm_gem_object *
>>>>>>>>>>>>>         amdgpu_gem_prime_import_sg_table(struct drm_device
>>>> *dev,
>>>>>>>>>>>>>                          struct dma_buf_attachment *attach,
>>>>>>>>>>>>> @@
>>>>>>>>>>>>> -130,14
>>>>>>>>>>>>> +164,55 @@ struct reservation_object
>>>>>>>>>> *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
>>>>>>>>>>>>>             return bo->tbo.resv;
>>>>>>>>>>>>>         }
>>>>>>>>>>>>>         +static int amdgpu_gem_begin_cpu_access(struct
>>>>>>>>>>>>> dma_buf *dma_buf, enum dma_data_direction direction)
>>>>>>>>>>>>> +{
>>>>>>>>>>>>> +    return amdgpu_gem_prime_pin(dma_buf->priv);
>>>>>>>>>>>>> +}
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +static int amdgpu_gem_end_cpu_access(struct dma_buf
>>>>>> *dma_buf,
>>>>>>>>>> enum
>>>>>>>>>>>>> +dma_data_direction direction) {
>>>>>>>>>>>>> +    amdgpu_gem_prime_unpin(dma_buf->priv);
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +    return 0;
>>>>>>>>>>>>> +}
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +static struct dma_buf_ops amdgpu_dmabuf_ops;
>>>>>>>>>>>>> +
>>>>>>>>>>>>>         struct dma_buf *amdgpu_gem_prime_export(struct
>>>>>> drm_device
>>>>>>>> *dev,
>>>>>>>>>>>>>                             struct drm_gem_object *gobj,
>>>>>>>>>>>>>                             int flags)
>>>>>>>>>>>>>         {
>>>>>>>>>>>>>             struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
>>>>>>>>>>>>> +    struct dma_buf *dma_buf;
>>>>>>>>>>>>>               if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
>>>>>>>>>>>>>                 return ERR_PTR(-EPERM);
>>>>>>>>>>>>>         -    return drm_gem_prime_export(dev, gobj, flags);
>>>>>>>>>>>>> +    dma_buf = drm_gem_prime_export(dev, gobj, flags);
>>>>>>>>>>>>> +    amdgpu_dmabuf_ops = *(dma_buf->ops);
>>>>>>>>>>>>> +    amdgpu_dmabuf_ops.begin_cpu_access =
>>>>>>>>>> amdgpu_gem_begin_cpu_access;
>>>>>>>>>>>>> +    amdgpu_dmabuf_ops.end_cpu_access =
>>>>>>>>>> amdgpu_gem_end_cpu_access;
>>>>>>>>>>>>> +    dma_buf->ops = &amdgpu_dmabuf_ops;
>>>>>>>>>>>> This isn't race free and needs to be fixed.
>>>>>>>>>>>>
>>>>>>>>>>>> Better add callbacks to drm_prime.c similar to
>>>>>>>>>> drm_gem_dmabuf_mmap().
>>>>>>>>>>>> Alternative you could just completely drop
>>>>>>>>>> amdgpu_gem_begin_cpu_access() and
>>>>>> amdgpu_gem_end_cpu_access()
>>>>>>>> as
>>>>>>>>>> well.
>>>>>>>>>>>> When the buffer is shared between device it is pinned anyway
>>>>>>>>>>>> and when
>>>>>>>>>> it isn't shared ttm_bo_mmap() is able to handle VRAM mappings
>>>>>>>>>> as
>>>>>> well.
>>>>>>>>>>>> Regards,
>>>>>>>>>>>> Christian.
>>>>>>>>>>>>
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +    return dma_buf;
>>>>>>>>>>>>> +}
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +struct drm_gem_object
>> *amdgpu_gem_prime_import(struct
>>>>>>>>>> drm_device *dev,
>>>>>>>>>>>>> +                        struct dma_buf *dma_buf) {
>>>>>>>>>>>>> +    struct drm_gem_object *obj;
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +    if (dma_buf->ops == &amdgpu_dmabuf_ops) {
>>>>>>>>>>>>> +        obj = dma_buf->priv;
>>>>>>>>>>>>> +        if (obj->dev == dev) {
>>>>>>>>>>>>> +            /*
>>>>>>>>>>>>> +             * Importing dmabuf exported from out own gem
>>>> increases
>>>>>>>>>>>>> +             * refcount on gem itself instead of f_count of dmabuf.
>>>>>>>>>>>>> +             */
>>>>>>>>>>>>> +            drm_gem_object_get(obj);
>>>>>>>>>>>>> +            return obj;
>>>>>>>>>>>>> +        }
>>>>>>>>>>>>> +    }
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +    return drm_gem_prime_import(dev, dma_buf);
>>>>>>>>>>>>>         }
>>>>> _______________________________________________
>>>>> amd-gfx mailing list
>>>>> amd-gfx@lists.freedesktop.org
>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2017-09-21 14:34 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-09-13 22:39 [PATCH v2 1/1] drm/amdgpu: Add gem_prime_mmap support Samuel Li
     [not found] ` <1505342358-21512-1-git-send-email-Samuel.Li-5C7GfCeVMHo@public.gmane.org>
2017-09-15 15:05   ` Christian König
     [not found]     ` <f68eaecb-ee43-78ba-2609-2ff6bb53ac50-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-09-19 21:22       ` Samuel Li
     [not found]         ` <b1064525-5b36-1c12-4da6-aa4a805caaec-5C7GfCeVMHo@public.gmane.org>
2017-09-20  6:57           ` Christian König
     [not found]             ` <f0a22ddb-d747-6d93-adf3-428fd10f0ab5-5C7GfCeVMHo@public.gmane.org>
2017-09-20 15:44               ` Li, Samuel
     [not found]                 ` <BLUPR12MB06283E58EC4B6434D71DCF50F5610-7LeqcoF/hwrL9O90+oShTwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2017-09-20 16:20                   ` Christian König
     [not found]                     ` <ddf0fe6c-4de7-ae03-a61f-30e3cc1cf315-5C7GfCeVMHo@public.gmane.org>
2017-09-20 17:34                       ` Li, Samuel
     [not found]                         ` <BLUPR12MB06286EB4DD86B34C84F124EAF5610-7LeqcoF/hwrL9O90+oShTwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2017-09-20 17:38                           ` Christian König
     [not found]                             ` <70db5b39-546e-ac15-635e-dad73420e8bc-5C7GfCeVMHo@public.gmane.org>
2017-09-20 17:47                               ` Li, Samuel
     [not found]                                 ` <BLUPR12MB0628DCFA21305B174B919D9EF5610-7LeqcoF/hwrL9O90+oShTwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2017-09-20 18:15                                   ` Christian König
     [not found]                                     ` <085926a6-90fc-2cf7-f05e-e762fa401fe8-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-09-20 20:31                                       ` Li, Samuel
     [not found]                                         ` <BLUPR12MB0628E9219A8C44D7220AA5F8F5610-7LeqcoF/hwrL9O90+oShTwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2017-09-21  7:39                                           ` Christian König
     [not found]                                             ` <ca08675a-7bb0-5e44-bb1f-dab3a0f14802-5C7GfCeVMHo@public.gmane.org>
2017-09-21 14:31                                               ` Li, Samuel
     [not found]                                                 ` <BLUPR12MB0628E666BD4B7BD7483B9F63F5660-7LeqcoF/hwrL9O90+oShTwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2017-09-21 14:34                                                   ` Christian König
2017-09-21  6:25                   ` Deucher, Alexander

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.