All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] drm/amdgpu: Allow to create BO lists in CS ioctl v2
@ 2018-07-11 20:57 Andrey Grodzovsky
       [not found] ` <1531342678-11142-1-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Andrey Grodzovsky @ 2018-07-11 20:57 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Andrey Grodzovsky, Marek.Olsak-5C7GfCeVMHo, christian.koenig-5C7GfCeVMHo

This change is to support MESA performace optimization.
Modify CS IOCTL to allow its input as command buffer and an array of
buffer handles to create a temporay bo list and then destroy it
when IOCTL completes.
This saves on calling for BO_LIST create and destry IOCTLs in MESA
and by this improves performance.

v2: Avoid inserting the temp list into idr struct.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h         | 11 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 86 ++++++++++++++++++-----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 51 +++++++++++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  3 +-
 include/uapi/drm/amdgpu_drm.h               |  1 +
 5 files changed, 114 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 8eaba0f..9b472b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -732,6 +732,17 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
 			     struct list_head *validated);
 void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
 void amdgpu_bo_list_free(struct amdgpu_bo_list *list);
+int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
+				      struct drm_amdgpu_bo_list_entry **info_param);
+
+int amdgpu_bo_list_create(struct amdgpu_device *adev,
+				 struct drm_file *filp,
+				 struct drm_amdgpu_bo_list_entry *info,
+				 unsigned num_entries,
+				 int *id,
+				 struct amdgpu_bo_list **list);
+
+void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id);
 
 /*
  * GFX stuff
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 92be7f6..14c7c59 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -55,11 +55,12 @@ static void amdgpu_bo_list_release_rcu(struct kref *ref)
 	kfree_rcu(list, rhead);
 }
 
-static int amdgpu_bo_list_create(struct amdgpu_device *adev,
+int amdgpu_bo_list_create(struct amdgpu_device *adev,
 				 struct drm_file *filp,
 				 struct drm_amdgpu_bo_list_entry *info,
 				 unsigned num_entries,
-				 int *id)
+				 int *id,
+				 struct amdgpu_bo_list **list_out)
 {
 	int r;
 	struct amdgpu_fpriv *fpriv = filp->driver_priv;
@@ -78,20 +79,25 @@ static int amdgpu_bo_list_create(struct amdgpu_device *adev,
 		return r;
 	}
 
+	if (id) {
 	/* idr alloc should be called only after initialization of bo list. */
-	mutex_lock(&fpriv->bo_list_lock);
-	r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
-	mutex_unlock(&fpriv->bo_list_lock);
-	if (r < 0) {
-		amdgpu_bo_list_free(list);
-		return r;
+		mutex_lock(&fpriv->bo_list_lock);
+		r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
+		mutex_unlock(&fpriv->bo_list_lock);
+		if (r < 0) {
+			amdgpu_bo_list_free(list);
+			return r;
+		}
+		*id = r;
 	}
-	*id = r;
+
+	if (list_out)
+		*list_out = list;
 
 	return 0;
 }
 
-static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
+void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
 {
 	struct amdgpu_bo_list *list;
 
@@ -263,53 +269,68 @@ void amdgpu_bo_list_free(struct amdgpu_bo_list *list)
 	kfree(list);
 }
 
-int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
-				struct drm_file *filp)
+int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
+				      struct drm_amdgpu_bo_list_entry **info_param)
 {
-	const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
-
-	struct amdgpu_device *adev = dev->dev_private;
-	struct amdgpu_fpriv *fpriv = filp->driver_priv;
-	union drm_amdgpu_bo_list *args = data;
-	uint32_t handle = args->in.list_handle;
-	const void __user *uptr = u64_to_user_ptr(args->in.bo_info_ptr);
-
 	struct drm_amdgpu_bo_list_entry *info;
-	struct amdgpu_bo_list *list;
-
 	int r;
+	const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
+	const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
 
-	info = kvmalloc_array(args->in.bo_number,
+	info = kvmalloc_array(in->bo_number,
 			     sizeof(struct drm_amdgpu_bo_list_entry), GFP_KERNEL);
 	if (!info)
 		return -ENOMEM;
 
 	/* copy the handle array from userspace to a kernel buffer */
 	r = -EFAULT;
-	if (likely(info_size == args->in.bo_info_size)) {
-		unsigned long bytes = args->in.bo_number *
-			args->in.bo_info_size;
+	if (likely(info_size == in->bo_info_size)) {
+		unsigned long bytes = in->bo_number *
+			in->bo_info_size;
 
 		if (copy_from_user(info, uptr, bytes))
 			goto error_free;
 
 	} else {
-		unsigned long bytes = min(args->in.bo_info_size, info_size);
+		unsigned long bytes = min(in->bo_info_size, info_size);
 		unsigned i;
 
-		memset(info, 0, args->in.bo_number * info_size);
-		for (i = 0; i < args->in.bo_number; ++i) {
+		memset(info, 0, in->bo_number * info_size);
+		for (i = 0; i < in->bo_number; ++i) {
 			if (copy_from_user(&info[i], uptr, bytes))
 				goto error_free;
 
-			uptr += args->in.bo_info_size;
+			uptr += in->bo_info_size;
 		}
 	}
 
+	*info_param = info;
+	return 0;
+
+error_free:
+	kvfree(info);
+	return r;
+}
+
+int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp)
+{
+	struct amdgpu_device *adev = dev->dev_private;
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	union drm_amdgpu_bo_list *args = data;
+	uint32_t handle = args->in.list_handle;
+	struct drm_amdgpu_bo_list_entry *info = NULL;
+	struct amdgpu_bo_list *list;
+	int r;
+
+	r = amdgpu_bo_create_list_entry_array(&args->in, &info);
+	if (r)
+		goto error_free;
+
 	switch (args->in.operation) {
 	case AMDGPU_BO_LIST_OP_CREATE:
 		r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number,
-					  &handle);
+					  &handle, NULL);
 		if (r)
 			goto error_free;
 		break;
@@ -345,6 +366,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
 	return 0;
 
 error_free:
-	kvfree(info);
+	if (info)
+		kvfree(info);
 	return r;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9881a1e..30026b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -66,11 +66,34 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 	return 0;
 }
 
-static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
+static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
+				      struct drm_amdgpu_bo_list_in *data)
+{
+	int r;
+	struct drm_amdgpu_bo_list_entry *info = NULL;
+
+	r = amdgpu_bo_create_list_entry_array(data, &info);
+	if (r)
+		return r;
+
+	r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number, NULL, &p->bo_list);
+	if (r)
+		goto error_free;
+
+	kvfree(info);
+	return 0;
+
+error_free:
+	if (info)
+		kvfree(info);
+
+	return r;
+}
+
+static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs)
 {
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 	struct amdgpu_vm *vm = &fpriv->vm;
-	union drm_amdgpu_cs *cs = data;
 	uint64_t *chunk_array_user;
 	uint64_t *chunk_array;
 	unsigned size, num_ibs = 0;
@@ -164,6 +187,19 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 
 			break;
 
+		case AMDGPU_CHUNK_ID_BO_HANDLES:
+			size = sizeof(struct drm_amdgpu_bo_list_in);
+			if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
+				ret = -EINVAL;
+				goto free_partial_kdata;
+			}
+
+			ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
+			if (ret)
+				goto free_partial_kdata;
+
+			break;
+
 		case AMDGPU_CHUNK_ID_DEPENDENCIES:
 		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
 		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
@@ -534,7 +570,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 
 	INIT_LIST_HEAD(&p->validated);
 
-	p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
+	/* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
+	if (!p->bo_list)
+		p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
+	else
+		mutex_lock(&p->bo_list->lock);
+
 	if (p->bo_list) {
 		amdgpu_bo_list_get_list(p->bo_list, &p->validated);
 		if (p->bo_list->first_userptr != p->bo_list->num_entries)
@@ -747,7 +788,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
  * used by parsing context.
  **/
 static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
-				  bool backoff)
+				  bool backoff, int id)
 {
 	unsigned i;
 
@@ -1277,7 +1318,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	r = amdgpu_cs_submit(&parser, cs);
 
 out:
-	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
+	amdgpu_cs_parser_fini(&parser, r, reserved_buffers, cs->in.bo_list_handle);
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 06aede1..529500c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -69,9 +69,10 @@
  * - 3.24.0 - Add high priority compute support for gfx9
  * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
  * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
+ * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	26
+#define KMS_DRIVER_MINOR	27
 #define KMS_DRIVER_PATCHLEVEL	0
 
 int amdgpu_vram_limit = 0;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 09741ba..94444ee 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -519,6 +519,7 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
 #define AMDGPU_CHUNK_ID_SYNCOBJ_IN      0x04
 #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT     0x05
+#define AMDGPU_CHUNK_ID_BO_HANDLES      0x06
 
 struct drm_amdgpu_cs_chunk {
 	__u32		chunk_id;
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] drm/amdgpu: Allow to create BO lists in CS ioctl v2
       [not found] ` <1531342678-11142-1-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
@ 2018-07-12  2:31   ` zhoucm1
       [not found]     ` <9060133a-b04f-0c59-e806-1c0d407bb56c-5C7GfCeVMHo@public.gmane.org>
  2018-07-12  7:09   ` Christian König
  1 sibling, 1 reply; 8+ messages in thread
From: zhoucm1 @ 2018-07-12  2:31 UTC (permalink / raw)
  To: Andrey Grodzovsky, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Marek.Olsak-5C7GfCeVMHo, christian.koenig-5C7GfCeVMHo



On 2018年07月12日 04:57, Andrey Grodzovsky wrote:
> This change is to support MESA performace optimization.
> Modify CS IOCTL to allow its input as command buffer and an array of
> buffer handles to create a temporay bo list and then destroy it
> when IOCTL completes.
> This saves on calling for BO_LIST create and destry IOCTLs in MESA
> and by this improves performance.
>
> v2: Avoid inserting the temp list into idr struct.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h         | 11 ++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 86 ++++++++++++++++++-----------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 51 +++++++++++++++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  3 +-
>   include/uapi/drm/amdgpu_drm.h               |  1 +
>   5 files changed, 114 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 8eaba0f..9b472b2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -732,6 +732,17 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
>   			     struct list_head *validated);
>   void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
>   void amdgpu_bo_list_free(struct amdgpu_bo_list *list);
> +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
> +				      struct drm_amdgpu_bo_list_entry **info_param);
> +
> +int amdgpu_bo_list_create(struct amdgpu_device *adev,
> +				 struct drm_file *filp,
> +				 struct drm_amdgpu_bo_list_entry *info,
> +				 unsigned num_entries,
> +				 int *id,
> +				 struct amdgpu_bo_list **list);
> +
> +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id);
>   
>   /*
>    * GFX stuff
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> index 92be7f6..14c7c59 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> @@ -55,11 +55,12 @@ static void amdgpu_bo_list_release_rcu(struct kref *ref)
>   	kfree_rcu(list, rhead);
>   }
>   
> -static int amdgpu_bo_list_create(struct amdgpu_device *adev,
> +int amdgpu_bo_list_create(struct amdgpu_device *adev,
>   				 struct drm_file *filp,
>   				 struct drm_amdgpu_bo_list_entry *info,
>   				 unsigned num_entries,
> -				 int *id)
> +				 int *id,
> +				 struct amdgpu_bo_list **list_out)
>   {
>   	int r;
>   	struct amdgpu_fpriv *fpriv = filp->driver_priv;
> @@ -78,20 +79,25 @@ static int amdgpu_bo_list_create(struct amdgpu_device *adev,
>   		return r;
>   	}
>   
> +	if (id) {
>   	/* idr alloc should be called only after initialization of bo list. */
> -	mutex_lock(&fpriv->bo_list_lock);
> -	r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
> -	mutex_unlock(&fpriv->bo_list_lock);
> -	if (r < 0) {
> -		amdgpu_bo_list_free(list);
> -		return r;
> +		mutex_lock(&fpriv->bo_list_lock);
> +		r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
> +		mutex_unlock(&fpriv->bo_list_lock);
> +		if (r < 0) {
> +			amdgpu_bo_list_free(list);
> +			return r;
> +		}
> +		*id = r;
>   	}
> -	*id = r;
> +
> +	if (list_out)
> +		*list_out = list;
>   
>   	return 0;
>   }
>   
> -static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
> +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
>   {
>   	struct amdgpu_bo_list *list;
>   
> @@ -263,53 +269,68 @@ void amdgpu_bo_list_free(struct amdgpu_bo_list *list)
>   	kfree(list);
>   }
>   
> -int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
> -				struct drm_file *filp)
> +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
> +				      struct drm_amdgpu_bo_list_entry **info_param)
>   {
> -	const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
> -
> -	struct amdgpu_device *adev = dev->dev_private;
> -	struct amdgpu_fpriv *fpriv = filp->driver_priv;
> -	union drm_amdgpu_bo_list *args = data;
> -	uint32_t handle = args->in.list_handle;
> -	const void __user *uptr = u64_to_user_ptr(args->in.bo_info_ptr);
> -
>   	struct drm_amdgpu_bo_list_entry *info;
> -	struct amdgpu_bo_list *list;
> -
>   	int r;
> +	const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
> +	const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
>   
> -	info = kvmalloc_array(args->in.bo_number,
> +	info = kvmalloc_array(in->bo_number,
>   			     sizeof(struct drm_amdgpu_bo_list_entry), GFP_KERNEL);
>   	if (!info)
>   		return -ENOMEM;
>   
>   	/* copy the handle array from userspace to a kernel buffer */
>   	r = -EFAULT;
> -	if (likely(info_size == args->in.bo_info_size)) {
> -		unsigned long bytes = args->in.bo_number *
> -			args->in.bo_info_size;
> +	if (likely(info_size == in->bo_info_size)) {
> +		unsigned long bytes = in->bo_number *
> +			in->bo_info_size;
>   
>   		if (copy_from_user(info, uptr, bytes))
>   			goto error_free;
>   
>   	} else {
> -		unsigned long bytes = min(args->in.bo_info_size, info_size);
> +		unsigned long bytes = min(in->bo_info_size, info_size);
>   		unsigned i;
>   
> -		memset(info, 0, args->in.bo_number * info_size);
> -		for (i = 0; i < args->in.bo_number; ++i) {
> +		memset(info, 0, in->bo_number * info_size);
> +		for (i = 0; i < in->bo_number; ++i) {
>   			if (copy_from_user(&info[i], uptr, bytes))
>   				goto error_free;
>   
> -			uptr += args->in.bo_info_size;
> +			uptr += in->bo_info_size;
>   		}
>   	}
>   
> +	*info_param = info;
> +	return 0;
> +
> +error_free:
> +	kvfree(info);
> +	return r;
> +}
> +
> +int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
> +				struct drm_file *filp)
> +{
> +	struct amdgpu_device *adev = dev->dev_private;
> +	struct amdgpu_fpriv *fpriv = filp->driver_priv;
> +	union drm_amdgpu_bo_list *args = data;
> +	uint32_t handle = args->in.list_handle;
> +	struct drm_amdgpu_bo_list_entry *info = NULL;
> +	struct amdgpu_bo_list *list;
> +	int r;
> +
> +	r = amdgpu_bo_create_list_entry_array(&args->in, &info);
> +	if (r)
> +		goto error_free;
> +
>   	switch (args->in.operation) {
>   	case AMDGPU_BO_LIST_OP_CREATE:
>   		r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number,
> -					  &handle);
> +					  &handle, NULL);
>   		if (r)
>   			goto error_free;
>   		break;
> @@ -345,6 +366,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
>   	return 0;
>   
>   error_free:
> -	kvfree(info);
> +	if (info)
> +		kvfree(info);
>   	return r;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 9881a1e..30026b8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -66,11 +66,34 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
>   	return 0;
>   }
>   
> -static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
> +static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
> +				      struct drm_amdgpu_bo_list_in *data)
> +{
> +	int r;
> +	struct drm_amdgpu_bo_list_entry *info = NULL;
> +
> +	r = amdgpu_bo_create_list_entry_array(data, &info);
> +	if (r)
> +		return r;
> +
> +	r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number, NULL, &p->bo_list);
> +	if (r)
> +		goto error_free;
> +
> +	kvfree(info);
> +	return 0;
> +
> +error_free:
> +	if (info)
> +		kvfree(info);
> +
> +	return r;
> +}
> +
> +static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs)
>   {
>   	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>   	struct amdgpu_vm *vm = &fpriv->vm;
> -	union drm_amdgpu_cs *cs = data;
>   	uint64_t *chunk_array_user;
>   	uint64_t *chunk_array;
>   	unsigned size, num_ibs = 0;
> @@ -164,6 +187,19 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
>   
>   			break;
>   
> +		case AMDGPU_CHUNK_ID_BO_HANDLES:
> +			size = sizeof(struct drm_amdgpu_bo_list_in);
> +			if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
> +				ret = -EINVAL;
> +				goto free_partial_kdata;
> +			}
> +
> +			ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
> +			if (ret)
> +				goto free_partial_kdata;
> +
> +			break;
> +
>   		case AMDGPU_CHUNK_ID_DEPENDENCIES:
>   		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
>   		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
> @@ -534,7 +570,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>   
>   	INIT_LIST_HEAD(&p->validated);
>   
> -	p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
> +	/* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
> +	if (!p->bo_list)
> +		p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
> +	else
> +		mutex_lock(&p->bo_list->lock);
> +
>   	if (p->bo_list) {
>   		amdgpu_bo_list_get_list(p->bo_list, &p->validated);
>   		if (p->bo_list->first_userptr != p->bo_list->num_entries)
> @@ -747,7 +788,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
>    * used by parsing context.
>    **/
>   static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
> -				  bool backoff)
> +				  bool backoff, int id)
Don't need it after you get bo_list without idr.

>   {
>   	unsigned i;
>   
> @@ -1277,7 +1318,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>   	r = amdgpu_cs_submit(&parser, cs);
>   
>   out:
> -	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
> +	amdgpu_cs_parser_fini(&parser, r, reserved_buffers, cs->in.bo_list_handle);
Don't need it after you get bo_list without idr.

Otherwise it looks really good to me, Reviewed-by: Chunming Zhou 
<david1.zhou@amd.com>

>   	return r;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 06aede1..529500c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -69,9 +69,10 @@
>    * - 3.24.0 - Add high priority compute support for gfx9
>    * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
>    * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
> + * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
>    */
>   #define KMS_DRIVER_MAJOR	3
> -#define KMS_DRIVER_MINOR	26
> +#define KMS_DRIVER_MINOR	27
>   #define KMS_DRIVER_PATCHLEVEL	0
>   
>   int amdgpu_vram_limit = 0;
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 09741ba..94444ee 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -519,6 +519,7 @@ struct drm_amdgpu_gem_va {
>   #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
>   #define AMDGPU_CHUNK_ID_SYNCOBJ_IN      0x04
>   #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT     0x05
> +#define AMDGPU_CHUNK_ID_BO_HANDLES      0x06
>   
>   struct drm_amdgpu_cs_chunk {
>   	__u32		chunk_id;

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* RE: [PATCH v2] drm/amdgpu: Allow to create BO lists in CS ioctl v2
       [not found]     ` <9060133a-b04f-0c59-e806-1c0d407bb56c-5C7GfCeVMHo@public.gmane.org>
@ 2018-07-12  3:09       ` Zhou, David(ChunMing)
       [not found]         ` <BY1PR12MB050202D0A4883EDB709D434EB4590-PicGAnIBOobrCwm+z9iKNgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Zhou, David(ChunMing) @ 2018-07-12  3:09 UTC (permalink / raw)
  To: Zhou, David(ChunMing),
	Grodzovsky, Andrey, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Olsak, Marek, Koenig, Christian

Hi Andrey,

Could you add compatibility flag or increase kms driver version? So that user space can keep old path when using new one.

Regards,
David Zhou

-----Original Message-----
From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf Of zhoucm1
Sent: Thursday, July 12, 2018 10:31 AM
To: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Olsak, Marek <Marek.Olsak@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>
Subject: Re: [PATCH v2] drm/amdgpu: Allow to create BO lists in CS ioctl v2



On 2018年07月12日 04:57, Andrey Grodzovsky wrote:
> This change is to support MESA performace optimization.
> Modify CS IOCTL to allow its input as command buffer and an array of 
> buffer handles to create a temporay bo list and then destroy it when 
> IOCTL completes.
> This saves on calling for BO_LIST create and destry IOCTLs in MESA and 
> by this improves performance.
>
> v2: Avoid inserting the temp list into idr struct.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h         | 11 ++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 86 ++++++++++++++++++-----------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 51 +++++++++++++++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  3 +-
>   include/uapi/drm/amdgpu_drm.h               |  1 +
>   5 files changed, 114 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 8eaba0f..9b472b2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -732,6 +732,17 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
>   			     struct list_head *validated);
>   void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
>   void amdgpu_bo_list_free(struct amdgpu_bo_list *list);
> +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
> +				      struct drm_amdgpu_bo_list_entry **info_param);
> +
> +int amdgpu_bo_list_create(struct amdgpu_device *adev,
> +				 struct drm_file *filp,
> +				 struct drm_amdgpu_bo_list_entry *info,
> +				 unsigned num_entries,
> +				 int *id,
> +				 struct amdgpu_bo_list **list);
> +
> +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id);
>   
>   /*
>    * GFX stuff
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> index 92be7f6..14c7c59 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> @@ -55,11 +55,12 @@ static void amdgpu_bo_list_release_rcu(struct kref *ref)
>   	kfree_rcu(list, rhead);
>   }
>   
> -static int amdgpu_bo_list_create(struct amdgpu_device *adev,
> +int amdgpu_bo_list_create(struct amdgpu_device *adev,
>   				 struct drm_file *filp,
>   				 struct drm_amdgpu_bo_list_entry *info,
>   				 unsigned num_entries,
> -				 int *id)
> +				 int *id,
> +				 struct amdgpu_bo_list **list_out)
>   {
>   	int r;
>   	struct amdgpu_fpriv *fpriv = filp->driver_priv; @@ -78,20 +79,25 @@ 
> static int amdgpu_bo_list_create(struct amdgpu_device *adev,
>   		return r;
>   	}
>   
> +	if (id) {
>   	/* idr alloc should be called only after initialization of bo list. */
> -	mutex_lock(&fpriv->bo_list_lock);
> -	r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
> -	mutex_unlock(&fpriv->bo_list_lock);
> -	if (r < 0) {
> -		amdgpu_bo_list_free(list);
> -		return r;
> +		mutex_lock(&fpriv->bo_list_lock);
> +		r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
> +		mutex_unlock(&fpriv->bo_list_lock);
> +		if (r < 0) {
> +			amdgpu_bo_list_free(list);
> +			return r;
> +		}
> +		*id = r;
>   	}
> -	*id = r;
> +
> +	if (list_out)
> +		*list_out = list;
>   
>   	return 0;
>   }
>   
> -static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int 
> id)
> +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
>   {
>   	struct amdgpu_bo_list *list;
>   
> @@ -263,53 +269,68 @@ void amdgpu_bo_list_free(struct amdgpu_bo_list *list)
>   	kfree(list);
>   }
>   
> -int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
> -				struct drm_file *filp)
> +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
> +				      struct drm_amdgpu_bo_list_entry **info_param)
>   {
> -	const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
> -
> -	struct amdgpu_device *adev = dev->dev_private;
> -	struct amdgpu_fpriv *fpriv = filp->driver_priv;
> -	union drm_amdgpu_bo_list *args = data;
> -	uint32_t handle = args->in.list_handle;
> -	const void __user *uptr = u64_to_user_ptr(args->in.bo_info_ptr);
> -
>   	struct drm_amdgpu_bo_list_entry *info;
> -	struct amdgpu_bo_list *list;
> -
>   	int r;
> +	const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
> +	const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
>   
> -	info = kvmalloc_array(args->in.bo_number,
> +	info = kvmalloc_array(in->bo_number,
>   			     sizeof(struct drm_amdgpu_bo_list_entry), GFP_KERNEL);
>   	if (!info)
>   		return -ENOMEM;
>   
>   	/* copy the handle array from userspace to a kernel buffer */
>   	r = -EFAULT;
> -	if (likely(info_size == args->in.bo_info_size)) {
> -		unsigned long bytes = args->in.bo_number *
> -			args->in.bo_info_size;
> +	if (likely(info_size == in->bo_info_size)) {
> +		unsigned long bytes = in->bo_number *
> +			in->bo_info_size;
>   
>   		if (copy_from_user(info, uptr, bytes))
>   			goto error_free;
>   
>   	} else {
> -		unsigned long bytes = min(args->in.bo_info_size, info_size);
> +		unsigned long bytes = min(in->bo_info_size, info_size);
>   		unsigned i;
>   
> -		memset(info, 0, args->in.bo_number * info_size);
> -		for (i = 0; i < args->in.bo_number; ++i) {
> +		memset(info, 0, in->bo_number * info_size);
> +		for (i = 0; i < in->bo_number; ++i) {
>   			if (copy_from_user(&info[i], uptr, bytes))
>   				goto error_free;
>   
> -			uptr += args->in.bo_info_size;
> +			uptr += in->bo_info_size;
>   		}
>   	}
>   
> +	*info_param = info;
> +	return 0;
> +
> +error_free:
> +	kvfree(info);
> +	return r;
> +}
> +
> +int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
> +				struct drm_file *filp)
> +{
> +	struct amdgpu_device *adev = dev->dev_private;
> +	struct amdgpu_fpriv *fpriv = filp->driver_priv;
> +	union drm_amdgpu_bo_list *args = data;
> +	uint32_t handle = args->in.list_handle;
> +	struct drm_amdgpu_bo_list_entry *info = NULL;
> +	struct amdgpu_bo_list *list;
> +	int r;
> +
> +	r = amdgpu_bo_create_list_entry_array(&args->in, &info);
> +	if (r)
> +		goto error_free;
> +
>   	switch (args->in.operation) {
>   	case AMDGPU_BO_LIST_OP_CREATE:
>   		r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number,
> -					  &handle);
> +					  &handle, NULL);
>   		if (r)
>   			goto error_free;
>   		break;
> @@ -345,6 +366,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
>   	return 0;
>   
>   error_free:
> -	kvfree(info);
> +	if (info)
> +		kvfree(info);
>   	return r;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 9881a1e..30026b8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -66,11 +66,34 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
>   	return 0;
>   }
>   
> -static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void 
> *data)
> +static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
> +				      struct drm_amdgpu_bo_list_in *data) {
> +	int r;
> +	struct drm_amdgpu_bo_list_entry *info = NULL;
> +
> +	r = amdgpu_bo_create_list_entry_array(data, &info);
> +	if (r)
> +		return r;
> +
> +	r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number, NULL, &p->bo_list);
> +	if (r)
> +		goto error_free;
> +
> +	kvfree(info);
> +	return 0;
> +
> +error_free:
> +	if (info)
> +		kvfree(info);
> +
> +	return r;
> +}
> +
> +static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union 
> +drm_amdgpu_cs *cs)
>   {
>   	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>   	struct amdgpu_vm *vm = &fpriv->vm;
> -	union drm_amdgpu_cs *cs = data;
>   	uint64_t *chunk_array_user;
>   	uint64_t *chunk_array;
>   	unsigned size, num_ibs = 0;
> @@ -164,6 +187,19 @@ static int amdgpu_cs_parser_init(struct 
> amdgpu_cs_parser *p, void *data)
>   
>   			break;
>   
> +		case AMDGPU_CHUNK_ID_BO_HANDLES:
> +			size = sizeof(struct drm_amdgpu_bo_list_in);
> +			if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
> +				ret = -EINVAL;
> +				goto free_partial_kdata;
> +			}
> +
> +			ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
> +			if (ret)
> +				goto free_partial_kdata;
> +
> +			break;
> +
>   		case AMDGPU_CHUNK_ID_DEPENDENCIES:
>   		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
>   		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
> @@ -534,7 +570,12 @@ static int amdgpu_cs_parser_bos(struct 
> amdgpu_cs_parser *p,
>   
>   	INIT_LIST_HEAD(&p->validated);
>   
> -	p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
> +	/* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
> +	if (!p->bo_list)
> +		p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
> +	else
> +		mutex_lock(&p->bo_list->lock);
> +
>   	if (p->bo_list) {
>   		amdgpu_bo_list_get_list(p->bo_list, &p->validated);
>   		if (p->bo_list->first_userptr != p->bo_list->num_entries) @@ 
> -747,7 +788,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
>    * used by parsing context.
>    **/
>   static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
> -				  bool backoff)
> +				  bool backoff, int id)
Don't need it after you get bo_list without idr.

>   {
>   	unsigned i;
>   
> @@ -1277,7 +1318,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>   	r = amdgpu_cs_submit(&parser, cs);
>   
>   out:
> -	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
> +	amdgpu_cs_parser_fini(&parser, r, reserved_buffers, 
> +cs->in.bo_list_handle);
Don't need it after you get bo_list without idr.

Otherwise it looks really good to me, Reviewed-by: Chunming Zhou <david1.zhou@amd.com>

>   	return r;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 06aede1..529500c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -69,9 +69,10 @@
>    * - 3.24.0 - Add high priority compute support for gfx9
>    * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
>    * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
> + * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
>    */
>   #define KMS_DRIVER_MAJOR	3
> -#define KMS_DRIVER_MINOR	26
> +#define KMS_DRIVER_MINOR	27
>   #define KMS_DRIVER_PATCHLEVEL	0
>   
>   int amdgpu_vram_limit = 0;
> diff --git a/include/uapi/drm/amdgpu_drm.h 
> b/include/uapi/drm/amdgpu_drm.h index 09741ba..94444ee 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -519,6 +519,7 @@ struct drm_amdgpu_gem_va {
>   #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
>   #define AMDGPU_CHUNK_ID_SYNCOBJ_IN      0x04
>   #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT     0x05
> +#define AMDGPU_CHUNK_ID_BO_HANDLES      0x06
>   
>   struct drm_amdgpu_cs_chunk {
>   	__u32		chunk_id;

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] drm/amdgpu: Allow to create BO lists in CS ioctl v2
       [not found]         ` <BY1PR12MB050202D0A4883EDB709D434EB4590-PicGAnIBOobrCwm+z9iKNgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2018-07-12  4:02           ` zhoucm1
       [not found]             ` <a43a5f3d-7fef-1721-307e-9135cf494264-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: zhoucm1 @ 2018-07-12  4:02 UTC (permalink / raw)
  To: Zhou, David(ChunMing),
	Grodzovsky, Andrey, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Olsak, Marek, Koenig, Christian



On 2018年07月12日 11:09, Zhou, David(ChunMing) wrote:
> Hi Andrey,
>
> Could you add compatibility flag or increase kms driver version? So that user space can keep old path when using new one.
Sorry for noise, it's already at end of path.

Regards,
David Zhou
>
> Regards,
> David Zhou
>
> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf Of zhoucm1
> Sent: Thursday, July 12, 2018 10:31 AM
> To: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Olsak, Marek <Marek.Olsak@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>
> Subject: Re: [PATCH v2] drm/amdgpu: Allow to create BO lists in CS ioctl v2
>
>
>
> On 2018年07月12日 04:57, Andrey Grodzovsky wrote:
>> This change is to support MESA performace optimization.
>> Modify CS IOCTL to allow its input as command buffer and an array of
>> buffer handles to create a temporay bo list and then destroy it when
>> IOCTL completes.
>> This saves on calling for BO_LIST create and destry IOCTLs in MESA and
>> by this improves performance.
>>
>> v2: Avoid inserting the temp list into idr struct.
>>
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>> ---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h         | 11 ++++
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 86 ++++++++++++++++++-----------
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 51 +++++++++++++++--
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  3 +-
>>    include/uapi/drm/amdgpu_drm.h               |  1 +
>>    5 files changed, 114 insertions(+), 38 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 8eaba0f..9b472b2 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -732,6 +732,17 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
>>    			     struct list_head *validated);
>>    void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
>>    void amdgpu_bo_list_free(struct amdgpu_bo_list *list);
>> +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
>> +				      struct drm_amdgpu_bo_list_entry **info_param);
>> +
>> +int amdgpu_bo_list_create(struct amdgpu_device *adev,
>> +				 struct drm_file *filp,
>> +				 struct drm_amdgpu_bo_list_entry *info,
>> +				 unsigned num_entries,
>> +				 int *id,
>> +				 struct amdgpu_bo_list **list);
>> +
>> +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id);
>>    
>>    /*
>>     * GFX stuff
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>> index 92be7f6..14c7c59 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>> @@ -55,11 +55,12 @@ static void amdgpu_bo_list_release_rcu(struct kref *ref)
>>    	kfree_rcu(list, rhead);
>>    }
>>    
>> -static int amdgpu_bo_list_create(struct amdgpu_device *adev,
>> +int amdgpu_bo_list_create(struct amdgpu_device *adev,
>>    				 struct drm_file *filp,
>>    				 struct drm_amdgpu_bo_list_entry *info,
>>    				 unsigned num_entries,
>> -				 int *id)
>> +				 int *id,
>> +				 struct amdgpu_bo_list **list_out)
>>    {
>>    	int r;
>>    	struct amdgpu_fpriv *fpriv = filp->driver_priv; @@ -78,20 +79,25 @@
>> static int amdgpu_bo_list_create(struct amdgpu_device *adev,
>>    		return r;
>>    	}
>>    
>> +	if (id) {
>>    	/* idr alloc should be called only after initialization of bo list. */
>> -	mutex_lock(&fpriv->bo_list_lock);
>> -	r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
>> -	mutex_unlock(&fpriv->bo_list_lock);
>> -	if (r < 0) {
>> -		amdgpu_bo_list_free(list);
>> -		return r;
>> +		mutex_lock(&fpriv->bo_list_lock);
>> +		r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
>> +		mutex_unlock(&fpriv->bo_list_lock);
>> +		if (r < 0) {
>> +			amdgpu_bo_list_free(list);
>> +			return r;
>> +		}
>> +		*id = r;
>>    	}
>> -	*id = r;
>> +
>> +	if (list_out)
>> +		*list_out = list;
>>    
>>    	return 0;
>>    }
>>    
>> -static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int
>> id)
>> +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
>>    {
>>    	struct amdgpu_bo_list *list;
>>    
>> @@ -263,53 +269,68 @@ void amdgpu_bo_list_free(struct amdgpu_bo_list *list)
>>    	kfree(list);
>>    }
>>    
>> -int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
>> -				struct drm_file *filp)
>> +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
>> +				      struct drm_amdgpu_bo_list_entry **info_param)
>>    {
>> -	const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
>> -
>> -	struct amdgpu_device *adev = dev->dev_private;
>> -	struct amdgpu_fpriv *fpriv = filp->driver_priv;
>> -	union drm_amdgpu_bo_list *args = data;
>> -	uint32_t handle = args->in.list_handle;
>> -	const void __user *uptr = u64_to_user_ptr(args->in.bo_info_ptr);
>> -
>>    	struct drm_amdgpu_bo_list_entry *info;
>> -	struct amdgpu_bo_list *list;
>> -
>>    	int r;
>> +	const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
>> +	const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
>>    
>> -	info = kvmalloc_array(args->in.bo_number,
>> +	info = kvmalloc_array(in->bo_number,
>>    			     sizeof(struct drm_amdgpu_bo_list_entry), GFP_KERNEL);
>>    	if (!info)
>>    		return -ENOMEM;
>>    
>>    	/* copy the handle array from userspace to a kernel buffer */
>>    	r = -EFAULT;
>> -	if (likely(info_size == args->in.bo_info_size)) {
>> -		unsigned long bytes = args->in.bo_number *
>> -			args->in.bo_info_size;
>> +	if (likely(info_size == in->bo_info_size)) {
>> +		unsigned long bytes = in->bo_number *
>> +			in->bo_info_size;
>>    
>>    		if (copy_from_user(info, uptr, bytes))
>>    			goto error_free;
>>    
>>    	} else {
>> -		unsigned long bytes = min(args->in.bo_info_size, info_size);
>> +		unsigned long bytes = min(in->bo_info_size, info_size);
>>    		unsigned i;
>>    
>> -		memset(info, 0, args->in.bo_number * info_size);
>> -		for (i = 0; i < args->in.bo_number; ++i) {
>> +		memset(info, 0, in->bo_number * info_size);
>> +		for (i = 0; i < in->bo_number; ++i) {
>>    			if (copy_from_user(&info[i], uptr, bytes))
>>    				goto error_free;
>>    
>> -			uptr += args->in.bo_info_size;
>> +			uptr += in->bo_info_size;
>>    		}
>>    	}
>>    
>> +	*info_param = info;
>> +	return 0;
>> +
>> +error_free:
>> +	kvfree(info);
>> +	return r;
>> +}
>> +
>> +int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
>> +				struct drm_file *filp)
>> +{
>> +	struct amdgpu_device *adev = dev->dev_private;
>> +	struct amdgpu_fpriv *fpriv = filp->driver_priv;
>> +	union drm_amdgpu_bo_list *args = data;
>> +	uint32_t handle = args->in.list_handle;
>> +	struct drm_amdgpu_bo_list_entry *info = NULL;
>> +	struct amdgpu_bo_list *list;
>> +	int r;
>> +
>> +	r = amdgpu_bo_create_list_entry_array(&args->in, &info);
>> +	if (r)
>> +		goto error_free;
>> +
>>    	switch (args->in.operation) {
>>    	case AMDGPU_BO_LIST_OP_CREATE:
>>    		r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number,
>> -					  &handle);
>> +					  &handle, NULL);
>>    		if (r)
>>    			goto error_free;
>>    		break;
>> @@ -345,6 +366,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
>>    	return 0;
>>    
>>    error_free:
>> -	kvfree(info);
>> +	if (info)
>> +		kvfree(info);
>>    	return r;
>>    }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index 9881a1e..30026b8 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -66,11 +66,34 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
>>    	return 0;
>>    }
>>    
>> -static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void
>> *data)
>> +static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
>> +				      struct drm_amdgpu_bo_list_in *data) {
>> +	int r;
>> +	struct drm_amdgpu_bo_list_entry *info = NULL;
>> +
>> +	r = amdgpu_bo_create_list_entry_array(data, &info);
>> +	if (r)
>> +		return r;
>> +
>> +	r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number, NULL, &p->bo_list);
>> +	if (r)
>> +		goto error_free;
>> +
>> +	kvfree(info);
>> +	return 0;
>> +
>> +error_free:
>> +	if (info)
>> +		kvfree(info);
>> +
>> +	return r;
>> +}
>> +
>> +static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union
>> +drm_amdgpu_cs *cs)
>>    {
>>    	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>>    	struct amdgpu_vm *vm = &fpriv->vm;
>> -	union drm_amdgpu_cs *cs = data;
>>    	uint64_t *chunk_array_user;
>>    	uint64_t *chunk_array;
>>    	unsigned size, num_ibs = 0;
>> @@ -164,6 +187,19 @@ static int amdgpu_cs_parser_init(struct
>> amdgpu_cs_parser *p, void *data)
>>    
>>    			break;
>>    
>> +		case AMDGPU_CHUNK_ID_BO_HANDLES:
>> +			size = sizeof(struct drm_amdgpu_bo_list_in);
>> +			if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
>> +				ret = -EINVAL;
>> +				goto free_partial_kdata;
>> +			}
>> +
>> +			ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
>> +			if (ret)
>> +				goto free_partial_kdata;
>> +
>> +			break;
>> +
>>    		case AMDGPU_CHUNK_ID_DEPENDENCIES:
>>    		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
>>    		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
>> @@ -534,7 +570,12 @@ static int amdgpu_cs_parser_bos(struct
>> amdgpu_cs_parser *p,
>>    
>>    	INIT_LIST_HEAD(&p->validated);
>>    
>> -	p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
>> +	/* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
>> +	if (!p->bo_list)
>> +		p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
>> +	else
>> +		mutex_lock(&p->bo_list->lock);
>> +
>>    	if (p->bo_list) {
>>    		amdgpu_bo_list_get_list(p->bo_list, &p->validated);
>>    		if (p->bo_list->first_userptr != p->bo_list->num_entries) @@
>> -747,7 +788,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
>>     * used by parsing context.
>>     **/
>>    static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
>> -				  bool backoff)
>> +				  bool backoff, int id)
> Don't need it after you get bo_list without idr.
>
>>    {
>>    	unsigned i;
>>    
>> @@ -1277,7 +1318,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>>    	r = amdgpu_cs_submit(&parser, cs);
>>    
>>    out:
>> -	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
>> +	amdgpu_cs_parser_fini(&parser, r, reserved_buffers,
>> +cs->in.bo_list_handle);
> Don't need it after you get bo_list without idr.
>
> Otherwise it looks really good to me, Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
>
>>    	return r;
>>    }
>>    
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> index 06aede1..529500c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> @@ -69,9 +69,10 @@
>>     * - 3.24.0 - Add high priority compute support for gfx9
>>     * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
>>     * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
>> + * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
>>     */
>>    #define KMS_DRIVER_MAJOR	3
>> -#define KMS_DRIVER_MINOR	26
>> +#define KMS_DRIVER_MINOR	27
>>    #define KMS_DRIVER_PATCHLEVEL	0
>>    
>>    int amdgpu_vram_limit = 0;
>> diff --git a/include/uapi/drm/amdgpu_drm.h
>> b/include/uapi/drm/amdgpu_drm.h index 09741ba..94444ee 100644
>> --- a/include/uapi/drm/amdgpu_drm.h
>> +++ b/include/uapi/drm/amdgpu_drm.h
>> @@ -519,6 +519,7 @@ struct drm_amdgpu_gem_va {
>>    #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
>>    #define AMDGPU_CHUNK_ID_SYNCOBJ_IN      0x04
>>    #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT     0x05
>> +#define AMDGPU_CHUNK_ID_BO_HANDLES      0x06
>>    
>>    struct drm_amdgpu_cs_chunk {
>>    	__u32		chunk_id;
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] drm/amdgpu: Allow to create BO lists in CS ioctl v2
       [not found]             ` <a43a5f3d-7fef-1721-307e-9135cf494264-5C7GfCeVMHo@public.gmane.org>
@ 2018-07-12  4:21               ` zhoucm1
       [not found]                 ` <f6b77415-b991-6d4e-d872-934a269c397e-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: zhoucm1 @ 2018-07-12  4:21 UTC (permalink / raw)
  To: Zhou, David(ChunMing),
	Grodzovsky, Andrey, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Olsak, Marek, Koenig, Christian

With more thinking for you performance reason, Can we go further more 
not to create temp bo list at all? directly add them into 
parser->validated list?

In fact, if bo array is very long, then overhead of bo list creation in 
CS is considerable, which will double iterate all BOs compared to original.

 From UMD perspective, they don't create bo list for every CS, they 
could use old created bo_list for next several CS, if there is a new bo, 
they just add it.

Thanks,
David Zhou
On 2018年07月12日 12:02, zhoucm1 wrote:
>
>
> On 2018年07月12日 11:09, Zhou, David(ChunMing) wrote:
>> Hi Andrey,
>>
>> Could you add compatibility flag or increase kms driver version? So 
>> that user space can keep old path when using new one.
> Sorry for noise, it's already at end of path.
>
> Regards,
> David Zhou
>>
>> Regards,
>> David Zhou
>>
>> -----Original Message-----
>> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On 
>> Behalf Of zhoucm1
>> Sent: Thursday, July 12, 2018 10:31 AM
>> To: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; 
>> amd-gfx@lists.freedesktop.org
>> Cc: Olsak, Marek <Marek.Olsak@amd.com>; Koenig, Christian 
>> <Christian.Koenig@amd.com>
>> Subject: Re: [PATCH v2] drm/amdgpu: Allow to create BO lists in CS 
>> ioctl v2
>>
>>
>>
>> On 2018年07月12日 04:57, Andrey Grodzovsky wrote:
>>> This change is to support MESA performace optimization.
>>> Modify CS IOCTL to allow its input as command buffer and an array of
>>> buffer handles to create a temporay bo list and then destroy it when
>>> IOCTL completes.
>>> This saves on calling for BO_LIST create and destry IOCTLs in MESA and
>>> by this improves performance.
>>>
>>> v2: Avoid inserting the temp list into idr struct.
>>>
>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h         | 11 ++++
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 86 
>>> ++++++++++++++++++-----------
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 51 +++++++++++++++--
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  3 +-
>>>    include/uapi/drm/amdgpu_drm.h               |  1 +
>>>    5 files changed, 114 insertions(+), 38 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index 8eaba0f..9b472b2 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -732,6 +732,17 @@ void amdgpu_bo_list_get_list(struct 
>>> amdgpu_bo_list *list,
>>>                     struct list_head *validated);
>>>    void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
>>>    void amdgpu_bo_list_free(struct amdgpu_bo_list *list);
>>> +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in 
>>> *in,
>>> +                      struct drm_amdgpu_bo_list_entry **info_param);
>>> +
>>> +int amdgpu_bo_list_create(struct amdgpu_device *adev,
>>> +                 struct drm_file *filp,
>>> +                 struct drm_amdgpu_bo_list_entry *info,
>>> +                 unsigned num_entries,
>>> +                 int *id,
>>> +                 struct amdgpu_bo_list **list);
>>> +
>>> +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id);
>>>       /*
>>>     * GFX stuff
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>>> index 92be7f6..14c7c59 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>>> @@ -55,11 +55,12 @@ static void amdgpu_bo_list_release_rcu(struct 
>>> kref *ref)
>>>        kfree_rcu(list, rhead);
>>>    }
>>>    -static int amdgpu_bo_list_create(struct amdgpu_device *adev,
>>> +int amdgpu_bo_list_create(struct amdgpu_device *adev,
>>>                     struct drm_file *filp,
>>>                     struct drm_amdgpu_bo_list_entry *info,
>>>                     unsigned num_entries,
>>> -                 int *id)
>>> +                 int *id,
>>> +                 struct amdgpu_bo_list **list_out)
>>>    {
>>>        int r;
>>>        struct amdgpu_fpriv *fpriv = filp->driver_priv; @@ -78,20 
>>> +79,25 @@
>>> static int amdgpu_bo_list_create(struct amdgpu_device *adev,
>>>            return r;
>>>        }
>>>    +    if (id) {
>>>        /* idr alloc should be called only after initialization of bo 
>>> list. */
>>> -    mutex_lock(&fpriv->bo_list_lock);
>>> -    r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
>>> -    mutex_unlock(&fpriv->bo_list_lock);
>>> -    if (r < 0) {
>>> -        amdgpu_bo_list_free(list);
>>> -        return r;
>>> +        mutex_lock(&fpriv->bo_list_lock);
>>> +        r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, 
>>> GFP_KERNEL);
>>> +        mutex_unlock(&fpriv->bo_list_lock);
>>> +        if (r < 0) {
>>> +            amdgpu_bo_list_free(list);
>>> +            return r;
>>> +        }
>>> +        *id = r;
>>>        }
>>> -    *id = r;
>>> +
>>> +    if (list_out)
>>> +        *list_out = list;
>>>           return 0;
>>>    }
>>>    -static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int
>>> id)
>>> +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
>>>    {
>>>        struct amdgpu_bo_list *list;
>>>    @@ -263,53 +269,68 @@ void amdgpu_bo_list_free(struct 
>>> amdgpu_bo_list *list)
>>>        kfree(list);
>>>    }
>>>    -int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
>>> -                struct drm_file *filp)
>>> +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in 
>>> *in,
>>> +                      struct drm_amdgpu_bo_list_entry **info_param)
>>>    {
>>> -    const uint32_t info_size = sizeof(struct 
>>> drm_amdgpu_bo_list_entry);
>>> -
>>> -    struct amdgpu_device *adev = dev->dev_private;
>>> -    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>> -    union drm_amdgpu_bo_list *args = data;
>>> -    uint32_t handle = args->in.list_handle;
>>> -    const void __user *uptr = u64_to_user_ptr(args->in.bo_info_ptr);
>>> -
>>>        struct drm_amdgpu_bo_list_entry *info;
>>> -    struct amdgpu_bo_list *list;
>>> -
>>>        int r;
>>> +    const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
>>> +    const uint32_t info_size = sizeof(struct 
>>> drm_amdgpu_bo_list_entry);
>>>    -    info = kvmalloc_array(args->in.bo_number,
>>> +    info = kvmalloc_array(in->bo_number,
>>>                     sizeof(struct drm_amdgpu_bo_list_entry), 
>>> GFP_KERNEL);
>>>        if (!info)
>>>            return -ENOMEM;
>>>           /* copy the handle array from userspace to a kernel buffer */
>>>        r = -EFAULT;
>>> -    if (likely(info_size == args->in.bo_info_size)) {
>>> -        unsigned long bytes = args->in.bo_number *
>>> -            args->in.bo_info_size;
>>> +    if (likely(info_size == in->bo_info_size)) {
>>> +        unsigned long bytes = in->bo_number *
>>> +            in->bo_info_size;
>>>               if (copy_from_user(info, uptr, bytes))
>>>                goto error_free;
>>>           } else {
>>> -        unsigned long bytes = min(args->in.bo_info_size, info_size);
>>> +        unsigned long bytes = min(in->bo_info_size, info_size);
>>>            unsigned i;
>>>    -        memset(info, 0, args->in.bo_number * info_size);
>>> -        for (i = 0; i < args->in.bo_number; ++i) {
>>> +        memset(info, 0, in->bo_number * info_size);
>>> +        for (i = 0; i < in->bo_number; ++i) {
>>>                if (copy_from_user(&info[i], uptr, bytes))
>>>                    goto error_free;
>>>    -            uptr += args->in.bo_info_size;
>>> +            uptr += in->bo_info_size;
>>>            }
>>>        }
>>>    +    *info_param = info;
>>> +    return 0;
>>> +
>>> +error_free:
>>> +    kvfree(info);
>>> +    return r;
>>> +}
>>> +
>>> +int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
>>> +                struct drm_file *filp)
>>> +{
>>> +    struct amdgpu_device *adev = dev->dev_private;
>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>> +    union drm_amdgpu_bo_list *args = data;
>>> +    uint32_t handle = args->in.list_handle;
>>> +    struct drm_amdgpu_bo_list_entry *info = NULL;
>>> +    struct amdgpu_bo_list *list;
>>> +    int r;
>>> +
>>> +    r = amdgpu_bo_create_list_entry_array(&args->in, &info);
>>> +    if (r)
>>> +        goto error_free;
>>> +
>>>        switch (args->in.operation) {
>>>        case AMDGPU_BO_LIST_OP_CREATE:
>>>            r = amdgpu_bo_list_create(adev, filp, info, 
>>> args->in.bo_number,
>>> -                      &handle);
>>> +                      &handle, NULL);
>>>            if (r)
>>>                goto error_free;
>>>            break;
>>> @@ -345,6 +366,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, 
>>> void *data,
>>>        return 0;
>>>       error_free:
>>> -    kvfree(info);
>>> +    if (info)
>>> +        kvfree(info);
>>>        return r;
>>>    }
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> index 9881a1e..30026b8 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> @@ -66,11 +66,34 @@ static int amdgpu_cs_user_fence_chunk(struct 
>>> amdgpu_cs_parser *p,
>>>        return 0;
>>>    }
>>>    -static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void
>>> *data)
>>> +static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
>>> +                      struct drm_amdgpu_bo_list_in *data) {
>>> +    int r;
>>> +    struct drm_amdgpu_bo_list_entry *info = NULL;
>>> +
>>> +    r = amdgpu_bo_create_list_entry_array(data, &info);
>>> +    if (r)
>>> +        return r;
>>> +
>>> +    r = amdgpu_bo_list_create(p->adev, p->filp, info, 
>>> data->bo_number, NULL, &p->bo_list);
>>> +    if (r)
>>> +        goto error_free;
>>> +
>>> +    kvfree(info);
>>> +    return 0;
>>> +
>>> +error_free:
>>> +    if (info)
>>> +        kvfree(info);
>>> +
>>> +    return r;
>>> +}
>>> +
>>> +static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union
>>> +drm_amdgpu_cs *cs)
>>>    {
>>>        struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>>>        struct amdgpu_vm *vm = &fpriv->vm;
>>> -    union drm_amdgpu_cs *cs = data;
>>>        uint64_t *chunk_array_user;
>>>        uint64_t *chunk_array;
>>>        unsigned size, num_ibs = 0;
>>> @@ -164,6 +187,19 @@ static int amdgpu_cs_parser_init(struct
>>> amdgpu_cs_parser *p, void *data)
>>>                   break;
>>>    +        case AMDGPU_CHUNK_ID_BO_HANDLES:
>>> +            size = sizeof(struct drm_amdgpu_bo_list_in);
>>> +            if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
>>> +                ret = -EINVAL;
>>> +                goto free_partial_kdata;
>>> +            }
>>> +
>>> +            ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
>>> +            if (ret)
>>> +                goto free_partial_kdata;
>>> +
>>> +            break;
>>> +
>>>            case AMDGPU_CHUNK_ID_DEPENDENCIES:
>>>            case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
>>>            case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
>>> @@ -534,7 +570,12 @@ static int amdgpu_cs_parser_bos(struct
>>> amdgpu_cs_parser *p,
>>>           INIT_LIST_HEAD(&p->validated);
>>>    -    p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
>>> +    /* p->bo_list could already be assigned if 
>>> AMDGPU_CHUNK_ID_BO_HANDLES is present */
>>> +    if (!p->bo_list)
>>> +        p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
>>> +    else
>>> +        mutex_lock(&p->bo_list->lock);
>>> +
>>>        if (p->bo_list) {
>>>            amdgpu_bo_list_get_list(p->bo_list, &p->validated);
>>>            if (p->bo_list->first_userptr != p->bo_list->num_entries) @@
>>> -747,7 +788,7 @@ static int amdgpu_cs_sync_rings(struct 
>>> amdgpu_cs_parser *p)
>>>     * used by parsing context.
>>>     **/
>>>    static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser 
>>> *parser, int error,
>>> -                  bool backoff)
>>> +                  bool backoff, int id)
>> Don't need it after you get bo_list without idr.
>>
>>>    {
>>>        unsigned i;
>>>    @@ -1277,7 +1318,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, 
>>> void *data, struct drm_file *filp)
>>>        r = amdgpu_cs_submit(&parser, cs);
>>>       out:
>>> -    amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
>>> +    amdgpu_cs_parser_fini(&parser, r, reserved_buffers,
>>> +cs->in.bo_list_handle);
>> Don't need it after you get bo_list without idr.
>>
>> Otherwise it looks really good to me, Reviewed-by: Chunming Zhou 
>> <david1.zhou@amd.com>
>>
>>>        return r;
>>>    }
>>>    diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> index 06aede1..529500c 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> @@ -69,9 +69,10 @@
>>>     * - 3.24.0 - Add high priority compute support for gfx9
>>>     * - 3.25.0 - Add support for sensor query info (stable pstate 
>>> sclk/mclk).
>>>     * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
>>> + * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST 
>>> creation.
>>>     */
>>>    #define KMS_DRIVER_MAJOR    3
>>> -#define KMS_DRIVER_MINOR    26
>>> +#define KMS_DRIVER_MINOR    27
>>>    #define KMS_DRIVER_PATCHLEVEL    0
>>>       int amdgpu_vram_limit = 0;
>>> diff --git a/include/uapi/drm/amdgpu_drm.h
>>> b/include/uapi/drm/amdgpu_drm.h index 09741ba..94444ee 100644
>>> --- a/include/uapi/drm/amdgpu_drm.h
>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>> @@ -519,6 +519,7 @@ struct drm_amdgpu_gem_va {
>>>    #define AMDGPU_CHUNK_ID_DEPENDENCIES    0x03
>>>    #define AMDGPU_CHUNK_ID_SYNCOBJ_IN      0x04
>>>    #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT     0x05
>>> +#define AMDGPU_CHUNK_ID_BO_HANDLES      0x06
>>>       struct drm_amdgpu_cs_chunk {
>>>        __u32        chunk_id;
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] drm/amdgpu: Allow to create BO lists in CS ioctl v2
       [not found] ` <1531342678-11142-1-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
  2018-07-12  2:31   ` zhoucm1
@ 2018-07-12  7:09   ` Christian König
  1 sibling, 0 replies; 8+ messages in thread
From: Christian König @ 2018-07-12  7:09 UTC (permalink / raw)
  To: Andrey Grodzovsky, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Marek.Olsak-5C7GfCeVMHo

Am 11.07.2018 um 22:57 schrieb Andrey Grodzovsky:
> This change is to support MESA performace optimization.
> Modify CS IOCTL to allow its input as command buffer and an array of
> buffer handles to create a temporay bo list and then destroy it
> when IOCTL completes.
> This saves on calling for BO_LIST create and destry IOCTLs in MESA
> and by this improves performance.
>
> v2: Avoid inserting the temp list into idr struct.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h         | 11 ++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 86 ++++++++++++++++++-----------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 51 +++++++++++++++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  3 +-
>   include/uapi/drm/amdgpu_drm.h               |  1 +
>   5 files changed, 114 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 8eaba0f..9b472b2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -732,6 +732,17 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
>   			     struct list_head *validated);
>   void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
>   void amdgpu_bo_list_free(struct amdgpu_bo_list *list);
> +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
> +				      struct drm_amdgpu_bo_list_entry **info_param);
> +
> +int amdgpu_bo_list_create(struct amdgpu_device *adev,
> +				 struct drm_file *filp,
> +				 struct drm_amdgpu_bo_list_entry *info,
> +				 unsigned num_entries,
> +				 int *id,
> +				 struct amdgpu_bo_list **list);
> +
> +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id);
>   
>   /*
>    * GFX stuff
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> index 92be7f6..14c7c59 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> @@ -55,11 +55,12 @@ static void amdgpu_bo_list_release_rcu(struct kref *ref)
>   	kfree_rcu(list, rhead);
>   }
>   
> -static int amdgpu_bo_list_create(struct amdgpu_device *adev,
> +int amdgpu_bo_list_create(struct amdgpu_device *adev,
>   				 struct drm_file *filp,
>   				 struct drm_amdgpu_bo_list_entry *info,
>   				 unsigned num_entries,
> -				 int *id)
> +				 int *id,
> +				 struct amdgpu_bo_list **list_out)
>   {
>   	int r;
>   	struct amdgpu_fpriv *fpriv = filp->driver_priv;
> @@ -78,20 +79,25 @@ static int amdgpu_bo_list_create(struct amdgpu_device *adev,
>   		return r;
>   	}
>   
> +	if (id) {
>   	/* idr alloc should be called only after initialization of bo list. */
> -	mutex_lock(&fpriv->bo_list_lock);
> -	r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
> -	mutex_unlock(&fpriv->bo_list_lock);
> -	if (r < 0) {
> -		amdgpu_bo_list_free(list);
> -		return r;
> +		mutex_lock(&fpriv->bo_list_lock);
> +		r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
> +		mutex_unlock(&fpriv->bo_list_lock);
> +		if (r < 0) {
> +			amdgpu_bo_list_free(list);
> +			return r;
> +		}
> +		*id = r;

Can we move that into the calling or a separate other function?

Would be cleaner to separate the functionality of creating a bo_list 
structure from inserting it into the IDR.

>   	}
> -	*id = r;
> +
> +	if (list_out)
> +		*list_out = list;
>   
>   	return 0;
>   }
>   
> -static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
> +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
>   {
>   	struct amdgpu_bo_list *list;
>   
> @@ -263,53 +269,68 @@ void amdgpu_bo_list_free(struct amdgpu_bo_list *list)
>   	kfree(list);
>   }
>   
> -int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
> -				struct drm_file *filp)
> +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
> +				      struct drm_amdgpu_bo_list_entry **info_param)
>   {
> -	const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
> -
> -	struct amdgpu_device *adev = dev->dev_private;
> -	struct amdgpu_fpriv *fpriv = filp->driver_priv;
> -	union drm_amdgpu_bo_list *args = data;
> -	uint32_t handle = args->in.list_handle;
> -	const void __user *uptr = u64_to_user_ptr(args->in.bo_info_ptr);
> -
>   	struct drm_amdgpu_bo_list_entry *info;
> -	struct amdgpu_bo_list *list;
> -
>   	int r;
> +	const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
> +	const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);

Reverse xmas tree order please, e.g. at least "int r;" should be last. I 
personally also put consts at the beginning of the declarations.

Same problem at a few other occasions.

>   
> -	info = kvmalloc_array(args->in.bo_number,
> +	info = kvmalloc_array(in->bo_number,
>   			     sizeof(struct drm_amdgpu_bo_list_entry), GFP_KERNEL);

We could use info_size here, would make the line a bit shorter.

>   	if (!info)
>   		return -ENOMEM;
>   
>   	/* copy the handle array from userspace to a kernel buffer */
>   	r = -EFAULT;
> -	if (likely(info_size == args->in.bo_info_size)) {
> -		unsigned long bytes = args->in.bo_number *
> -			args->in.bo_info_size;
> +	if (likely(info_size == in->bo_info_size)) {
> +		unsigned long bytes = in->bo_number *
> +			in->bo_info_size;
>   
>   		if (copy_from_user(info, uptr, bytes))
>   			goto error_free;
>   
>   	} else {
> -		unsigned long bytes = min(args->in.bo_info_size, info_size);
> +		unsigned long bytes = min(in->bo_info_size, info_size);
>   		unsigned i;
>   
> -		memset(info, 0, args->in.bo_number * info_size);
> -		for (i = 0; i < args->in.bo_number; ++i) {
> +		memset(info, 0, in->bo_number * info_size);
> +		for (i = 0; i < in->bo_number; ++i) {
>   			if (copy_from_user(&info[i], uptr, bytes))
>   				goto error_free;
>   
> -			uptr += args->in.bo_info_size;
> +			uptr += in->bo_info_size;
>   		}
>   	}
>   
> +	*info_param = info;
> +	return 0;
> +
> +error_free:
> +	kvfree(info);
> +	return r;
> +}
> +
> +int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
> +				struct drm_file *filp)
> +{
> +	struct amdgpu_device *adev = dev->dev_private;
> +	struct amdgpu_fpriv *fpriv = filp->driver_priv;
> +	union drm_amdgpu_bo_list *args = data;
> +	uint32_t handle = args->in.list_handle;
> +	struct drm_amdgpu_bo_list_entry *info = NULL;
> +	struct amdgpu_bo_list *list;
> +	int r;
> +
> +	r = amdgpu_bo_create_list_entry_array(&args->in, &info);
> +	if (r)
> +		goto error_free;
> +
>   	switch (args->in.operation) {
>   	case AMDGPU_BO_LIST_OP_CREATE:
>   		r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number,
> -					  &handle);
> +					  &handle, NULL);
>   		if (r)
>   			goto error_free;
>   		break;
> @@ -345,6 +366,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
>   	return 0;
>   
>   error_free:
> -	kvfree(info);
> +	if (info)
> +		kvfree(info);
>   	return r;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 9881a1e..30026b8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -66,11 +66,34 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
>   	return 0;
>   }
>   
> -static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
> +static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
> +				      struct drm_amdgpu_bo_list_in *data)
> +{
> +	int r;
> +	struct drm_amdgpu_bo_list_entry *info = NULL;
> +
> +	r = amdgpu_bo_create_list_entry_array(data, &info);
> +	if (r)
> +		return r;
> +
> +	r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number, NULL, &p->bo_list);
> +	if (r)
> +		goto error_free;
> +
> +	kvfree(info);
> +	return 0;
> +
> +error_free:
> +	if (info)
> +		kvfree(info);
> +
> +	return r;
> +}
> +
> +static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs)
>   {
>   	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>   	struct amdgpu_vm *vm = &fpriv->vm;
> -	union drm_amdgpu_cs *cs = data;
>   	uint64_t *chunk_array_user;
>   	uint64_t *chunk_array;
>   	unsigned size, num_ibs = 0;
> @@ -164,6 +187,19 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
>   
>   			break;
>   
> +		case AMDGPU_CHUNK_ID_BO_HANDLES:
> +			size = sizeof(struct drm_amdgpu_bo_list_in);
> +			if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
> +				ret = -EINVAL;
> +				goto free_partial_kdata;
> +			}
> +
> +			ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
> +			if (ret)
> +				goto free_partial_kdata;
> +
> +			break;
> +
>   		case AMDGPU_CHUNK_ID_DEPENDENCIES:
>   		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
>   		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
> @@ -534,7 +570,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>   
>   	INIT_LIST_HEAD(&p->validated);
>   
> -	p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
> +	/* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
> +	if (!p->bo_list)
> +		p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
> +	else
> +		mutex_lock(&p->bo_list->lock);
> +
>   	if (p->bo_list) {
>   		amdgpu_bo_list_get_list(p->bo_list, &p->validated);
>   		if (p->bo_list->first_userptr != p->bo_list->num_entries)
> @@ -747,7 +788,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
>    * used by parsing context.
>    **/
>   static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
> -				  bool backoff)
> +				  bool backoff, int id)
>   {
>   	unsigned i;
>   
> @@ -1277,7 +1318,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>   	r = amdgpu_cs_submit(&parser, cs);
>   
>   out:
> -	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
> +	amdgpu_cs_parser_fini(&parser, r, reserved_buffers, cs->in.bo_list_handle);

As David noted as well those changes are unnecessary now.

Regards,
Christian.

>   	return r;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 06aede1..529500c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -69,9 +69,10 @@
>    * - 3.24.0 - Add high priority compute support for gfx9
>    * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
>    * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
> + * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
>    */
>   #define KMS_DRIVER_MAJOR	3
> -#define KMS_DRIVER_MINOR	26
> +#define KMS_DRIVER_MINOR	27
>   #define KMS_DRIVER_PATCHLEVEL	0
>   
>   int amdgpu_vram_limit = 0;
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 09741ba..94444ee 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -519,6 +519,7 @@ struct drm_amdgpu_gem_va {
>   #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
>   #define AMDGPU_CHUNK_ID_SYNCOBJ_IN      0x04
>   #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT     0x05
> +#define AMDGPU_CHUNK_ID_BO_HANDLES      0x06
>   
>   struct drm_amdgpu_cs_chunk {
>   	__u32		chunk_id;

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] drm/amdgpu: Allow to create BO lists in CS ioctl v2
       [not found]                 ` <f6b77415-b991-6d4e-d872-934a269c397e-5C7GfCeVMHo@public.gmane.org>
@ 2018-07-12  7:56                   ` Christian König
       [not found]                     ` <18352936-c661-dccd-a515-6d2845172470-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 8+ messages in thread
From: Christian König @ 2018-07-12  7:56 UTC (permalink / raw)
  To: zhoucm1, Zhou, David(ChunMing),
	Grodzovsky, Andrey, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Olsak, Marek, Koenig, Christian

Am 12.07.2018 um 06:21 schrieb zhoucm1:
> With more thinking for you performance reason, Can we go further more 
> not to create temp bo list at all? directly add them into 
> parser->validated list?

You still need something which is added to the parser->validated list, 
so creating the array of BOs in unavoidable.

> In fact, if bo array is very long, then overhead of bo list creation 
> in CS is considerable, which will double iterate all BOs compared to 
> original.
>
> From UMD perspective, they don't create bo list for every CS, they 
> could use old created bo_list for next several CS, if there is a new 
> bo, they just add it.

And exactly that is the failed concept of bo_lists, it is complete 
nonsense to do this.

Either you create the list of BOs from scratch for each command 
submission like Mesa does it in which is exactly the case we try to 
support efficient here.

Or you use per process BOs which are always valid. Something which we 
have already implemented as well.

Regards,
Christian.

>
> Thanks,
> David Zhou
> On 2018年07月12日 12:02, zhoucm1 wrote:
>>
>>
>> On 2018年07月12日 11:09, Zhou, David(ChunMing) wrote:
>>> Hi Andrey,
>>>
>>> Could you add compatibility flag or increase kms driver version? So 
>>> that user space can keep old path when using new one.
>> Sorry for noise, it's already at end of path.
>>
>> Regards,
>> David Zhou
>>>
>>> Regards,
>>> David Zhou
>>>
>>> -----Original Message-----
>>> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On 
>>> Behalf Of zhoucm1
>>> Sent: Thursday, July 12, 2018 10:31 AM
>>> To: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; 
>>> amd-gfx@lists.freedesktop.org
>>> Cc: Olsak, Marek <Marek.Olsak@amd.com>; Koenig, Christian 
>>> <Christian.Koenig@amd.com>
>>> Subject: Re: [PATCH v2] drm/amdgpu: Allow to create BO lists in CS 
>>> ioctl v2
>>>
>>>
>>>
>>> On 2018年07月12日 04:57, Andrey Grodzovsky wrote:
>>>> This change is to support MESA performace optimization.
>>>> Modify CS IOCTL to allow its input as command buffer and an array of
>>>> buffer handles to create a temporay bo list and then destroy it when
>>>> IOCTL completes.
>>>> This saves on calling for BO_LIST create and destry IOCTLs in MESA and
>>>> by this improves performance.
>>>>
>>>> v2: Avoid inserting the temp list into idr struct.
>>>>
>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h         | 11 ++++
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 86 
>>>> ++++++++++++++++++-----------
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 51 +++++++++++++++--
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  3 +-
>>>>    include/uapi/drm/amdgpu_drm.h               |  1 +
>>>>    5 files changed, 114 insertions(+), 38 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> index 8eaba0f..9b472b2 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> @@ -732,6 +732,17 @@ void amdgpu_bo_list_get_list(struct 
>>>> amdgpu_bo_list *list,
>>>>                     struct list_head *validated);
>>>>    void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
>>>>    void amdgpu_bo_list_free(struct amdgpu_bo_list *list);
>>>> +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in 
>>>> *in,
>>>> +                      struct drm_amdgpu_bo_list_entry **info_param);
>>>> +
>>>> +int amdgpu_bo_list_create(struct amdgpu_device *adev,
>>>> +                 struct drm_file *filp,
>>>> +                 struct drm_amdgpu_bo_list_entry *info,
>>>> +                 unsigned num_entries,
>>>> +                 int *id,
>>>> +                 struct amdgpu_bo_list **list);
>>>> +
>>>> +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id);
>>>>       /*
>>>>     * GFX stuff
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>>>> index 92be7f6..14c7c59 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>>>> @@ -55,11 +55,12 @@ static void amdgpu_bo_list_release_rcu(struct 
>>>> kref *ref)
>>>>        kfree_rcu(list, rhead);
>>>>    }
>>>>    -static int amdgpu_bo_list_create(struct amdgpu_device *adev,
>>>> +int amdgpu_bo_list_create(struct amdgpu_device *adev,
>>>>                     struct drm_file *filp,
>>>>                     struct drm_amdgpu_bo_list_entry *info,
>>>>                     unsigned num_entries,
>>>> -                 int *id)
>>>> +                 int *id,
>>>> +                 struct amdgpu_bo_list **list_out)
>>>>    {
>>>>        int r;
>>>>        struct amdgpu_fpriv *fpriv = filp->driver_priv; @@ -78,20 
>>>> +79,25 @@
>>>> static int amdgpu_bo_list_create(struct amdgpu_device *adev,
>>>>            return r;
>>>>        }
>>>>    +    if (id) {
>>>>        /* idr alloc should be called only after initialization of 
>>>> bo list. */
>>>> -    mutex_lock(&fpriv->bo_list_lock);
>>>> -    r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
>>>> -    mutex_unlock(&fpriv->bo_list_lock);
>>>> -    if (r < 0) {
>>>> -        amdgpu_bo_list_free(list);
>>>> -        return r;
>>>> +        mutex_lock(&fpriv->bo_list_lock);
>>>> +        r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, 
>>>> GFP_KERNEL);
>>>> +        mutex_unlock(&fpriv->bo_list_lock);
>>>> +        if (r < 0) {
>>>> +            amdgpu_bo_list_free(list);
>>>> +            return r;
>>>> +        }
>>>> +        *id = r;
>>>>        }
>>>> -    *id = r;
>>>> +
>>>> +    if (list_out)
>>>> +        *list_out = list;
>>>>           return 0;
>>>>    }
>>>>    -static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int
>>>> id)
>>>> +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
>>>>    {
>>>>        struct amdgpu_bo_list *list;
>>>>    @@ -263,53 +269,68 @@ void amdgpu_bo_list_free(struct 
>>>> amdgpu_bo_list *list)
>>>>        kfree(list);
>>>>    }
>>>>    -int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
>>>> -                struct drm_file *filp)
>>>> +int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in 
>>>> *in,
>>>> +                      struct drm_amdgpu_bo_list_entry **info_param)
>>>>    {
>>>> -    const uint32_t info_size = sizeof(struct 
>>>> drm_amdgpu_bo_list_entry);
>>>> -
>>>> -    struct amdgpu_device *adev = dev->dev_private;
>>>> -    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>>> -    union drm_amdgpu_bo_list *args = data;
>>>> -    uint32_t handle = args->in.list_handle;
>>>> -    const void __user *uptr = u64_to_user_ptr(args->in.bo_info_ptr);
>>>> -
>>>>        struct drm_amdgpu_bo_list_entry *info;
>>>> -    struct amdgpu_bo_list *list;
>>>> -
>>>>        int r;
>>>> +    const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
>>>> +    const uint32_t info_size = sizeof(struct 
>>>> drm_amdgpu_bo_list_entry);
>>>>    -    info = kvmalloc_array(args->in.bo_number,
>>>> +    info = kvmalloc_array(in->bo_number,
>>>>                     sizeof(struct drm_amdgpu_bo_list_entry), 
>>>> GFP_KERNEL);
>>>>        if (!info)
>>>>            return -ENOMEM;
>>>>           /* copy the handle array from userspace to a kernel 
>>>> buffer */
>>>>        r = -EFAULT;
>>>> -    if (likely(info_size == args->in.bo_info_size)) {
>>>> -        unsigned long bytes = args->in.bo_number *
>>>> -            args->in.bo_info_size;
>>>> +    if (likely(info_size == in->bo_info_size)) {
>>>> +        unsigned long bytes = in->bo_number *
>>>> +            in->bo_info_size;
>>>>               if (copy_from_user(info, uptr, bytes))
>>>>                goto error_free;
>>>>           } else {
>>>> -        unsigned long bytes = min(args->in.bo_info_size, info_size);
>>>> +        unsigned long bytes = min(in->bo_info_size, info_size);
>>>>            unsigned i;
>>>>    -        memset(info, 0, args->in.bo_number * info_size);
>>>> -        for (i = 0; i < args->in.bo_number; ++i) {
>>>> +        memset(info, 0, in->bo_number * info_size);
>>>> +        for (i = 0; i < in->bo_number; ++i) {
>>>>                if (copy_from_user(&info[i], uptr, bytes))
>>>>                    goto error_free;
>>>>    -            uptr += args->in.bo_info_size;
>>>> +            uptr += in->bo_info_size;
>>>>            }
>>>>        }
>>>>    +    *info_param = info;
>>>> +    return 0;
>>>> +
>>>> +error_free:
>>>> +    kvfree(info);
>>>> +    return r;
>>>> +}
>>>> +
>>>> +int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
>>>> +                struct drm_file *filp)
>>>> +{
>>>> +    struct amdgpu_device *adev = dev->dev_private;
>>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>>> +    union drm_amdgpu_bo_list *args = data;
>>>> +    uint32_t handle = args->in.list_handle;
>>>> +    struct drm_amdgpu_bo_list_entry *info = NULL;
>>>> +    struct amdgpu_bo_list *list;
>>>> +    int r;
>>>> +
>>>> +    r = amdgpu_bo_create_list_entry_array(&args->in, &info);
>>>> +    if (r)
>>>> +        goto error_free;
>>>> +
>>>>        switch (args->in.operation) {
>>>>        case AMDGPU_BO_LIST_OP_CREATE:
>>>>            r = amdgpu_bo_list_create(adev, filp, info, 
>>>> args->in.bo_number,
>>>> -                      &handle);
>>>> +                      &handle, NULL);
>>>>            if (r)
>>>>                goto error_free;
>>>>            break;
>>>> @@ -345,6 +366,7 @@ int amdgpu_bo_list_ioctl(struct drm_device 
>>>> *dev, void *data,
>>>>        return 0;
>>>>       error_free:
>>>> -    kvfree(info);
>>>> +    if (info)
>>>> +        kvfree(info);
>>>>        return r;
>>>>    }
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> index 9881a1e..30026b8 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> @@ -66,11 +66,34 @@ static int amdgpu_cs_user_fence_chunk(struct 
>>>> amdgpu_cs_parser *p,
>>>>        return 0;
>>>>    }
>>>>    -static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void
>>>> *data)
>>>> +static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
>>>> +                      struct drm_amdgpu_bo_list_in *data) {
>>>> +    int r;
>>>> +    struct drm_amdgpu_bo_list_entry *info = NULL;
>>>> +
>>>> +    r = amdgpu_bo_create_list_entry_array(data, &info);
>>>> +    if (r)
>>>> +        return r;
>>>> +
>>>> +    r = amdgpu_bo_list_create(p->adev, p->filp, info, 
>>>> data->bo_number, NULL, &p->bo_list);
>>>> +    if (r)
>>>> +        goto error_free;
>>>> +
>>>> +    kvfree(info);
>>>> +    return 0;
>>>> +
>>>> +error_free:
>>>> +    if (info)
>>>> +        kvfree(info);
>>>> +
>>>> +    return r;
>>>> +}
>>>> +
>>>> +static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union
>>>> +drm_amdgpu_cs *cs)
>>>>    {
>>>>        struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>>>>        struct amdgpu_vm *vm = &fpriv->vm;
>>>> -    union drm_amdgpu_cs *cs = data;
>>>>        uint64_t *chunk_array_user;
>>>>        uint64_t *chunk_array;
>>>>        unsigned size, num_ibs = 0;
>>>> @@ -164,6 +187,19 @@ static int amdgpu_cs_parser_init(struct
>>>> amdgpu_cs_parser *p, void *data)
>>>>                   break;
>>>>    +        case AMDGPU_CHUNK_ID_BO_HANDLES:
>>>> +            size = sizeof(struct drm_amdgpu_bo_list_in);
>>>> +            if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
>>>> +                ret = -EINVAL;
>>>> +                goto free_partial_kdata;
>>>> +            }
>>>> +
>>>> +            ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
>>>> +            if (ret)
>>>> +                goto free_partial_kdata;
>>>> +
>>>> +            break;
>>>> +
>>>>            case AMDGPU_CHUNK_ID_DEPENDENCIES:
>>>>            case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
>>>>            case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
>>>> @@ -534,7 +570,12 @@ static int amdgpu_cs_parser_bos(struct
>>>> amdgpu_cs_parser *p,
>>>>           INIT_LIST_HEAD(&p->validated);
>>>>    -    p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
>>>> +    /* p->bo_list could already be assigned if 
>>>> AMDGPU_CHUNK_ID_BO_HANDLES is present */
>>>> +    if (!p->bo_list)
>>>> +        p->bo_list = amdgpu_bo_list_get(fpriv, 
>>>> cs->in.bo_list_handle);
>>>> +    else
>>>> +        mutex_lock(&p->bo_list->lock);
>>>> +
>>>>        if (p->bo_list) {
>>>>            amdgpu_bo_list_get_list(p->bo_list, &p->validated);
>>>>            if (p->bo_list->first_userptr != 
>>>> p->bo_list->num_entries) @@
>>>> -747,7 +788,7 @@ static int amdgpu_cs_sync_rings(struct 
>>>> amdgpu_cs_parser *p)
>>>>     * used by parsing context.
>>>>     **/
>>>>    static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser 
>>>> *parser, int error,
>>>> -                  bool backoff)
>>>> +                  bool backoff, int id)
>>> Don't need it after you get bo_list without idr.
>>>
>>>>    {
>>>>        unsigned i;
>>>>    @@ -1277,7 +1318,7 @@ int amdgpu_cs_ioctl(struct drm_device 
>>>> *dev, void *data, struct drm_file *filp)
>>>>        r = amdgpu_cs_submit(&parser, cs);
>>>>       out:
>>>> -    amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
>>>> +    amdgpu_cs_parser_fini(&parser, r, reserved_buffers,
>>>> +cs->in.bo_list_handle);
>>> Don't need it after you get bo_list without idr.
>>>
>>> Otherwise it looks really good to me, Reviewed-by: Chunming Zhou 
>>> <david1.zhou@amd.com>
>>>
>>>>        return r;
>>>>    }
>>>>    diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>> index 06aede1..529500c 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>> @@ -69,9 +69,10 @@
>>>>     * - 3.24.0 - Add high priority compute support for gfx9
>>>>     * - 3.25.0 - Add support for sensor query info (stable pstate 
>>>> sclk/mclk).
>>>>     * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
>>>> + * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST 
>>>> creation.
>>>>     */
>>>>    #define KMS_DRIVER_MAJOR    3
>>>> -#define KMS_DRIVER_MINOR    26
>>>> +#define KMS_DRIVER_MINOR    27
>>>>    #define KMS_DRIVER_PATCHLEVEL    0
>>>>       int amdgpu_vram_limit = 0;
>>>> diff --git a/include/uapi/drm/amdgpu_drm.h
>>>> b/include/uapi/drm/amdgpu_drm.h index 09741ba..94444ee 100644
>>>> --- a/include/uapi/drm/amdgpu_drm.h
>>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>>> @@ -519,6 +519,7 @@ struct drm_amdgpu_gem_va {
>>>>    #define AMDGPU_CHUNK_ID_DEPENDENCIES    0x03
>>>>    #define AMDGPU_CHUNK_ID_SYNCOBJ_IN      0x04
>>>>    #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT     0x05
>>>> +#define AMDGPU_CHUNK_ID_BO_HANDLES      0x06
>>>>       struct drm_amdgpu_cs_chunk {
>>>>        __u32        chunk_id;
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] drm/amdgpu: Allow to create BO lists in CS ioctl v2
       [not found]                     ` <18352936-c661-dccd-a515-6d2845172470-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2018-07-12  8:14                       ` zhoucm1
  0 siblings, 0 replies; 8+ messages in thread
From: zhoucm1 @ 2018-07-12  8:14 UTC (permalink / raw)
  To: christian.koenig-5C7GfCeVMHo, Zhou, David(ChunMing),
	Grodzovsky, Andrey, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Olsak, Marek, Guo, Kai



On 2018年07月12日 15:56, Christian König wrote:
> Am 12.07.2018 um 06:21 schrieb zhoucm1:
>> With more thinking for you performance reason, Can we go further more 
>> not to create temp bo list at all? directly add them into 
>> parser->validated list?
>
> You still need something which is added to the parser->validated list, 
> so creating the array of BOs in unavoidable.
>
>> In fact, if bo array is very long, then overhead of bo list creation 
>> in CS is considerable, which will double iterate all BOs compared to 
>> original.
>>
>> From UMD perspective, they don't create bo list for every CS, they 
>> could use old created bo_list for next several CS, if there is a new 
>> bo, they just add it.
>
> And exactly that is the failed concept of bo_lists, it is complete 
> nonsense to do this.
>
> Either you create the list of BOs from scratch for each command 
> submission like Mesa does it in which is exactly the case we try to 
> support efficient here.
@Kai, do you have comments for what Christian said?

>
> Or you use per process BOs which are always valid. Something which we 
> have already implemented as well.
Yes, vulkan already use it from 4.15. But pro-ogl still use bo list.
>
> Regards,
> Christian.
>
>>
>> Thanks,
>> David Zhou
>> On 2018年07月12日 12:02, zhoucm1 wrote:
>>>
>>>
>>> On 2018年07月12日 11:09, Zhou, David(ChunMing) wrote:
>>>> Hi Andrey,
>>>>
>>>> Could you add compatibility flag or increase kms driver version? So 
>>>> that user space can keep old path when using new one.
>>> Sorry for noise, it's already at end of path.
>>>
>>> Regards,
>>> David Zhou
>>>>
>>>> Regards,
>>>> David Zhou
>>>>
>>>> -----Original Message-----
>>>> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On 
>>>> Behalf Of zhoucm1
>>>> Sent: Thursday, July 12, 2018 10:31 AM
>>>> To: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; 
>>>> amd-gfx@lists.freedesktop.org
>>>> Cc: Olsak, Marek <Marek.Olsak@amd.com>; Koenig, Christian 
>>>> <Christian.Koenig@amd.com>
>>>> Subject: Re: [PATCH v2] drm/amdgpu: Allow to create BO lists in CS 
>>>> ioctl v2
>>>>
>>>>
>>>>
>>>> On 2018年07月12日 04:57, Andrey Grodzovsky wrote:
>>>>> This change is to support MESA performace optimization.
>>>>> Modify CS IOCTL to allow its input as command buffer and an array of
>>>>> buffer handles to create a temporay bo list and then destroy it when
>>>>> IOCTL completes.
>>>>> This saves on calling for BO_LIST create and destry IOCTLs in MESA 
>>>>> and
>>>>> by this improves performance.
>>>>>
>>>>> v2: Avoid inserting the temp list into idr struct.
>>>>>
>>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>>>>> ---
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h         | 11 ++++
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 86 
>>>>> ++++++++++++++++++-----------
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 51 +++++++++++++++--
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  3 +-
>>>>>    include/uapi/drm/amdgpu_drm.h               |  1 +
>>>>>    5 files changed, 114 insertions(+), 38 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> index 8eaba0f..9b472b2 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>>> @@ -732,6 +732,17 @@ void amdgpu_bo_list_get_list(struct 
>>>>> amdgpu_bo_list *list,
>>>>>                     struct list_head *validated);
>>>>>    void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
>>>>>    void amdgpu_bo_list_free(struct amdgpu_bo_list *list);
>>>>> +int amdgpu_bo_create_list_entry_array(struct 
>>>>> drm_amdgpu_bo_list_in *in,
>>>>> +                      struct drm_amdgpu_bo_list_entry **info_param);
>>>>> +
>>>>> +int amdgpu_bo_list_create(struct amdgpu_device *adev,
>>>>> +                 struct drm_file *filp,
>>>>> +                 struct drm_amdgpu_bo_list_entry *info,
>>>>> +                 unsigned num_entries,
>>>>> +                 int *id,
>>>>> +                 struct amdgpu_bo_list **list);
>>>>> +
>>>>> +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id);
>>>>>       /*
>>>>>     * GFX stuff
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>>>>> index 92be7f6..14c7c59 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>>>>> @@ -55,11 +55,12 @@ static void amdgpu_bo_list_release_rcu(struct 
>>>>> kref *ref)
>>>>>        kfree_rcu(list, rhead);
>>>>>    }
>>>>>    -static int amdgpu_bo_list_create(struct amdgpu_device *adev,
>>>>> +int amdgpu_bo_list_create(struct amdgpu_device *adev,
>>>>>                     struct drm_file *filp,
>>>>>                     struct drm_amdgpu_bo_list_entry *info,
>>>>>                     unsigned num_entries,
>>>>> -                 int *id)
>>>>> +                 int *id,
>>>>> +                 struct amdgpu_bo_list **list_out)
>>>>>    {
>>>>>        int r;
>>>>>        struct amdgpu_fpriv *fpriv = filp->driver_priv; @@ -78,20 
>>>>> +79,25 @@
>>>>> static int amdgpu_bo_list_create(struct amdgpu_device *adev,
>>>>>            return r;
>>>>>        }
>>>>>    +    if (id) {
>>>>>        /* idr alloc should be called only after initialization of 
>>>>> bo list. */
>>>>> -    mutex_lock(&fpriv->bo_list_lock);
>>>>> -    r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
>>>>> -    mutex_unlock(&fpriv->bo_list_lock);
>>>>> -    if (r < 0) {
>>>>> -        amdgpu_bo_list_free(list);
>>>>> -        return r;
>>>>> +        mutex_lock(&fpriv->bo_list_lock);
>>>>> +        r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, 
>>>>> GFP_KERNEL);
>>>>> +        mutex_unlock(&fpriv->bo_list_lock);
>>>>> +        if (r < 0) {
>>>>> +            amdgpu_bo_list_free(list);
>>>>> +            return r;
>>>>> +        }
>>>>> +        *id = r;
>>>>>        }
>>>>> -    *id = r;
>>>>> +
>>>>> +    if (list_out)
>>>>> +        *list_out = list;
>>>>>           return 0;
>>>>>    }
>>>>>    -static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, 
>>>>> int
>>>>> id)
>>>>> +void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
>>>>>    {
>>>>>        struct amdgpu_bo_list *list;
>>>>>    @@ -263,53 +269,68 @@ void amdgpu_bo_list_free(struct 
>>>>> amdgpu_bo_list *list)
>>>>>        kfree(list);
>>>>>    }
>>>>>    -int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
>>>>> -                struct drm_file *filp)
>>>>> +int amdgpu_bo_create_list_entry_array(struct 
>>>>> drm_amdgpu_bo_list_in *in,
>>>>> +                      struct drm_amdgpu_bo_list_entry **info_param)
>>>>>    {
>>>>> -    const uint32_t info_size = sizeof(struct 
>>>>> drm_amdgpu_bo_list_entry);
>>>>> -
>>>>> -    struct amdgpu_device *adev = dev->dev_private;
>>>>> -    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>>>> -    union drm_amdgpu_bo_list *args = data;
>>>>> -    uint32_t handle = args->in.list_handle;
>>>>> -    const void __user *uptr = u64_to_user_ptr(args->in.bo_info_ptr);
>>>>> -
>>>>>        struct drm_amdgpu_bo_list_entry *info;
>>>>> -    struct amdgpu_bo_list *list;
>>>>> -
>>>>>        int r;
>>>>> +    const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
>>>>> +    const uint32_t info_size = sizeof(struct 
>>>>> drm_amdgpu_bo_list_entry);
>>>>>    -    info = kvmalloc_array(args->in.bo_number,
>>>>> +    info = kvmalloc_array(in->bo_number,
>>>>>                     sizeof(struct drm_amdgpu_bo_list_entry), 
>>>>> GFP_KERNEL);
>>>>>        if (!info)
>>>>>            return -ENOMEM;
>>>>>           /* copy the handle array from userspace to a kernel 
>>>>> buffer */
>>>>>        r = -EFAULT;
>>>>> -    if (likely(info_size == args->in.bo_info_size)) {
>>>>> -        unsigned long bytes = args->in.bo_number *
>>>>> -            args->in.bo_info_size;
>>>>> +    if (likely(info_size == in->bo_info_size)) {
>>>>> +        unsigned long bytes = in->bo_number *
>>>>> +            in->bo_info_size;
>>>>>               if (copy_from_user(info, uptr, bytes))
>>>>>                goto error_free;
>>>>>           } else {
>>>>> -        unsigned long bytes = min(args->in.bo_info_size, info_size);
>>>>> +        unsigned long bytes = min(in->bo_info_size, info_size);
>>>>>            unsigned i;
>>>>>    -        memset(info, 0, args->in.bo_number * info_size);
>>>>> -        for (i = 0; i < args->in.bo_number; ++i) {
>>>>> +        memset(info, 0, in->bo_number * info_size);
>>>>> +        for (i = 0; i < in->bo_number; ++i) {
>>>>>                if (copy_from_user(&info[i], uptr, bytes))
>>>>>                    goto error_free;
>>>>>    -            uptr += args->in.bo_info_size;
>>>>> +            uptr += in->bo_info_size;
>>>>>            }
>>>>>        }
>>>>>    +    *info_param = info;
>>>>> +    return 0;
>>>>> +
>>>>> +error_free:
>>>>> +    kvfree(info);
>>>>> +    return r;
>>>>> +}
>>>>> +
>>>>> +int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
>>>>> +                struct drm_file *filp)
>>>>> +{
>>>>> +    struct amdgpu_device *adev = dev->dev_private;
>>>>> +    struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>>>> +    union drm_amdgpu_bo_list *args = data;
>>>>> +    uint32_t handle = args->in.list_handle;
>>>>> +    struct drm_amdgpu_bo_list_entry *info = NULL;
>>>>> +    struct amdgpu_bo_list *list;
>>>>> +    int r;
>>>>> +
>>>>> +    r = amdgpu_bo_create_list_entry_array(&args->in, &info);
>>>>> +    if (r)
>>>>> +        goto error_free;
>>>>> +
>>>>>        switch (args->in.operation) {
>>>>>        case AMDGPU_BO_LIST_OP_CREATE:
>>>>>            r = amdgpu_bo_list_create(adev, filp, info, 
>>>>> args->in.bo_number,
>>>>> -                      &handle);
>>>>> +                      &handle, NULL);
>>>>>            if (r)
>>>>>                goto error_free;
>>>>>            break;
>>>>> @@ -345,6 +366,7 @@ int amdgpu_bo_list_ioctl(struct drm_device 
>>>>> *dev, void *data,
>>>>>        return 0;
>>>>>       error_free:
>>>>> -    kvfree(info);
>>>>> +    if (info)
>>>>> +        kvfree(info);
>>>>>        return r;
>>>>>    }
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>> index 9881a1e..30026b8 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>>> @@ -66,11 +66,34 @@ static int amdgpu_cs_user_fence_chunk(struct 
>>>>> amdgpu_cs_parser *p,
>>>>>        return 0;
>>>>>    }
>>>>>    -static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void
>>>>> *data)
>>>>> +static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
>>>>> +                      struct drm_amdgpu_bo_list_in *data) {
>>>>> +    int r;
>>>>> +    struct drm_amdgpu_bo_list_entry *info = NULL;
>>>>> +
>>>>> +    r = amdgpu_bo_create_list_entry_array(data, &info);
>>>>> +    if (r)
>>>>> +        return r;
>>>>> +
>>>>> +    r = amdgpu_bo_list_create(p->adev, p->filp, info, 
>>>>> data->bo_number, NULL, &p->bo_list);
>>>>> +    if (r)
>>>>> +        goto error_free;
>>>>> +
>>>>> +    kvfree(info);
>>>>> +    return 0;
>>>>> +
>>>>> +error_free:
>>>>> +    if (info)
>>>>> +        kvfree(info);
>>>>> +
>>>>> +    return r;
>>>>> +}
>>>>> +
>>>>> +static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union
>>>>> +drm_amdgpu_cs *cs)
>>>>>    {
>>>>>        struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>>>>>        struct amdgpu_vm *vm = &fpriv->vm;
>>>>> -    union drm_amdgpu_cs *cs = data;
>>>>>        uint64_t *chunk_array_user;
>>>>>        uint64_t *chunk_array;
>>>>>        unsigned size, num_ibs = 0;
>>>>> @@ -164,6 +187,19 @@ static int amdgpu_cs_parser_init(struct
>>>>> amdgpu_cs_parser *p, void *data)
>>>>>                   break;
>>>>>    +        case AMDGPU_CHUNK_ID_BO_HANDLES:
>>>>> +            size = sizeof(struct drm_amdgpu_bo_list_in);
>>>>> +            if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
>>>>> +                ret = -EINVAL;
>>>>> +                goto free_partial_kdata;
>>>>> +            }
>>>>> +
>>>>> +            ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
>>>>> +            if (ret)
>>>>> +                goto free_partial_kdata;
>>>>> +
>>>>> +            break;
>>>>> +
>>>>>            case AMDGPU_CHUNK_ID_DEPENDENCIES:
>>>>>            case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
>>>>>            case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
>>>>> @@ -534,7 +570,12 @@ static int amdgpu_cs_parser_bos(struct
>>>>> amdgpu_cs_parser *p,
>>>>>           INIT_LIST_HEAD(&p->validated);
>>>>>    -    p->bo_list = amdgpu_bo_list_get(fpriv, 
>>>>> cs->in.bo_list_handle);
>>>>> +    /* p->bo_list could already be assigned if 
>>>>> AMDGPU_CHUNK_ID_BO_HANDLES is present */
>>>>> +    if (!p->bo_list)
>>>>> +        p->bo_list = amdgpu_bo_list_get(fpriv, 
>>>>> cs->in.bo_list_handle);
>>>>> +    else
>>>>> +        mutex_lock(&p->bo_list->lock);
>>>>> +
>>>>>        if (p->bo_list) {
>>>>>            amdgpu_bo_list_get_list(p->bo_list, &p->validated);
>>>>>            if (p->bo_list->first_userptr != 
>>>>> p->bo_list->num_entries) @@
>>>>> -747,7 +788,7 @@ static int amdgpu_cs_sync_rings(struct 
>>>>> amdgpu_cs_parser *p)
>>>>>     * used by parsing context.
>>>>>     **/
>>>>>    static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser 
>>>>> *parser, int error,
>>>>> -                  bool backoff)
>>>>> +                  bool backoff, int id)
>>>> Don't need it after you get bo_list without idr.
>>>>
>>>>>    {
>>>>>        unsigned i;
>>>>>    @@ -1277,7 +1318,7 @@ int amdgpu_cs_ioctl(struct drm_device 
>>>>> *dev, void *data, struct drm_file *filp)
>>>>>        r = amdgpu_cs_submit(&parser, cs);
>>>>>       out:
>>>>> -    amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
>>>>> +    amdgpu_cs_parser_fini(&parser, r, reserved_buffers,
>>>>> +cs->in.bo_list_handle);
>>>> Don't need it after you get bo_list without idr.
>>>>
>>>> Otherwise it looks really good to me, Reviewed-by: Chunming Zhou 
>>>> <david1.zhou@amd.com>
>>>>
>>>>>        return r;
>>>>>    }
>>>>>    diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>> index 06aede1..529500c 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>>> @@ -69,9 +69,10 @@
>>>>>     * - 3.24.0 - Add high priority compute support for gfx9
>>>>>     * - 3.25.0 - Add support for sensor query info (stable pstate 
>>>>> sclk/mclk).
>>>>>     * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
>>>>> + * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST 
>>>>> creation.
>>>>>     */
>>>>>    #define KMS_DRIVER_MAJOR    3
>>>>> -#define KMS_DRIVER_MINOR    26
>>>>> +#define KMS_DRIVER_MINOR    27
>>>>>    #define KMS_DRIVER_PATCHLEVEL    0
>>>>>       int amdgpu_vram_limit = 0;
>>>>> diff --git a/include/uapi/drm/amdgpu_drm.h
>>>>> b/include/uapi/drm/amdgpu_drm.h index 09741ba..94444ee 100644
>>>>> --- a/include/uapi/drm/amdgpu_drm.h
>>>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>>>> @@ -519,6 +519,7 @@ struct drm_amdgpu_gem_va {
>>>>>    #define AMDGPU_CHUNK_ID_DEPENDENCIES    0x03
>>>>>    #define AMDGPU_CHUNK_ID_SYNCOBJ_IN      0x04
>>>>>    #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT     0x05
>>>>> +#define AMDGPU_CHUNK_ID_BO_HANDLES      0x06
>>>>>       struct drm_amdgpu_cs_chunk {
>>>>>        __u32        chunk_id;
>>>> _______________________________________________
>>>> amd-gfx mailing list
>>>> amd-gfx@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2018-07-12  8:14 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-11 20:57 [PATCH v2] drm/amdgpu: Allow to create BO lists in CS ioctl v2 Andrey Grodzovsky
     [not found] ` <1531342678-11142-1-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
2018-07-12  2:31   ` zhoucm1
     [not found]     ` <9060133a-b04f-0c59-e806-1c0d407bb56c-5C7GfCeVMHo@public.gmane.org>
2018-07-12  3:09       ` Zhou, David(ChunMing)
     [not found]         ` <BY1PR12MB050202D0A4883EDB709D434EB4590-PicGAnIBOobrCwm+z9iKNgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2018-07-12  4:02           ` zhoucm1
     [not found]             ` <a43a5f3d-7fef-1721-307e-9135cf494264-5C7GfCeVMHo@public.gmane.org>
2018-07-12  4:21               ` zhoucm1
     [not found]                 ` <f6b77415-b991-6d4e-d872-934a269c397e-5C7GfCeVMHo@public.gmane.org>
2018-07-12  7:56                   ` Christian König
     [not found]                     ` <18352936-c661-dccd-a515-6d2845172470-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2018-07-12  8:14                       ` zhoucm1
2018-07-12  7:09   ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.