All of lore.kernel.org
 help / color / mirror / Atom feed
* [rfc] amdgpu/sync_file shared semaphores
@ 2017-03-14  0:50 Dave Airlie
  2017-03-14  0:50 ` [PATCH 1/4] sync_file: add a mutex to protect fence and callback members Dave Airlie
                   ` (2 more replies)
  0 siblings, 3 replies; 40+ messages in thread
From: Dave Airlie @ 2017-03-14  0:50 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

This contains one libdrm patch and 4 kernel patches.

The goal is to implement the Vulkan KHR_external_semaphore_fd interface
for permanent semaphore behaviour for radv.

This code tries to enhance sync file to add the required behaviour
(which is mostly being able to replace the fence backing the sync file),
it also introduces new API to amdgpu to create/destroy/export/import the
sync_files which we store in an idr there, rather than fds.

There is a possibility we should share the amdgpu_sem object tracking
code for other drivers, maybe we should move that to sync_file as well,
but I'm open to suggestions for whether this is a useful approach for
other drivers.

Dave.

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH libdrm] libdrm/amdgpu: add interface for kernel semaphores
       [not found] ` <20170314005054.7487-1-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-03-14  0:50   ` Dave Airlie
  2017-03-14  2:00     ` zhoucm1
       [not found]     ` <20170314005054.7487-2-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2017-03-14  0:50   ` [PATCH 2/4] sync_file: add replace and export some functionality Dave Airlie
                     ` (2 subsequent siblings)
  3 siblings, 2 replies; 40+ messages in thread
From: Dave Airlie @ 2017-03-14  0:50 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Dave Airlie <airlied@redhat.com>

This adds the corresponding code for libdrm to use the new
kernel interfaces for semaphores.

This will be used by radv to implement shared semaphores.

TODO: Version checks.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 amdgpu/amdgpu.h          |  28 +++++++++
 amdgpu/amdgpu_cs.c       | 161 ++++++++++++++++++++++++++++++++++++++++++++---
 include/drm/amdgpu_drm.h |  28 +++++++++
 3 files changed, 208 insertions(+), 9 deletions(-)

diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
index 7b26a04..747e248 100644
--- a/amdgpu/amdgpu.h
+++ b/amdgpu/amdgpu.h
@@ -129,6 +129,8 @@ typedef struct amdgpu_va *amdgpu_va_handle;
  */
 typedef struct amdgpu_semaphore *amdgpu_semaphore_handle;
 
+typedef uint32_t amdgpu_sem_handle;
+
 /*--------------------------------------------------------------------------*/
 /* -------------------------- Structures ---------------------------------- */
 /*--------------------------------------------------------------------------*/
@@ -365,6 +367,16 @@ struct amdgpu_cs_request {
 	struct amdgpu_cs_fence_info fence_info;
 };
 
+struct amdgpu_cs_request_sem {
+	/*
+	 *
+	 */
+	uint32_t number_of_wait_sem;
+	uint32_t *wait_sems;
+	uint32_t number_of_signal_sem;
+	uint32_t *signal_sems;
+};
+
 /**
  * Structure which provide information about GPU VM MC Address space
  * alignments requirements
@@ -882,6 +894,12 @@ int amdgpu_cs_submit(amdgpu_context_handle context,
 		     struct amdgpu_cs_request *ibs_request,
 		     uint32_t number_of_requests);
 
+int amdgpu_cs_submit_sem(amdgpu_context_handle context,
+			 uint64_t flags,
+			 struct amdgpu_cs_request *ibs_request,
+			 struct amdgpu_cs_request_sem *ibs_sem,
+			 uint32_t number_of_requests);
+
 /**
  *  Query status of Command Buffer Submission
  *
@@ -1255,4 +1273,14 @@ int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem);
 */
 const char *amdgpu_get_marketing_name(amdgpu_device_handle dev);
 
+int amdgpu_cs_create_sem(amdgpu_device_handle dev,
+			 amdgpu_sem_handle *sem);
+int amdgpu_cs_export_sem(amdgpu_device_handle dev,
+			  amdgpu_sem_handle sem,
+			 int *shared_handle);
+int amdgpu_cs_import_sem(amdgpu_device_handle dev,
+			  int shared_handle,
+			 amdgpu_sem_handle *sem);
+int amdgpu_cs_destroy_sem(amdgpu_device_handle dev,
+			  amdgpu_sem_handle sem);
 #endif /* #ifdef _AMDGPU_H_ */
diff --git a/amdgpu/amdgpu_cs.c b/amdgpu/amdgpu_cs.c
index fb5b3a8..7283327 100644
--- a/amdgpu/amdgpu_cs.c
+++ b/amdgpu/amdgpu_cs.c
@@ -170,7 +170,8 @@ int amdgpu_cs_query_reset_state(amdgpu_context_handle context,
  * \sa amdgpu_cs_submit()
 */
 static int amdgpu_cs_submit_one(amdgpu_context_handle context,
-				struct amdgpu_cs_request *ibs_request)
+				struct amdgpu_cs_request *ibs_request,
+				struct amdgpu_cs_request_sem *sem_request)
 {
 	union drm_amdgpu_cs cs;
 	uint64_t *chunk_array;
@@ -178,9 +179,11 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
 	struct drm_amdgpu_cs_chunk_data *chunk_data;
 	struct drm_amdgpu_cs_chunk_dep *dependencies = NULL;
 	struct drm_amdgpu_cs_chunk_dep *sem_dependencies = NULL;
+	struct drm_amdgpu_cs_chunk_sem *wait_sem_dependencies = NULL;
+	struct drm_amdgpu_cs_chunk_sem *signal_sem_dependencies = NULL;
 	struct list_head *sem_list;
 	amdgpu_semaphore_handle sem, tmp;
-	uint32_t i, size, sem_count = 0;
+	uint32_t i, j, size, sem_count = 0;
 	bool user_fence;
 	int r = 0;
 
@@ -196,7 +199,7 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
 	}
 	user_fence = (ibs_request->fence_info.handle != NULL);
 
-	size = ibs_request->number_of_ibs + (user_fence ? 2 : 1) + 1;
+	size = ibs_request->number_of_ibs + (user_fence ? 2 : 1) + 1 + (sem_request ? 2 : 0);
 
 	chunk_array = alloca(sizeof(uint64_t) * size);
 	chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size);
@@ -308,6 +311,45 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
 		chunks[i].chunk_data = (uint64_t)(uintptr_t)sem_dependencies;
 	}
 
+	if (sem_request) {
+		if (sem_request->number_of_wait_sem) {
+			wait_sem_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_sem) * sem_request->number_of_wait_sem);
+			if (!wait_sem_dependencies) {
+				r = -ENOMEM;
+				goto error_unlock;
+			}
+			for (j = 0; j < sem_request->number_of_wait_sem; j++) {
+				struct drm_amdgpu_cs_chunk_sem *dep = &wait_sem_dependencies[j];
+				dep->handle = sem_request->wait_sems[j];
+			}
+			i = cs.in.num_chunks++;
+
+			/* dependencies chunk */
+			chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
+			chunks[i].chunk_id = AMDGPU_CHUNK_ID_SEM_WAIT;
+			chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_sem) / 4 * sem_request->number_of_wait_sem;
+			chunks[i].chunk_data = (uint64_t)(uintptr_t)wait_sem_dependencies;
+		}
+		if (sem_request->number_of_signal_sem) {
+			signal_sem_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_sem) * sem_request->number_of_signal_sem);
+			if (!signal_sem_dependencies) {
+				r = -ENOMEM;
+				goto error_unlock;
+			}
+			for (j = 0; j < sem_request->number_of_signal_sem; j++) {
+				struct drm_amdgpu_cs_chunk_sem *dep = &signal_sem_dependencies[j];
+				dep->handle = sem_request->signal_sems[j];
+			}
+			i = cs.in.num_chunks++;
+
+			/* dependencies chunk */
+			chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
+			chunks[i].chunk_id = AMDGPU_CHUNK_ID_SEM_SIGNAL;
+			chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_sem) / 4 * sem_request->number_of_signal_sem;
+			chunks[i].chunk_data = (uint64_t)(uintptr_t)signal_sem_dependencies;
+		}
+	}
+
 	r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CS,
 				&cs, sizeof(cs));
 	if (r)
@@ -319,17 +361,20 @@ error_unlock:
 	pthread_mutex_unlock(&context->sequence_mutex);
 	free(dependencies);
 	free(sem_dependencies);
+	free(wait_sem_dependencies);
+	free(signal_sem_dependencies);
 	return r;
 }
 
-int amdgpu_cs_submit(amdgpu_context_handle context,
-		     uint64_t flags,
-		     struct amdgpu_cs_request *ibs_request,
-		     uint32_t number_of_requests)
+int amdgpu_cs_submit_sem(amdgpu_context_handle context,
+			 uint64_t flags,
+			 struct amdgpu_cs_request *ibs_request,
+			 struct amdgpu_cs_request_sem *ibs_sem,
+			 uint32_t number_of_requests)
 {
 	uint32_t i;
 	int r;
-
+	bool has_sems = ibs_sem ? true : false;
 	if (NULL == context)
 		return -EINVAL;
 	if (NULL == ibs_request)
@@ -337,15 +382,28 @@ int amdgpu_cs_submit(amdgpu_context_handle context,
 
 	r = 0;
 	for (i = 0; i < number_of_requests; i++) {
-		r = amdgpu_cs_submit_one(context, ibs_request);
+		r = amdgpu_cs_submit_one(context, ibs_request, has_sems ? ibs_sem : NULL);
 		if (r)
 			break;
 		ibs_request++;
+		if (has_sems)
+			ibs_sem++;
 	}
 
 	return r;
 }
 
+int amdgpu_cs_submit(amdgpu_context_handle context,
+		     uint64_t flags,
+		     struct amdgpu_cs_request *ibs_request,
+		     uint32_t number_of_requests)
+{
+	return amdgpu_cs_submit_sem(context, flags,
+				    ibs_request, NULL,
+				    number_of_requests);
+}
+
+
 /**
  * Calculate absolute timeout.
  *
@@ -542,3 +600,88 @@ int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem)
 {
 	return amdgpu_cs_unreference_sem(sem);
 }
+
+
+int amdgpu_cs_create_sem(amdgpu_device_handle dev,
+			 amdgpu_sem_handle *sem)
+{
+	union drm_amdgpu_sem args;
+	int r;
+
+	if (NULL == dev)
+		return -EINVAL;
+
+	/* Create the context */
+	memset(&args, 0, sizeof(args));
+	args.in.op = AMDGPU_SEM_OP_CREATE_SEM;
+	r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_SEM, &args, sizeof(args));
+	if (r)
+		return r;
+
+	*sem = args.out.handle;
+
+	return 0;
+}
+
+int amdgpu_cs_export_sem(amdgpu_device_handle dev,
+			  amdgpu_sem_handle sem,
+			  int *shared_handle)
+{
+	union drm_amdgpu_sem args;
+	int r;
+
+	if (NULL == dev)
+		return -EINVAL;
+
+	/* Create the context */
+	memset(&args, 0, sizeof(args));
+	args.in.op = AMDGPU_SEM_OP_EXPORT_SEM;
+	args.in.handle = sem;
+	r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_SEM, &args, sizeof(args));
+	if (r)
+		return r;
+	*shared_handle = args.out.fd;
+	return 0;
+}
+
+int amdgpu_cs_import_sem(amdgpu_device_handle dev,
+			  int shared_handle,
+			  amdgpu_sem_handle *sem)
+{
+	union drm_amdgpu_sem args;
+	int r;
+
+	if (NULL == dev)
+		return -EINVAL;
+
+	/* Create the context */
+	memset(&args, 0, sizeof(args));
+	args.in.op = AMDGPU_SEM_OP_IMPORT_SEM;
+	args.in.handle = shared_handle;
+	r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_SEM, &args, sizeof(args));
+	if (r)
+		return r;
+	*sem = args.out.handle;
+	return 0;
+}
+
+
+int amdgpu_cs_destroy_sem(amdgpu_device_handle dev,
+			  amdgpu_sem_handle sem)
+{
+	union drm_amdgpu_sem args;
+	int r;
+
+	if (NULL == dev)
+		return -EINVAL;
+
+	/* Create the context */
+	memset(&args, 0, sizeof(args));
+	args.in.op = AMDGPU_SEM_OP_DESTROY_SEM;
+	args.in.handle = sem;
+	r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_SEM, &args, sizeof(args));
+	if (r)
+		return r;
+
+	return 0;
+}
diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h
index d8f2497..fa0bfe2 100644
--- a/include/drm/amdgpu_drm.h
+++ b/include/drm/amdgpu_drm.h
@@ -50,6 +50,7 @@ extern "C" {
 #define DRM_AMDGPU_WAIT_CS		0x09
 #define DRM_AMDGPU_GEM_OP		0x10
 #define DRM_AMDGPU_GEM_USERPTR		0x11
+#define DRM_AMDGPU_SEM                  0x13
 
 #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
 #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -63,6 +64,7 @@ extern "C" {
 #define DRM_IOCTL_AMDGPU_WAIT_CS	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_CS, union drm_amdgpu_wait_cs)
 #define DRM_IOCTL_AMDGPU_GEM_OP		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op)
 #define DRM_IOCTL_AMDGPU_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr)
+#define DRM_IOCTL_AMDGPU_SEM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_SEM, union drm_amdgpu_sem)
 
 #define AMDGPU_GEM_DOMAIN_CPU		0x1
 #define AMDGPU_GEM_DOMAIN_GTT		0x2
@@ -303,6 +305,26 @@ union drm_amdgpu_wait_cs {
 	struct drm_amdgpu_wait_cs_out out;
 };
 
+#define AMDGPU_SEM_OP_CREATE_SEM 0
+#define AMDGPU_SEM_OP_IMPORT_SEM 1
+#define AMDGPU_SEM_OP_EXPORT_SEM 2
+#define AMDGPU_SEM_OP_DESTROY_SEM 3
+
+struct drm_amdgpu_sem_in {
+	__u32 op;
+	__u32 handle;
+};
+
+struct drm_amdgpu_sem_out {
+	__u32 fd;
+	__u32 handle;
+};
+
+union drm_amdgpu_sem {
+	struct drm_amdgpu_sem_in in;
+	struct drm_amdgpu_sem_out out;
+};
+
 #define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO	0
 #define AMDGPU_GEM_OP_SET_PLACEMENT		1
 
@@ -358,6 +380,8 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_CHUNK_ID_IB		0x01
 #define AMDGPU_CHUNK_ID_FENCE		0x02
 #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
+#define AMDGPU_CHUNK_ID_SEM_WAIT        0x04
+#define AMDGPU_CHUNK_ID_SEM_SIGNAL      0x05
 
 struct drm_amdgpu_cs_chunk {
 	uint32_t		chunk_id;
@@ -422,6 +446,10 @@ struct drm_amdgpu_cs_chunk_fence {
 	uint32_t offset;
 };
 
+struct drm_amdgpu_cs_chunk_sem {
+	uint32_t handle;
+};
+
 struct drm_amdgpu_cs_chunk_data {
 	union {
 		struct drm_amdgpu_cs_chunk_ib		ib_data;
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 1/4] sync_file: add a mutex to protect fence and callback members.
  2017-03-14  0:50 [rfc] amdgpu/sync_file shared semaphores Dave Airlie
@ 2017-03-14  0:50 ` Dave Airlie
       [not found]   ` <20170314005054.7487-3-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
       [not found] ` <20170314005054.7487-1-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2017-03-14  8:53 ` [rfc] amdgpu/sync_file " Daniel Vetter
  2 siblings, 1 reply; 40+ messages in thread
From: Dave Airlie @ 2017-03-14  0:50 UTC (permalink / raw)
  To: amd-gfx, dri-devel

From: Dave Airlie <airlied@redhat.com>

This isn't needed currently, but to reuse sync file for Vulkan
permanent shared semaphore semantics, we need to be able to swap
the fence backing a sync file. This patch adds a mutex to the
sync file and uses to protect accesses to the fence and cb members.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/dma-buf/sync_file.c | 23 +++++++++++++++++++----
 include/linux/sync_file.h   |  3 +++
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index 2321035..105f48c 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -47,6 +47,7 @@ static struct sync_file *sync_file_alloc(void)
 
 	INIT_LIST_HEAD(&sync_file->cb.node);
 
+	mutex_init(&sync_file->lock);
 	return sync_file;
 
 err:
@@ -204,10 +205,13 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
 	if (!sync_file)
 		return NULL;
 
+	mutex_lock(&a->lock);
+	mutex_lock(&b->lock);
 	a_fences = get_fences(a, &a_num_fences);
 	b_fences = get_fences(b, &b_num_fences);
-	if (a_num_fences > INT_MAX - b_num_fences)
-		return NULL;
+	if (a_num_fences > INT_MAX - b_num_fences) {
+		goto unlock;
+	}
 
 	num_fences = a_num_fences + b_num_fences;
 
@@ -268,11 +272,17 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
 		goto err;
 	}
 
+	mutex_unlock(&b->lock);
+	mutex_unlock(&a->lock);
+
 	strlcpy(sync_file->name, name, sizeof(sync_file->name));
 	return sync_file;
 
 err:
 	fput(sync_file->file);
+unlock:
+	mutex_unlock(&b->lock);
+	mutex_unlock(&a->lock);
 	return NULL;
 
 }
@@ -299,16 +309,20 @@ static int sync_file_release(struct inode *inode, struct file *file)
 static unsigned int sync_file_poll(struct file *file, poll_table *wait)
 {
 	struct sync_file *sync_file = file->private_data;
+	unsigned int ret_val;
 
 	poll_wait(file, &sync_file->wq, wait);
 
+	mutex_lock(&sync_file->lock);
 	if (!test_and_set_bit(POLL_ENABLED, &sync_file->fence->flags)) {
 		if (dma_fence_add_callback(sync_file->fence, &sync_file->cb,
 					   fence_check_cb_func) < 0)
 			wake_up_all(&sync_file->wq);
 	}
+	ret_val = dma_fence_is_signaled(sync_file->fence) ? POLLIN : 0;
+	mutex_unlock(&sync_file->lock);
 
-	return dma_fence_is_signaled(sync_file->fence) ? POLLIN : 0;
+	return ret_val;
 }
 
 static long sync_file_ioctl_merge(struct sync_file *sync_file,
@@ -393,6 +407,7 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file,
 	if (info.flags || info.pad)
 		return -EINVAL;
 
+	mutex_lock(&sync_file->lock);
 	fences = get_fences(sync_file, &num_fences);
 
 	/*
@@ -433,7 +448,7 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file,
 
 out:
 	kfree(fence_info);
-
+	mutex_unlock(&sync_file->lock);
 	return ret;
 }
 
diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h
index 3e3ab84..5aef17f 100644
--- a/include/linux/sync_file.h
+++ b/include/linux/sync_file.h
@@ -30,6 +30,7 @@
  * @wq:			wait queue for fence signaling
  * @fence:		fence with the fences in the sync_file
  * @cb:			fence callback information
+ * @lock:               mutex to protect fence/cb - used for semaphores
  */
 struct sync_file {
 	struct file		*file;
@@ -43,6 +44,8 @@ struct sync_file {
 
 	struct dma_fence	*fence;
 	struct dma_fence_cb cb;
+	/* protects the fence pointer and cb */
+	struct mutex lock;
 };
 
 #define POLL_ENABLED DMA_FENCE_FLAG_USER_BITS
-- 
2.7.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 2/4] sync_file: add replace and export some functionality
       [not found] ` <20170314005054.7487-1-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2017-03-14  0:50   ` [PATCH libdrm] libdrm/amdgpu: add interface for kernel semaphores Dave Airlie
@ 2017-03-14  0:50   ` Dave Airlie
       [not found]     ` <20170314005054.7487-4-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2017-03-14  9:00     ` Chris Wilson
  2017-03-14  0:50   ` [PATCH 3/4] amdgpu/cs: split out fence dependency checking Dave Airlie
  2017-03-14  0:50   ` [PATCH 4/4] amdgpu: use sync file for shared semaphores Dave Airlie
  3 siblings, 2 replies; 40+ messages in thread
From: Dave Airlie @ 2017-03-14  0:50 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Dave Airlie <airlied@redhat.com>

Using sync_file to back vulkan semaphores means need to replace
the fence underlying the sync file. This replace function removes
the callback, swaps the fence, and returns the old one. This
also exports the alloc and fdget functionality for the semaphore
wrapper code.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/dma-buf/sync_file.c | 46 +++++++++++++++++++++++++++++++++++++++++----
 include/linux/sync_file.h   |  5 ++++-
 2 files changed, 46 insertions(+), 5 deletions(-)

diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index 105f48c..df7bb37 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -28,7 +28,14 @@
 
 static const struct file_operations sync_file_fops;
 
-static struct sync_file *sync_file_alloc(void)
+/**
+ * sync_file_alloc() - allocate an unfenced sync file
+ *
+ * Creates a sync_file.
+ * The sync_file can be released with fput(sync_file->file).
+ * Returns the sync_file or NULL in case of error.
+ */
+struct sync_file *sync_file_alloc(void)
 {
 	struct sync_file *sync_file;
 
@@ -54,6 +61,7 @@ static struct sync_file *sync_file_alloc(void)
 	kfree(sync_file);
 	return NULL;
 }
+EXPORT_SYMBOL(sync_file_alloc);
 
 static void fence_check_cb_func(struct dma_fence *f, struct dma_fence_cb *cb)
 {
@@ -92,7 +100,7 @@ struct sync_file *sync_file_create(struct dma_fence *fence)
 }
 EXPORT_SYMBOL(sync_file_create);
 
-static struct sync_file *sync_file_fdget(int fd)
+struct sync_file *sync_file_fdget(int fd)
 {
 	struct file *file = fget(fd);
 
@@ -108,6 +116,7 @@ static struct sync_file *sync_file_fdget(int fd)
 	fput(file);
 	return NULL;
 }
+EXPORT_SYMBOL(sync_file_fdget);
 
 /**
  * sync_file_get_fence - get the fence related to the sync_file fd
@@ -125,13 +134,40 @@ struct dma_fence *sync_file_get_fence(int fd)
 	if (!sync_file)
 		return NULL;
 
+	mutex_lock(&sync_file->lock);
 	fence = dma_fence_get(sync_file->fence);
+	mutex_unlock(&sync_file->lock);
 	fput(sync_file->file);
 
 	return fence;
 }
 EXPORT_SYMBOL(sync_file_get_fence);
 
+/**
+ * sync_file_replace_fence - replace the fence related to the sync_file
+ * @sync_file:	 sync file to replace fence in
+ * @fence: fence to replace with (or NULL for no fence).
+ * Returns previous fence.
+ */
+struct dma_fence *sync_file_replace_fence(struct sync_file *sync_file,
+					  struct dma_fence *fence)
+{
+	struct dma_fence *ret_fence = NULL;
+	mutex_lock(&sync_file->lock);
+	if (sync_file->fence) {
+		if (test_bit(POLL_ENABLED, &sync_file->fence->flags))
+			dma_fence_remove_callback(sync_file->fence, &sync_file->cb);
+		ret_fence = sync_file->fence;
+	}
+	if (fence)
+		sync_file->fence = dma_fence_get(fence);
+	else
+		sync_file->fence = NULL;
+	mutex_unlock(&sync_file->lock);
+	return ret_fence;
+}
+EXPORT_SYMBOL(sync_file_replace_fence);
+
 static int sync_file_set_fence(struct sync_file *sync_file,
 			       struct dma_fence **fences, int num_fences)
 {
@@ -292,8 +328,10 @@ static void sync_file_free(struct kref *kref)
 	struct sync_file *sync_file = container_of(kref, struct sync_file,
 						     kref);
 
-	if (test_bit(POLL_ENABLED, &sync_file->fence->flags))
-		dma_fence_remove_callback(sync_file->fence, &sync_file->cb);
+	if (sync_file->fence) {
+		if (test_bit(POLL_ENABLED, &sync_file->fence->flags))
+			dma_fence_remove_callback(sync_file->fence, &sync_file->cb);
+	}
 	dma_fence_put(sync_file->fence);
 	kfree(sync_file);
 }
diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h
index 5aef17f..9511a54 100644
--- a/include/linux/sync_file.h
+++ b/include/linux/sync_file.h
@@ -50,7 +50,10 @@ struct sync_file {
 
 #define POLL_ENABLED DMA_FENCE_FLAG_USER_BITS
 
+struct sync_file *sync_file_alloc(void);
 struct sync_file *sync_file_create(struct dma_fence *fence);
 struct dma_fence *sync_file_get_fence(int fd);
-
+struct sync_file *sync_file_fdget(int fd);
+struct dma_fence *sync_file_replace_fence(struct sync_file *sync_file,
+					  struct dma_fence *fence);
 #endif /* _LINUX_SYNC_H */
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 3/4] amdgpu/cs: split out fence dependency checking
       [not found] ` <20170314005054.7487-1-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2017-03-14  0:50   ` [PATCH libdrm] libdrm/amdgpu: add interface for kernel semaphores Dave Airlie
  2017-03-14  0:50   ` [PATCH 2/4] sync_file: add replace and export some functionality Dave Airlie
@ 2017-03-14  0:50   ` Dave Airlie
  2017-03-14  0:50   ` [PATCH 4/4] amdgpu: use sync file for shared semaphores Dave Airlie
  3 siblings, 0 replies; 40+ messages in thread
From: Dave Airlie @ 2017-03-14  0:50 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Dave Airlie <airlied@redhat.com>

This just splits out the fence depenency checking into it's
own function to make it easier to add semaphore dependencies.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 86 +++++++++++++++++++---------------
 1 file changed, 48 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 99424cb..4671432 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -963,56 +963,66 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 	return 0;
 }
 
-static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
-				  struct amdgpu_cs_parser *p)
+static int amdgpu_process_fence_dep(struct amdgpu_device *adev,
+				    struct amdgpu_cs_parser *p,
+				    struct amdgpu_cs_chunk *chunk)
 {
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
-	int i, j, r;
-
-	for (i = 0; i < p->nchunks; ++i) {
-		struct drm_amdgpu_cs_chunk_dep *deps;
-		struct amdgpu_cs_chunk *chunk;
-		unsigned num_deps;
+	unsigned num_deps;
+	int i, r;
+	struct drm_amdgpu_cs_chunk_dep *deps;
 
-		chunk = &p->chunks[i];
+	deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
+	num_deps = chunk->length_dw * 4 /
+		sizeof(struct drm_amdgpu_cs_chunk_dep);
 
-		if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES)
-			continue;
+	for (i = 0; i < num_deps; ++i) {
+		struct amdgpu_ring *ring;
+		struct amdgpu_ctx *ctx;
+		struct dma_fence *fence;
 
-		deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
-		num_deps = chunk->length_dw * 4 /
-			sizeof(struct drm_amdgpu_cs_chunk_dep);
+		r = amdgpu_cs_get_ring(adev, deps[i].ip_type,
+				       deps[i].ip_instance,
+				       deps[i].ring, &ring);
+		if (r)
+			return r;
 
-		for (j = 0; j < num_deps; ++j) {
-			struct amdgpu_ring *ring;
-			struct amdgpu_ctx *ctx;
-			struct dma_fence *fence;
+		ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
+		if (ctx == NULL)
+			return -EINVAL;
 
-			r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
-					       deps[j].ip_instance,
-					       deps[j].ring, &ring);
+		fence = amdgpu_ctx_get_fence(ctx, ring,
+					     deps[i].handle);
+		if (IS_ERR(fence)) {
+			r = PTR_ERR(fence);
+			amdgpu_ctx_put(ctx);
+			return r;
+		} else if (fence) {
+			r = amdgpu_sync_fence(adev, &p->job->sync,
+					      fence);
+			dma_fence_put(fence);
+			amdgpu_ctx_put(ctx);
 			if (r)
 				return r;
+		}
+	}
+	return 0;
+}
 
-			ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id);
-			if (ctx == NULL)
-				return -EINVAL;
+static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
+				  struct amdgpu_cs_parser *p)
+{
+	int i, r;
 
-			fence = amdgpu_ctx_get_fence(ctx, ring,
-						     deps[j].handle);
-			if (IS_ERR(fence)) {
-				r = PTR_ERR(fence);
-				amdgpu_ctx_put(ctx);
-				return r;
+	for (i = 0; i < p->nchunks; ++i) {
+		struct amdgpu_cs_chunk *chunk;
 
-			} else if (fence) {
-				r = amdgpu_sync_fence(adev, &p->job->sync,
-						      fence);
-				dma_fence_put(fence);
-				amdgpu_ctx_put(ctx);
-				if (r)
-					return r;
-			}
+		chunk = &p->chunks[i];
+
+		if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) {
+			r = amdgpu_process_fence_dep(adev, p, chunk);
+			if (r)
+				return r;
 		}
 	}
 
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 4/4] amdgpu: use sync file for shared semaphores
       [not found] ` <20170314005054.7487-1-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (2 preceding siblings ...)
  2017-03-14  0:50   ` [PATCH 3/4] amdgpu/cs: split out fence dependency checking Dave Airlie
@ 2017-03-14  0:50   ` Dave Airlie
       [not found]     ` <20170314005054.7487-6-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  3 siblings, 1 reply; 40+ messages in thread
From: Dave Airlie @ 2017-03-14  0:50 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Dave Airlie <airlied@redhat.com>

This creates a new interface for amdgpu with ioctls to
create/destroy/import and export shared semaphores using
sem object backed by the sync_file code. The semaphores
are not installed as fd (except for export), but rather
like other driver internal objects in an idr. The idr
holds the initial reference on the sync file.

The command submission interface is enhanced with two new
chunks, one for semaphore waiting, one for semaphore signalling
and just takes a list of handles for each.

This is based on work originally done by David Zhou at AMD,
with input from Christian Konig on what things should look like.

NOTE: this interface addition needs a version bump to expose
it to userspace.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/amd/amdgpu/Makefile     |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  12 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  70 ++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |   8 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c | 204 ++++++++++++++++++++++++++++++++
 include/uapi/drm/amdgpu_drm.h           |  28 +++++
 6 files changed, 322 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 2814aad..404bcba 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -24,7 +24,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
 	atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
 	amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
 	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
-	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o
+	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_sem.o
 
 # add asic specific block
 amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c1b9135..4ed8811 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -702,6 +702,8 @@ struct amdgpu_fpriv {
 	struct mutex		bo_list_lock;
 	struct idr		bo_list_handles;
 	struct amdgpu_ctx_mgr	ctx_mgr;
+	spinlock_t		sem_handles_lock;
+	struct idr		sem_handles;
 };
 
 /*
@@ -1814,5 +1816,15 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
 		       uint64_t addr, struct amdgpu_bo **bo);
 int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
 
+int amdgpu_sem_ioctl(struct drm_device *dev, void *data,
+		     struct drm_file *filp);
+void amdgpu_sem_destroy(struct amdgpu_fpriv *fpriv, u32 handle);
+int amdgpu_sem_lookup_and_signal(struct amdgpu_fpriv *fpriv,
+				 uint32_t handle,
+				 struct dma_fence *fence);
+int amdgpu_sem_lookup_and_sync(struct amdgpu_device *adev,
+			       struct amdgpu_fpriv *fpriv,
+			       struct amdgpu_sync *sync,
+			       uint32_t handle);
 #include "amdgpu_object.h"
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 4671432..80fc94b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -217,6 +217,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 			break;
 
 		case AMDGPU_CHUNK_ID_DEPENDENCIES:
+		case AMDGPU_CHUNK_ID_SEM_WAIT:
+		case AMDGPU_CHUNK_ID_SEM_SIGNAL:
 			break;
 
 		default:
@@ -1009,6 +1011,28 @@ static int amdgpu_process_fence_dep(struct amdgpu_device *adev,
 	return 0;
 }
 
+static int amdgpu_process_sem_wait_dep(struct amdgpu_device *adev,
+				       struct amdgpu_cs_parser *p,
+				       struct amdgpu_cs_chunk *chunk)
+{
+	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+	unsigned num_deps;
+	int i, r;
+	struct drm_amdgpu_cs_chunk_sem *deps;
+
+	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+	num_deps = chunk->length_dw * 4 /
+		sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+	for (i = 0; i < num_deps; ++i) {
+		r = amdgpu_sem_lookup_and_sync(adev, fpriv, &p->job->sync,
+					       deps[i].handle);
+		if (r)
+			return r;
+	}
+	return 0;
+}
+
 static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
 				  struct amdgpu_cs_parser *p)
 {
@@ -1023,12 +1047,56 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
 			r = amdgpu_process_fence_dep(adev, p, chunk);
 			if (r)
 				return r;
+		} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_WAIT) {
+			r = amdgpu_process_sem_wait_dep(adev, p, chunk);
+			if (r)
+				return r;
 		}
 	}
 
 	return 0;
 }
 
+static int amdgpu_process_sem_signal_dep(struct amdgpu_cs_parser *p,
+					 struct amdgpu_cs_chunk *chunk,
+					 struct dma_fence *fence)
+{
+	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+	unsigned num_deps;
+	int i, r;
+	struct drm_amdgpu_cs_chunk_sem *deps;
+
+	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+	num_deps = chunk->length_dw * 4 /
+		sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+	for (i = 0; i < num_deps; ++i) {
+		r = amdgpu_sem_lookup_and_signal(fpriv, deps[i].handle,
+						 fence);
+		if (r)
+			return r;
+	}
+	return 0;
+}
+
+static int amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
+{
+	int i, r;
+
+	for (i = 0; i < p->nchunks; ++i) {
+		struct amdgpu_cs_chunk *chunk;
+
+		chunk = &p->chunks[i];
+
+		if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_SIGNAL) {
+			r = amdgpu_process_sem_signal_dep(p, chunk, p->fence);
+			if (r)
+				return r;
+		}
+	}
+	return 0;
+}
+
 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 			    union drm_amdgpu_cs *cs)
 {
@@ -1056,7 +1124,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	trace_amdgpu_cs_ioctl(job);
 	amd_sched_entity_push_job(&job->base);
 
-	return 0;
+	return amdgpu_cs_post_dependencies(p);
 }
 
 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 61d94c7..013aed1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -664,6 +664,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 	mutex_init(&fpriv->bo_list_lock);
 	idr_init(&fpriv->bo_list_handles);
 
+	spin_lock_init(&fpriv->sem_handles_lock);
+	idr_init(&fpriv->sem_handles);
 	amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
 
 	file_priv->driver_priv = fpriv;
@@ -689,6 +691,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
 	struct amdgpu_bo_list *list;
+	struct amdgpu_sem *sem;
 	int handle;
 
 	if (!fpriv)
@@ -715,6 +718,10 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 	idr_destroy(&fpriv->bo_list_handles);
 	mutex_destroy(&fpriv->bo_list_lock);
 
+	idr_for_each_entry(&fpriv->sem_handles, sem, handle)
+		amdgpu_sem_destroy(fpriv, handle);
+	idr_destroy(&fpriv->sem_handles);
+
 	kfree(fpriv);
 	file_priv->driver_priv = NULL;
 
@@ -896,6 +903,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_SEM, amdgpu_sem_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 };
 const int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
new file mode 100644
index 0000000..066520a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chunming Zhou <david1.zhou@amd.com>
+ */
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/poll.h>
+#include <linux/seq_file.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/anon_inodes.h>
+#include <linux/sync_file.h>
+#include "amdgpu.h"
+#include <drm/drmP.h>
+
+static inline struct sync_file *amdgpu_sync_file_lookup(struct amdgpu_fpriv *fpriv, u32 handle)
+{
+	struct sync_file *sync_file;
+
+	spin_lock(&fpriv->sem_handles_lock);
+
+	/* Check if we currently have a reference on the object */
+	sync_file = idr_find(&fpriv->sem_handles, handle);
+
+	spin_unlock(&fpriv->sem_handles_lock);
+
+	return sync_file;
+}
+
+static int amdgpu_sem_create(struct amdgpu_fpriv *fpriv, u32 *handle)
+{
+	struct sync_file *sync_file = sync_file_alloc();
+	int ret;
+
+	if (!sync_file)
+		return -ENOMEM;
+
+	snprintf(sync_file->name, sizeof(sync_file->name), "sync_sem");
+
+	/* we get a file reference and we use that in the idr. */
+	idr_preload(GFP_KERNEL);
+	spin_lock(&fpriv->sem_handles_lock);
+
+	ret = idr_alloc(&fpriv->sem_handles, sync_file, 1, 0, GFP_NOWAIT);
+
+	spin_unlock(&fpriv->sem_handles_lock);
+	idr_preload_end();
+
+	if (ret < 0)
+		return ret;
+
+	*handle = ret;
+	return 0;
+}
+
+void amdgpu_sem_destroy(struct amdgpu_fpriv *fpriv, u32 handle)
+{
+	struct sync_file *sync_file = amdgpu_sync_file_lookup(fpriv, handle);
+	if (!sync_file)
+		return;
+
+	spin_lock(&fpriv->sem_handles_lock);
+	idr_remove(&fpriv->sem_handles, handle);
+	spin_unlock(&fpriv->sem_handles_lock);
+
+	fput(sync_file->file);
+}
+
+
+int amdgpu_sem_lookup_and_signal(struct amdgpu_fpriv *fpriv,
+				 uint32_t handle,
+				 struct dma_fence *fence)
+{
+	struct sync_file *sync_file;
+	struct dma_fence *old_fence;
+	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
+	if (!sync_file)
+		return -EINVAL;
+
+	old_fence = sync_file_replace_fence(sync_file, fence);
+	dma_fence_put(old_fence);
+	return 0;
+}
+
+static int amdgpu_sem_import(struct amdgpu_fpriv *fpriv,
+				       int fd, u32 *handle)
+{
+	struct sync_file *sync_file = sync_file_fdget(fd);
+	int ret;
+
+	if (!sync_file)
+		return -EINVAL;
+
+	idr_preload(GFP_KERNEL);
+	spin_lock(&fpriv->sem_handles_lock);
+
+	ret = idr_alloc(&fpriv->sem_handles, sync_file, 1, 0, GFP_NOWAIT);
+
+	spin_unlock(&fpriv->sem_handles_lock);
+	idr_preload_end();
+
+	fput(sync_file->file);
+	if (ret < 0)
+		goto err_out;
+
+	*handle = ret;
+	return 0;
+err_out:
+	return ret;
+
+}
+
+static int amdgpu_sem_export(struct amdgpu_fpriv *fpriv,
+			     u32 handle, int *fd)
+{
+	struct sync_file *sync_file;
+	int ret;
+
+	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
+	if (!sync_file)
+		return -EINVAL;
+
+	ret = get_unused_fd_flags(O_CLOEXEC);
+	if (ret < 0)
+		goto err_put_file;
+
+	fd_install(ret, sync_file->file);
+
+	*fd = ret;
+	return 0;
+err_put_file:
+	return ret;
+}
+
+int amdgpu_sem_lookup_and_sync(struct amdgpu_device *adev,
+			       struct amdgpu_fpriv *fpriv,
+			       struct amdgpu_sync *sync,
+			       uint32_t handle)
+{
+	int r;
+	struct sync_file *sync_file;
+	struct dma_fence *fence;
+
+	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
+	if (!sync_file)
+		return -EINVAL;
+
+	fence = sync_file_replace_fence(sync_file, NULL);
+	r = amdgpu_sync_fence(adev, sync, fence);
+	dma_fence_put(fence);
+
+	return r;
+}
+
+int amdgpu_sem_ioctl(struct drm_device *dev, void *data,
+		     struct drm_file *filp)
+{
+	union drm_amdgpu_sem *args = data;
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	int r = 0;
+
+	switch (args->in.op) {
+	case AMDGPU_SEM_OP_CREATE_SEM:
+		r = amdgpu_sem_create(fpriv, &args->out.handle);
+		break;
+	case AMDGPU_SEM_OP_IMPORT_SEM:
+		r = amdgpu_sem_import(fpriv, args->in.handle, &args->out.handle);
+		break;
+	case AMDGPU_SEM_OP_EXPORT_SEM:
+		r = amdgpu_sem_export(fpriv, args->in.handle, &args->out.fd);
+		break;
+	case AMDGPU_SEM_OP_DESTROY_SEM:
+		amdgpu_sem_destroy(fpriv, args->in.handle);
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+
+	return r;
+}
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 5797283..646b103 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -51,6 +51,7 @@ extern "C" {
 #define DRM_AMDGPU_GEM_OP		0x10
 #define DRM_AMDGPU_GEM_USERPTR		0x11
 #define DRM_AMDGPU_WAIT_FENCES		0x12
+#define DRM_AMDGPU_SEM                  0x13
 
 #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
 #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -65,6 +66,7 @@ extern "C" {
 #define DRM_IOCTL_AMDGPU_GEM_OP		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op)
 #define DRM_IOCTL_AMDGPU_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr)
 #define DRM_IOCTL_AMDGPU_WAIT_FENCES	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences)
+#define DRM_IOCTL_AMDGPU_SEM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_SEM, union drm_amdgpu_sem)
 
 #define AMDGPU_GEM_DOMAIN_CPU		0x1
 #define AMDGPU_GEM_DOMAIN_GTT		0x2
@@ -335,6 +337,26 @@ union drm_amdgpu_wait_fences {
 	struct drm_amdgpu_wait_fences_out out;
 };
 
+#define AMDGPU_SEM_OP_CREATE_SEM 0
+#define AMDGPU_SEM_OP_IMPORT_SEM 1
+#define AMDGPU_SEM_OP_EXPORT_SEM 2
+#define AMDGPU_SEM_OP_DESTROY_SEM 3
+
+struct drm_amdgpu_sem_in {
+	__u32 op;
+	__u32 handle;
+};
+
+struct drm_amdgpu_sem_out {
+	__u32 fd;
+	__u32 handle;
+};
+
+union drm_amdgpu_sem {
+	struct drm_amdgpu_sem_in in;
+	struct drm_amdgpu_sem_out out;
+};
+
 #define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO	0
 #define AMDGPU_GEM_OP_SET_PLACEMENT		1
 
@@ -390,6 +412,8 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_CHUNK_ID_IB		0x01
 #define AMDGPU_CHUNK_ID_FENCE		0x02
 #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
+#define AMDGPU_CHUNK_ID_SEM_WAIT        0x04
+#define AMDGPU_CHUNK_ID_SEM_SIGNAL      0x05
 
 struct drm_amdgpu_cs_chunk {
 	__u32		chunk_id;
@@ -454,6 +478,10 @@ struct drm_amdgpu_cs_chunk_fence {
 	__u32 offset;
 };
 
+struct drm_amdgpu_cs_chunk_sem {
+	__u32 handle;
+};
+
 struct drm_amdgpu_cs_chunk_data {
 	union {
 		struct drm_amdgpu_cs_chunk_ib		ib_data;
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH libdrm] libdrm/amdgpu: add interface for kernel semaphores
  2017-03-14  0:50   ` [PATCH libdrm] libdrm/amdgpu: add interface for kernel semaphores Dave Airlie
@ 2017-03-14  2:00     ` zhoucm1
  2017-03-14  2:52       ` Dave Airlie
       [not found]     ` <20170314005054.7487-2-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  1 sibling, 1 reply; 40+ messages in thread
From: zhoucm1 @ 2017-03-14  2:00 UTC (permalink / raw)
  To: Dave Airlie, amd-gfx, dri-devel, Mao, David

Hi Dave,

Could you tell me why you create your new one patch? I remember I send 
out our the whole implementation, Why not directly review our patches?

Thanks,
David Zhou

On 2017年03月14日 08:50, Dave Airlie wrote:
> From: Dave Airlie <airlied@redhat.com>
>
> This adds the corresponding code for libdrm to use the new
> kernel interfaces for semaphores.
>
> This will be used by radv to implement shared semaphores.
>
> TODO: Version checks.
>
> Signed-off-by: Dave Airlie <airlied@redhat.com>
> ---
>   amdgpu/amdgpu.h          |  28 +++++++++
>   amdgpu/amdgpu_cs.c       | 161 ++++++++++++++++++++++++++++++++++++++++++++---
>   include/drm/amdgpu_drm.h |  28 +++++++++
>   3 files changed, 208 insertions(+), 9 deletions(-)
>
> diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
> index 7b26a04..747e248 100644
> --- a/amdgpu/amdgpu.h
> +++ b/amdgpu/amdgpu.h
> @@ -129,6 +129,8 @@ typedef struct amdgpu_va *amdgpu_va_handle;
>    */
>   typedef struct amdgpu_semaphore *amdgpu_semaphore_handle;
>   
> +typedef uint32_t amdgpu_sem_handle;
> +
>   /*--------------------------------------------------------------------------*/
>   /* -------------------------- Structures ---------------------------------- */
>   /*--------------------------------------------------------------------------*/
> @@ -365,6 +367,16 @@ struct amdgpu_cs_request {
>   	struct amdgpu_cs_fence_info fence_info;
>   };
>   
> +struct amdgpu_cs_request_sem {
> +	/*
> +	 *
> +	 */
> +	uint32_t number_of_wait_sem;
> +	uint32_t *wait_sems;
> +	uint32_t number_of_signal_sem;
> +	uint32_t *signal_sems;
> +};
> +
>   /**
>    * Structure which provide information about GPU VM MC Address space
>    * alignments requirements
> @@ -882,6 +894,12 @@ int amdgpu_cs_submit(amdgpu_context_handle context,
>   		     struct amdgpu_cs_request *ibs_request,
>   		     uint32_t number_of_requests);
>   
> +int amdgpu_cs_submit_sem(amdgpu_context_handle context,
> +			 uint64_t flags,
> +			 struct amdgpu_cs_request *ibs_request,
> +			 struct amdgpu_cs_request_sem *ibs_sem,
> +			 uint32_t number_of_requests);
> +
>   /**
>    *  Query status of Command Buffer Submission
>    *
> @@ -1255,4 +1273,14 @@ int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem);
>   */
>   const char *amdgpu_get_marketing_name(amdgpu_device_handle dev);
>   
> +int amdgpu_cs_create_sem(amdgpu_device_handle dev,
> +			 amdgpu_sem_handle *sem);
> +int amdgpu_cs_export_sem(amdgpu_device_handle dev,
> +			  amdgpu_sem_handle sem,
> +			 int *shared_handle);
> +int amdgpu_cs_import_sem(amdgpu_device_handle dev,
> +			  int shared_handle,
> +			 amdgpu_sem_handle *sem);
> +int amdgpu_cs_destroy_sem(amdgpu_device_handle dev,
> +			  amdgpu_sem_handle sem);
>   #endif /* #ifdef _AMDGPU_H_ */
> diff --git a/amdgpu/amdgpu_cs.c b/amdgpu/amdgpu_cs.c
> index fb5b3a8..7283327 100644
> --- a/amdgpu/amdgpu_cs.c
> +++ b/amdgpu/amdgpu_cs.c
> @@ -170,7 +170,8 @@ int amdgpu_cs_query_reset_state(amdgpu_context_handle context,
>    * \sa amdgpu_cs_submit()
>   */
>   static int amdgpu_cs_submit_one(amdgpu_context_handle context,
> -				struct amdgpu_cs_request *ibs_request)
> +				struct amdgpu_cs_request *ibs_request,
> +				struct amdgpu_cs_request_sem *sem_request)
>   {
>   	union drm_amdgpu_cs cs;
>   	uint64_t *chunk_array;
> @@ -178,9 +179,11 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
>   	struct drm_amdgpu_cs_chunk_data *chunk_data;
>   	struct drm_amdgpu_cs_chunk_dep *dependencies = NULL;
>   	struct drm_amdgpu_cs_chunk_dep *sem_dependencies = NULL;
> +	struct drm_amdgpu_cs_chunk_sem *wait_sem_dependencies = NULL;
> +	struct drm_amdgpu_cs_chunk_sem *signal_sem_dependencies = NULL;
>   	struct list_head *sem_list;
>   	amdgpu_semaphore_handle sem, tmp;
> -	uint32_t i, size, sem_count = 0;
> +	uint32_t i, j, size, sem_count = 0;
>   	bool user_fence;
>   	int r = 0;
>   
> @@ -196,7 +199,7 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
>   	}
>   	user_fence = (ibs_request->fence_info.handle != NULL);
>   
> -	size = ibs_request->number_of_ibs + (user_fence ? 2 : 1) + 1;
> +	size = ibs_request->number_of_ibs + (user_fence ? 2 : 1) + 1 + (sem_request ? 2 : 0);
>   
>   	chunk_array = alloca(sizeof(uint64_t) * size);
>   	chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size);
> @@ -308,6 +311,45 @@ static int amdgpu_cs_submit_one(amdgpu_context_handle context,
>   		chunks[i].chunk_data = (uint64_t)(uintptr_t)sem_dependencies;
>   	}
>   
> +	if (sem_request) {
> +		if (sem_request->number_of_wait_sem) {
> +			wait_sem_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_sem) * sem_request->number_of_wait_sem);
> +			if (!wait_sem_dependencies) {
> +				r = -ENOMEM;
> +				goto error_unlock;
> +			}
> +			for (j = 0; j < sem_request->number_of_wait_sem; j++) {
> +				struct drm_amdgpu_cs_chunk_sem *dep = &wait_sem_dependencies[j];
> +				dep->handle = sem_request->wait_sems[j];
> +			}
> +			i = cs.in.num_chunks++;
> +
> +			/* dependencies chunk */
> +			chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
> +			chunks[i].chunk_id = AMDGPU_CHUNK_ID_SEM_WAIT;
> +			chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_sem) / 4 * sem_request->number_of_wait_sem;
> +			chunks[i].chunk_data = (uint64_t)(uintptr_t)wait_sem_dependencies;
> +		}
> +		if (sem_request->number_of_signal_sem) {
> +			signal_sem_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_sem) * sem_request->number_of_signal_sem);
> +			if (!signal_sem_dependencies) {
> +				r = -ENOMEM;
> +				goto error_unlock;
> +			}
> +			for (j = 0; j < sem_request->number_of_signal_sem; j++) {
> +				struct drm_amdgpu_cs_chunk_sem *dep = &signal_sem_dependencies[j];
> +				dep->handle = sem_request->signal_sems[j];
> +			}
> +			i = cs.in.num_chunks++;
> +
> +			/* dependencies chunk */
> +			chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
> +			chunks[i].chunk_id = AMDGPU_CHUNK_ID_SEM_SIGNAL;
> +			chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_sem) / 4 * sem_request->number_of_signal_sem;
> +			chunks[i].chunk_data = (uint64_t)(uintptr_t)signal_sem_dependencies;
> +		}
> +	}
> +
>   	r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CS,
>   				&cs, sizeof(cs));
>   	if (r)
> @@ -319,17 +361,20 @@ error_unlock:
>   	pthread_mutex_unlock(&context->sequence_mutex);
>   	free(dependencies);
>   	free(sem_dependencies);
> +	free(wait_sem_dependencies);
> +	free(signal_sem_dependencies);
>   	return r;
>   }
>   
> -int amdgpu_cs_submit(amdgpu_context_handle context,
> -		     uint64_t flags,
> -		     struct amdgpu_cs_request *ibs_request,
> -		     uint32_t number_of_requests)
> +int amdgpu_cs_submit_sem(amdgpu_context_handle context,
> +			 uint64_t flags,
> +			 struct amdgpu_cs_request *ibs_request,
> +			 struct amdgpu_cs_request_sem *ibs_sem,
> +			 uint32_t number_of_requests)
>   {
>   	uint32_t i;
>   	int r;
> -
> +	bool has_sems = ibs_sem ? true : false;
>   	if (NULL == context)
>   		return -EINVAL;
>   	if (NULL == ibs_request)
> @@ -337,15 +382,28 @@ int amdgpu_cs_submit(amdgpu_context_handle context,
>   
>   	r = 0;
>   	for (i = 0; i < number_of_requests; i++) {
> -		r = amdgpu_cs_submit_one(context, ibs_request);
> +		r = amdgpu_cs_submit_one(context, ibs_request, has_sems ? ibs_sem : NULL);
>   		if (r)
>   			break;
>   		ibs_request++;
> +		if (has_sems)
> +			ibs_sem++;
>   	}
>   
>   	return r;
>   }
>   
> +int amdgpu_cs_submit(amdgpu_context_handle context,
> +		     uint64_t flags,
> +		     struct amdgpu_cs_request *ibs_request,
> +		     uint32_t number_of_requests)
> +{
> +	return amdgpu_cs_submit_sem(context, flags,
> +				    ibs_request, NULL,
> +				    number_of_requests);
> +}
> +
> +
>   /**
>    * Calculate absolute timeout.
>    *
> @@ -542,3 +600,88 @@ int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem)
>   {
>   	return amdgpu_cs_unreference_sem(sem);
>   }
> +
> +
> +int amdgpu_cs_create_sem(amdgpu_device_handle dev,
> +			 amdgpu_sem_handle *sem)
> +{
> +	union drm_amdgpu_sem args;
> +	int r;
> +
> +	if (NULL == dev)
> +		return -EINVAL;
> +
> +	/* Create the context */
> +	memset(&args, 0, sizeof(args));
> +	args.in.op = AMDGPU_SEM_OP_CREATE_SEM;
> +	r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_SEM, &args, sizeof(args));
> +	if (r)
> +		return r;
> +
> +	*sem = args.out.handle;
> +
> +	return 0;
> +}
> +
> +int amdgpu_cs_export_sem(amdgpu_device_handle dev,
> +			  amdgpu_sem_handle sem,
> +			  int *shared_handle)
> +{
> +	union drm_amdgpu_sem args;
> +	int r;
> +
> +	if (NULL == dev)
> +		return -EINVAL;
> +
> +	/* Create the context */
> +	memset(&args, 0, sizeof(args));
> +	args.in.op = AMDGPU_SEM_OP_EXPORT_SEM;
> +	args.in.handle = sem;
> +	r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_SEM, &args, sizeof(args));
> +	if (r)
> +		return r;
> +	*shared_handle = args.out.fd;
> +	return 0;
> +}
> +
> +int amdgpu_cs_import_sem(amdgpu_device_handle dev,
> +			  int shared_handle,
> +			  amdgpu_sem_handle *sem)
> +{
> +	union drm_amdgpu_sem args;
> +	int r;
> +
> +	if (NULL == dev)
> +		return -EINVAL;
> +
> +	/* Create the context */
> +	memset(&args, 0, sizeof(args));
> +	args.in.op = AMDGPU_SEM_OP_IMPORT_SEM;
> +	args.in.handle = shared_handle;
> +	r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_SEM, &args, sizeof(args));
> +	if (r)
> +		return r;
> +	*sem = args.out.handle;
> +	return 0;
> +}
> +
> +
> +int amdgpu_cs_destroy_sem(amdgpu_device_handle dev,
> +			  amdgpu_sem_handle sem)
> +{
> +	union drm_amdgpu_sem args;
> +	int r;
> +
> +	if (NULL == dev)
> +		return -EINVAL;
> +
> +	/* Create the context */
> +	memset(&args, 0, sizeof(args));
> +	args.in.op = AMDGPU_SEM_OP_DESTROY_SEM;
> +	args.in.handle = sem;
> +	r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_SEM, &args, sizeof(args));
> +	if (r)
> +		return r;
> +
> +	return 0;
> +}
> diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h
> index d8f2497..fa0bfe2 100644
> --- a/include/drm/amdgpu_drm.h
> +++ b/include/drm/amdgpu_drm.h
> @@ -50,6 +50,7 @@ extern "C" {
>   #define DRM_AMDGPU_WAIT_CS		0x09
>   #define DRM_AMDGPU_GEM_OP		0x10
>   #define DRM_AMDGPU_GEM_USERPTR		0x11
> +#define DRM_AMDGPU_SEM                  0x13
>   
>   #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>   #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> @@ -63,6 +64,7 @@ extern "C" {
>   #define DRM_IOCTL_AMDGPU_WAIT_CS	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_CS, union drm_amdgpu_wait_cs)
>   #define DRM_IOCTL_AMDGPU_GEM_OP		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op)
>   #define DRM_IOCTL_AMDGPU_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr)
> +#define DRM_IOCTL_AMDGPU_SEM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_SEM, union drm_amdgpu_sem)
>   
>   #define AMDGPU_GEM_DOMAIN_CPU		0x1
>   #define AMDGPU_GEM_DOMAIN_GTT		0x2
> @@ -303,6 +305,26 @@ union drm_amdgpu_wait_cs {
>   	struct drm_amdgpu_wait_cs_out out;
>   };
>   
> +#define AMDGPU_SEM_OP_CREATE_SEM 0
> +#define AMDGPU_SEM_OP_IMPORT_SEM 1
> +#define AMDGPU_SEM_OP_EXPORT_SEM 2
> +#define AMDGPU_SEM_OP_DESTROY_SEM 3
> +
> +struct drm_amdgpu_sem_in {
> +	__u32 op;
> +	__u32 handle;
> +};
> +
> +struct drm_amdgpu_sem_out {
> +	__u32 fd;
> +	__u32 handle;
> +};
> +
> +union drm_amdgpu_sem {
> +	struct drm_amdgpu_sem_in in;
> +	struct drm_amdgpu_sem_out out;
> +};
> +
>   #define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO	0
>   #define AMDGPU_GEM_OP_SET_PLACEMENT		1
>   
> @@ -358,6 +380,8 @@ struct drm_amdgpu_gem_va {
>   #define AMDGPU_CHUNK_ID_IB		0x01
>   #define AMDGPU_CHUNK_ID_FENCE		0x02
>   #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
> +#define AMDGPU_CHUNK_ID_SEM_WAIT        0x04
> +#define AMDGPU_CHUNK_ID_SEM_SIGNAL      0x05
>   
>   struct drm_amdgpu_cs_chunk {
>   	uint32_t		chunk_id;
> @@ -422,6 +446,10 @@ struct drm_amdgpu_cs_chunk_fence {
>   	uint32_t offset;
>   };
>   
> +struct drm_amdgpu_cs_chunk_sem {
> +	uint32_t handle;
> +};
> +
>   struct drm_amdgpu_cs_chunk_data {
>   	union {
>   		struct drm_amdgpu_cs_chunk_ib		ib_data;

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH libdrm] libdrm/amdgpu: add interface for kernel semaphores
  2017-03-14  2:00     ` zhoucm1
@ 2017-03-14  2:52       ` Dave Airlie
       [not found]         ` <CAPM=9tzegdtiyEmbW+PqmVeQ+bXrNtrdKXuE3kB0dPMKnMir+Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 40+ messages in thread
From: Dave Airlie @ 2017-03-14  2:52 UTC (permalink / raw)
  To: zhoucm1; +Cc: dri-devel, amd-gfx mailing list, Mao, David

On 14 March 2017 at 12:00, zhoucm1 <david1.zhou@amd.com> wrote:
> Hi Dave,
>
> Could you tell me why you create your new one patch? I remember I send out
> our the whole implementation, Why not directly review our patches?

This is patch review, I'm not sure what you are expecting in terms of
direct review.

The patches you sent out were reviewed by Christian, he stated he
thinks this should
use sync_file, I was interested to see if this was actually possible,
so I just adapted
the patches to check if it was possible to avoid adding a lot of amd
specific code
for something that isn't required to be. Hence why I've sent this as
an rfc, as I want
to see if others think using sync_file is a good or bad idea. We may
end up going
back to the non-sync_file based patches if this proves to be a bad
idea, so far it
doesn't look like one.

I also reviewed the patches and noted it shouldn't add the wait/signal
interfaces,
that it should use the chunks on command submission, so while I was in
there I re
wrote that as well.

Dave.
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH libdrm] libdrm/amdgpu: add interface for kernel semaphores
       [not found]         ` <CAPM=9tzegdtiyEmbW+PqmVeQ+bXrNtrdKXuE3kB0dPMKnMir+Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2017-03-14  3:30           ` zhoucm1
       [not found]             ` <58C763E0.1-5C7GfCeVMHo@public.gmane.org>
  2017-03-14  8:54             ` Daniel Vetter
  0 siblings, 2 replies; 40+ messages in thread
From: zhoucm1 @ 2017-03-14  3:30 UTC (permalink / raw)
  To: Dave Airlie; +Cc: dri-devel, amd-gfx mailing list, Mao, David



On 2017年03月14日 10:52, Dave Airlie wrote:
> On 14 March 2017 at 12:00, zhoucm1 <david1.zhou@amd.com> wrote:
>> Hi Dave,
>>
>> Could you tell me why you create your new one patch? I remember I send out
>> our the whole implementation, Why not directly review our patches?
> This is patch review, I'm not sure what you are expecting in terms of
> direct review.
>
> The patches you sent out were reviewed by Christian, he stated he
> thinks this should
> use sync_file, I was interested to see if this was actually possible,
> so I just adapted
> the patches to check if it was possible to avoid adding a lot of amd
> specific code
> for something that isn't required to be. Hence why I've sent this as
> an rfc, as I want
> to see if others think using sync_file is a good or bad idea. We may
> end up going
> back to the non-sync_file based patches if this proves to be a bad
> idea, so far it
> doesn't look like one.
>
> I also reviewed the patches and noted it shouldn't add the wait/signal
> interfaces,
> that it should use the chunks on command submission, so while I was in
> there I re
> wrote that as well.
Yeah, then I'm ok with this, just our internal team has used this 
implementation, they find some gaps between yours and previous, they 
could need to change their's again, they are annoyance for this.

Regards,
David Zhou
>
> Dave.

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH libdrm] libdrm/amdgpu: add interface for kernel semaphores
       [not found]             ` <58C763E0.1-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-14  4:16               ` Dave Airlie
  2017-03-14  8:56                 ` Daniel Vetter
  0 siblings, 1 reply; 40+ messages in thread
From: Dave Airlie @ 2017-03-14  4:16 UTC (permalink / raw)
  To: zhoucm1; +Cc: dri-devel, amd-gfx mailing list, Mao, David

On 14 March 2017 at 13:30, zhoucm1 <david1.zhou@amd.com> wrote:
>
>
> On 2017年03月14日 10:52, Dave Airlie wrote:
>>
>> On 14 March 2017 at 12:00, zhoucm1 <david1.zhou@amd.com> wrote:
>>>
>>> Hi Dave,
>>>
>>> Could you tell me why you create your new one patch? I remember I send
>>> out
>>> our the whole implementation, Why not directly review our patches?
>>
>> This is patch review, I'm not sure what you are expecting in terms of
>> direct review.
>>
>> The patches you sent out were reviewed by Christian, he stated he
>> thinks this should
>> use sync_file, I was interested to see if this was actually possible,
>> so I just adapted
>> the patches to check if it was possible to avoid adding a lot of amd
>> specific code
>> for something that isn't required to be. Hence why I've sent this as
>> an rfc, as I want
>> to see if others think using sync_file is a good or bad idea. We may
>> end up going
>> back to the non-sync_file based patches if this proves to be a bad
>> idea, so far it
>> doesn't look like one.
>>
>> I also reviewed the patches and noted it shouldn't add the wait/signal
>> interfaces,
>> that it should use the chunks on command submission, so while I was in
>> there I re
>> wrote that as well.
>
> Yeah, then I'm ok with this, just our internal team has used this
> implementation, they find some gaps between yours and previous, they could
> need to change their's again, they are annoyance for this.

This is unfortunate, but the more internal development done, the more
this will happen,
especially in areas where you might interact with the kernel. I'd
suggest trying to
develop stuff in the open much earlier (don't start anything in-house).

Now that we have an open vulkan driver it might be that most features
the internal
vulkan driver requires will eventually be wanted by us,

Dave.
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/4] sync_file: add a mutex to protect fence and callback members.
       [not found]   ` <20170314005054.7487-3-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-03-14  8:45     ` Daniel Vetter
  2017-03-14  9:13       ` Christian König
  0 siblings, 1 reply; 40+ messages in thread
From: Daniel Vetter @ 2017-03-14  8:45 UTC (permalink / raw)
  To: Dave Airlie
  Cc: dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Tue, Mar 14, 2017 at 10:50:51AM +1000, Dave Airlie wrote:
> From: Dave Airlie <airlied@redhat.com>
> 
> This isn't needed currently, but to reuse sync file for Vulkan
> permanent shared semaphore semantics, we need to be able to swap
> the fence backing a sync file. This patch adds a mutex to the
> sync file and uses to protect accesses to the fence and cb members.
> 
> Signed-off-by: Dave Airlie <airlied@redhat.com>

We've gone to pretty great lengths to rcu-protect all the fence stuff, so
that a peek-only is entirely lockless. Can we haz the same for this pls?

Yes I know it's probably going to be slightly nasty when you get around to
implementing the replacement logic. For just normal fences we can probably
get away with not doing an rcu dance when freeing it, since the
refcounting should protect us already.

But for the replacement you need to have an rcu-delayed fence_put on the
old fences.
-Daniel
> ---
>  drivers/dma-buf/sync_file.c | 23 +++++++++++++++++++----
>  include/linux/sync_file.h   |  3 +++
>  2 files changed, 22 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
> index 2321035..105f48c 100644
> --- a/drivers/dma-buf/sync_file.c
> +++ b/drivers/dma-buf/sync_file.c
> @@ -47,6 +47,7 @@ static struct sync_file *sync_file_alloc(void)
>  
>  	INIT_LIST_HEAD(&sync_file->cb.node);
>  
> +	mutex_init(&sync_file->lock);
>  	return sync_file;
>  
>  err:
> @@ -204,10 +205,13 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
>  	if (!sync_file)
>  		return NULL;
>  
> +	mutex_lock(&a->lock);
> +	mutex_lock(&b->lock);
>  	a_fences = get_fences(a, &a_num_fences);
>  	b_fences = get_fences(b, &b_num_fences);
> -	if (a_num_fences > INT_MAX - b_num_fences)
> -		return NULL;
> +	if (a_num_fences > INT_MAX - b_num_fences) {
> +		goto unlock;
> +	}
>  
>  	num_fences = a_num_fences + b_num_fences;
>  
> @@ -268,11 +272,17 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
>  		goto err;
>  	}
>  
> +	mutex_unlock(&b->lock);
> +	mutex_unlock(&a->lock);
> +
>  	strlcpy(sync_file->name, name, sizeof(sync_file->name));
>  	return sync_file;
>  
>  err:
>  	fput(sync_file->file);
> +unlock:
> +	mutex_unlock(&b->lock);
> +	mutex_unlock(&a->lock);
>  	return NULL;
>  
>  }
> @@ -299,16 +309,20 @@ static int sync_file_release(struct inode *inode, struct file *file)
>  static unsigned int sync_file_poll(struct file *file, poll_table *wait)
>  {
>  	struct sync_file *sync_file = file->private_data;
> +	unsigned int ret_val;
>  
>  	poll_wait(file, &sync_file->wq, wait);
>  
> +	mutex_lock(&sync_file->lock);
>  	if (!test_and_set_bit(POLL_ENABLED, &sync_file->fence->flags)) {
>  		if (dma_fence_add_callback(sync_file->fence, &sync_file->cb,
>  					   fence_check_cb_func) < 0)
>  			wake_up_all(&sync_file->wq);
>  	}
> +	ret_val = dma_fence_is_signaled(sync_file->fence) ? POLLIN : 0;
> +	mutex_unlock(&sync_file->lock);
>  
> -	return dma_fence_is_signaled(sync_file->fence) ? POLLIN : 0;
> +	return ret_val;
>  }
>  
>  static long sync_file_ioctl_merge(struct sync_file *sync_file,
> @@ -393,6 +407,7 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file,
>  	if (info.flags || info.pad)
>  		return -EINVAL;
>  
> +	mutex_lock(&sync_file->lock);
>  	fences = get_fences(sync_file, &num_fences);
>  
>  	/*
> @@ -433,7 +448,7 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file,
>  
>  out:
>  	kfree(fence_info);
> -
> +	mutex_unlock(&sync_file->lock);
>  	return ret;
>  }
>  
> diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h
> index 3e3ab84..5aef17f 100644
> --- a/include/linux/sync_file.h
> +++ b/include/linux/sync_file.h
> @@ -30,6 +30,7 @@
>   * @wq:			wait queue for fence signaling
>   * @fence:		fence with the fences in the sync_file
>   * @cb:			fence callback information
> + * @lock:               mutex to protect fence/cb - used for semaphores
>   */
>  struct sync_file {
>  	struct file		*file;
> @@ -43,6 +44,8 @@ struct sync_file {
>  
>  	struct dma_fence	*fence;
>  	struct dma_fence_cb cb;
> +	/* protects the fence pointer and cb */
> +	struct mutex lock;
>  };
>  
>  #define POLL_ENABLED DMA_FENCE_FLAG_USER_BITS
> -- 
> 2.7.4
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/4] sync_file: add replace and export some functionality
       [not found]     ` <20170314005054.7487-4-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-03-14  8:48       ` Daniel Vetter
       [not found]         ` <20170314084851.covfl7sjwn3yadh5-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
  0 siblings, 1 reply; 40+ messages in thread
From: Daniel Vetter @ 2017-03-14  8:48 UTC (permalink / raw)
  To: Dave Airlie
  Cc: dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Tue, Mar 14, 2017 at 10:50:52AM +1000, Dave Airlie wrote:
> From: Dave Airlie <airlied@redhat.com>
> 
> Using sync_file to back vulkan semaphores means need to replace
> the fence underlying the sync file. This replace function removes
> the callback, swaps the fence, and returns the old one. This
> also exports the alloc and fdget functionality for the semaphore
> wrapper code.
> 
> Signed-off-by: Dave Airlie <airlied@redhat.com>
> ---
>  drivers/dma-buf/sync_file.c | 46 +++++++++++++++++++++++++++++++++++++++++----
>  include/linux/sync_file.h   |  5 ++++-
>  2 files changed, 46 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
> index 105f48c..df7bb37 100644
> --- a/drivers/dma-buf/sync_file.c
> +++ b/drivers/dma-buf/sync_file.c
> @@ -28,7 +28,14 @@
>  
>  static const struct file_operations sync_file_fops;
>  
> -static struct sync_file *sync_file_alloc(void)
> +/**
> + * sync_file_alloc() - allocate an unfenced sync file
> + *
> + * Creates a sync_file.
> + * The sync_file can be released with fput(sync_file->file).
> + * Returns the sync_file or NULL in case of error.
> + */
> +struct sync_file *sync_file_alloc(void)
>  {
>  	struct sync_file *sync_file;
>  
> @@ -54,6 +61,7 @@ static struct sync_file *sync_file_alloc(void)
>  	kfree(sync_file);
>  	return NULL;
>  }
> +EXPORT_SYMBOL(sync_file_alloc);
>  
>  static void fence_check_cb_func(struct dma_fence *f, struct dma_fence_cb *cb)
>  {
> @@ -92,7 +100,7 @@ struct sync_file *sync_file_create(struct dma_fence *fence)
>  }
>  EXPORT_SYMBOL(sync_file_create);
>  
> -static struct sync_file *sync_file_fdget(int fd)
> +struct sync_file *sync_file_fdget(int fd)
>  {
>  	struct file *file = fget(fd);
>  
> @@ -108,6 +116,7 @@ static struct sync_file *sync_file_fdget(int fd)
>  	fput(file);
>  	return NULL;
>  }
> +EXPORT_SYMBOL(sync_file_fdget);
>  
>  /**
>   * sync_file_get_fence - get the fence related to the sync_file fd
> @@ -125,13 +134,40 @@ struct dma_fence *sync_file_get_fence(int fd)
>  	if (!sync_file)
>  		return NULL;
>  
> +	mutex_lock(&sync_file->lock);
>  	fence = dma_fence_get(sync_file->fence);
> +	mutex_unlock(&sync_file->lock);
>  	fput(sync_file->file);
>  
>  	return fence;
>  }
>  EXPORT_SYMBOL(sync_file_get_fence);
>  
> +/**
> + * sync_file_replace_fence - replace the fence related to the sync_file
> + * @sync_file:	 sync file to replace fence in
> + * @fence: fence to replace with (or NULL for no fence).
> + * Returns previous fence.
> + */
> +struct dma_fence *sync_file_replace_fence(struct sync_file *sync_file,
> +					  struct dma_fence *fence)
> +{
> +	struct dma_fence *ret_fence = NULL;
> +	mutex_lock(&sync_file->lock);
> +	if (sync_file->fence) {
> +		if (test_bit(POLL_ENABLED, &sync_file->fence->flags))
> +			dma_fence_remove_callback(sync_file->fence, &sync_file->cb);
> +		ret_fence = sync_file->fence;
> +	}

uabi semantics question: Should we wake up everyone when the fence gets
replaced? What's the khr semaphore expectation here?

Spec quote in the kernel-doc (if available) would be best ...

> +	if (fence)
> +		sync_file->fence = dma_fence_get(fence);
> +	else
> +		sync_file->fence = NULL;
> +	mutex_unlock(&sync_file->lock);
> +	return ret_fence;
> +}
> +EXPORT_SYMBOL(sync_file_replace_fence);
> +
>  static int sync_file_set_fence(struct sync_file *sync_file,
>  			       struct dma_fence **fences, int num_fences)
>  {
> @@ -292,8 +328,10 @@ static void sync_file_free(struct kref *kref)
>  	struct sync_file *sync_file = container_of(kref, struct sync_file,
>  						     kref);
>  
> -	if (test_bit(POLL_ENABLED, &sync_file->fence->flags))
> -		dma_fence_remove_callback(sync_file->fence, &sync_file->cb);
> +	if (sync_file->fence) {
> +		if (test_bit(POLL_ENABLED, &sync_file->fence->flags))
> +			dma_fence_remove_callback(sync_file->fence, &sync_file->cb);
> +	}
>  	dma_fence_put(sync_file->fence);
>  	kfree(sync_file);
>  }

I think you've missed _poll, won't that oops now?
-Daniel

> diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h
> index 5aef17f..9511a54 100644
> --- a/include/linux/sync_file.h
> +++ b/include/linux/sync_file.h
> @@ -50,7 +50,10 @@ struct sync_file {
>  
>  #define POLL_ENABLED DMA_FENCE_FLAG_USER_BITS
>  
> +struct sync_file *sync_file_alloc(void);
>  struct sync_file *sync_file_create(struct dma_fence *fence);
>  struct dma_fence *sync_file_get_fence(int fd);
> -
> +struct sync_file *sync_file_fdget(int fd);
> +struct dma_fence *sync_file_replace_fence(struct sync_file *sync_file,
> +					  struct dma_fence *fence);
>  #endif /* _LINUX_SYNC_H */
> -- 
> 2.7.4
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [rfc] amdgpu/sync_file shared semaphores
  2017-03-14  0:50 [rfc] amdgpu/sync_file shared semaphores Dave Airlie
  2017-03-14  0:50 ` [PATCH 1/4] sync_file: add a mutex to protect fence and callback members Dave Airlie
       [not found] ` <20170314005054.7487-1-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-03-14  8:53 ` Daniel Vetter
       [not found]   ` <20170314085313.qmnnofqa6e6ozmwk-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
  2 siblings, 1 reply; 40+ messages in thread
From: Daniel Vetter @ 2017-03-14  8:53 UTC (permalink / raw)
  To: Dave Airlie; +Cc: dri-devel, amd-gfx

On Tue, Mar 14, 2017 at 10:50:49AM +1000, Dave Airlie wrote:
> This contains one libdrm patch and 4 kernel patches.
> 
> The goal is to implement the Vulkan KHR_external_semaphore_fd interface
> for permanent semaphore behaviour for radv.
> 
> This code tries to enhance sync file to add the required behaviour
> (which is mostly being able to replace the fence backing the sync file),
> it also introduces new API to amdgpu to create/destroy/export/import the
> sync_files which we store in an idr there, rather than fds.
> 
> There is a possibility we should share the amdgpu_sem object tracking
> code for other drivers, maybe we should move that to sync_file as well,
> but I'm open to suggestions for whether this is a useful approach for
> other drivers.

Yeah, makes sense. I couldn't google the spec and didn't bother to figure
out where my intel khronos login went, so didn't double-check precise
semantics, just dropped a question. Few more things on the actual
sync_file stuff itself.

Really big wish here is for some igts using the debug/testing fence stuff
we have in vgem so that we can validate the corner-cases of fence
replacement and make sure nothing falls over. Especially with all the rcu
dancing sync_file/dma_fence have become non-trival, exhaustive testing is
needed here imo.

Also, NULL sync_file->fence is probably what we want for the future fences
(which android wants to keep tilers well-fed by essentially looping the
entire render pipeline on itself), so this goes into the right direction
for sure. I think, but coffee kicked in already :-)
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH libdrm] libdrm/amdgpu: add interface for kernel semaphores
  2017-03-14  3:30           ` zhoucm1
       [not found]             ` <58C763E0.1-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-14  8:54             ` Daniel Vetter
  2017-03-14  9:20               ` Christian König
  1 sibling, 1 reply; 40+ messages in thread
From: Daniel Vetter @ 2017-03-14  8:54 UTC (permalink / raw)
  To: zhoucm1; +Cc: amd-gfx mailing list, dri-devel, Mao, David

On Tue, Mar 14, 2017 at 11:30:40AM +0800, zhoucm1 wrote:
> 
> 
> On 2017年03月14日 10:52, Dave Airlie wrote:
> > On 14 March 2017 at 12:00, zhoucm1 <david1.zhou@amd.com> wrote:
> > > Hi Dave,
> > > 
> > > Could you tell me why you create your new one patch? I remember I send out
> > > our the whole implementation, Why not directly review our patches?
> > This is patch review, I'm not sure what you are expecting in terms of
> > direct review.
> > 
> > The patches you sent out were reviewed by Christian, he stated he
> > thinks this should
> > use sync_file, I was interested to see if this was actually possible,
> > so I just adapted
> > the patches to check if it was possible to avoid adding a lot of amd
> > specific code
> > for something that isn't required to be. Hence why I've sent this as
> > an rfc, as I want
> > to see if others think using sync_file is a good or bad idea. We may
> > end up going
> > back to the non-sync_file based patches if this proves to be a bad
> > idea, so far it
> > doesn't look like one.
> > 
> > I also reviewed the patches and noted it shouldn't add the wait/signal
> > interfaces,
> > that it should use the chunks on command submission, so while I was in
> > there I re
> > wrote that as well.
> Yeah, then I'm ok with this, just our internal team has used this
> implementation, they find some gaps between yours and previous, they could
> need to change their's again, they are annoyance for this.

This is why you _must_ get anything you're doing discussed in upstream
first. Your internal teams simply do not have design authority on stuff
like that. And yes it takes forever to get formerly proprietary
internal-only teams to understand this, speaking from years of experience
implement a proper upstream-design-first approach to feature development
here at intel.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH libdrm] libdrm/amdgpu: add interface for kernel semaphores
  2017-03-14  4:16               ` Dave Airlie
@ 2017-03-14  8:56                 ` Daniel Vetter
  0 siblings, 0 replies; 40+ messages in thread
From: Daniel Vetter @ 2017-03-14  8:56 UTC (permalink / raw)
  To: Dave Airlie; +Cc: amd-gfx mailing list, dri-devel, Mao, David

On Tue, Mar 14, 2017 at 02:16:11PM +1000, Dave Airlie wrote:
> On 14 March 2017 at 13:30, zhoucm1 <david1.zhou@amd.com> wrote:
> >
> >
> > On 2017年03月14日 10:52, Dave Airlie wrote:
> >>
> >> On 14 March 2017 at 12:00, zhoucm1 <david1.zhou@amd.com> wrote:
> >>>
> >>> Hi Dave,
> >>>
> >>> Could you tell me why you create your new one patch? I remember I send
> >>> out
> >>> our the whole implementation, Why not directly review our patches?
> >>
> >> This is patch review, I'm not sure what you are expecting in terms of
> >> direct review.
> >>
> >> The patches you sent out were reviewed by Christian, he stated he
> >> thinks this should
> >> use sync_file, I was interested to see if this was actually possible,
> >> so I just adapted
> >> the patches to check if it was possible to avoid adding a lot of amd
> >> specific code
> >> for something that isn't required to be. Hence why I've sent this as
> >> an rfc, as I want
> >> to see if others think using sync_file is a good or bad idea. We may
> >> end up going
> >> back to the non-sync_file based patches if this proves to be a bad
> >> idea, so far it
> >> doesn't look like one.
> >>
> >> I also reviewed the patches and noted it shouldn't add the wait/signal
> >> interfaces,
> >> that it should use the chunks on command submission, so while I was in
> >> there I re
> >> wrote that as well.
> >
> > Yeah, then I'm ok with this, just our internal team has used this
> > implementation, they find some gaps between yours and previous, they could
> > need to change their's again, they are annoyance for this.
> 
> This is unfortunate, but the more internal development done, the more
> this will happen,
> especially in areas where you might interact with the kernel. I'd
> suggest trying to
> develop stuff in the open much earlier (don't start anything in-house).
> 
> Now that we have an open vulkan driver it might be that most features
> the internal
> vulkan driver requires will eventually be wanted by us,

Yeah that's another aspect, radv is the open vulkan driver for amd hw,
which means it gets to drive uapi.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/4] sync_file: add replace and export some functionality
  2017-03-14  0:50   ` [PATCH 2/4] sync_file: add replace and export some functionality Dave Airlie
       [not found]     ` <20170314005054.7487-4-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-03-14  9:00     ` Chris Wilson
  1 sibling, 0 replies; 40+ messages in thread
From: Chris Wilson @ 2017-03-14  9:00 UTC (permalink / raw)
  To: Dave Airlie; +Cc: jason, dri-devel, amd-gfx

On Tue, Mar 14, 2017 at 10:50:52AM +1000, Dave Airlie wrote:
> From: Dave Airlie <airlied@redhat.com>
> 
> Using sync_file to back vulkan semaphores means need to replace
> the fence underlying the sync file. This replace function removes
> the callback, swaps the fence, and returns the old one. This
> also exports the alloc and fdget functionality for the semaphore
> wrapper code.

Did you think about encapsulating a reservation object?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/4] sync_file: add a mutex to protect fence and callback members.
  2017-03-14  8:45     ` Daniel Vetter
@ 2017-03-14  9:13       ` Christian König
       [not found]         ` <2cba21f6-731b-d112-f882-bef66a9b96c9-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 40+ messages in thread
From: Christian König @ 2017-03-14  9:13 UTC (permalink / raw)
  To: Daniel Vetter, Dave Airlie; +Cc: amd-gfx, dri-devel

Am 14.03.2017 um 09:45 schrieb Daniel Vetter:
> On Tue, Mar 14, 2017 at 10:50:51AM +1000, Dave Airlie wrote:
>> From: Dave Airlie <airlied@redhat.com>
>>
>> This isn't needed currently, but to reuse sync file for Vulkan
>> permanent shared semaphore semantics, we need to be able to swap
>> the fence backing a sync file. This patch adds a mutex to the
>> sync file and uses to protect accesses to the fence and cb members.
>>
>> Signed-off-by: Dave Airlie <airlied@redhat.com>
> We've gone to pretty great lengths to rcu-protect all the fence stuff, so
> that a peek-only is entirely lockless. Can we haz the same for this pls?

Yes, just wanted to suggest the same thing.

Basically you just need the following to retrieve the fence:

while (sync_file->fence && !(fence = fence_get_rcu(sync_file->fence));

And then only taking a look when replacing it.

> Yes I know it's probably going to be slightly nasty when you get around to
> implementing the replacement logic. For just normal fences we can probably
> get away with not doing an rcu dance when freeing it, since the
> refcounting should protect us already.

The only tricky thing I can see is the fence_callback structure in the 
sync file. And that can be handled while holding the lock in the replace 
function.

> But for the replacement you need to have an rcu-delayed fence_put on the
> old fences.

Freeing fences is RCU save anyway, see the default implementation of 
fence_free().

Had to fix that in amdgpu and radeon because our private implementation 
wasn't RCU save and we run into problems because of that.

So at least that should already been taken care of.

Christian.

> -Daniel
>> ---
>>   drivers/dma-buf/sync_file.c | 23 +++++++++++++++++++----
>>   include/linux/sync_file.h   |  3 +++
>>   2 files changed, 22 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
>> index 2321035..105f48c 100644
>> --- a/drivers/dma-buf/sync_file.c
>> +++ b/drivers/dma-buf/sync_file.c
>> @@ -47,6 +47,7 @@ static struct sync_file *sync_file_alloc(void)
>>   
>>   	INIT_LIST_HEAD(&sync_file->cb.node);
>>   
>> +	mutex_init(&sync_file->lock);
>>   	return sync_file;
>>   
>>   err:
>> @@ -204,10 +205,13 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
>>   	if (!sync_file)
>>   		return NULL;
>>   
>> +	mutex_lock(&a->lock);
>> +	mutex_lock(&b->lock);
>>   	a_fences = get_fences(a, &a_num_fences);
>>   	b_fences = get_fences(b, &b_num_fences);
>> -	if (a_num_fences > INT_MAX - b_num_fences)
>> -		return NULL;
>> +	if (a_num_fences > INT_MAX - b_num_fences) {
>> +		goto unlock;
>> +	}
>>   
>>   	num_fences = a_num_fences + b_num_fences;
>>   
>> @@ -268,11 +272,17 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
>>   		goto err;
>>   	}
>>   
>> +	mutex_unlock(&b->lock);
>> +	mutex_unlock(&a->lock);
>> +
>>   	strlcpy(sync_file->name, name, sizeof(sync_file->name));
>>   	return sync_file;
>>   
>>   err:
>>   	fput(sync_file->file);
>> +unlock:
>> +	mutex_unlock(&b->lock);
>> +	mutex_unlock(&a->lock);
>>   	return NULL;
>>   
>>   }
>> @@ -299,16 +309,20 @@ static int sync_file_release(struct inode *inode, struct file *file)
>>   static unsigned int sync_file_poll(struct file *file, poll_table *wait)
>>   {
>>   	struct sync_file *sync_file = file->private_data;
>> +	unsigned int ret_val;
>>   
>>   	poll_wait(file, &sync_file->wq, wait);
>>   
>> +	mutex_lock(&sync_file->lock);
>>   	if (!test_and_set_bit(POLL_ENABLED, &sync_file->fence->flags)) {
>>   		if (dma_fence_add_callback(sync_file->fence, &sync_file->cb,
>>   					   fence_check_cb_func) < 0)
>>   			wake_up_all(&sync_file->wq);
>>   	}
>> +	ret_val = dma_fence_is_signaled(sync_file->fence) ? POLLIN : 0;
>> +	mutex_unlock(&sync_file->lock);
>>   
>> -	return dma_fence_is_signaled(sync_file->fence) ? POLLIN : 0;
>> +	return ret_val;
>>   }
>>   
>>   static long sync_file_ioctl_merge(struct sync_file *sync_file,
>> @@ -393,6 +407,7 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file,
>>   	if (info.flags || info.pad)
>>   		return -EINVAL;
>>   
>> +	mutex_lock(&sync_file->lock);
>>   	fences = get_fences(sync_file, &num_fences);
>>   
>>   	/*
>> @@ -433,7 +448,7 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file,
>>   
>>   out:
>>   	kfree(fence_info);
>> -
>> +	mutex_unlock(&sync_file->lock);
>>   	return ret;
>>   }
>>   
>> diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h
>> index 3e3ab84..5aef17f 100644
>> --- a/include/linux/sync_file.h
>> +++ b/include/linux/sync_file.h
>> @@ -30,6 +30,7 @@
>>    * @wq:			wait queue for fence signaling
>>    * @fence:		fence with the fences in the sync_file
>>    * @cb:			fence callback information
>> + * @lock:               mutex to protect fence/cb - used for semaphores
>>    */
>>   struct sync_file {
>>   	struct file		*file;
>> @@ -43,6 +44,8 @@ struct sync_file {
>>   
>>   	struct dma_fence	*fence;
>>   	struct dma_fence_cb cb;
>> +	/* protects the fence pointer and cb */
>> +	struct mutex lock;
>>   };
>>   
>>   #define POLL_ENABLED DMA_FENCE_FLAG_USER_BITS
>> -- 
>> 2.7.4
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel


_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH libdrm] libdrm/amdgpu: add interface for kernel semaphores
  2017-03-14  8:54             ` Daniel Vetter
@ 2017-03-14  9:20               ` Christian König
  2017-03-14 10:04                 ` zhoucm1
  0 siblings, 1 reply; 40+ messages in thread
From: Christian König @ 2017-03-14  9:20 UTC (permalink / raw)
  To: Daniel Vetter, zhoucm1; +Cc: dri-devel, amd-gfx mailing list, Mao, David

Am 14.03.2017 um 09:54 schrieb Daniel Vetter:
> On Tue, Mar 14, 2017 at 11:30:40AM +0800, zhoucm1 wrote:
>>
>> On 2017年03月14日 10:52, Dave Airlie wrote:
>>> On 14 March 2017 at 12:00, zhoucm1 <david1.zhou@amd.com> wrote:
>>>> Hi Dave,
>>>>
>>>> Could you tell me why you create your new one patch? I remember I send out
>>>> our the whole implementation, Why not directly review our patches?
>>> This is patch review, I'm not sure what you are expecting in terms of
>>> direct review.
>>>
>>> The patches you sent out were reviewed by Christian, he stated he
>>> thinks this should
>>> use sync_file, I was interested to see if this was actually possible,
>>> so I just adapted
>>> the patches to check if it was possible to avoid adding a lot of amd
>>> specific code
>>> for something that isn't required to be. Hence why I've sent this as
>>> an rfc, as I want
>>> to see if others think using sync_file is a good or bad idea. We may
>>> end up going
>>> back to the non-sync_file based patches if this proves to be a bad
>>> idea, so far it
>>> doesn't look like one.
>>>
>>> I also reviewed the patches and noted it shouldn't add the wait/signal
>>> interfaces,
>>> that it should use the chunks on command submission, so while I was in
>>> there I re
>>> wrote that as well.
>> Yeah, then I'm ok with this, just our internal team has used this
>> implementation, they find some gaps between yours and previous, they could
>> need to change their's again, they are annoyance for this.
> This is why you _must_ get anything you're doing discussed in upstream
> first. Your internal teams simply do not have design authority on stuff
> like that. And yes it takes forever to get formerly proprietary
> internal-only teams to understand this, speaking from years of experience
> implement a proper upstream-design-first approach to feature development
> here at intel.

"internal teams simply do not have design authority on stuff like that"

Can I print that on a t-shirt and start to sell it?

Christian.

> -Daniel


_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/4] sync_file: add a mutex to protect fence and callback members.
       [not found]         ` <2cba21f6-731b-d112-f882-bef66a9b96c9-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-03-14  9:29           ` Chris Wilson
       [not found]             ` <20170314092909.GE2118-aII6DKEyn0pWYbfKqPwjAkR8Iwp7RQ6xAL8bYrjMMd8@public.gmane.org>
  0 siblings, 1 reply; 40+ messages in thread
From: Chris Wilson @ 2017-03-14  9:29 UTC (permalink / raw)
  To: Christian König
  Cc: dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Dave Airlie,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Daniel Vetter

On Tue, Mar 14, 2017 at 10:13:37AM +0100, Christian König wrote:
> Am 14.03.2017 um 09:45 schrieb Daniel Vetter:
> >On Tue, Mar 14, 2017 at 10:50:51AM +1000, Dave Airlie wrote:
> >>From: Dave Airlie <airlied@redhat.com>
> >>
> >>This isn't needed currently, but to reuse sync file for Vulkan
> >>permanent shared semaphore semantics, we need to be able to swap
> >>the fence backing a sync file. This patch adds a mutex to the
> >>sync file and uses to protect accesses to the fence and cb members.
> >>
> >>Signed-off-by: Dave Airlie <airlied@redhat.com>
> >We've gone to pretty great lengths to rcu-protect all the fence stuff, so
> >that a peek-only is entirely lockless. Can we haz the same for this pls?
> 
> Yes, just wanted to suggest the same thing.
> 
> Basically you just need the following to retrieve the fence:
> 
> while (sync_file->fence && !(fence = fence_get_rcu(sync_file->fence));

We even have a helper for that:

	fence = dma_fence_get_rcu_safe(&sync_file->fence);

(Still going to suggest using a reservation_object rather than an
exclusive-only implementation.)
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/4] sync_file: add a mutex to protect fence and callback members.
       [not found]             ` <20170314092909.GE2118-aII6DKEyn0pWYbfKqPwjAkR8Iwp7RQ6xAL8bYrjMMd8@public.gmane.org>
@ 2017-03-14  9:30               ` Christian König
  2017-03-15  0:47                 ` Dave Airlie
  0 siblings, 1 reply; 40+ messages in thread
From: Christian König @ 2017-03-14  9:30 UTC (permalink / raw)
  To: Chris Wilson, Daniel Vetter, Dave Airlie,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 14.03.2017 um 10:29 schrieb Chris Wilson:
> On Tue, Mar 14, 2017 at 10:13:37AM +0100, Christian König wrote:
>> Am 14.03.2017 um 09:45 schrieb Daniel Vetter:
>>> On Tue, Mar 14, 2017 at 10:50:51AM +1000, Dave Airlie wrote:
>>>> From: Dave Airlie <airlied@redhat.com>
>>>>
>>>> This isn't needed currently, but to reuse sync file for Vulkan
>>>> permanent shared semaphore semantics, we need to be able to swap
>>>> the fence backing a sync file. This patch adds a mutex to the
>>>> sync file and uses to protect accesses to the fence and cb members.
>>>>
>>>> Signed-off-by: Dave Airlie <airlied@redhat.com>
>>> We've gone to pretty great lengths to rcu-protect all the fence stuff, so
>>> that a peek-only is entirely lockless. Can we haz the same for this pls?
>> Yes, just wanted to suggest the same thing.
>>
>> Basically you just need the following to retrieve the fence:
>>
>> while (sync_file->fence && !(fence = fence_get_rcu(sync_file->fence));
> We even have a helper for that:
>
> 	fence = dma_fence_get_rcu_safe(&sync_file->fence);
>
> (Still going to suggest using a reservation_object rather than an
> exclusive-only implementation.)

Yeah, thought about that as well. But the reservation object doesn't 
seem to match the required userspace semantic.

E.g. you actually don't want more than one fence it in as far as I 
understand it.

Christian.

> -Chris
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH libdrm] libdrm/amdgpu: add interface for kernel semaphores
  2017-03-14  9:20               ` Christian König
@ 2017-03-14 10:04                 ` zhoucm1
       [not found]                   ` <58C7C032.7060706-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 40+ messages in thread
From: zhoucm1 @ 2017-03-14 10:04 UTC (permalink / raw)
  To: Christian König, Daniel Vetter
  Cc: dri-devel, amd-gfx mailing list, Mao, David



On 2017年03月14日 17:20, Christian König wrote:
> Am 14.03.2017 um 09:54 schrieb Daniel Vetter:
>> On Tue, Mar 14, 2017 at 11:30:40AM +0800, zhoucm1 wrote:
>>>
>>> On 2017年03月14日 10:52, Dave Airlie wrote:
>>>> On 14 March 2017 at 12:00, zhoucm1 <david1.zhou@amd.com> wrote:
>>>>> Hi Dave,
>>>>>
>>>>> Could you tell me why you create your new one patch? I remember I 
>>>>> send out
>>>>> our the whole implementation, Why not directly review our patches?
>>>> This is patch review, I'm not sure what you are expecting in terms of
>>>> direct review.
>>>>
>>>> The patches you sent out were reviewed by Christian, he stated he
>>>> thinks this should
>>>> use sync_file, I was interested to see if this was actually possible,
>>>> so I just adapted
>>>> the patches to check if it was possible to avoid adding a lot of amd
>>>> specific code
>>>> for something that isn't required to be. Hence why I've sent this as
>>>> an rfc, as I want
>>>> to see if others think using sync_file is a good or bad idea. We may
>>>> end up going
>>>> back to the non-sync_file based patches if this proves to be a bad
>>>> idea, so far it
>>>> doesn't look like one.
>>>>
>>>> I also reviewed the patches and noted it shouldn't add the wait/signal
>>>> interfaces,
>>>> that it should use the chunks on command submission, so while I was in
>>>> there I re
>>>> wrote that as well.
>>> Yeah, then I'm ok with this, just our internal team has used this
>>> implementation, they find some gaps between yours and previous, they 
>>> could
>>> need to change their's again, they are annoyance for this.
>> This is why you _must_ get anything you're doing discussed in upstream
>> first. Your internal teams simply do not have design authority on stuff
>> like that. And yes it takes forever to get formerly proprietary
>> internal-only teams to understand this, speaking from years of 
>> experience
>> implement a proper upstream-design-first approach to feature development
>> here at intel.
>
> "internal teams simply do not have design authority on stuff like that"
>
> Can I print that on a t-shirt and start to sell it?
I guess you cannot dress it to go to office..:)

David Zhou
>
> Christian.
>
>> -Daniel
>
>

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH libdrm] libdrm/amdgpu: add interface for kernel semaphores
       [not found]                   ` <58C7C032.7060706-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-14 17:45                     ` Harry Wentland
       [not found]                       ` <8dea3303-480d-c29a-22ec-42adf3dab4ce-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 40+ messages in thread
From: Harry Wentland @ 2017-03-14 17:45 UTC (permalink / raw)
  To: zhoucm1, Christian König, Daniel Vetter
  Cc: Dave Airlie, amd-gfx mailing list, dri-devel, Mao, David

On 2017-03-14 06:04 AM, zhoucm1 wrote:
>
>
> On 2017年03月14日 17:20, Christian König wrote:
>> Am 14.03.2017 um 09:54 schrieb Daniel Vetter:
>>> On Tue, Mar 14, 2017 at 11:30:40AM +0800, zhoucm1 wrote:
>>>>
>>>> On 2017年03月14日 10:52, Dave Airlie wrote:
>>>>> On 14 March 2017 at 12:00, zhoucm1 <david1.zhou@amd.com> wrote:
>>>>>> Hi Dave,
>>>>>>
>>>>>> Could you tell me why you create your new one patch? I remember I
>>>>>> send out
>>>>>> our the whole implementation, Why not directly review our patches?
>>>>> This is patch review, I'm not sure what you are expecting in terms of
>>>>> direct review.
>>>>>
>>>>> The patches you sent out were reviewed by Christian, he stated he
>>>>> thinks this should
>>>>> use sync_file, I was interested to see if this was actually possible,
>>>>> so I just adapted
>>>>> the patches to check if it was possible to avoid adding a lot of amd
>>>>> specific code
>>>>> for something that isn't required to be. Hence why I've sent this as
>>>>> an rfc, as I want
>>>>> to see if others think using sync_file is a good or bad idea. We may
>>>>> end up going
>>>>> back to the non-sync_file based patches if this proves to be a bad
>>>>> idea, so far it
>>>>> doesn't look like one.
>>>>>
>>>>> I also reviewed the patches and noted it shouldn't add the wait/signal
>>>>> interfaces,
>>>>> that it should use the chunks on command submission, so while I was in
>>>>> there I re
>>>>> wrote that as well.
>>>> Yeah, then I'm ok with this, just our internal team has used this
>>>> implementation, they find some gaps between yours and previous, they
>>>> could
>>>> need to change their's again, they are annoyance for this.
>>> This is why you _must_ get anything you're doing discussed in upstream
>>> first. Your internal teams simply do not have design authority on stuff
>>> like that. And yes it takes forever to get formerly proprietary
>>> internal-only teams to understand this, speaking from years of
>>> experience
>>> implement a proper upstream-design-first approach to feature development
>>> here at intel.
>>
>> "internal teams simply do not have design authority on stuff like that"
>>
>> Can I print that on a t-shirt and start to sell it?
> I guess you cannot dress it to go to office..:)
>

I'd wear it to the office.

https://www.customink.com/lab?cid=hkp0-00ay-6vjg

Harry

> David Zhou
>>
>> Christian.
>>
>>> -Daniel
>>
>>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH libdrm] libdrm/amdgpu: add interface for kernel semaphores
       [not found]                       ` <8dea3303-480d-c29a-22ec-42adf3dab4ce-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-14 17:54                         ` Daniel Vetter
  2017-03-14 18:10                         ` Christian König
  1 sibling, 0 replies; 40+ messages in thread
From: Daniel Vetter @ 2017-03-14 17:54 UTC (permalink / raw)
  To: Harry Wentland
  Cc: zhoucm1, Mao, David, amd-gfx mailing list, Christian König,
	dri-devel, Dave Airlie

On Tue, Mar 14, 2017 at 6:45 PM, Harry Wentland <harry.wentland@amd.com> wrote:
>>> "internal teams simply do not have design authority on stuff like that"
>>>
>>> Can I print that on a t-shirt and start to sell it?
>>
>> I guess you cannot dress it to go to office..:)
>>
>
> I'd wear it to the office.
>
> https://www.customink.com/lab?cid=hkp0-00ay-6vjg

Can we have a picture of the entire amd team wearing these? :-)

Anyway, I approve.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH libdrm] libdrm/amdgpu: add interface for kernel semaphores
       [not found]                       ` <8dea3303-480d-c29a-22ec-42adf3dab4ce-5C7GfCeVMHo@public.gmane.org>
  2017-03-14 17:54                         ` Daniel Vetter
@ 2017-03-14 18:10                         ` Christian König
  2017-03-15  0:01                           ` Marek Olšák
  1 sibling, 1 reply; 40+ messages in thread
From: Christian König @ 2017-03-14 18:10 UTC (permalink / raw)
  To: Harry Wentland, zhoucm1, Daniel Vetter
  Cc: Dave Airlie, amd-gfx mailing list, dri-devel, Mao, David

Am 14.03.2017 um 18:45 schrieb Harry Wentland:
> On 2017-03-14 06:04 AM, zhoucm1 wrote:
>>
>>
>> On 2017年03月14日 17:20, Christian König wrote:
>>> Am 14.03.2017 um 09:54 schrieb Daniel Vetter:
>>>> On Tue, Mar 14, 2017 at 11:30:40AM +0800, zhoucm1 wrote:
>>>>>
>>>>> On 2017年03月14日 10:52, Dave Airlie wrote:
>>>>>> On 14 March 2017 at 12:00, zhoucm1 <david1.zhou@amd.com> wrote:
>>>>>>> Hi Dave,
>>>>>>>
>>>>>>> Could you tell me why you create your new one patch? I remember I
>>>>>>> send out
>>>>>>> our the whole implementation, Why not directly review our patches?
>>>>>> This is patch review, I'm not sure what you are expecting in 
>>>>>> terms of
>>>>>> direct review.
>>>>>>
>>>>>> The patches you sent out were reviewed by Christian, he stated he
>>>>>> thinks this should
>>>>>> use sync_file, I was interested to see if this was actually 
>>>>>> possible,
>>>>>> so I just adapted
>>>>>> the patches to check if it was possible to avoid adding a lot of amd
>>>>>> specific code
>>>>>> for something that isn't required to be. Hence why I've sent this as
>>>>>> an rfc, as I want
>>>>>> to see if others think using sync_file is a good or bad idea. We may
>>>>>> end up going
>>>>>> back to the non-sync_file based patches if this proves to be a bad
>>>>>> idea, so far it
>>>>>> doesn't look like one.
>>>>>>
>>>>>> I also reviewed the patches and noted it shouldn't add the 
>>>>>> wait/signal
>>>>>> interfaces,
>>>>>> that it should use the chunks on command submission, so while I 
>>>>>> was in
>>>>>> there I re
>>>>>> wrote that as well.
>>>>> Yeah, then I'm ok with this, just our internal team has used this
>>>>> implementation, they find some gaps between yours and previous, they
>>>>> could
>>>>> need to change their's again, they are annoyance for this.
>>>> This is why you _must_ get anything you're doing discussed in upstream
>>>> first. Your internal teams simply do not have design authority on 
>>>> stuff
>>>> like that. And yes it takes forever to get formerly proprietary
>>>> internal-only teams to understand this, speaking from years of
>>>> experience
>>>> implement a proper upstream-design-first approach to feature 
>>>> development
>>>> here at intel.
>>>
>>> "internal teams simply do not have design authority on stuff like that"
>>>
>>> Can I print that on a t-shirt and start to sell it?
>> I guess you cannot dress it to go to office..:)
>>
>
> I'd wear it to the office.
>
> https://www.customink.com/lab?cid=hkp0-00ay-6vjg

I'm only at an AMD office every few years, so that's probably not worth it.

Anyway it's really something we should keep in mind if we want to 
upstream things.

Christian.

>
> Harry
>
>> David Zhou
>>>
>>> Christian.
>>>
>>>> -Daniel
>>>
>>>
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH libdrm] libdrm/amdgpu: add interface for kernel semaphores
  2017-03-14 18:10                         ` Christian König
@ 2017-03-15  0:01                           ` Marek Olšák
       [not found]                             ` <CAAxE2A7xRWhkp-OC59iy2i91uj5mtVTHR0uHfDf745L-ixxHcw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 40+ messages in thread
From: Marek Olšák @ 2017-03-15  0:01 UTC (permalink / raw)
  To: Christian König; +Cc: Mao, David, dri-devel, amd-gfx mailing list

While it's nice that you are all having fun here, I don't think that's
the way to communicate this.

The truth is, if AMD had an open source driver using the semaphores
(e.g. Vulkan) and if the libdrm semaphore code was merged, Dave
wouldn't be able to change it, ever. If a dependent open source
project relies on some libdrm interface, that interface is set in
stone. So AMD wouldn't be able to change it either. Unfortunately,
such an open source project doesn't exist, so the community can assume
that this libdrm code is still in the "initial design phase". Dave has
an upper hand here, because he actually has an open source project
that will use this, while AMD doesn't (yet). However, AMD can still
negotiate some details here, i.e. do a proper review.

Marek


On Tue, Mar 14, 2017 at 7:10 PM, Christian König
<deathsimple@vodafone.de> wrote:
> Am 14.03.2017 um 18:45 schrieb Harry Wentland:
>>
>> On 2017-03-14 06:04 AM, zhoucm1 wrote:
>>>
>>>
>>>
>>> On 2017年03月14日 17:20, Christian König wrote:
>>>>
>>>> Am 14.03.2017 um 09:54 schrieb Daniel Vetter:
>>>>>
>>>>> On Tue, Mar 14, 2017 at 11:30:40AM +0800, zhoucm1 wrote:
>>>>>>
>>>>>>
>>>>>> On 2017年03月14日 10:52, Dave Airlie wrote:
>>>>>>>
>>>>>>> On 14 March 2017 at 12:00, zhoucm1 <david1.zhou@amd.com> wrote:
>>>>>>>>
>>>>>>>> Hi Dave,
>>>>>>>>
>>>>>>>> Could you tell me why you create your new one patch? I remember I
>>>>>>>> send out
>>>>>>>> our the whole implementation, Why not directly review our patches?
>>>>>>>
>>>>>>> This is patch review, I'm not sure what you are expecting in terms of
>>>>>>> direct review.
>>>>>>>
>>>>>>> The patches you sent out were reviewed by Christian, he stated he
>>>>>>> thinks this should
>>>>>>> use sync_file, I was interested to see if this was actually possible,
>>>>>>> so I just adapted
>>>>>>> the patches to check if it was possible to avoid adding a lot of amd
>>>>>>> specific code
>>>>>>> for something that isn't required to be. Hence why I've sent this as
>>>>>>> an rfc, as I want
>>>>>>> to see if others think using sync_file is a good or bad idea. We may
>>>>>>> end up going
>>>>>>> back to the non-sync_file based patches if this proves to be a bad
>>>>>>> idea, so far it
>>>>>>> doesn't look like one.
>>>>>>>
>>>>>>> I also reviewed the patches and noted it shouldn't add the
>>>>>>> wait/signal
>>>>>>> interfaces,
>>>>>>> that it should use the chunks on command submission, so while I was
>>>>>>> in
>>>>>>> there I re
>>>>>>> wrote that as well.
>>>>>>
>>>>>> Yeah, then I'm ok with this, just our internal team has used this
>>>>>> implementation, they find some gaps between yours and previous, they
>>>>>> could
>>>>>> need to change their's again, they are annoyance for this.
>>>>>
>>>>> This is why you _must_ get anything you're doing discussed in upstream
>>>>> first. Your internal teams simply do not have design authority on stuff
>>>>> like that. And yes it takes forever to get formerly proprietary
>>>>> internal-only teams to understand this, speaking from years of
>>>>> experience
>>>>> implement a proper upstream-design-first approach to feature
>>>>> development
>>>>> here at intel.
>>>>
>>>>
>>>> "internal teams simply do not have design authority on stuff like that"
>>>>
>>>> Can I print that on a t-shirt and start to sell it?
>>>
>>> I guess you cannot dress it to go to office..:)
>>>
>>
>> I'd wear it to the office.
>>
>> https://www.customink.com/lab?cid=hkp0-00ay-6vjg
>
>
> I'm only at an AMD office every few years, so that's probably not worth it.
>
> Anyway it's really something we should keep in mind if we want to upstream
> things.
>
> Christian.
>
>
>>
>> Harry
>>
>>> David Zhou
>>>>
>>>>
>>>> Christian.
>>>>
>>>>> -Daniel
>>>>
>>>>
>>>>
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/4] sync_file: add a mutex to protect fence and callback members.
  2017-03-14  9:30               ` Christian König
@ 2017-03-15  0:47                 ` Dave Airlie
       [not found]                   ` <CAPM=9twwjgBirFDenUmnorDdOZNiWHuzdBxawCiU7p9uS1o0_Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 40+ messages in thread
From: Dave Airlie @ 2017-03-15  0:47 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel, amd-gfx mailing list

On 14 March 2017 at 19:30, Christian König <deathsimple@vodafone.de> wrote:
> Am 14.03.2017 um 10:29 schrieb Chris Wilson:
>>
>> On Tue, Mar 14, 2017 at 10:13:37AM +0100, Christian König wrote:
>>>
>>> Am 14.03.2017 um 09:45 schrieb Daniel Vetter:
>>>>
>>>> On Tue, Mar 14, 2017 at 10:50:51AM +1000, Dave Airlie wrote:
>>>>>
>>>>> From: Dave Airlie <airlied@redhat.com>
>>>>>
>>>>> This isn't needed currently, but to reuse sync file for Vulkan
>>>>> permanent shared semaphore semantics, we need to be able to swap
>>>>> the fence backing a sync file. This patch adds a mutex to the
>>>>> sync file and uses to protect accesses to the fence and cb members.
>>>>>
>>>>> Signed-off-by: Dave Airlie <airlied@redhat.com>
>>>>
>>>> We've gone to pretty great lengths to rcu-protect all the fence stuff,
>>>> so
>>>> that a peek-only is entirely lockless. Can we haz the same for this pls?
>>>
>>> Yes, just wanted to suggest the same thing.
>>>
>>> Basically you just need the following to retrieve the fence:
>>>
>>> while (sync_file->fence && !(fence = fence_get_rcu(sync_file->fence));
>>
>> We even have a helper for that:
>>
>>         fence = dma_fence_get_rcu_safe(&sync_file->fence);
>>
>> (Still going to suggest using a reservation_object rather than an
>> exclusive-only implementation.)
>
>
> Yeah, thought about that as well. But the reservation object doesn't seem to
> match the required userspace semantic.
>
> E.g. you actually don't want more than one fence it in as far as I
> understand it.

I don't think a reservation object is what the vulkan semantics ask for.

Dave.
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [rfc] amdgpu/sync_file shared semaphores
       [not found]   ` <20170314085313.qmnnofqa6e6ozmwk-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
@ 2017-03-15  1:09     ` Dave Airlie
       [not found]       ` <CAPM=9txSFTkz0y5hamBA7U7fu+X8x_wHG+X3xtWvm2PY4aCwsw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 40+ messages in thread
From: Dave Airlie @ 2017-03-15  1:09 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: dri-devel, amd-gfx mailing list

On 14 March 2017 at 18:53, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Tue, Mar 14, 2017 at 10:50:49AM +1000, Dave Airlie wrote:
>> This contains one libdrm patch and 4 kernel patches.
>>
>> The goal is to implement the Vulkan KHR_external_semaphore_fd interface
>> for permanent semaphore behaviour for radv.
>>
>> This code tries to enhance sync file to add the required behaviour
>> (which is mostly being able to replace the fence backing the sync file),
>> it also introduces new API to amdgpu to create/destroy/export/import the
>> sync_files which we store in an idr there, rather than fds.
>>
>> There is a possibility we should share the amdgpu_sem object tracking
>> code for other drivers, maybe we should move that to sync_file as well,
>> but I'm open to suggestions for whether this is a useful approach for
>> other drivers.
>
> Yeah, makes sense. I couldn't google the spec and didn't bother to figure
> out where my intel khronos login went, so didn't double-check precise
> semantics, just dropped a question. Few more things on the actual
> sync_file stuff itself.
>
> Really big wish here is for some igts using the debug/testing fence stuff
> we have in vgem so that we can validate the corner-cases of fence
> replacement and make sure nothing falls over. Especially with all the rcu
> dancing sync_file/dma_fence have become non-trival, exhaustive testing is
> needed here imo.

We'd have to add vgem specific interfaces to trigger the replacement
path though,
since the replacement path is only going to be used for the semaphore sematics.

Suggestions on how to test better welcome!

>
> Also, NULL sync_file->fence is probably what we want for the future fences
> (which android wants to keep tilers well-fed by essentially looping the
> entire render pipeline on itself), so this goes into the right direction
> for sure. I think, but coffee kicked in already :-)

Dave.
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/4] sync_file: add a mutex to protect fence and callback members.
       [not found]                   ` <CAPM=9twwjgBirFDenUmnorDdOZNiWHuzdBxawCiU7p9uS1o0_Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2017-03-15  2:20                     ` Dave Airlie
  0 siblings, 0 replies; 40+ messages in thread
From: Dave Airlie @ 2017-03-15  2:20 UTC (permalink / raw)
  To: Christian König
  Cc: dri-devel, amd-gfx mailing list, Daniel Vetter, Chris Wilson

On 15 March 2017 at 10:47, Dave Airlie <airlied@gmail.com> wrote:
> On 14 March 2017 at 19:30, Christian König <deathsimple@vodafone.de> wrote:
>> Am 14.03.2017 um 10:29 schrieb Chris Wilson:
>>>
>>> On Tue, Mar 14, 2017 at 10:13:37AM +0100, Christian König wrote:
>>>>
>>>> Am 14.03.2017 um 09:45 schrieb Daniel Vetter:
>>>>>
>>>>> On Tue, Mar 14, 2017 at 10:50:51AM +1000, Dave Airlie wrote:
>>>>>>
>>>>>> From: Dave Airlie <airlied@redhat.com>
>>>>>>
>>>>>> This isn't needed currently, but to reuse sync file for Vulkan
>>>>>> permanent shared semaphore semantics, we need to be able to swap
>>>>>> the fence backing a sync file. This patch adds a mutex to the
>>>>>> sync file and uses to protect accesses to the fence and cb members.
>>>>>>
>>>>>> Signed-off-by: Dave Airlie <airlied@redhat.com>
>>>>>
>>>>> We've gone to pretty great lengths to rcu-protect all the fence stuff,
>>>>> so
>>>>> that a peek-only is entirely lockless. Can we haz the same for this pls?
>>>>
>>>> Yes, just wanted to suggest the same thing.
>>>>
>>>> Basically you just need the following to retrieve the fence:
>>>>
>>>> while (sync_file->fence && !(fence = fence_get_rcu(sync_file->fence));
>>>
>>> We even have a helper for that:
>>>
>>>         fence = dma_fence_get_rcu_safe(&sync_file->fence);
>>>
>>> (Still going to suggest using a reservation_object rather than an
>>> exclusive-only implementation.)
>>
>>
>> Yeah, thought about that as well. But the reservation object doesn't seem to
>> match the required userspace semantic.
>>
>> E.g. you actually don't want more than one fence it in as far as I
>> understand it.
>
> I don't think a reservation object is what the vulkan semantics ask for.
>

I suppose a reservation object with a single exclusive fence is close enough,
just wouldn't want to create one with non-exclusive fences on it.

Dave.
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/4] sync_file: add replace and export some functionality
       [not found]         ` <20170314084851.covfl7sjwn3yadh5-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
@ 2017-03-15  4:19           ` Dave Airlie
  2017-03-15  8:55             ` Daniel Vetter
  0 siblings, 1 reply; 40+ messages in thread
From: Dave Airlie @ 2017-03-15  4:19 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: dri-devel, amd-gfx mailing list

>
> uabi semantics question: Should we wake up everyone when the fence gets
> replaced? What's the khr semaphore expectation here?

There are no real semantics for this case, you either wait the semaphore and
replace it with NULL, or signal it where you replace the fence with a new fence.

Nobody should be using the sync_fence interfaces to poll on these fence fds.

So not crashing for non-vulkan behaviour is what we need.

The spec doesn't know anything other than this is an opaque fd,
it shouldn't be doing operations on it, execpt importing it.

>>  static int sync_file_set_fence(struct sync_file *sync_file,
>>                              struct dma_fence **fences, int num_fences)
>>  {
>> @@ -292,8 +328,10 @@ static void sync_file_free(struct kref *kref)
>>       struct sync_file *sync_file = container_of(kref, struct sync_file,
>>                                                    kref);
>>
>> -     if (test_bit(POLL_ENABLED, &sync_file->fence->flags))
>> -             dma_fence_remove_callback(sync_file->fence, &sync_file->cb);
>> +     if (sync_file->fence) {
>> +             if (test_bit(POLL_ENABLED, &sync_file->fence->flags))
>> +                     dma_fence_remove_callback(sync_file->fence, &sync_file->cb);
>> +     }
>>       dma_fence_put(sync_file->fence);
>>       kfree(sync_file);
>>  }
>
> I think you've missed _poll, won't that oops now?

I don't think it will, all the stuff do inside the poll handler is
protected by the mutex
or do you think we should hook the new fence if the old fence has poll enabled?

Dave.
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [rfc] amdgpu/sync_file shared semaphores
       [not found]       ` <CAPM=9txSFTkz0y5hamBA7U7fu+X8x_wHG+X3xtWvm2PY4aCwsw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2017-03-15  8:47         ` Daniel Vetter
  0 siblings, 0 replies; 40+ messages in thread
From: Daniel Vetter @ 2017-03-15  8:47 UTC (permalink / raw)
  To: Dave Airlie; +Cc: dri-devel, amd-gfx mailing list, Daniel Vetter

On Wed, Mar 15, 2017 at 11:09:39AM +1000, Dave Airlie wrote:
> On 14 March 2017 at 18:53, Daniel Vetter <daniel@ffwll.ch> wrote:
> > On Tue, Mar 14, 2017 at 10:50:49AM +1000, Dave Airlie wrote:
> >> This contains one libdrm patch and 4 kernel patches.
> >>
> >> The goal is to implement the Vulkan KHR_external_semaphore_fd interface
> >> for permanent semaphore behaviour for radv.
> >>
> >> This code tries to enhance sync file to add the required behaviour
> >> (which is mostly being able to replace the fence backing the sync file),
> >> it also introduces new API to amdgpu to create/destroy/export/import the
> >> sync_files which we store in an idr there, rather than fds.
> >>
> >> There is a possibility we should share the amdgpu_sem object tracking
> >> code for other drivers, maybe we should move that to sync_file as well,
> >> but I'm open to suggestions for whether this is a useful approach for
> >> other drivers.
> >
> > Yeah, makes sense. I couldn't google the spec and didn't bother to figure
> > out where my intel khronos login went, so didn't double-check precise
> > semantics, just dropped a question. Few more things on the actual
> > sync_file stuff itself.
> >
> > Really big wish here is for some igts using the debug/testing fence stuff
> > we have in vgem so that we can validate the corner-cases of fence
> > replacement and make sure nothing falls over. Especially with all the rcu
> > dancing sync_file/dma_fence have become non-trival, exhaustive testing is
> > needed here imo.
> 
> We'd have to add vgem specific interfaces to trigger the replacement
> path though,
> since the replacement path is only going to be used for the semaphore sematics.
> 
> Suggestions on how to test better welcome!

Yeah, vgem semaphore replacement ioctl is what I had in mind. And I guess
if you don't get around to it, we will when we do the i915 side of this
:-)
-Daniel

> 
> >
> > Also, NULL sync_file->fence is probably what we want for the future fences
> > (which android wants to keep tilers well-fed by essentially looping the
> > entire render pipeline on itself), so this goes into the right direction
> > for sure. I think, but coffee kicked in already :-)
> 
> Dave.

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH libdrm] libdrm/amdgpu: add interface for kernel semaphores
       [not found]                             ` <CAAxE2A7xRWhkp-OC59iy2i91uj5mtVTHR0uHfDf745L-ixxHcw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2017-03-15  8:48                               ` Daniel Vetter
  2017-03-15  9:35                                 ` Christian König
  0 siblings, 1 reply; 40+ messages in thread
From: Daniel Vetter @ 2017-03-15  8:48 UTC (permalink / raw)
  To: Marek Olšák
  Cc: zhoucm1, Mao, David, Harry Wentland, amd-gfx mailing list,
	Christian König, dri-devel, Daniel Vetter, Dave Airlie

On Wed, Mar 15, 2017 at 01:01:19AM +0100, Marek Olšák wrote:
> While it's nice that you are all having fun here, I don't think that's
> the way to communicate this.
> 
> The truth is, if AMD had an open source driver using the semaphores
> (e.g. Vulkan) and if the libdrm semaphore code was merged, Dave
> wouldn't be able to change it, ever. If a dependent open source
> project relies on some libdrm interface, that interface is set in
> stone. So AMD wouldn't be able to change it either. Unfortunately,
> such an open source project doesn't exist, so the community can assume
> that this libdrm code is still in the "initial design phase". Dave has
> an upper hand here, because he actually has an open source project
> that will use this, while AMD doesn't (yet). However, AMD can still
> negotiate some details here, i.e. do a proper review.

Fully agreed, that's why there was this "internal" qualifier. If you do
this internally, then it's not final, if you discuss it here on the m-l,
it actually matters. So drag your internal teams into the public, and it's
all fine.
-Daniel

> 
> Marek
> 
> 
> On Tue, Mar 14, 2017 at 7:10 PM, Christian König
> <deathsimple@vodafone.de> wrote:
> > Am 14.03.2017 um 18:45 schrieb Harry Wentland:
> >>
> >> On 2017-03-14 06:04 AM, zhoucm1 wrote:
> >>>
> >>>
> >>>
> >>> On 2017年03月14日 17:20, Christian König wrote:
> >>>>
> >>>> Am 14.03.2017 um 09:54 schrieb Daniel Vetter:
> >>>>>
> >>>>> On Tue, Mar 14, 2017 at 11:30:40AM +0800, zhoucm1 wrote:
> >>>>>>
> >>>>>>
> >>>>>> On 2017年03月14日 10:52, Dave Airlie wrote:
> >>>>>>>
> >>>>>>> On 14 March 2017 at 12:00, zhoucm1 <david1.zhou@amd.com> wrote:
> >>>>>>>>
> >>>>>>>> Hi Dave,
> >>>>>>>>
> >>>>>>>> Could you tell me why you create your new one patch? I remember I
> >>>>>>>> send out
> >>>>>>>> our the whole implementation, Why not directly review our patches?
> >>>>>>>
> >>>>>>> This is patch review, I'm not sure what you are expecting in terms of
> >>>>>>> direct review.
> >>>>>>>
> >>>>>>> The patches you sent out were reviewed by Christian, he stated he
> >>>>>>> thinks this should
> >>>>>>> use sync_file, I was interested to see if this was actually possible,
> >>>>>>> so I just adapted
> >>>>>>> the patches to check if it was possible to avoid adding a lot of amd
> >>>>>>> specific code
> >>>>>>> for something that isn't required to be. Hence why I've sent this as
> >>>>>>> an rfc, as I want
> >>>>>>> to see if others think using sync_file is a good or bad idea. We may
> >>>>>>> end up going
> >>>>>>> back to the non-sync_file based patches if this proves to be a bad
> >>>>>>> idea, so far it
> >>>>>>> doesn't look like one.
> >>>>>>>
> >>>>>>> I also reviewed the patches and noted it shouldn't add the
> >>>>>>> wait/signal
> >>>>>>> interfaces,
> >>>>>>> that it should use the chunks on command submission, so while I was
> >>>>>>> in
> >>>>>>> there I re
> >>>>>>> wrote that as well.
> >>>>>>
> >>>>>> Yeah, then I'm ok with this, just our internal team has used this
> >>>>>> implementation, they find some gaps between yours and previous, they
> >>>>>> could
> >>>>>> need to change their's again, they are annoyance for this.
> >>>>>
> >>>>> This is why you _must_ get anything you're doing discussed in upstream
> >>>>> first. Your internal teams simply do not have design authority on stuff
> >>>>> like that. And yes it takes forever to get formerly proprietary
> >>>>> internal-only teams to understand this, speaking from years of
> >>>>> experience
> >>>>> implement a proper upstream-design-first approach to feature
> >>>>> development
> >>>>> here at intel.
> >>>>
> >>>>
> >>>> "internal teams simply do not have design authority on stuff like that"
> >>>>
> >>>> Can I print that on a t-shirt and start to sell it?
> >>>
> >>> I guess you cannot dress it to go to office..:)
> >>>
> >>
> >> I'd wear it to the office.
> >>
> >> https://www.customink.com/lab?cid=hkp0-00ay-6vjg
> >
> >
> > I'm only at an AMD office every few years, so that's probably not worth it.
> >
> > Anyway it's really something we should keep in mind if we want to upstream
> > things.
> >
> > Christian.
> >
> >
> >>
> >> Harry
> >>
> >>> David Zhou
> >>>>
> >>>>
> >>>> Christian.
> >>>>
> >>>>> -Daniel
> >>>>
> >>>>
> >>>>
> >>>
> >>> _______________________________________________
> >>> amd-gfx mailing list
> >>> amd-gfx@lists.freedesktop.org
> >>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> >
> >
> >
> > _______________________________________________
> > amd-gfx mailing list
> > amd-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/amd-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/4] sync_file: add replace and export some functionality
  2017-03-15  4:19           ` Dave Airlie
@ 2017-03-15  8:55             ` Daniel Vetter
  2017-03-16  5:18               ` Dave Airlie
  0 siblings, 1 reply; 40+ messages in thread
From: Daniel Vetter @ 2017-03-15  8:55 UTC (permalink / raw)
  To: Dave Airlie; +Cc: dri-devel, amd-gfx mailing list

On Wed, Mar 15, 2017 at 02:19:16PM +1000, Dave Airlie wrote:
> >
> > uabi semantics question: Should we wake up everyone when the fence gets
> > replaced? What's the khr semaphore expectation here?
> 
> There are no real semantics for this case, you either wait the semaphore and
> replace it with NULL, or signal it where you replace the fence with a new fence.
> 
> Nobody should be using the sync_fence interfaces to poll on these fence fds.
> 
> So not crashing for non-vulkan behaviour is what we need.
> 
> The spec doesn't know anything other than this is an opaque fd,
> it shouldn't be doing operations on it, execpt importing it.
> 
> >>  static int sync_file_set_fence(struct sync_file *sync_file,
> >>                              struct dma_fence **fences, int num_fences)
> >>  {
> >> @@ -292,8 +328,10 @@ static void sync_file_free(struct kref *kref)
> >>       struct sync_file *sync_file = container_of(kref, struct sync_file,
> >>                                                    kref);
> >>
> >> -     if (test_bit(POLL_ENABLED, &sync_file->fence->flags))
> >> -             dma_fence_remove_callback(sync_file->fence, &sync_file->cb);
> >> +     if (sync_file->fence) {
> >> +             if (test_bit(POLL_ENABLED, &sync_file->fence->flags))
> >> +                     dma_fence_remove_callback(sync_file->fence, &sync_file->cb);
> >> +     }
> >>       dma_fence_put(sync_file->fence);
> >>       kfree(sync_file);
> >>  }
> >
> > I think you've missed _poll, won't that oops now?
> 
> I don't think it will, all the stuff do inside the poll handler is
> protected by the mutex
> or do you think we should hook the new fence if the old fence has poll enabled?

Yeah, or at least wake them up somehow and restart it. Or well at least
think what would happen when that does, and whether we care. If vk says
you get undefined behaviour when you replace the fence of a semaphore when
the old one hasn't signalled yet, then we don't need to do anything.

But if they spec some behaviour poll returning "ready" way after you've
replaced the fence and the new one is definitely not ready is a bit
confusing semantics. Or maybe that's exactly the semantics vulkan once,
but then we need to be careful with restarts and stuff.

wrt the oops I think there's a possibility of a use-after-free: Thus far
the wait inherited the fence reference from the sync_file (since poll
stops before the file ref is gone), but now the fence could disappear
underneath it. add_callback itself doesn't grab a reference on its own.

So either we need synchronous replacement for poll or grab a reference
there. I think at least ... coffee not yet entirely working. In any case,
this kind of nasty corner case is perfect for some vgem testing.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 4/4] amdgpu: use sync file for shared semaphores
       [not found]     ` <20170314005054.7487-6-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-03-15  9:01       ` Daniel Vetter
       [not found]         ` <20170315090129.fneo5gafrz2jec32-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
  0 siblings, 1 reply; 40+ messages in thread
From: Daniel Vetter @ 2017-03-15  9:01 UTC (permalink / raw)
  To: Dave Airlie
  Cc: dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Tue, Mar 14, 2017 at 10:50:54AM +1000, Dave Airlie wrote:
> From: Dave Airlie <airlied@redhat.com>
> 
> This creates a new interface for amdgpu with ioctls to
> create/destroy/import and export shared semaphores using
> sem object backed by the sync_file code. The semaphores
> are not installed as fd (except for export), but rather
> like other driver internal objects in an idr. The idr
> holds the initial reference on the sync file.
> 
> The command submission interface is enhanced with two new
> chunks, one for semaphore waiting, one for semaphore signalling
> and just takes a list of handles for each.
> 
> This is based on work originally done by David Zhou at AMD,
> with input from Christian Konig on what things should look like.
> 
> NOTE: this interface addition needs a version bump to expose
> it to userspace.
> 
> Signed-off-by: Dave Airlie <airlied@redhat.com>

Another uapi corner case: Since you allow semaphores to be created before
they have a first fence attached, that essentially creates future fences.
I.e. sync_file fd with no fence yet attached. We want that anyway, but
maybe not together with semaphores (since there's some more implications).

I think it'd be good to attach a dummy fence which already starts out as
signalled to sync_file->fence for semaphores. That way userspace can't go
evil, create a semaphore, then pass it to a driver which then promptly
oopses or worse because it assumes then when it has a sync_file, it also
has a fence. And the dummy fence could be globally shared, so not really
overhead.
-Daniel

> ---
>  drivers/gpu/drm/amd/amdgpu/Makefile     |   2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  12 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  70 ++++++++++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |   8 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c | 204 ++++++++++++++++++++++++++++++++
>  include/uapi/drm/amdgpu_drm.h           |  28 +++++
>  6 files changed, 322 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 2814aad..404bcba 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -24,7 +24,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>  	atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
>  	amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
>  	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
> -	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o
> +	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_sem.o
>  
>  # add asic specific block
>  amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index c1b9135..4ed8811 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -702,6 +702,8 @@ struct amdgpu_fpriv {
>  	struct mutex		bo_list_lock;
>  	struct idr		bo_list_handles;
>  	struct amdgpu_ctx_mgr	ctx_mgr;
> +	spinlock_t		sem_handles_lock;
> +	struct idr		sem_handles;
>  };
>  
>  /*
> @@ -1814,5 +1816,15 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>  		       uint64_t addr, struct amdgpu_bo **bo);
>  int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
>  
> +int amdgpu_sem_ioctl(struct drm_device *dev, void *data,
> +		     struct drm_file *filp);
> +void amdgpu_sem_destroy(struct amdgpu_fpriv *fpriv, u32 handle);
> +int amdgpu_sem_lookup_and_signal(struct amdgpu_fpriv *fpriv,
> +				 uint32_t handle,
> +				 struct dma_fence *fence);
> +int amdgpu_sem_lookup_and_sync(struct amdgpu_device *adev,
> +			       struct amdgpu_fpriv *fpriv,
> +			       struct amdgpu_sync *sync,
> +			       uint32_t handle);
>  #include "amdgpu_object.h"
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 4671432..80fc94b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -217,6 +217,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
>  			break;
>  
>  		case AMDGPU_CHUNK_ID_DEPENDENCIES:
> +		case AMDGPU_CHUNK_ID_SEM_WAIT:
> +		case AMDGPU_CHUNK_ID_SEM_SIGNAL:
>  			break;
>  
>  		default:
> @@ -1009,6 +1011,28 @@ static int amdgpu_process_fence_dep(struct amdgpu_device *adev,
>  	return 0;
>  }
>  
> +static int amdgpu_process_sem_wait_dep(struct amdgpu_device *adev,
> +				       struct amdgpu_cs_parser *p,
> +				       struct amdgpu_cs_chunk *chunk)
> +{
> +	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
> +	unsigned num_deps;
> +	int i, r;
> +	struct drm_amdgpu_cs_chunk_sem *deps;
> +
> +	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
> +	num_deps = chunk->length_dw * 4 /
> +		sizeof(struct drm_amdgpu_cs_chunk_sem);
> +
> +	for (i = 0; i < num_deps; ++i) {
> +		r = amdgpu_sem_lookup_and_sync(adev, fpriv, &p->job->sync,
> +					       deps[i].handle);
> +		if (r)
> +			return r;
> +	}
> +	return 0;
> +}
> +
>  static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
>  				  struct amdgpu_cs_parser *p)
>  {
> @@ -1023,12 +1047,56 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
>  			r = amdgpu_process_fence_dep(adev, p, chunk);
>  			if (r)
>  				return r;
> +		} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_WAIT) {
> +			r = amdgpu_process_sem_wait_dep(adev, p, chunk);
> +			if (r)
> +				return r;
>  		}
>  	}
>  
>  	return 0;
>  }
>  
> +static int amdgpu_process_sem_signal_dep(struct amdgpu_cs_parser *p,
> +					 struct amdgpu_cs_chunk *chunk,
> +					 struct dma_fence *fence)
> +{
> +	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
> +	unsigned num_deps;
> +	int i, r;
> +	struct drm_amdgpu_cs_chunk_sem *deps;
> +
> +	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
> +	num_deps = chunk->length_dw * 4 /
> +		sizeof(struct drm_amdgpu_cs_chunk_sem);
> +
> +	for (i = 0; i < num_deps; ++i) {
> +		r = amdgpu_sem_lookup_and_signal(fpriv, deps[i].handle,
> +						 fence);
> +		if (r)
> +			return r;
> +	}
> +	return 0;
> +}
> +
> +static int amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
> +{
> +	int i, r;
> +
> +	for (i = 0; i < p->nchunks; ++i) {
> +		struct amdgpu_cs_chunk *chunk;
> +
> +		chunk = &p->chunks[i];
> +
> +		if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_SIGNAL) {
> +			r = amdgpu_process_sem_signal_dep(p, chunk, p->fence);
> +			if (r)
> +				return r;
> +		}
> +	}
> +	return 0;
> +}
> +
>  static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>  			    union drm_amdgpu_cs *cs)
>  {
> @@ -1056,7 +1124,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>  	trace_amdgpu_cs_ioctl(job);
>  	amd_sched_entity_push_job(&job->base);
>  
> -	return 0;
> +	return amdgpu_cs_post_dependencies(p);
>  }
>  
>  int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 61d94c7..013aed1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -664,6 +664,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
>  	mutex_init(&fpriv->bo_list_lock);
>  	idr_init(&fpriv->bo_list_handles);
>  
> +	spin_lock_init(&fpriv->sem_handles_lock);
> +	idr_init(&fpriv->sem_handles);
>  	amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
>  
>  	file_priv->driver_priv = fpriv;
> @@ -689,6 +691,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
>  	struct amdgpu_device *adev = dev->dev_private;
>  	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
>  	struct amdgpu_bo_list *list;
> +	struct amdgpu_sem *sem;
>  	int handle;
>  
>  	if (!fpriv)
> @@ -715,6 +718,10 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
>  	idr_destroy(&fpriv->bo_list_handles);
>  	mutex_destroy(&fpriv->bo_list_lock);
>  
> +	idr_for_each_entry(&fpriv->sem_handles, sem, handle)
> +		amdgpu_sem_destroy(fpriv, handle);
> +	idr_destroy(&fpriv->sem_handles);
> +
>  	kfree(fpriv);
>  	file_priv->driver_priv = NULL;
>  
> @@ -896,6 +903,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
>  	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
>  	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
>  	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> +	DRM_IOCTL_DEF_DRV(AMDGPU_SEM, amdgpu_sem_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
>  };
>  const int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms);
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
> new file mode 100644
> index 0000000..066520a
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
> @@ -0,0 +1,204 @@
> +/*
> + * Copyright 2016 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Chunming Zhou <david1.zhou@amd.com>
> + */
> +#include <linux/file.h>
> +#include <linux/fs.h>
> +#include <linux/kernel.h>
> +#include <linux/poll.h>
> +#include <linux/seq_file.h>
> +#include <linux/export.h>
> +#include <linux/sched.h>
> +#include <linux/slab.h>
> +#include <linux/uaccess.h>
> +#include <linux/anon_inodes.h>
> +#include <linux/sync_file.h>
> +#include "amdgpu.h"
> +#include <drm/drmP.h>
> +
> +static inline struct sync_file *amdgpu_sync_file_lookup(struct amdgpu_fpriv *fpriv, u32 handle)
> +{
> +	struct sync_file *sync_file;
> +
> +	spin_lock(&fpriv->sem_handles_lock);
> +
> +	/* Check if we currently have a reference on the object */
> +	sync_file = idr_find(&fpriv->sem_handles, handle);
> +
> +	spin_unlock(&fpriv->sem_handles_lock);
> +
> +	return sync_file;
> +}
> +
> +static int amdgpu_sem_create(struct amdgpu_fpriv *fpriv, u32 *handle)
> +{
> +	struct sync_file *sync_file = sync_file_alloc();
> +	int ret;
> +
> +	if (!sync_file)
> +		return -ENOMEM;
> +
> +	snprintf(sync_file->name, sizeof(sync_file->name), "sync_sem");
> +
> +	/* we get a file reference and we use that in the idr. */
> +	idr_preload(GFP_KERNEL);
> +	spin_lock(&fpriv->sem_handles_lock);
> +
> +	ret = idr_alloc(&fpriv->sem_handles, sync_file, 1, 0, GFP_NOWAIT);
> +
> +	spin_unlock(&fpriv->sem_handles_lock);
> +	idr_preload_end();
> +
> +	if (ret < 0)
> +		return ret;
> +
> +	*handle = ret;
> +	return 0;
> +}
> +
> +void amdgpu_sem_destroy(struct amdgpu_fpriv *fpriv, u32 handle)
> +{
> +	struct sync_file *sync_file = amdgpu_sync_file_lookup(fpriv, handle);
> +	if (!sync_file)
> +		return;
> +
> +	spin_lock(&fpriv->sem_handles_lock);
> +	idr_remove(&fpriv->sem_handles, handle);
> +	spin_unlock(&fpriv->sem_handles_lock);
> +
> +	fput(sync_file->file);
> +}
> +
> +
> +int amdgpu_sem_lookup_and_signal(struct amdgpu_fpriv *fpriv,
> +				 uint32_t handle,
> +				 struct dma_fence *fence)
> +{
> +	struct sync_file *sync_file;
> +	struct dma_fence *old_fence;
> +	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
> +	if (!sync_file)
> +		return -EINVAL;
> +
> +	old_fence = sync_file_replace_fence(sync_file, fence);
> +	dma_fence_put(old_fence);
> +	return 0;
> +}
> +
> +static int amdgpu_sem_import(struct amdgpu_fpriv *fpriv,
> +				       int fd, u32 *handle)
> +{
> +	struct sync_file *sync_file = sync_file_fdget(fd);
> +	int ret;
> +
> +	if (!sync_file)
> +		return -EINVAL;
> +
> +	idr_preload(GFP_KERNEL);
> +	spin_lock(&fpriv->sem_handles_lock);
> +
> +	ret = idr_alloc(&fpriv->sem_handles, sync_file, 1, 0, GFP_NOWAIT);
> +
> +	spin_unlock(&fpriv->sem_handles_lock);
> +	idr_preload_end();
> +
> +	fput(sync_file->file);
> +	if (ret < 0)
> +		goto err_out;
> +
> +	*handle = ret;
> +	return 0;
> +err_out:
> +	return ret;
> +
> +}
> +
> +static int amdgpu_sem_export(struct amdgpu_fpriv *fpriv,
> +			     u32 handle, int *fd)
> +{
> +	struct sync_file *sync_file;
> +	int ret;
> +
> +	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
> +	if (!sync_file)
> +		return -EINVAL;
> +
> +	ret = get_unused_fd_flags(O_CLOEXEC);
> +	if (ret < 0)
> +		goto err_put_file;
> +
> +	fd_install(ret, sync_file->file);
> +
> +	*fd = ret;
> +	return 0;
> +err_put_file:
> +	return ret;
> +}
> +
> +int amdgpu_sem_lookup_and_sync(struct amdgpu_device *adev,
> +			       struct amdgpu_fpriv *fpriv,
> +			       struct amdgpu_sync *sync,
> +			       uint32_t handle)
> +{
> +	int r;
> +	struct sync_file *sync_file;
> +	struct dma_fence *fence;
> +
> +	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
> +	if (!sync_file)
> +		return -EINVAL;
> +
> +	fence = sync_file_replace_fence(sync_file, NULL);
> +	r = amdgpu_sync_fence(adev, sync, fence);
> +	dma_fence_put(fence);
> +
> +	return r;
> +}
> +
> +int amdgpu_sem_ioctl(struct drm_device *dev, void *data,
> +		     struct drm_file *filp)
> +{
> +	union drm_amdgpu_sem *args = data;
> +	struct amdgpu_fpriv *fpriv = filp->driver_priv;
> +	int r = 0;
> +
> +	switch (args->in.op) {
> +	case AMDGPU_SEM_OP_CREATE_SEM:
> +		r = amdgpu_sem_create(fpriv, &args->out.handle);
> +		break;
> +	case AMDGPU_SEM_OP_IMPORT_SEM:
> +		r = amdgpu_sem_import(fpriv, args->in.handle, &args->out.handle);
> +		break;
> +	case AMDGPU_SEM_OP_EXPORT_SEM:
> +		r = amdgpu_sem_export(fpriv, args->in.handle, &args->out.fd);
> +		break;
> +	case AMDGPU_SEM_OP_DESTROY_SEM:
> +		amdgpu_sem_destroy(fpriv, args->in.handle);
> +		break;
> +	default:
> +		r = -EINVAL;
> +		break;
> +	}
> +
> +	return r;
> +}
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 5797283..646b103 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -51,6 +51,7 @@ extern "C" {
>  #define DRM_AMDGPU_GEM_OP		0x10
>  #define DRM_AMDGPU_GEM_USERPTR		0x11
>  #define DRM_AMDGPU_WAIT_FENCES		0x12
> +#define DRM_AMDGPU_SEM                  0x13
>  
>  #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>  #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> @@ -65,6 +66,7 @@ extern "C" {
>  #define DRM_IOCTL_AMDGPU_GEM_OP		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op)
>  #define DRM_IOCTL_AMDGPU_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr)
>  #define DRM_IOCTL_AMDGPU_WAIT_FENCES	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences)
> +#define DRM_IOCTL_AMDGPU_SEM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_SEM, union drm_amdgpu_sem)
>  
>  #define AMDGPU_GEM_DOMAIN_CPU		0x1
>  #define AMDGPU_GEM_DOMAIN_GTT		0x2
> @@ -335,6 +337,26 @@ union drm_amdgpu_wait_fences {
>  	struct drm_amdgpu_wait_fences_out out;
>  };
>  
> +#define AMDGPU_SEM_OP_CREATE_SEM 0
> +#define AMDGPU_SEM_OP_IMPORT_SEM 1
> +#define AMDGPU_SEM_OP_EXPORT_SEM 2
> +#define AMDGPU_SEM_OP_DESTROY_SEM 3
> +
> +struct drm_amdgpu_sem_in {
> +	__u32 op;
> +	__u32 handle;
> +};
> +
> +struct drm_amdgpu_sem_out {
> +	__u32 fd;
> +	__u32 handle;
> +};
> +
> +union drm_amdgpu_sem {
> +	struct drm_amdgpu_sem_in in;
> +	struct drm_amdgpu_sem_out out;
> +};
> +
>  #define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO	0
>  #define AMDGPU_GEM_OP_SET_PLACEMENT		1
>  
> @@ -390,6 +412,8 @@ struct drm_amdgpu_gem_va {
>  #define AMDGPU_CHUNK_ID_IB		0x01
>  #define AMDGPU_CHUNK_ID_FENCE		0x02
>  #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
> +#define AMDGPU_CHUNK_ID_SEM_WAIT        0x04
> +#define AMDGPU_CHUNK_ID_SEM_SIGNAL      0x05
>  
>  struct drm_amdgpu_cs_chunk {
>  	__u32		chunk_id;
> @@ -454,6 +478,10 @@ struct drm_amdgpu_cs_chunk_fence {
>  	__u32 offset;
>  };
>  
> +struct drm_amdgpu_cs_chunk_sem {
> +	__u32 handle;
> +};
> +
>  struct drm_amdgpu_cs_chunk_data {
>  	union {
>  		struct drm_amdgpu_cs_chunk_ib		ib_data;
> -- 
> 2.7.4
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH libdrm] libdrm/amdgpu: add interface for kernel semaphores
  2017-03-15  8:48                               ` Daniel Vetter
@ 2017-03-15  9:35                                 ` Christian König
  0 siblings, 0 replies; 40+ messages in thread
From: Christian König @ 2017-03-15  9:35 UTC (permalink / raw)
  To: Daniel Vetter, Marek Olšák
  Cc: Mao, David, dri-devel, amd-gfx mailing list

Am 15.03.2017 um 09:48 schrieb Daniel Vetter:
> On Wed, Mar 15, 2017 at 01:01:19AM +0100, Marek Olšák wrote:
>> While it's nice that you are all having fun here, I don't think that's
>> the way to communicate this.
>>
>> The truth is, if AMD had an open source driver using the semaphores
>> (e.g. Vulkan) and if the libdrm semaphore code was merged, Dave
>> wouldn't be able to change it, ever. If a dependent open source
>> project relies on some libdrm interface, that interface is set in
>> stone. So AMD wouldn't be able to change it either. Unfortunately,
>> such an open source project doesn't exist, so the community can assume
>> that this libdrm code is still in the "initial design phase". Dave has
>> an upper hand here, because he actually has an open source project
>> that will use this, while AMD doesn't (yet). However, AMD can still
>> negotiate some details here, i.e. do a proper review.
> Fully agreed, that's why there was this "internal" qualifier. If you do
> this internally, then it's not final, if you discuss it here on the m-l,
> it actually matters. So drag your internal teams into the public, and it's
> all fine.

Unfortunately it's not done with that. You also need to raise company 
wide awareness of changed needs.

For example the concept that changes aren't allowed to break older 
upstream components is completely new to most teams inside AMD.

It's a rather painful learning curve when you want to move projects from 
closed source to open source.

Christian.

> -Daniel
>
>> Marek
>>
>>
>> On Tue, Mar 14, 2017 at 7:10 PM, Christian König
>> <deathsimple@vodafone.de> wrote:
>>> Am 14.03.2017 um 18:45 schrieb Harry Wentland:
>>>> On 2017-03-14 06:04 AM, zhoucm1 wrote:
>>>>>
>>>>>
>>>>> On 2017年03月14日 17:20, Christian König wrote:
>>>>>> Am 14.03.2017 um 09:54 schrieb Daniel Vetter:
>>>>>>> On Tue, Mar 14, 2017 at 11:30:40AM +0800, zhoucm1 wrote:
>>>>>>>>
>>>>>>>> On 2017年03月14日 10:52, Dave Airlie wrote:
>>>>>>>>> On 14 March 2017 at 12:00, zhoucm1 <david1.zhou@amd.com> wrote:
>>>>>>>>>> Hi Dave,
>>>>>>>>>>
>>>>>>>>>> Could you tell me why you create your new one patch? I remember I
>>>>>>>>>> send out
>>>>>>>>>> our the whole implementation, Why not directly review our patches?
>>>>>>>>> This is patch review, I'm not sure what you are expecting in terms of
>>>>>>>>> direct review.
>>>>>>>>>
>>>>>>>>> The patches you sent out were reviewed by Christian, he stated he
>>>>>>>>> thinks this should
>>>>>>>>> use sync_file, I was interested to see if this was actually possible,
>>>>>>>>> so I just adapted
>>>>>>>>> the patches to check if it was possible to avoid adding a lot of amd
>>>>>>>>> specific code
>>>>>>>>> for something that isn't required to be. Hence why I've sent this as
>>>>>>>>> an rfc, as I want
>>>>>>>>> to see if others think using sync_file is a good or bad idea. We may
>>>>>>>>> end up going
>>>>>>>>> back to the non-sync_file based patches if this proves to be a bad
>>>>>>>>> idea, so far it
>>>>>>>>> doesn't look like one.
>>>>>>>>>
>>>>>>>>> I also reviewed the patches and noted it shouldn't add the
>>>>>>>>> wait/signal
>>>>>>>>> interfaces,
>>>>>>>>> that it should use the chunks on command submission, so while I was
>>>>>>>>> in
>>>>>>>>> there I re
>>>>>>>>> wrote that as well.
>>>>>>>> Yeah, then I'm ok with this, just our internal team has used this
>>>>>>>> implementation, they find some gaps between yours and previous, they
>>>>>>>> could
>>>>>>>> need to change their's again, they are annoyance for this.
>>>>>>> This is why you _must_ get anything you're doing discussed in upstream
>>>>>>> first. Your internal teams simply do not have design authority on stuff
>>>>>>> like that. And yes it takes forever to get formerly proprietary
>>>>>>> internal-only teams to understand this, speaking from years of
>>>>>>> experience
>>>>>>> implement a proper upstream-design-first approach to feature
>>>>>>> development
>>>>>>> here at intel.
>>>>>>
>>>>>> "internal teams simply do not have design authority on stuff like that"
>>>>>>
>>>>>> Can I print that on a t-shirt and start to sell it?
>>>>> I guess you cannot dress it to go to office..:)
>>>>>
>>>> I'd wear it to the office.
>>>>
>>>> https://www.customink.com/lab?cid=hkp0-00ay-6vjg
>>>
>>> I'm only at an AMD office every few years, so that's probably not worth it.
>>>
>>> Anyway it's really something we should keep in mind if we want to upstream
>>> things.
>>>
>>> Christian.
>>>
>>>
>>>> Harry
>>>>
>>>>> David Zhou
>>>>>>
>>>>>> Christian.
>>>>>>
>>>>>>> -Daniel
>>>>>>
>>>>>>
>>>>> _______________________________________________
>>>>> amd-gfx mailing list
>>>>> amd-gfx@lists.freedesktop.org
>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 4/4] amdgpu: use sync file for shared semaphores
       [not found]         ` <20170315090129.fneo5gafrz2jec32-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
@ 2017-03-15  9:43           ` Christian König
  2017-03-15  9:57             ` Daniel Vetter
  0 siblings, 1 reply; 40+ messages in thread
From: Christian König @ 2017-03-15  9:43 UTC (permalink / raw)
  To: Daniel Vetter, Dave Airlie
  Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 15.03.2017 um 10:01 schrieb Daniel Vetter:
> On Tue, Mar 14, 2017 at 10:50:54AM +1000, Dave Airlie wrote:
>> From: Dave Airlie <airlied@redhat.com>
>>
>> This creates a new interface for amdgpu with ioctls to
>> create/destroy/import and export shared semaphores using
>> sem object backed by the sync_file code. The semaphores
>> are not installed as fd (except for export), but rather
>> like other driver internal objects in an idr. The idr
>> holds the initial reference on the sync file.
>>
>> The command submission interface is enhanced with two new
>> chunks, one for semaphore waiting, one for semaphore signalling
>> and just takes a list of handles for each.
>>
>> This is based on work originally done by David Zhou at AMD,
>> with input from Christian Konig on what things should look like.
>>
>> NOTE: this interface addition needs a version bump to expose
>> it to userspace.
>>
>> Signed-off-by: Dave Airlie <airlied@redhat.com>
> Another uapi corner case: Since you allow semaphores to be created before
> they have a first fence attached, that essentially creates future fences.
> I.e. sync_file fd with no fence yet attached. We want that anyway, but
> maybe not together with semaphores (since there's some more implications).
>
> I think it'd be good to attach a dummy fence which already starts out as
> signalled to sync_file->fence for semaphores. That way userspace can't go
> evil, create a semaphore, then pass it to a driver which then promptly
> oopses or worse because it assumes then when it has a sync_file, it also
> has a fence. And the dummy fence could be globally shared, so not really
> overhead.

Sounds fishy to me, what happens in case of a race condition and the 
receiver sometimes waits on the dummy and sometimes on the real fence?

I would rather teach the users of sync_file to return a proper error 
code when you want to wait on a sync_file which doesn't have a fence yet.

Christian.

> -Daniel
>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/Makefile     |   2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  12 ++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  70 ++++++++++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |   8 ++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c | 204 ++++++++++++++++++++++++++++++++
>>   include/uapi/drm/amdgpu_drm.h           |  28 +++++
>>   6 files changed, 322 insertions(+), 2 deletions(-)
>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
>> index 2814aad..404bcba 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>> @@ -24,7 +24,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>>   	atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
>>   	amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
>>   	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
>> -	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o
>> +	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_sem.o
>>   
>>   # add asic specific block
>>   amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index c1b9135..4ed8811 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -702,6 +702,8 @@ struct amdgpu_fpriv {
>>   	struct mutex		bo_list_lock;
>>   	struct idr		bo_list_handles;
>>   	struct amdgpu_ctx_mgr	ctx_mgr;
>> +	spinlock_t		sem_handles_lock;
>> +	struct idr		sem_handles;
>>   };
>>   
>>   /*
>> @@ -1814,5 +1816,15 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>>   		       uint64_t addr, struct amdgpu_bo **bo);
>>   int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
>>   
>> +int amdgpu_sem_ioctl(struct drm_device *dev, void *data,
>> +		     struct drm_file *filp);
>> +void amdgpu_sem_destroy(struct amdgpu_fpriv *fpriv, u32 handle);
>> +int amdgpu_sem_lookup_and_signal(struct amdgpu_fpriv *fpriv,
>> +				 uint32_t handle,
>> +				 struct dma_fence *fence);
>> +int amdgpu_sem_lookup_and_sync(struct amdgpu_device *adev,
>> +			       struct amdgpu_fpriv *fpriv,
>> +			       struct amdgpu_sync *sync,
>> +			       uint32_t handle);
>>   #include "amdgpu_object.h"
>>   #endif
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index 4671432..80fc94b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -217,6 +217,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
>>   			break;
>>   
>>   		case AMDGPU_CHUNK_ID_DEPENDENCIES:
>> +		case AMDGPU_CHUNK_ID_SEM_WAIT:
>> +		case AMDGPU_CHUNK_ID_SEM_SIGNAL:
>>   			break;
>>   
>>   		default:
>> @@ -1009,6 +1011,28 @@ static int amdgpu_process_fence_dep(struct amdgpu_device *adev,
>>   	return 0;
>>   }
>>   
>> +static int amdgpu_process_sem_wait_dep(struct amdgpu_device *adev,
>> +				       struct amdgpu_cs_parser *p,
>> +				       struct amdgpu_cs_chunk *chunk)
>> +{
>> +	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>> +	unsigned num_deps;
>> +	int i, r;
>> +	struct drm_amdgpu_cs_chunk_sem *deps;
>> +
>> +	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
>> +	num_deps = chunk->length_dw * 4 /
>> +		sizeof(struct drm_amdgpu_cs_chunk_sem);
>> +
>> +	for (i = 0; i < num_deps; ++i) {
>> +		r = amdgpu_sem_lookup_and_sync(adev, fpriv, &p->job->sync,
>> +					       deps[i].handle);
>> +		if (r)
>> +			return r;
>> +	}
>> +	return 0;
>> +}
>> +
>>   static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
>>   				  struct amdgpu_cs_parser *p)
>>   {
>> @@ -1023,12 +1047,56 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
>>   			r = amdgpu_process_fence_dep(adev, p, chunk);
>>   			if (r)
>>   				return r;
>> +		} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_WAIT) {
>> +			r = amdgpu_process_sem_wait_dep(adev, p, chunk);
>> +			if (r)
>> +				return r;
>>   		}
>>   	}
>>   
>>   	return 0;
>>   }
>>   
>> +static int amdgpu_process_sem_signal_dep(struct amdgpu_cs_parser *p,
>> +					 struct amdgpu_cs_chunk *chunk,
>> +					 struct dma_fence *fence)
>> +{
>> +	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>> +	unsigned num_deps;
>> +	int i, r;
>> +	struct drm_amdgpu_cs_chunk_sem *deps;
>> +
>> +	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
>> +	num_deps = chunk->length_dw * 4 /
>> +		sizeof(struct drm_amdgpu_cs_chunk_sem);
>> +
>> +	for (i = 0; i < num_deps; ++i) {
>> +		r = amdgpu_sem_lookup_and_signal(fpriv, deps[i].handle,
>> +						 fence);
>> +		if (r)
>> +			return r;
>> +	}
>> +	return 0;
>> +}
>> +
>> +static int amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
>> +{
>> +	int i, r;
>> +
>> +	for (i = 0; i < p->nchunks; ++i) {
>> +		struct amdgpu_cs_chunk *chunk;
>> +
>> +		chunk = &p->chunks[i];
>> +
>> +		if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_SIGNAL) {
>> +			r = amdgpu_process_sem_signal_dep(p, chunk, p->fence);
>> +			if (r)
>> +				return r;
>> +		}
>> +	}
>> +	return 0;
>> +}
>> +
>>   static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>>   			    union drm_amdgpu_cs *cs)
>>   {
>> @@ -1056,7 +1124,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>>   	trace_amdgpu_cs_ioctl(job);
>>   	amd_sched_entity_push_job(&job->base);
>>   
>> -	return 0;
>> +	return amdgpu_cs_post_dependencies(p);
>>   }
>>   
>>   int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> index 61d94c7..013aed1 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> @@ -664,6 +664,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
>>   	mutex_init(&fpriv->bo_list_lock);
>>   	idr_init(&fpriv->bo_list_handles);
>>   
>> +	spin_lock_init(&fpriv->sem_handles_lock);
>> +	idr_init(&fpriv->sem_handles);
>>   	amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
>>   
>>   	file_priv->driver_priv = fpriv;
>> @@ -689,6 +691,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
>>   	struct amdgpu_device *adev = dev->dev_private;
>>   	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
>>   	struct amdgpu_bo_list *list;
>> +	struct amdgpu_sem *sem;
>>   	int handle;
>>   
>>   	if (!fpriv)
>> @@ -715,6 +718,10 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
>>   	idr_destroy(&fpriv->bo_list_handles);
>>   	mutex_destroy(&fpriv->bo_list_lock);
>>   
>> +	idr_for_each_entry(&fpriv->sem_handles, sem, handle)
>> +		amdgpu_sem_destroy(fpriv, handle);
>> +	idr_destroy(&fpriv->sem_handles);
>> +
>>   	kfree(fpriv);
>>   	file_priv->driver_priv = NULL;
>>   
>> @@ -896,6 +903,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
>>   	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
>>   	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
>>   	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
>> +	DRM_IOCTL_DEF_DRV(AMDGPU_SEM, amdgpu_sem_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
>>   };
>>   const int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms);
>>   
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
>> new file mode 100644
>> index 0000000..066520a
>> --- /dev/null
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
>> @@ -0,0 +1,204 @@
>> +/*
>> + * Copyright 2016 Advanced Micro Devices, Inc.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + * Authors:
>> + *    Chunming Zhou <david1.zhou@amd.com>
>> + */
>> +#include <linux/file.h>
>> +#include <linux/fs.h>
>> +#include <linux/kernel.h>
>> +#include <linux/poll.h>
>> +#include <linux/seq_file.h>
>> +#include <linux/export.h>
>> +#include <linux/sched.h>
>> +#include <linux/slab.h>
>> +#include <linux/uaccess.h>
>> +#include <linux/anon_inodes.h>
>> +#include <linux/sync_file.h>
>> +#include "amdgpu.h"
>> +#include <drm/drmP.h>
>> +
>> +static inline struct sync_file *amdgpu_sync_file_lookup(struct amdgpu_fpriv *fpriv, u32 handle)
>> +{
>> +	struct sync_file *sync_file;
>> +
>> +	spin_lock(&fpriv->sem_handles_lock);
>> +
>> +	/* Check if we currently have a reference on the object */
>> +	sync_file = idr_find(&fpriv->sem_handles, handle);
>> +
>> +	spin_unlock(&fpriv->sem_handles_lock);
>> +
>> +	return sync_file;
>> +}
>> +
>> +static int amdgpu_sem_create(struct amdgpu_fpriv *fpriv, u32 *handle)
>> +{
>> +	struct sync_file *sync_file = sync_file_alloc();
>> +	int ret;
>> +
>> +	if (!sync_file)
>> +		return -ENOMEM;
>> +
>> +	snprintf(sync_file->name, sizeof(sync_file->name), "sync_sem");
>> +
>> +	/* we get a file reference and we use that in the idr. */
>> +	idr_preload(GFP_KERNEL);
>> +	spin_lock(&fpriv->sem_handles_lock);
>> +
>> +	ret = idr_alloc(&fpriv->sem_handles, sync_file, 1, 0, GFP_NOWAIT);
>> +
>> +	spin_unlock(&fpriv->sem_handles_lock);
>> +	idr_preload_end();
>> +
>> +	if (ret < 0)
>> +		return ret;
>> +
>> +	*handle = ret;
>> +	return 0;
>> +}
>> +
>> +void amdgpu_sem_destroy(struct amdgpu_fpriv *fpriv, u32 handle)
>> +{
>> +	struct sync_file *sync_file = amdgpu_sync_file_lookup(fpriv, handle);
>> +	if (!sync_file)
>> +		return;
>> +
>> +	spin_lock(&fpriv->sem_handles_lock);
>> +	idr_remove(&fpriv->sem_handles, handle);
>> +	spin_unlock(&fpriv->sem_handles_lock);
>> +
>> +	fput(sync_file->file);
>> +}
>> +
>> +
>> +int amdgpu_sem_lookup_and_signal(struct amdgpu_fpriv *fpriv,
>> +				 uint32_t handle,
>> +				 struct dma_fence *fence)
>> +{
>> +	struct sync_file *sync_file;
>> +	struct dma_fence *old_fence;
>> +	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
>> +	if (!sync_file)
>> +		return -EINVAL;
>> +
>> +	old_fence = sync_file_replace_fence(sync_file, fence);
>> +	dma_fence_put(old_fence);
>> +	return 0;
>> +}
>> +
>> +static int amdgpu_sem_import(struct amdgpu_fpriv *fpriv,
>> +				       int fd, u32 *handle)
>> +{
>> +	struct sync_file *sync_file = sync_file_fdget(fd);
>> +	int ret;
>> +
>> +	if (!sync_file)
>> +		return -EINVAL;
>> +
>> +	idr_preload(GFP_KERNEL);
>> +	spin_lock(&fpriv->sem_handles_lock);
>> +
>> +	ret = idr_alloc(&fpriv->sem_handles, sync_file, 1, 0, GFP_NOWAIT);
>> +
>> +	spin_unlock(&fpriv->sem_handles_lock);
>> +	idr_preload_end();
>> +
>> +	fput(sync_file->file);
>> +	if (ret < 0)
>> +		goto err_out;
>> +
>> +	*handle = ret;
>> +	return 0;
>> +err_out:
>> +	return ret;
>> +
>> +}
>> +
>> +static int amdgpu_sem_export(struct amdgpu_fpriv *fpriv,
>> +			     u32 handle, int *fd)
>> +{
>> +	struct sync_file *sync_file;
>> +	int ret;
>> +
>> +	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
>> +	if (!sync_file)
>> +		return -EINVAL;
>> +
>> +	ret = get_unused_fd_flags(O_CLOEXEC);
>> +	if (ret < 0)
>> +		goto err_put_file;
>> +
>> +	fd_install(ret, sync_file->file);
>> +
>> +	*fd = ret;
>> +	return 0;
>> +err_put_file:
>> +	return ret;
>> +}
>> +
>> +int amdgpu_sem_lookup_and_sync(struct amdgpu_device *adev,
>> +			       struct amdgpu_fpriv *fpriv,
>> +			       struct amdgpu_sync *sync,
>> +			       uint32_t handle)
>> +{
>> +	int r;
>> +	struct sync_file *sync_file;
>> +	struct dma_fence *fence;
>> +
>> +	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
>> +	if (!sync_file)
>> +		return -EINVAL;
>> +
>> +	fence = sync_file_replace_fence(sync_file, NULL);
>> +	r = amdgpu_sync_fence(adev, sync, fence);
>> +	dma_fence_put(fence);
>> +
>> +	return r;
>> +}
>> +
>> +int amdgpu_sem_ioctl(struct drm_device *dev, void *data,
>> +		     struct drm_file *filp)
>> +{
>> +	union drm_amdgpu_sem *args = data;
>> +	struct amdgpu_fpriv *fpriv = filp->driver_priv;
>> +	int r = 0;
>> +
>> +	switch (args->in.op) {
>> +	case AMDGPU_SEM_OP_CREATE_SEM:
>> +		r = amdgpu_sem_create(fpriv, &args->out.handle);
>> +		break;
>> +	case AMDGPU_SEM_OP_IMPORT_SEM:
>> +		r = amdgpu_sem_import(fpriv, args->in.handle, &args->out.handle);
>> +		break;
>> +	case AMDGPU_SEM_OP_EXPORT_SEM:
>> +		r = amdgpu_sem_export(fpriv, args->in.handle, &args->out.fd);
>> +		break;
>> +	case AMDGPU_SEM_OP_DESTROY_SEM:
>> +		amdgpu_sem_destroy(fpriv, args->in.handle);
>> +		break;
>> +	default:
>> +		r = -EINVAL;
>> +		break;
>> +	}
>> +
>> +	return r;
>> +}
>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>> index 5797283..646b103 100644
>> --- a/include/uapi/drm/amdgpu_drm.h
>> +++ b/include/uapi/drm/amdgpu_drm.h
>> @@ -51,6 +51,7 @@ extern "C" {
>>   #define DRM_AMDGPU_GEM_OP		0x10
>>   #define DRM_AMDGPU_GEM_USERPTR		0x11
>>   #define DRM_AMDGPU_WAIT_FENCES		0x12
>> +#define DRM_AMDGPU_SEM                  0x13
>>   
>>   #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>>   #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
>> @@ -65,6 +66,7 @@ extern "C" {
>>   #define DRM_IOCTL_AMDGPU_GEM_OP		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op)
>>   #define DRM_IOCTL_AMDGPU_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr)
>>   #define DRM_IOCTL_AMDGPU_WAIT_FENCES	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences)
>> +#define DRM_IOCTL_AMDGPU_SEM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_SEM, union drm_amdgpu_sem)
>>   
>>   #define AMDGPU_GEM_DOMAIN_CPU		0x1
>>   #define AMDGPU_GEM_DOMAIN_GTT		0x2
>> @@ -335,6 +337,26 @@ union drm_amdgpu_wait_fences {
>>   	struct drm_amdgpu_wait_fences_out out;
>>   };
>>   
>> +#define AMDGPU_SEM_OP_CREATE_SEM 0
>> +#define AMDGPU_SEM_OP_IMPORT_SEM 1
>> +#define AMDGPU_SEM_OP_EXPORT_SEM 2
>> +#define AMDGPU_SEM_OP_DESTROY_SEM 3
>> +
>> +struct drm_amdgpu_sem_in {
>> +	__u32 op;
>> +	__u32 handle;
>> +};
>> +
>> +struct drm_amdgpu_sem_out {
>> +	__u32 fd;
>> +	__u32 handle;
>> +};
>> +
>> +union drm_amdgpu_sem {
>> +	struct drm_amdgpu_sem_in in;
>> +	struct drm_amdgpu_sem_out out;
>> +};
>> +
>>   #define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO	0
>>   #define AMDGPU_GEM_OP_SET_PLACEMENT		1
>>   
>> @@ -390,6 +412,8 @@ struct drm_amdgpu_gem_va {
>>   #define AMDGPU_CHUNK_ID_IB		0x01
>>   #define AMDGPU_CHUNK_ID_FENCE		0x02
>>   #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
>> +#define AMDGPU_CHUNK_ID_SEM_WAIT        0x04
>> +#define AMDGPU_CHUNK_ID_SEM_SIGNAL      0x05
>>   
>>   struct drm_amdgpu_cs_chunk {
>>   	__u32		chunk_id;
>> @@ -454,6 +478,10 @@ struct drm_amdgpu_cs_chunk_fence {
>>   	__u32 offset;
>>   };
>>   
>> +struct drm_amdgpu_cs_chunk_sem {
>> +	__u32 handle;
>> +};
>> +
>>   struct drm_amdgpu_cs_chunk_data {
>>   	union {
>>   		struct drm_amdgpu_cs_chunk_ib		ib_data;
>> -- 
>> 2.7.4
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 4/4] amdgpu: use sync file for shared semaphores
  2017-03-15  9:43           ` Christian König
@ 2017-03-15  9:57             ` Daniel Vetter
  0 siblings, 0 replies; 40+ messages in thread
From: Daniel Vetter @ 2017-03-15  9:57 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx, dri-devel

On Wed, Mar 15, 2017 at 10:43:01AM +0100, Christian König wrote:
> Am 15.03.2017 um 10:01 schrieb Daniel Vetter:
> > On Tue, Mar 14, 2017 at 10:50:54AM +1000, Dave Airlie wrote:
> > > From: Dave Airlie <airlied@redhat.com>
> > > 
> > > This creates a new interface for amdgpu with ioctls to
> > > create/destroy/import and export shared semaphores using
> > > sem object backed by the sync_file code. The semaphores
> > > are not installed as fd (except for export), but rather
> > > like other driver internal objects in an idr. The idr
> > > holds the initial reference on the sync file.
> > > 
> > > The command submission interface is enhanced with two new
> > > chunks, one for semaphore waiting, one for semaphore signalling
> > > and just takes a list of handles for each.
> > > 
> > > This is based on work originally done by David Zhou at AMD,
> > > with input from Christian Konig on what things should look like.
> > > 
> > > NOTE: this interface addition needs a version bump to expose
> > > it to userspace.
> > > 
> > > Signed-off-by: Dave Airlie <airlied@redhat.com>
> > Another uapi corner case: Since you allow semaphores to be created before
> > they have a first fence attached, that essentially creates future fences.
> > I.e. sync_file fd with no fence yet attached. We want that anyway, but
> > maybe not together with semaphores (since there's some more implications).
> > 
> > I think it'd be good to attach a dummy fence which already starts out as
> > signalled to sync_file->fence for semaphores. That way userspace can't go
> > evil, create a semaphore, then pass it to a driver which then promptly
> > oopses or worse because it assumes then when it has a sync_file, it also
> > has a fence. And the dummy fence could be globally shared, so not really
> > overhead.
> 
> Sounds fishy to me, what happens in case of a race condition and the
> receiver sometimes waits on the dummy and sometimes on the real fence?
> 
> I would rather teach the users of sync_file to return a proper error code
> when you want to wait on a sync_file which doesn't have a fence yet.

Races in userspace are races in userspace :-) And it's only a problem
initially when you use a semaphore for the first time. After that you
still have the same race, but because there's always a fence attached,
your userspace won't ever notice the fail. It will simply complete
immediately (because most likely the old fence has completed already).

And it also doesn't mesh with the rough future fence plan, where the idea
is that sync_file_get_fence blocks until the fence is there, and only
drivers who can handle the risk of a fence loop (through hangcheck and
hang recovery) will use a __get_fence function to peek at the future
fence. I think the assumption that a sync_file always comes with a fence
is a good principle for code robustness in the kernel.

If you really want the userspace debugging, we can make it a module
option, or even better, sprinkle a pile of tracepoints all over fences to
make it easier to watch. But when the tradeoff is between userspace
debugging and kernel robustness, I think we need to go with kernel
robustness, for security reasons and all that.
-Daniel

> 
> Christian.
> 
> > -Daniel
> > 
> > > ---
> > >   drivers/gpu/drm/amd/amdgpu/Makefile     |   2 +-
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  12 ++
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  70 ++++++++++-
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |   8 ++
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c | 204 ++++++++++++++++++++++++++++++++
> > >   include/uapi/drm/amdgpu_drm.h           |  28 +++++
> > >   6 files changed, 322 insertions(+), 2 deletions(-)
> > >   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
> > > 
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> > > index 2814aad..404bcba 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> > > +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> > > @@ -24,7 +24,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
> > >   	atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
> > >   	amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
> > >   	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
> > > -	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o
> > > +	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_sem.o
> > >   # add asic specific block
> > >   amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > > index c1b9135..4ed8811 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > > @@ -702,6 +702,8 @@ struct amdgpu_fpriv {
> > >   	struct mutex		bo_list_lock;
> > >   	struct idr		bo_list_handles;
> > >   	struct amdgpu_ctx_mgr	ctx_mgr;
> > > +	spinlock_t		sem_handles_lock;
> > > +	struct idr		sem_handles;
> > >   };
> > >   /*
> > > @@ -1814,5 +1816,15 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
> > >   		       uint64_t addr, struct amdgpu_bo **bo);
> > >   int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
> > > +int amdgpu_sem_ioctl(struct drm_device *dev, void *data,
> > > +		     struct drm_file *filp);
> > > +void amdgpu_sem_destroy(struct amdgpu_fpriv *fpriv, u32 handle);
> > > +int amdgpu_sem_lookup_and_signal(struct amdgpu_fpriv *fpriv,
> > > +				 uint32_t handle,
> > > +				 struct dma_fence *fence);
> > > +int amdgpu_sem_lookup_and_sync(struct amdgpu_device *adev,
> > > +			       struct amdgpu_fpriv *fpriv,
> > > +			       struct amdgpu_sync *sync,
> > > +			       uint32_t handle);
> > >   #include "amdgpu_object.h"
> > >   #endif
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > > index 4671432..80fc94b 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > > @@ -217,6 +217,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
> > >   			break;
> > >   		case AMDGPU_CHUNK_ID_DEPENDENCIES:
> > > +		case AMDGPU_CHUNK_ID_SEM_WAIT:
> > > +		case AMDGPU_CHUNK_ID_SEM_SIGNAL:
> > >   			break;
> > >   		default:
> > > @@ -1009,6 +1011,28 @@ static int amdgpu_process_fence_dep(struct amdgpu_device *adev,
> > >   	return 0;
> > >   }
> > > +static int amdgpu_process_sem_wait_dep(struct amdgpu_device *adev,
> > > +				       struct amdgpu_cs_parser *p,
> > > +				       struct amdgpu_cs_chunk *chunk)
> > > +{
> > > +	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
> > > +	unsigned num_deps;
> > > +	int i, r;
> > > +	struct drm_amdgpu_cs_chunk_sem *deps;
> > > +
> > > +	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
> > > +	num_deps = chunk->length_dw * 4 /
> > > +		sizeof(struct drm_amdgpu_cs_chunk_sem);
> > > +
> > > +	for (i = 0; i < num_deps; ++i) {
> > > +		r = amdgpu_sem_lookup_and_sync(adev, fpriv, &p->job->sync,
> > > +					       deps[i].handle);
> > > +		if (r)
> > > +			return r;
> > > +	}
> > > +	return 0;
> > > +}
> > > +
> > >   static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
> > >   				  struct amdgpu_cs_parser *p)
> > >   {
> > > @@ -1023,12 +1047,56 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
> > >   			r = amdgpu_process_fence_dep(adev, p, chunk);
> > >   			if (r)
> > >   				return r;
> > > +		} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_WAIT) {
> > > +			r = amdgpu_process_sem_wait_dep(adev, p, chunk);
> > > +			if (r)
> > > +				return r;
> > >   		}
> > >   	}
> > >   	return 0;
> > >   }
> > > +static int amdgpu_process_sem_signal_dep(struct amdgpu_cs_parser *p,
> > > +					 struct amdgpu_cs_chunk *chunk,
> > > +					 struct dma_fence *fence)
> > > +{
> > > +	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
> > > +	unsigned num_deps;
> > > +	int i, r;
> > > +	struct drm_amdgpu_cs_chunk_sem *deps;
> > > +
> > > +	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
> > > +	num_deps = chunk->length_dw * 4 /
> > > +		sizeof(struct drm_amdgpu_cs_chunk_sem);
> > > +
> > > +	for (i = 0; i < num_deps; ++i) {
> > > +		r = amdgpu_sem_lookup_and_signal(fpriv, deps[i].handle,
> > > +						 fence);
> > > +		if (r)
> > > +			return r;
> > > +	}
> > > +	return 0;
> > > +}
> > > +
> > > +static int amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
> > > +{
> > > +	int i, r;
> > > +
> > > +	for (i = 0; i < p->nchunks; ++i) {
> > > +		struct amdgpu_cs_chunk *chunk;
> > > +
> > > +		chunk = &p->chunks[i];
> > > +
> > > +		if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_SIGNAL) {
> > > +			r = amdgpu_process_sem_signal_dep(p, chunk, p->fence);
> > > +			if (r)
> > > +				return r;
> > > +		}
> > > +	}
> > > +	return 0;
> > > +}
> > > +
> > >   static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
> > >   			    union drm_amdgpu_cs *cs)
> > >   {
> > > @@ -1056,7 +1124,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
> > >   	trace_amdgpu_cs_ioctl(job);
> > >   	amd_sched_entity_push_job(&job->base);
> > > -	return 0;
> > > +	return amdgpu_cs_post_dependencies(p);
> > >   }
> > >   int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > index 61d94c7..013aed1 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > @@ -664,6 +664,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
> > >   	mutex_init(&fpriv->bo_list_lock);
> > >   	idr_init(&fpriv->bo_list_handles);
> > > +	spin_lock_init(&fpriv->sem_handles_lock);
> > > +	idr_init(&fpriv->sem_handles);
> > >   	amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
> > >   	file_priv->driver_priv = fpriv;
> > > @@ -689,6 +691,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
> > >   	struct amdgpu_device *adev = dev->dev_private;
> > >   	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
> > >   	struct amdgpu_bo_list *list;
> > > +	struct amdgpu_sem *sem;
> > >   	int handle;
> > >   	if (!fpriv)
> > > @@ -715,6 +718,10 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
> > >   	idr_destroy(&fpriv->bo_list_handles);
> > >   	mutex_destroy(&fpriv->bo_list_lock);
> > > +	idr_for_each_entry(&fpriv->sem_handles, sem, handle)
> > > +		amdgpu_sem_destroy(fpriv, handle);
> > > +	idr_destroy(&fpriv->sem_handles);
> > > +
> > >   	kfree(fpriv);
> > >   	file_priv->driver_priv = NULL;
> > > @@ -896,6 +903,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
> > >   	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> > >   	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> > >   	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> > > +	DRM_IOCTL_DEF_DRV(AMDGPU_SEM, amdgpu_sem_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> > >   };
> > >   const int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms);
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
> > > new file mode 100644
> > > index 0000000..066520a
> > > --- /dev/null
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
> > > @@ -0,0 +1,204 @@
> > > +/*
> > > + * Copyright 2016 Advanced Micro Devices, Inc.
> > > + *
> > > + * Permission is hereby granted, free of charge, to any person obtaining a
> > > + * copy of this software and associated documentation files (the "Software"),
> > > + * to deal in the Software without restriction, including without limitation
> > > + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> > > + * and/or sell copies of the Software, and to permit persons to whom the
> > > + * Software is furnished to do so, subject to the following conditions:
> > > + *
> > > + * The above copyright notice and this permission notice shall be included in
> > > + * all copies or substantial portions of the Software.
> > > + *
> > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> > > + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> > > + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> > > + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> > > + * OTHER DEALINGS IN THE SOFTWARE.
> > > + *
> > > + * Authors:
> > > + *    Chunming Zhou <david1.zhou@amd.com>
> > > + */
> > > +#include <linux/file.h>
> > > +#include <linux/fs.h>
> > > +#include <linux/kernel.h>
> > > +#include <linux/poll.h>
> > > +#include <linux/seq_file.h>
> > > +#include <linux/export.h>
> > > +#include <linux/sched.h>
> > > +#include <linux/slab.h>
> > > +#include <linux/uaccess.h>
> > > +#include <linux/anon_inodes.h>
> > > +#include <linux/sync_file.h>
> > > +#include "amdgpu.h"
> > > +#include <drm/drmP.h>
> > > +
> > > +static inline struct sync_file *amdgpu_sync_file_lookup(struct amdgpu_fpriv *fpriv, u32 handle)
> > > +{
> > > +	struct sync_file *sync_file;
> > > +
> > > +	spin_lock(&fpriv->sem_handles_lock);
> > > +
> > > +	/* Check if we currently have a reference on the object */
> > > +	sync_file = idr_find(&fpriv->sem_handles, handle);
> > > +
> > > +	spin_unlock(&fpriv->sem_handles_lock);
> > > +
> > > +	return sync_file;
> > > +}
> > > +
> > > +static int amdgpu_sem_create(struct amdgpu_fpriv *fpriv, u32 *handle)
> > > +{
> > > +	struct sync_file *sync_file = sync_file_alloc();
> > > +	int ret;
> > > +
> > > +	if (!sync_file)
> > > +		return -ENOMEM;
> > > +
> > > +	snprintf(sync_file->name, sizeof(sync_file->name), "sync_sem");
> > > +
> > > +	/* we get a file reference and we use that in the idr. */
> > > +	idr_preload(GFP_KERNEL);
> > > +	spin_lock(&fpriv->sem_handles_lock);
> > > +
> > > +	ret = idr_alloc(&fpriv->sem_handles, sync_file, 1, 0, GFP_NOWAIT);
> > > +
> > > +	spin_unlock(&fpriv->sem_handles_lock);
> > > +	idr_preload_end();
> > > +
> > > +	if (ret < 0)
> > > +		return ret;
> > > +
> > > +	*handle = ret;
> > > +	return 0;
> > > +}
> > > +
> > > +void amdgpu_sem_destroy(struct amdgpu_fpriv *fpriv, u32 handle)
> > > +{
> > > +	struct sync_file *sync_file = amdgpu_sync_file_lookup(fpriv, handle);
> > > +	if (!sync_file)
> > > +		return;
> > > +
> > > +	spin_lock(&fpriv->sem_handles_lock);
> > > +	idr_remove(&fpriv->sem_handles, handle);
> > > +	spin_unlock(&fpriv->sem_handles_lock);
> > > +
> > > +	fput(sync_file->file);
> > > +}
> > > +
> > > +
> > > +int amdgpu_sem_lookup_and_signal(struct amdgpu_fpriv *fpriv,
> > > +				 uint32_t handle,
> > > +				 struct dma_fence *fence)
> > > +{
> > > +	struct sync_file *sync_file;
> > > +	struct dma_fence *old_fence;
> > > +	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
> > > +	if (!sync_file)
> > > +		return -EINVAL;
> > > +
> > > +	old_fence = sync_file_replace_fence(sync_file, fence);
> > > +	dma_fence_put(old_fence);
> > > +	return 0;
> > > +}
> > > +
> > > +static int amdgpu_sem_import(struct amdgpu_fpriv *fpriv,
> > > +				       int fd, u32 *handle)
> > > +{
> > > +	struct sync_file *sync_file = sync_file_fdget(fd);
> > > +	int ret;
> > > +
> > > +	if (!sync_file)
> > > +		return -EINVAL;
> > > +
> > > +	idr_preload(GFP_KERNEL);
> > > +	spin_lock(&fpriv->sem_handles_lock);
> > > +
> > > +	ret = idr_alloc(&fpriv->sem_handles, sync_file, 1, 0, GFP_NOWAIT);
> > > +
> > > +	spin_unlock(&fpriv->sem_handles_lock);
> > > +	idr_preload_end();
> > > +
> > > +	fput(sync_file->file);
> > > +	if (ret < 0)
> > > +		goto err_out;
> > > +
> > > +	*handle = ret;
> > > +	return 0;
> > > +err_out:
> > > +	return ret;
> > > +
> > > +}
> > > +
> > > +static int amdgpu_sem_export(struct amdgpu_fpriv *fpriv,
> > > +			     u32 handle, int *fd)
> > > +{
> > > +	struct sync_file *sync_file;
> > > +	int ret;
> > > +
> > > +	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
> > > +	if (!sync_file)
> > > +		return -EINVAL;
> > > +
> > > +	ret = get_unused_fd_flags(O_CLOEXEC);
> > > +	if (ret < 0)
> > > +		goto err_put_file;
> > > +
> > > +	fd_install(ret, sync_file->file);
> > > +
> > > +	*fd = ret;
> > > +	return 0;
> > > +err_put_file:
> > > +	return ret;
> > > +}
> > > +
> > > +int amdgpu_sem_lookup_and_sync(struct amdgpu_device *adev,
> > > +			       struct amdgpu_fpriv *fpriv,
> > > +			       struct amdgpu_sync *sync,
> > > +			       uint32_t handle)
> > > +{
> > > +	int r;
> > > +	struct sync_file *sync_file;
> > > +	struct dma_fence *fence;
> > > +
> > > +	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
> > > +	if (!sync_file)
> > > +		return -EINVAL;
> > > +
> > > +	fence = sync_file_replace_fence(sync_file, NULL);
> > > +	r = amdgpu_sync_fence(adev, sync, fence);
> > > +	dma_fence_put(fence);
> > > +
> > > +	return r;
> > > +}
> > > +
> > > +int amdgpu_sem_ioctl(struct drm_device *dev, void *data,
> > > +		     struct drm_file *filp)
> > > +{
> > > +	union drm_amdgpu_sem *args = data;
> > > +	struct amdgpu_fpriv *fpriv = filp->driver_priv;
> > > +	int r = 0;
> > > +
> > > +	switch (args->in.op) {
> > > +	case AMDGPU_SEM_OP_CREATE_SEM:
> > > +		r = amdgpu_sem_create(fpriv, &args->out.handle);
> > > +		break;
> > > +	case AMDGPU_SEM_OP_IMPORT_SEM:
> > > +		r = amdgpu_sem_import(fpriv, args->in.handle, &args->out.handle);
> > > +		break;
> > > +	case AMDGPU_SEM_OP_EXPORT_SEM:
> > > +		r = amdgpu_sem_export(fpriv, args->in.handle, &args->out.fd);
> > > +		break;
> > > +	case AMDGPU_SEM_OP_DESTROY_SEM:
> > > +		amdgpu_sem_destroy(fpriv, args->in.handle);
> > > +		break;
> > > +	default:
> > > +		r = -EINVAL;
> > > +		break;
> > > +	}
> > > +
> > > +	return r;
> > > +}
> > > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> > > index 5797283..646b103 100644
> > > --- a/include/uapi/drm/amdgpu_drm.h
> > > +++ b/include/uapi/drm/amdgpu_drm.h
> > > @@ -51,6 +51,7 @@ extern "C" {
> > >   #define DRM_AMDGPU_GEM_OP		0x10
> > >   #define DRM_AMDGPU_GEM_USERPTR		0x11
> > >   #define DRM_AMDGPU_WAIT_FENCES		0x12
> > > +#define DRM_AMDGPU_SEM                  0x13
> > >   #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> > >   #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> > > @@ -65,6 +66,7 @@ extern "C" {
> > >   #define DRM_IOCTL_AMDGPU_GEM_OP		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op)
> > >   #define DRM_IOCTL_AMDGPU_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr)
> > >   #define DRM_IOCTL_AMDGPU_WAIT_FENCES	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences)
> > > +#define DRM_IOCTL_AMDGPU_SEM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_SEM, union drm_amdgpu_sem)
> > >   #define AMDGPU_GEM_DOMAIN_CPU		0x1
> > >   #define AMDGPU_GEM_DOMAIN_GTT		0x2
> > > @@ -335,6 +337,26 @@ union drm_amdgpu_wait_fences {
> > >   	struct drm_amdgpu_wait_fences_out out;
> > >   };
> > > +#define AMDGPU_SEM_OP_CREATE_SEM 0
> > > +#define AMDGPU_SEM_OP_IMPORT_SEM 1
> > > +#define AMDGPU_SEM_OP_EXPORT_SEM 2
> > > +#define AMDGPU_SEM_OP_DESTROY_SEM 3
> > > +
> > > +struct drm_amdgpu_sem_in {
> > > +	__u32 op;
> > > +	__u32 handle;
> > > +};
> > > +
> > > +struct drm_amdgpu_sem_out {
> > > +	__u32 fd;
> > > +	__u32 handle;
> > > +};
> > > +
> > > +union drm_amdgpu_sem {
> > > +	struct drm_amdgpu_sem_in in;
> > > +	struct drm_amdgpu_sem_out out;
> > > +};
> > > +
> > >   #define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO	0
> > >   #define AMDGPU_GEM_OP_SET_PLACEMENT		1
> > > @@ -390,6 +412,8 @@ struct drm_amdgpu_gem_va {
> > >   #define AMDGPU_CHUNK_ID_IB		0x01
> > >   #define AMDGPU_CHUNK_ID_FENCE		0x02
> > >   #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
> > > +#define AMDGPU_CHUNK_ID_SEM_WAIT        0x04
> > > +#define AMDGPU_CHUNK_ID_SEM_SIGNAL      0x05
> > >   struct drm_amdgpu_cs_chunk {
> > >   	__u32		chunk_id;
> > > @@ -454,6 +478,10 @@ struct drm_amdgpu_cs_chunk_fence {
> > >   	__u32 offset;
> > >   };
> > > +struct drm_amdgpu_cs_chunk_sem {
> > > +	__u32 handle;
> > > +};
> > > +
> > >   struct drm_amdgpu_cs_chunk_data {
> > >   	union {
> > >   		struct drm_amdgpu_cs_chunk_ib		ib_data;
> > > -- 
> > > 2.7.4
> > > 
> > > _______________________________________________
> > > dri-devel mailing list
> > > dri-devel@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> 
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH libdrm] libdrm/amdgpu: add interface for kernel semaphores
       [not found]     ` <20170314005054.7487-2-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-03-15 10:30       ` Emil Velikov
  0 siblings, 0 replies; 40+ messages in thread
From: Emil Velikov @ 2017-03-15 10:30 UTC (permalink / raw)
  To: Dave Airlie; +Cc: ML dri-devel, amd-gfx mailing list

Hi Dave,

Barring the other discussions, allow me to put a couple of trivial suggestions:

Please re-wrap the long lines to follow existing code style.

On 14 March 2017 at 00:50, Dave Airlie <airlied@gmail.com> wrote:

> @@ -882,6 +894,12 @@ int amdgpu_cs_submit(amdgpu_context_handle context,
>                      struct amdgpu_cs_request *ibs_request,
>                      uint32_t number_of_requests);
>
> +int amdgpu_cs_submit_sem(amdgpu_context_handle context,
> +                        uint64_t flags,
> +                        struct amdgpu_cs_request *ibs_request,
> +                        struct amdgpu_cs_request_sem *ibs_sem,
> +                        uint32_t number_of_requests);
> +
>  /**
>   *  Query status of Command Buffer Submission
>   *
> @@ -1255,4 +1273,14 @@ int amdgpu_cs_destroy_semaphore(amdgpu_semaphore_handle sem);
>  */
>  const char *amdgpu_get_marketing_name(amdgpu_device_handle dev);
>
> +int amdgpu_cs_create_sem(amdgpu_device_handle dev,
> +                        amdgpu_sem_handle *sem);
> +int amdgpu_cs_export_sem(amdgpu_device_handle dev,
> +                         amdgpu_sem_handle sem,
> +                        int *shared_handle);
> +int amdgpu_cs_import_sem(amdgpu_device_handle dev,
> +                         int shared_handle,
> +                        amdgpu_sem_handle *sem);
> +int amdgpu_cs_destroy_sem(amdgpu_device_handle dev,
> +                         amdgpu_sem_handle sem);
The new symbols should be added to the amdgpu-symbol-check test.
If in doubt - run `make -C amdgpu check'

> --- a/include/drm/amdgpu_drm.h
> +++ b/include/drm/amdgpu_drm.h
Please sync this as PATCH 1/2 via "make headers_install" + cp + git
commit -asm "....Generated using make headers_install.\nGenerated from
$tree/branch commit $sha."

There's a handful of other changes that are missing/should be merged.

> @@ -50,6 +50,7 @@ extern "C" {

> +struct drm_amdgpu_cs_chunk_sem {
> +       uint32_t handle;
> +};
> +
Seems unused in the UAPI header - might what to add a note ?
Also sizeof(struct drm_amdgpu_cs_chunk_sem) is not multiple of 64bit -
worth mentioning that it's safe and/or why ?

Thanks
Emil
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/4] sync_file: add replace and export some functionality
  2017-03-15  8:55             ` Daniel Vetter
@ 2017-03-16  5:18               ` Dave Airlie
  0 siblings, 0 replies; 40+ messages in thread
From: Dave Airlie @ 2017-03-16  5:18 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: dri-devel, amd-gfx mailing list

On 15 March 2017 at 18:55, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Wed, Mar 15, 2017 at 02:19:16PM +1000, Dave Airlie wrote:
>> >
>> > uabi semantics question: Should we wake up everyone when the fence gets
>> > replaced? What's the khr semaphore expectation here?
>>
>> There are no real semantics for this case, you either wait the semaphore and
>> replace it with NULL, or signal it where you replace the fence with a new fence.
>>
>> Nobody should be using the sync_fence interfaces to poll on these fence fds.
>>
>> So not crashing for non-vulkan behaviour is what we need.
>>
>> The spec doesn't know anything other than this is an opaque fd,
>> it shouldn't be doing operations on it, execpt importing it.
>>
>> >>  static int sync_file_set_fence(struct sync_file *sync_file,
>> >>                              struct dma_fence **fences, int num_fences)
>> >>  {
>> >> @@ -292,8 +328,10 @@ static void sync_file_free(struct kref *kref)
>> >>       struct sync_file *sync_file = container_of(kref, struct sync_file,
>> >>                                                    kref);
>> >>
>> >> -     if (test_bit(POLL_ENABLED, &sync_file->fence->flags))
>> >> -             dma_fence_remove_callback(sync_file->fence, &sync_file->cb);
>> >> +     if (sync_file->fence) {
>> >> +             if (test_bit(POLL_ENABLED, &sync_file->fence->flags))
>> >> +                     dma_fence_remove_callback(sync_file->fence, &sync_file->cb);
>> >> +     }
>> >>       dma_fence_put(sync_file->fence);
>> >>       kfree(sync_file);
>> >>  }
>> >
>> > I think you've missed _poll, won't that oops now?
>>
>> I don't think it will, all the stuff do inside the poll handler is
>> protected by the mutex
>> or do you think we should hook the new fence if the old fence has poll enabled?
>
> Yeah, or at least wake them up somehow and restart it. Or well at least
> think what would happen when that does, and whether we care. If vk says
> you get undefined behaviour when you replace the fence of a semaphore when
> the old one hasn't signalled yet, then we don't need to do anything.

In VK a semaphore is an object, there is no operations on it to touch the fence,
the sync_file implementation is there to support passing the fd
backing the semaphore
between vulkan processes and maybe later GL processes. Nothing else is defined,
and is left as an implementation detail. We just need to protect
against someone doing
something stupid with the sync_file fd, currently however replacing is
a kernel internal
operation only happens when you signal or wait. So I expect yes you could export
 a sem, import it, poll it, when it has never been signaled, which is undefined,
export it, import, signal it, then poll on it, and if someone waits on
it then the poll
is probably going to have issues, this behaviour is totally outside
the vulkan spec.

Dave.
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 3/4] amdgpu/cs: split out fence dependency checking
       [not found] ` <20170320070307.18344-1-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-03-20  7:03   ` Dave Airlie
  0 siblings, 0 replies; 40+ messages in thread
From: Dave Airlie @ 2017-03-20  7:03 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Dave Airlie <airlied@redhat.com>

This just splits out the fence depenency checking into it's
own function to make it easier to add semaphore dependencies.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 86 +++++++++++++++++++---------------
 1 file changed, 48 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 99424cb..4671432 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -963,56 +963,66 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 	return 0;
 }
 
-static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
-				  struct amdgpu_cs_parser *p)
+static int amdgpu_process_fence_dep(struct amdgpu_device *adev,
+				    struct amdgpu_cs_parser *p,
+				    struct amdgpu_cs_chunk *chunk)
 {
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
-	int i, j, r;
-
-	for (i = 0; i < p->nchunks; ++i) {
-		struct drm_amdgpu_cs_chunk_dep *deps;
-		struct amdgpu_cs_chunk *chunk;
-		unsigned num_deps;
+	unsigned num_deps;
+	int i, r;
+	struct drm_amdgpu_cs_chunk_dep *deps;
 
-		chunk = &p->chunks[i];
+	deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
+	num_deps = chunk->length_dw * 4 /
+		sizeof(struct drm_amdgpu_cs_chunk_dep);
 
-		if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES)
-			continue;
+	for (i = 0; i < num_deps; ++i) {
+		struct amdgpu_ring *ring;
+		struct amdgpu_ctx *ctx;
+		struct dma_fence *fence;
 
-		deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
-		num_deps = chunk->length_dw * 4 /
-			sizeof(struct drm_amdgpu_cs_chunk_dep);
+		r = amdgpu_cs_get_ring(adev, deps[i].ip_type,
+				       deps[i].ip_instance,
+				       deps[i].ring, &ring);
+		if (r)
+			return r;
 
-		for (j = 0; j < num_deps; ++j) {
-			struct amdgpu_ring *ring;
-			struct amdgpu_ctx *ctx;
-			struct dma_fence *fence;
+		ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
+		if (ctx == NULL)
+			return -EINVAL;
 
-			r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
-					       deps[j].ip_instance,
-					       deps[j].ring, &ring);
+		fence = amdgpu_ctx_get_fence(ctx, ring,
+					     deps[i].handle);
+		if (IS_ERR(fence)) {
+			r = PTR_ERR(fence);
+			amdgpu_ctx_put(ctx);
+			return r;
+		} else if (fence) {
+			r = amdgpu_sync_fence(adev, &p->job->sync,
+					      fence);
+			dma_fence_put(fence);
+			amdgpu_ctx_put(ctx);
 			if (r)
 				return r;
+		}
+	}
+	return 0;
+}
 
-			ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id);
-			if (ctx == NULL)
-				return -EINVAL;
+static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
+				  struct amdgpu_cs_parser *p)
+{
+	int i, r;
 
-			fence = amdgpu_ctx_get_fence(ctx, ring,
-						     deps[j].handle);
-			if (IS_ERR(fence)) {
-				r = PTR_ERR(fence);
-				amdgpu_ctx_put(ctx);
-				return r;
+	for (i = 0; i < p->nchunks; ++i) {
+		struct amdgpu_cs_chunk *chunk;
 
-			} else if (fence) {
-				r = amdgpu_sync_fence(adev, &p->job->sync,
-						      fence);
-				dma_fence_put(fence);
-				amdgpu_ctx_put(ctx);
-				if (r)
-					return r;
-			}
+		chunk = &p->chunks[i];
+
+		if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) {
+			r = amdgpu_process_fence_dep(adev, p, chunk);
+			if (r)
+				return r;
 		}
 	}
 
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 3/4] amdgpu/cs: split out fence dependency checking
  2017-03-15  4:27 sync_file rcu adoption and semaphore changes Dave Airlie
@ 2017-03-15  4:27 ` Dave Airlie
  0 siblings, 0 replies; 40+ messages in thread
From: Dave Airlie @ 2017-03-15  4:27 UTC (permalink / raw)
  To: amd-gfx, dri-devel

From: Dave Airlie <airlied@redhat.com>

This just splits out the fence depenency checking into it's
own function to make it easier to add semaphore dependencies.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 86 +++++++++++++++++++---------------
 1 file changed, 48 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 99424cb..4671432 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -963,56 +963,66 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 	return 0;
 }
 
-static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
-				  struct amdgpu_cs_parser *p)
+static int amdgpu_process_fence_dep(struct amdgpu_device *adev,
+				    struct amdgpu_cs_parser *p,
+				    struct amdgpu_cs_chunk *chunk)
 {
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
-	int i, j, r;
-
-	for (i = 0; i < p->nchunks; ++i) {
-		struct drm_amdgpu_cs_chunk_dep *deps;
-		struct amdgpu_cs_chunk *chunk;
-		unsigned num_deps;
+	unsigned num_deps;
+	int i, r;
+	struct drm_amdgpu_cs_chunk_dep *deps;
 
-		chunk = &p->chunks[i];
+	deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
+	num_deps = chunk->length_dw * 4 /
+		sizeof(struct drm_amdgpu_cs_chunk_dep);
 
-		if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES)
-			continue;
+	for (i = 0; i < num_deps; ++i) {
+		struct amdgpu_ring *ring;
+		struct amdgpu_ctx *ctx;
+		struct dma_fence *fence;
 
-		deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
-		num_deps = chunk->length_dw * 4 /
-			sizeof(struct drm_amdgpu_cs_chunk_dep);
+		r = amdgpu_cs_get_ring(adev, deps[i].ip_type,
+				       deps[i].ip_instance,
+				       deps[i].ring, &ring);
+		if (r)
+			return r;
 
-		for (j = 0; j < num_deps; ++j) {
-			struct amdgpu_ring *ring;
-			struct amdgpu_ctx *ctx;
-			struct dma_fence *fence;
+		ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
+		if (ctx == NULL)
+			return -EINVAL;
 
-			r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
-					       deps[j].ip_instance,
-					       deps[j].ring, &ring);
+		fence = amdgpu_ctx_get_fence(ctx, ring,
+					     deps[i].handle);
+		if (IS_ERR(fence)) {
+			r = PTR_ERR(fence);
+			amdgpu_ctx_put(ctx);
+			return r;
+		} else if (fence) {
+			r = amdgpu_sync_fence(adev, &p->job->sync,
+					      fence);
+			dma_fence_put(fence);
+			amdgpu_ctx_put(ctx);
 			if (r)
 				return r;
+		}
+	}
+	return 0;
+}
 
-			ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id);
-			if (ctx == NULL)
-				return -EINVAL;
+static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
+				  struct amdgpu_cs_parser *p)
+{
+	int i, r;
 
-			fence = amdgpu_ctx_get_fence(ctx, ring,
-						     deps[j].handle);
-			if (IS_ERR(fence)) {
-				r = PTR_ERR(fence);
-				amdgpu_ctx_put(ctx);
-				return r;
+	for (i = 0; i < p->nchunks; ++i) {
+		struct amdgpu_cs_chunk *chunk;
 
-			} else if (fence) {
-				r = amdgpu_sync_fence(adev, &p->job->sync,
-						      fence);
-				dma_fence_put(fence);
-				amdgpu_ctx_put(ctx);
-				if (r)
-					return r;
-			}
+		chunk = &p->chunks[i];
+
+		if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) {
+			r = amdgpu_process_fence_dep(adev, p, chunk);
+			if (r)
+				return r;
 		}
 	}
 
-- 
2.7.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 40+ messages in thread

end of thread, other threads:[~2017-03-20  7:03 UTC | newest]

Thread overview: 40+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-14  0:50 [rfc] amdgpu/sync_file shared semaphores Dave Airlie
2017-03-14  0:50 ` [PATCH 1/4] sync_file: add a mutex to protect fence and callback members Dave Airlie
     [not found]   ` <20170314005054.7487-3-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-03-14  8:45     ` Daniel Vetter
2017-03-14  9:13       ` Christian König
     [not found]         ` <2cba21f6-731b-d112-f882-bef66a9b96c9-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-03-14  9:29           ` Chris Wilson
     [not found]             ` <20170314092909.GE2118-aII6DKEyn0pWYbfKqPwjAkR8Iwp7RQ6xAL8bYrjMMd8@public.gmane.org>
2017-03-14  9:30               ` Christian König
2017-03-15  0:47                 ` Dave Airlie
     [not found]                   ` <CAPM=9twwjgBirFDenUmnorDdOZNiWHuzdBxawCiU7p9uS1o0_Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-03-15  2:20                     ` Dave Airlie
     [not found] ` <20170314005054.7487-1-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-03-14  0:50   ` [PATCH libdrm] libdrm/amdgpu: add interface for kernel semaphores Dave Airlie
2017-03-14  2:00     ` zhoucm1
2017-03-14  2:52       ` Dave Airlie
     [not found]         ` <CAPM=9tzegdtiyEmbW+PqmVeQ+bXrNtrdKXuE3kB0dPMKnMir+Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-03-14  3:30           ` zhoucm1
     [not found]             ` <58C763E0.1-5C7GfCeVMHo@public.gmane.org>
2017-03-14  4:16               ` Dave Airlie
2017-03-14  8:56                 ` Daniel Vetter
2017-03-14  8:54             ` Daniel Vetter
2017-03-14  9:20               ` Christian König
2017-03-14 10:04                 ` zhoucm1
     [not found]                   ` <58C7C032.7060706-5C7GfCeVMHo@public.gmane.org>
2017-03-14 17:45                     ` Harry Wentland
     [not found]                       ` <8dea3303-480d-c29a-22ec-42adf3dab4ce-5C7GfCeVMHo@public.gmane.org>
2017-03-14 17:54                         ` Daniel Vetter
2017-03-14 18:10                         ` Christian König
2017-03-15  0:01                           ` Marek Olšák
     [not found]                             ` <CAAxE2A7xRWhkp-OC59iy2i91uj5mtVTHR0uHfDf745L-ixxHcw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-03-15  8:48                               ` Daniel Vetter
2017-03-15  9:35                                 ` Christian König
     [not found]     ` <20170314005054.7487-2-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-03-15 10:30       ` Emil Velikov
2017-03-14  0:50   ` [PATCH 2/4] sync_file: add replace and export some functionality Dave Airlie
     [not found]     ` <20170314005054.7487-4-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-03-14  8:48       ` Daniel Vetter
     [not found]         ` <20170314084851.covfl7sjwn3yadh5-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
2017-03-15  4:19           ` Dave Airlie
2017-03-15  8:55             ` Daniel Vetter
2017-03-16  5:18               ` Dave Airlie
2017-03-14  9:00     ` Chris Wilson
2017-03-14  0:50   ` [PATCH 3/4] amdgpu/cs: split out fence dependency checking Dave Airlie
2017-03-14  0:50   ` [PATCH 4/4] amdgpu: use sync file for shared semaphores Dave Airlie
     [not found]     ` <20170314005054.7487-6-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-03-15  9:01       ` Daniel Vetter
     [not found]         ` <20170315090129.fneo5gafrz2jec32-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
2017-03-15  9:43           ` Christian König
2017-03-15  9:57             ` Daniel Vetter
2017-03-14  8:53 ` [rfc] amdgpu/sync_file " Daniel Vetter
     [not found]   ` <20170314085313.qmnnofqa6e6ozmwk-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
2017-03-15  1:09     ` Dave Airlie
     [not found]       ` <CAPM=9txSFTkz0y5hamBA7U7fu+X8x_wHG+X3xtWvm2PY4aCwsw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-03-15  8:47         ` Daniel Vetter
2017-03-15  4:27 sync_file rcu adoption and semaphore changes Dave Airlie
2017-03-15  4:27 ` [PATCH 3/4] amdgpu/cs: split out fence dependency checking Dave Airlie
2017-03-20  7:03 [rfc/repost] amdgpu/sync_file shared semaphores Dave Airlie
     [not found] ` <20170320070307.18344-1-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-03-20  7:03   ` [PATCH 3/4] amdgpu/cs: split out fence dependency checking Dave Airlie

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.