All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/11] dma-buf: make fence sequence numbers 64 bit v2
@ 2018-12-07  9:55 Chunming Zhou
  2018-12-07  9:55 ` [PATCH 02/11] dma-buf: add new dma_fence_chain container v4 Chunming Zhou
                   ` (5 more replies)
  0 siblings, 6 replies; 42+ messages in thread
From: Chunming Zhou @ 2018-12-07  9:55 UTC (permalink / raw)
  To: Christian.Koenig-5C7GfCeVMHo,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Christian König, Christian König

From: Christian König <ckoenig.leichtzumerken@gmail.com>

For a lot of use cases we need 64bit sequence numbers. Currently drivers
overload the dma_fence structure to store the additional bits.

Stop doing that and make the sequence number in the dma_fence always
64bit.

For compatibility with hardware which can do only 32bit sequences the
comparisons in __dma_fence_is_later only takes the lower 32bits as significant
when the upper 32bits are all zero.

v2: change the logic in __dma_fence_is_later

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/dma-fence.c            |  2 +-
 drivers/dma-buf/sw_sync.c              |  2 +-
 drivers/dma-buf/sync_file.c            |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c |  2 +-
 drivers/gpu/drm/i915/i915_sw_fence.c   |  2 +-
 drivers/gpu/drm/i915/intel_engine_cs.c |  2 +-
 drivers/gpu/drm/vgem/vgem_fence.c      |  4 ++--
 include/linux/dma-fence.h              | 22 +++++++++++++++-------
 8 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 136ec04d683f..3aa8733f832a 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -649,7 +649,7 @@ EXPORT_SYMBOL(dma_fence_wait_any_timeout);
  */
 void
 dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
-	       spinlock_t *lock, u64 context, unsigned seqno)
+	       spinlock_t *lock, u64 context, u64 seqno)
 {
 	BUG_ON(!lock);
 	BUG_ON(!ops || !ops->get_driver_name || !ops->get_timeline_name);
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
index 53c1d6d36a64..32dcf7b4c935 100644
--- a/drivers/dma-buf/sw_sync.c
+++ b/drivers/dma-buf/sw_sync.c
@@ -172,7 +172,7 @@ static bool timeline_fence_enable_signaling(struct dma_fence *fence)
 static void timeline_fence_value_str(struct dma_fence *fence,
 				    char *str, int size)
 {
-	snprintf(str, size, "%d", fence->seqno);
+	snprintf(str, size, "%lld", fence->seqno);
 }
 
 static void timeline_fence_timeline_value_str(struct dma_fence *fence,
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index 35dd06479867..4f6305ca52c8 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -144,7 +144,7 @@ char *sync_file_get_name(struct sync_file *sync_file, char *buf, int len)
 	} else {
 		struct dma_fence *fence = sync_file->fence;
 
-		snprintf(buf, len, "%s-%s%llu-%d",
+		snprintf(buf, len, "%s-%s%llu-%lld",
 			 fence->ops->get_driver_name(fence),
 			 fence->ops->get_timeline_name(fence),
 			 fence->context,
@@ -258,7 +258,7 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
 
 			i_b++;
 		} else {
-			if (pt_a->seqno - pt_b->seqno <= INT_MAX)
+			if (__dma_fence_is_later(pt_a->seqno, pt_b->seqno))
 				add_fence(fences, &i, pt_a);
 			else
 				add_fence(fences, &i, pt_b);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index 12f2bf97611f..bfaf5c6323be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -388,7 +388,7 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
 			   soffset, eoffset, eoffset - soffset);
 
 		if (i->fence)
-			seq_printf(m, " protected by 0x%08x on context %llu",
+			seq_printf(m, " protected by 0x%016llx on context %llu",
 				   i->fence->seqno, i->fence->context);
 
 		seq_printf(m, "\n");
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index 6dbeed079ae5..11bcdabd5177 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -393,7 +393,7 @@ static void timer_i915_sw_fence_wake(struct timer_list *t)
 	if (!fence)
 		return;
 
-	pr_notice("Asynchronous wait on fence %s:%s:%x timed out (hint:%pS)\n",
+	pr_notice("Asynchronous wait on fence %s:%s:%llx timed out (hint:%pS)\n",
 		  cb->dma->ops->get_driver_name(cb->dma),
 		  cb->dma->ops->get_timeline_name(cb->dma),
 		  cb->dma->seqno,
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 759c0fd58f8c..dfafa79171df 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1239,7 +1239,7 @@ static void print_request(struct drm_printer *m,
 
 	x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf));
 
-	drm_printf(m, "%s%x%s [%llx:%x]%s @ %dms: %s\n",
+	drm_printf(m, "%s%x%s [%llx:%llx]%s @ %dms: %s\n",
 		   prefix,
 		   rq->global_seqno,
 		   i915_request_completed(rq) ? "!" : "",
diff --git a/drivers/gpu/drm/vgem/vgem_fence.c b/drivers/gpu/drm/vgem/vgem_fence.c
index c1c420afe2dd..eb17c0cd3727 100644
--- a/drivers/gpu/drm/vgem/vgem_fence.c
+++ b/drivers/gpu/drm/vgem/vgem_fence.c
@@ -53,13 +53,13 @@ static void vgem_fence_release(struct dma_fence *base)
 
 static void vgem_fence_value_str(struct dma_fence *fence, char *str, int size)
 {
-	snprintf(str, size, "%u", fence->seqno);
+	snprintf(str, size, "%llu", fence->seqno);
 }
 
 static void vgem_fence_timeline_value_str(struct dma_fence *fence, char *str,
 					  int size)
 {
-	snprintf(str, size, "%u",
+	snprintf(str, size, "%llu",
 		 dma_fence_is_signaled(fence) ? fence->seqno : 0);
 }
 
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 999e4b104410..6b788467b2e3 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -77,7 +77,7 @@ struct dma_fence {
 	struct list_head cb_list;
 	spinlock_t *lock;
 	u64 context;
-	unsigned seqno;
+	u64 seqno;
 	unsigned long flags;
 	ktime_t timestamp;
 	int error;
@@ -244,7 +244,7 @@ struct dma_fence_ops {
 };
 
 void dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
-		    spinlock_t *lock, u64 context, unsigned seqno);
+		    spinlock_t *lock, u64 context, u64 seqno);
 
 void dma_fence_release(struct kref *kref);
 void dma_fence_free(struct dma_fence *fence);
@@ -414,9 +414,17 @@ dma_fence_is_signaled(struct dma_fence *fence)
  * Returns true if f1 is chronologically later than f2. Both fences must be
  * from the same context, since a seqno is not common across contexts.
  */
-static inline bool __dma_fence_is_later(u32 f1, u32 f2)
+static inline bool __dma_fence_is_later(u64 f1, u64 f2)
 {
-	return (int)(f1 - f2) > 0;
+	/* This is for backward compatibility with drivers which can only handle
+	 * 32bit sequence numbers. Use a 64bit compare when any of the higher
+	 * bits are none zero, otherwise use a 32bit compare with wrap around
+	 * handling.
+	 */
+	if (upper_32_bits(f1) || upper_32_bits(f2))
+		return f1 > f2;
+
+	return (int)(lower_32_bits(f1) - lower_32_bits(f2)) > 0;
 }
 
 /**
@@ -548,21 +556,21 @@ u64 dma_fence_context_alloc(unsigned num);
 	do {								\
 		struct dma_fence *__ff = (f);				\
 		if (IS_ENABLED(CONFIG_DMA_FENCE_TRACE))			\
-			pr_info("f %llu#%u: " fmt,			\
+			pr_info("f %llu#%llu: " fmt,			\
 				__ff->context, __ff->seqno, ##args);	\
 	} while (0)
 
 #define DMA_FENCE_WARN(f, fmt, args...) \
 	do {								\
 		struct dma_fence *__ff = (f);				\
-		pr_warn("f %llu#%u: " fmt, __ff->context, __ff->seqno,	\
+		pr_warn("f %llu#%llu: " fmt, __ff->context, __ff->seqno,\
 			 ##args);					\
 	} while (0)
 
 #define DMA_FENCE_ERR(f, fmt, args...) \
 	do {								\
 		struct dma_fence *__ff = (f);				\
-		pr_err("f %llu#%u: " fmt, __ff->context, __ff->seqno,	\
+		pr_err("f %llu#%llu: " fmt, __ff->context, __ff->seqno,	\
 			##args);					\
 	} while (0)
 
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 02/11] dma-buf: add new dma_fence_chain container v4
  2018-12-07  9:55 [PATCH 01/11] dma-buf: make fence sequence numbers 64 bit v2 Chunming Zhou
@ 2018-12-07  9:55 ` Chunming Zhou
  2019-02-15 14:23   ` Lionel Landwerlin via dri-devel
  2018-12-07  9:55 ` [PATCH 03/11] drm/syncobj: remove drm_syncobj_cb and cleanup Chunming Zhou
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 42+ messages in thread
From: Chunming Zhou @ 2018-12-07  9:55 UTC (permalink / raw)
  To: Christian.Koenig, dri-devel, amd-gfx
  Cc: Christian König, Christian König

From: Christian König <ckoenig.leichtzumerken@gmail.com>

Lockless container implementation similar to a dma_fence_array, but with
only two elements per node and automatic garbage collection.

v2: properly document dma_fence_chain_for_each, add dma_fence_chain_find_seqno,
    drop prev reference during garbage collection if it's not a chain fence.
v3: use head and iterator for dma_fence_chain_for_each
v4: fix reference count in dma_fence_chain_enable_signaling

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/Makefile          |   3 +-
 drivers/dma-buf/dma-fence-chain.c | 241 ++++++++++++++++++++++++++++++
 include/linux/dma-fence-chain.h   |  81 ++++++++++
 3 files changed, 324 insertions(+), 1 deletion(-)
 create mode 100644 drivers/dma-buf/dma-fence-chain.c
 create mode 100644 include/linux/dma-fence-chain.h

diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
index 0913a6ccab5a..1f006e083eb9 100644
--- a/drivers/dma-buf/Makefile
+++ b/drivers/dma-buf/Makefile
@@ -1,4 +1,5 @@
-obj-y := dma-buf.o dma-fence.o dma-fence-array.o reservation.o seqno-fence.o
+obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \
+	 reservation.o seqno-fence.o
 obj-$(CONFIG_SYNC_FILE)		+= sync_file.o
 obj-$(CONFIG_SW_SYNC)		+= sw_sync.o sync_debug.o
 obj-$(CONFIG_UDMABUF)		+= udmabuf.o
diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
new file mode 100644
index 000000000000..0c5e3c902fa0
--- /dev/null
+++ b/drivers/dma-buf/dma-fence-chain.c
@@ -0,0 +1,241 @@
+/*
+ * fence-chain: chain fences together in a timeline
+ *
+ * Copyright (C) 2018 Advanced Micro Devices, Inc.
+ * Authors:
+ *	Christian König <christian.koenig@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/dma-fence-chain.h>
+
+static bool dma_fence_chain_enable_signaling(struct dma_fence *fence);
+
+/**
+ * dma_fence_chain_get_prev - use RCU to get a reference to the previous fence
+ * @chain: chain node to get the previous node from
+ *
+ * Use dma_fence_get_rcu_safe to get a reference to the previous fence of the
+ * chain node.
+ */
+static struct dma_fence *dma_fence_chain_get_prev(struct dma_fence_chain *chain)
+{
+	struct dma_fence *prev;
+
+	rcu_read_lock();
+	prev = dma_fence_get_rcu_safe(&chain->prev);
+	rcu_read_unlock();
+	return prev;
+}
+
+/**
+ * dma_fence_chain_walk - chain walking function
+ * @fence: current chain node
+ *
+ * Walk the chain to the next node. Returns the next fence or NULL if we are at
+ * the end of the chain. Garbage collects chain nodes which are already
+ * signaled.
+ */
+struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence)
+{
+	struct dma_fence_chain *chain, *prev_chain;
+	struct dma_fence *prev, *replacement, *tmp;
+
+	chain = to_dma_fence_chain(fence);
+	if (!chain) {
+		dma_fence_put(fence);
+		return NULL;
+	}
+
+	while ((prev = dma_fence_chain_get_prev(chain))) {
+
+		prev_chain = to_dma_fence_chain(prev);
+		if (prev_chain) {
+			if (!dma_fence_is_signaled(prev_chain->fence))
+				break;
+
+			replacement = dma_fence_chain_get_prev(prev_chain);
+		} else {
+			if (!dma_fence_is_signaled(prev))
+				break;
+
+			replacement = NULL;
+		}
+
+		tmp = cmpxchg(&chain->prev, prev, replacement);
+		if (tmp == prev)
+			dma_fence_put(tmp);
+		else
+			dma_fence_put(replacement);
+		dma_fence_put(prev);
+	}
+
+	dma_fence_put(fence);
+	return prev;
+}
+EXPORT_SYMBOL(dma_fence_chain_walk);
+
+/**
+ * dma_fence_chain_find_seqno - find fence chain node by seqno
+ * @pfence: pointer to the chain node where to start
+ * @seqno: the sequence number to search for
+ *
+ * Advance the fence pointer to the chain node which will signal this sequence
+ * number. If no sequence number is provided then this is a no-op.
+ *
+ * Returns EINVAL if the fence is not a chain node or the sequence number has
+ * not yet advanced far enough.
+ */
+int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t seqno)
+{
+	struct dma_fence_chain *chain;
+
+	if (!seqno)
+		return 0;
+
+	chain = to_dma_fence_chain(*pfence);
+	if (!chain || chain->base.seqno < seqno)
+		return -EINVAL;
+
+	dma_fence_chain_for_each(*pfence, &chain->base) {
+		if ((*pfence)->context != chain->base.context ||
+		    to_dma_fence_chain(*pfence)->prev_seqno < seqno)
+			break;
+	}
+	dma_fence_put(&chain->base);
+
+	return 0;
+}
+EXPORT_SYMBOL(dma_fence_chain_find_seqno);
+
+static const char *dma_fence_chain_get_driver_name(struct dma_fence *fence)
+{
+        return "dma_fence_chain";
+}
+
+static const char *dma_fence_chain_get_timeline_name(struct dma_fence *fence)
+{
+        return "unbound";
+}
+
+static void dma_fence_chain_irq_work(struct irq_work *work)
+{
+	struct dma_fence_chain *chain;
+
+	chain = container_of(work, typeof(*chain), work);
+
+	/* Try to rearm the callback */
+	if (!dma_fence_chain_enable_signaling(&chain->base))
+		/* Ok, we are done. No more unsignaled fences left */
+		dma_fence_signal(&chain->base);
+	dma_fence_put(&chain->base);
+}
+
+static void dma_fence_chain_cb(struct dma_fence *f, struct dma_fence_cb *cb)
+{
+	struct dma_fence_chain *chain;
+
+	chain = container_of(cb, typeof(*chain), cb);
+	irq_work_queue(&chain->work);
+	dma_fence_put(f);
+}
+
+static bool dma_fence_chain_enable_signaling(struct dma_fence *fence)
+{
+	struct dma_fence_chain *head = to_dma_fence_chain(fence);
+
+	dma_fence_get(&head->base);
+	dma_fence_chain_for_each(fence, &head->base) {
+		struct dma_fence_chain *chain = to_dma_fence_chain(fence);
+		struct dma_fence *f = chain ? chain->fence : fence;
+
+		dma_fence_get(f);
+		if (!dma_fence_add_callback(f, &head->cb, dma_fence_chain_cb)) {
+			dma_fence_put(fence);
+			return true;
+		}
+		dma_fence_put(f);
+	}
+	dma_fence_put(&head->base);
+	return false;
+}
+
+static bool dma_fence_chain_signaled(struct dma_fence *fence)
+{
+	dma_fence_chain_for_each(fence, fence) {
+		struct dma_fence_chain *chain = to_dma_fence_chain(fence);
+		struct dma_fence *f = chain ? chain->fence : fence;
+
+		if (!dma_fence_is_signaled(f)) {
+			dma_fence_put(fence);
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static void dma_fence_chain_release(struct dma_fence *fence)
+{
+	struct dma_fence_chain *chain = to_dma_fence_chain(fence);
+
+	dma_fence_put(chain->prev);
+	dma_fence_put(chain->fence);
+	dma_fence_free(fence);
+}
+
+const struct dma_fence_ops dma_fence_chain_ops = {
+	.get_driver_name = dma_fence_chain_get_driver_name,
+	.get_timeline_name = dma_fence_chain_get_timeline_name,
+	.enable_signaling = dma_fence_chain_enable_signaling,
+	.signaled = dma_fence_chain_signaled,
+	.release = dma_fence_chain_release,
+};
+EXPORT_SYMBOL(dma_fence_chain_ops);
+
+/**
+ * dma_fence_chain_init - initialize a fence chain
+ * @chain: the chain node to initialize
+ * @prev: the previous fence
+ * @fence: the current fence
+ *
+ * Initialize a new chain node and either start a new chain or add the node to
+ * the existing chain of the previous fence.
+ */
+void dma_fence_chain_init(struct dma_fence_chain *chain,
+			  struct dma_fence *prev,
+			  struct dma_fence *fence,
+			  uint64_t seqno)
+{
+	struct dma_fence_chain *prev_chain = to_dma_fence_chain(prev);
+	uint64_t context;
+
+	spin_lock_init(&chain->lock);
+	chain->prev = prev;
+	chain->fence = fence;
+	chain->prev_seqno = 0;
+	init_irq_work(&chain->work, dma_fence_chain_irq_work);
+
+	/* Try to reuse the context of the previous chain node. */
+	if (prev_chain && __dma_fence_is_later(seqno, prev->seqno)) {
+		context = prev->context;
+		chain->prev_seqno = prev->seqno;
+	} else {
+		context = dma_fence_context_alloc(1);
+		/* Make sure that we always have a valid sequence number. */
+		if (prev_chain)
+			seqno = max(prev->seqno, seqno);
+	}
+
+	dma_fence_init(&chain->base, &dma_fence_chain_ops,
+		       &chain->lock, context, seqno);
+}
+EXPORT_SYMBOL(dma_fence_chain_init);
diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h
new file mode 100644
index 000000000000..a5c2e8c6915c
--- /dev/null
+++ b/include/linux/dma-fence-chain.h
@@ -0,0 +1,81 @@
+/*
+ * fence-chain: chain fences together in a timeline
+ *
+ * Copyright (C) 2018 Advanced Micro Devices, Inc.
+ * Authors:
+ *	Christian König <christian.koenig@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __LINUX_DMA_FENCE_CHAIN_H
+#define __LINUX_DMA_FENCE_CHAIN_H
+
+#include <linux/dma-fence.h>
+#include <linux/irq_work.h>
+
+/**
+ * struct dma_fence_chain - fence to represent an node of a fence chain
+ * @base: fence base class
+ * @lock: spinlock for fence handling
+ * @prev: previous fence of the chain
+ * @prev_seqno: original previous seqno before garbage collection
+ * @fence: encapsulated fence
+ * @cb: callback structure for signaling
+ * @work: irq work item for signaling
+ */
+struct dma_fence_chain {
+	struct dma_fence base;
+	spinlock_t lock;
+	struct dma_fence *prev;
+	u64 prev_seqno;
+	struct dma_fence *fence;
+	struct dma_fence_cb cb;
+	struct irq_work work;
+};
+
+extern const struct dma_fence_ops dma_fence_chain_ops;
+
+/**
+ * to_dma_fence_chain - cast a fence to a dma_fence_chain
+ * @fence: fence to cast to a dma_fence_array
+ *
+ * Returns NULL if the fence is not a dma_fence_chain,
+ * or the dma_fence_chain otherwise.
+ */
+static inline struct dma_fence_chain *
+to_dma_fence_chain(struct dma_fence *fence)
+{
+	if (!fence || fence->ops != &dma_fence_chain_ops)
+		return NULL;
+
+	return container_of(fence, struct dma_fence_chain, base);
+}
+
+/**
+ * dma_fence_chain_for_each - iterate over all fences in chain
+ * @iter: current fence
+ * @head: starting point
+ *
+ * Iterate over all fences in the chain. We keep a reference to the current
+ * fence while inside the loop which must be dropped when breaking out.
+ */
+#define dma_fence_chain_for_each(iter, head)	\
+	for (iter = dma_fence_get(head); iter; \
+	     iter = dma_fence_chain_walk(head))
+
+struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence);
+int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t seqno);
+void dma_fence_chain_init(struct dma_fence_chain *chain,
+			  struct dma_fence *prev,
+			  struct dma_fence *fence,
+			  uint64_t seqno);
+
+#endif /* __LINUX_DMA_FENCE_CHAIN_H */
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 03/11] drm/syncobj: remove drm_syncobj_cb and cleanup
  2018-12-07  9:55 [PATCH 01/11] dma-buf: make fence sequence numbers 64 bit v2 Chunming Zhou
  2018-12-07  9:55 ` [PATCH 02/11] dma-buf: add new dma_fence_chain container v4 Chunming Zhou
@ 2018-12-07  9:55 ` Chunming Zhou
  2018-12-07  9:55 ` [PATCH 05/11] drm/syncobj: add support for timeline point wait v8 Chunming Zhou
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 42+ messages in thread
From: Chunming Zhou @ 2018-12-07  9:55 UTC (permalink / raw)
  To: Christian.Koenig, dri-devel, amd-gfx
  Cc: Christian König, Christian König

From: Christian König <ckoenig.leichtzumerken@gmail.com>

This completes "drm/syncobj: Drop add/remove_callback from driver
interface" and cleans up the implementation a bit.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/drm_syncobj.c | 91 ++++++++++++-----------------------
 include/drm/drm_syncobj.h     | 21 --------
 2 files changed, 30 insertions(+), 82 deletions(-)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index db30a0e89db8..e19525af0cce 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -56,6 +56,16 @@
 #include "drm_internal.h"
 #include <drm/drm_syncobj.h>
 
+struct syncobj_wait_entry {
+	struct list_head node;
+	struct task_struct *task;
+	struct dma_fence *fence;
+	struct dma_fence_cb fence_cb;
+};
+
+static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
+				      struct syncobj_wait_entry *wait);
+
 /**
  * drm_syncobj_find - lookup and reference a sync object.
  * @file_private: drm file private pointer
@@ -82,58 +92,33 @@ struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private,
 }
 EXPORT_SYMBOL(drm_syncobj_find);
 
-static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj,
-					    struct drm_syncobj_cb *cb,
-					    drm_syncobj_func_t func)
+static void drm_syncobj_fence_add_wait(struct drm_syncobj *syncobj,
+				       struct syncobj_wait_entry *wait)
 {
-	cb->func = func;
-	list_add_tail(&cb->node, &syncobj->cb_list);
-}
-
-static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj,
-						 struct dma_fence **fence,
-						 struct drm_syncobj_cb *cb,
-						 drm_syncobj_func_t func)
-{
-	int ret;
-
-	*fence = drm_syncobj_fence_get(syncobj);
-	if (*fence)
-		return 1;
+	if (wait->fence)
+		return;
 
 	spin_lock(&syncobj->lock);
 	/* We've already tried once to get a fence and failed.  Now that we
 	 * have the lock, try one more time just to be sure we don't add a
 	 * callback when a fence has already been set.
 	 */
-	if (syncobj->fence) {
-		*fence = dma_fence_get(rcu_dereference_protected(syncobj->fence,
-								 lockdep_is_held(&syncobj->lock)));
-		ret = 1;
-	} else {
-		*fence = NULL;
-		drm_syncobj_add_callback_locked(syncobj, cb, func);
-		ret = 0;
-	}
+	if (syncobj->fence)
+		wait->fence = dma_fence_get(
+			rcu_dereference_protected(syncobj->fence, 1));
+	else
+		list_add_tail(&wait->node, &syncobj->cb_list);
 	spin_unlock(&syncobj->lock);
-
-	return ret;
 }
 
-void drm_syncobj_add_callback(struct drm_syncobj *syncobj,
-			      struct drm_syncobj_cb *cb,
-			      drm_syncobj_func_t func)
+static void drm_syncobj_remove_wait(struct drm_syncobj *syncobj,
+				    struct syncobj_wait_entry *wait)
 {
-	spin_lock(&syncobj->lock);
-	drm_syncobj_add_callback_locked(syncobj, cb, func);
-	spin_unlock(&syncobj->lock);
-}
+	if (!wait->node.next)
+		return;
 
-void drm_syncobj_remove_callback(struct drm_syncobj *syncobj,
-				 struct drm_syncobj_cb *cb)
-{
 	spin_lock(&syncobj->lock);
-	list_del_init(&cb->node);
+	list_del_init(&wait->node);
 	spin_unlock(&syncobj->lock);
 }
 
@@ -148,7 +133,7 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
 			       struct dma_fence *fence)
 {
 	struct dma_fence *old_fence;
-	struct drm_syncobj_cb *cur, *tmp;
+	struct syncobj_wait_entry *cur, *tmp;
 
 	if (fence)
 		dma_fence_get(fence);
@@ -162,7 +147,7 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
 	if (fence != old_fence) {
 		list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) {
 			list_del_init(&cur->node);
-			cur->func(syncobj, cur);
+			syncobj_wait_syncobj_func(syncobj, cur);
 		}
 	}
 
@@ -608,13 +593,6 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
 					&args->handle);
 }
 
-struct syncobj_wait_entry {
-	struct task_struct *task;
-	struct dma_fence *fence;
-	struct dma_fence_cb fence_cb;
-	struct drm_syncobj_cb syncobj_cb;
-};
-
 static void syncobj_wait_fence_func(struct dma_fence *fence,
 				    struct dma_fence_cb *cb)
 {
@@ -625,11 +603,8 @@ static void syncobj_wait_fence_func(struct dma_fence *fence,
 }
 
 static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
-				      struct drm_syncobj_cb *cb)
+				      struct syncobj_wait_entry *wait)
 {
-	struct syncobj_wait_entry *wait =
-		container_of(cb, struct syncobj_wait_entry, syncobj_cb);
-
 	/* This happens inside the syncobj lock */
 	wait->fence = dma_fence_get(rcu_dereference_protected(syncobj->fence,
 							      lockdep_is_held(&syncobj->lock)));
@@ -688,12 +663,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
 	 */
 
 	if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
-		for (i = 0; i < count; ++i) {
-			drm_syncobj_fence_get_or_add_callback(syncobjs[i],
-							      &entries[i].fence,
-							      &entries[i].syncobj_cb,
-							      syncobj_wait_syncobj_func);
-		}
+		for (i = 0; i < count; ++i)
+			drm_syncobj_fence_add_wait(syncobjs[i], &entries[i]);
 	}
 
 	do {
@@ -742,9 +713,7 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
 
 cleanup_entries:
 	for (i = 0; i < count; ++i) {
-		if (entries[i].syncobj_cb.func)
-			drm_syncobj_remove_callback(syncobjs[i],
-						    &entries[i].syncobj_cb);
+		drm_syncobj_remove_wait(syncobjs[i], &entries[i]);
 		if (entries[i].fence_cb.func)
 			dma_fence_remove_callback(entries[i].fence,
 						  &entries[i].fence_cb);
diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
index b1fe921f8e8f..7c6ed845c70d 100644
--- a/include/drm/drm_syncobj.h
+++ b/include/drm/drm_syncobj.h
@@ -28,8 +28,6 @@
 
 #include "linux/dma-fence.h"
 
-struct drm_syncobj_cb;
-
 /**
  * struct drm_syncobj - sync object.
  *
@@ -62,25 +60,6 @@ struct drm_syncobj {
 	struct file *file;
 };
 
-typedef void (*drm_syncobj_func_t)(struct drm_syncobj *syncobj,
-				   struct drm_syncobj_cb *cb);
-
-/**
- * struct drm_syncobj_cb - callback for drm_syncobj_add_callback
- * @node: used by drm_syncob_add_callback to append this struct to
- *	  &drm_syncobj.cb_list
- * @func: drm_syncobj_func_t to call
- *
- * This struct will be initialized by drm_syncobj_add_callback, additional
- * data can be passed along by embedding drm_syncobj_cb in another struct.
- * The callback will get called the next time drm_syncobj_replace_fence is
- * called.
- */
-struct drm_syncobj_cb {
-	struct list_head node;
-	drm_syncobj_func_t func;
-};
-
 void drm_syncobj_free(struct kref *kref);
 
 /**
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 04/11] drm/syncobj: add new drm_syncobj_add_point interface v2
       [not found] ` <20181207095601.2058-1-david1.zhou-5C7GfCeVMHo@public.gmane.org>
@ 2018-12-07  9:55   ` Chunming Zhou
  2018-12-07  9:55   ` [PATCH 06/11] drm/syncobj: add timeline payload query ioctl v4 Chunming Zhou
                     ` (3 subsequent siblings)
  4 siblings, 0 replies; 42+ messages in thread
From: Chunming Zhou @ 2018-12-07  9:55 UTC (permalink / raw)
  To: Christian.Koenig-5C7GfCeVMHo,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Christian König, Christian König

From: Christian König <ckoenig.leichtzumerken@gmail.com>

Use the dma_fence_chain object to create a timeline of fence objects
instead of just replacing the existing fence.

v2: rebase and cleanup

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/drm_syncobj.c | 37 +++++++++++++++++++++++++++++++++++
 include/drm/drm_syncobj.h     |  5 +++++
 2 files changed, 42 insertions(+)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index e19525af0cce..51f798e2194f 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -122,6 +122,43 @@ static void drm_syncobj_remove_wait(struct drm_syncobj *syncobj,
 	spin_unlock(&syncobj->lock);
 }
 
+/**
+ * drm_syncobj_add_point - add new timeline point to the syncobj
+ * @syncobj: sync object to add timeline point do
+ * @chain: chain node to use to add the point
+ * @fence: fence to encapsulate in the chain node
+ * @point: sequence number to use for the point
+ *
+ * Add the chain node as new timeline point to the syncobj.
+ */
+void drm_syncobj_add_point(struct drm_syncobj *syncobj,
+			   struct dma_fence_chain *chain,
+			   struct dma_fence *fence,
+			   uint64_t point)
+{
+	struct syncobj_wait_entry *cur, *tmp;
+	struct dma_fence *prev;
+
+	dma_fence_get(fence);
+
+	spin_lock(&syncobj->lock);
+
+	prev = rcu_dereference_protected(syncobj->fence,
+					 lockdep_is_held(&syncobj->lock));
+	dma_fence_chain_init(chain, prev, fence, point);
+	rcu_assign_pointer(syncobj->fence, &chain->base);
+
+	list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) {
+		list_del_init(&cur->node);
+		syncobj_wait_syncobj_func(syncobj, cur);
+	}
+	spin_unlock(&syncobj->lock);
+
+	/* Walk the chain once to trigger garbage collection */
+	dma_fence_chain_for_each(prev, fence);
+}
+EXPORT_SYMBOL(drm_syncobj_add_point);
+
 /**
  * drm_syncobj_replace_fence - replace fence in a sync object.
  * @syncobj: Sync object to replace fence in
diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
index 7c6ed845c70d..8acb4ae4f311 100644
--- a/include/drm/drm_syncobj.h
+++ b/include/drm/drm_syncobj.h
@@ -27,6 +27,7 @@
 #define __DRM_SYNCOBJ_H__
 
 #include "linux/dma-fence.h"
+#include "linux/dma-fence-chain.h"
 
 /**
  * struct drm_syncobj - sync object.
@@ -110,6 +111,10 @@ drm_syncobj_fence_get(struct drm_syncobj *syncobj)
 
 struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private,
 				     u32 handle);
+void drm_syncobj_add_point(struct drm_syncobj *syncobj,
+			   struct dma_fence_chain *chain,
+			   struct dma_fence *fence,
+			   uint64_t point);
 void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
 			       struct dma_fence *fence);
 int drm_syncobj_find_fence(struct drm_file *file_private,
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 05/11] drm/syncobj: add support for timeline point wait v8
  2018-12-07  9:55 [PATCH 01/11] dma-buf: make fence sequence numbers 64 bit v2 Chunming Zhou
  2018-12-07  9:55 ` [PATCH 02/11] dma-buf: add new dma_fence_chain container v4 Chunming Zhou
  2018-12-07  9:55 ` [PATCH 03/11] drm/syncobj: remove drm_syncobj_cb and cleanup Chunming Zhou
@ 2018-12-07  9:55 ` Chunming Zhou
  2018-12-07  9:55 ` [PATCH 08/11] drm/amdgpu: add timeline support in amdgpu CS v2 Chunming Zhou
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 42+ messages in thread
From: Chunming Zhou @ 2018-12-07  9:55 UTC (permalink / raw)
  To: Christian.Koenig, dri-devel, amd-gfx
  Cc: Daniel Rakos, Jason Ekstrand, Dave Airlie, Christian König

points array is one-to-one match with syncobjs array.
v2:
add seperate ioctl for timeline point wait, otherwise break uapi.
v3:
userspace can specify two kinds waits::
a. Wait for time point to be completed.
b. and wait for time point to become available
v4:
rebase
v5:
add comment for xxx_WAIT_AVAILABLE
v6: rebase and rework on new container
v7: drop _WAIT_COMPLETED, it is the default anyway
v8: correctly handle garbage collected fences

Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
Cc: Daniel Rakos <Daniel.Rakos@amd.com>
Cc: Jason Ekstrand <jason@jlekstrand.net>
Cc: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/drm_internal.h |   2 +
 drivers/gpu/drm/drm_ioctl.c    |   2 +
 drivers/gpu/drm/drm_syncobj.c  | 153 ++++++++++++++++++++++++++-------
 include/uapi/drm/drm.h         |  15 ++++
 4 files changed, 143 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index c7a7d7ce5d1c..18b41e10195c 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -178,6 +178,8 @@ int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
 				   struct drm_file *file_private);
 int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_private);
+int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
+				    struct drm_file *file_private);
 int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file_private);
 int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 94bd872d56c4..a9a17ed35cc4 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -675,6 +675,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, drm_syncobj_timeline_wait_ioctl,
+		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_RESET, drm_syncobj_reset_ioctl,
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 51f798e2194f..348079bb0965 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -61,6 +61,7 @@ struct syncobj_wait_entry {
 	struct task_struct *task;
 	struct dma_fence *fence;
 	struct dma_fence_cb fence_cb;
+	u64    point;
 };
 
 static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
@@ -95,6 +96,8 @@ EXPORT_SYMBOL(drm_syncobj_find);
 static void drm_syncobj_fence_add_wait(struct drm_syncobj *syncobj,
 				       struct syncobj_wait_entry *wait)
 {
+	struct dma_fence *fence;
+
 	if (wait->fence)
 		return;
 
@@ -103,11 +106,15 @@ static void drm_syncobj_fence_add_wait(struct drm_syncobj *syncobj,
 	 * have the lock, try one more time just to be sure we don't add a
 	 * callback when a fence has already been set.
 	 */
-	if (syncobj->fence)
-		wait->fence = dma_fence_get(
-			rcu_dereference_protected(syncobj->fence, 1));
-	else
+	fence = dma_fence_get(rcu_dereference_protected(syncobj->fence, 1));
+	if (!fence || dma_fence_chain_find_seqno(&fence, wait->point)) {
+		dma_fence_put(fence);
 		list_add_tail(&wait->node, &syncobj->cb_list);
+	} else if (!fence) {
+		wait->fence = dma_fence_get_stub();
+	} else {
+		wait->fence = fence;
+	}
 	spin_unlock(&syncobj->lock);
 }
 
@@ -148,10 +155,8 @@ void drm_syncobj_add_point(struct drm_syncobj *syncobj,
 	dma_fence_chain_init(chain, prev, fence, point);
 	rcu_assign_pointer(syncobj->fence, &chain->base);
 
-	list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) {
-		list_del_init(&cur->node);
+	list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node)
 		syncobj_wait_syncobj_func(syncobj, cur);
-	}
 	spin_unlock(&syncobj->lock);
 
 	/* Walk the chain once to trigger garbage collection */
@@ -182,10 +187,8 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj,
 	rcu_assign_pointer(syncobj->fence, fence);
 
 	if (fence != old_fence) {
-		list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) {
-			list_del_init(&cur->node);
+		list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node)
 			syncobj_wait_syncobj_func(syncobj, cur);
-		}
 	}
 
 	spin_unlock(&syncobj->lock);
@@ -642,13 +645,27 @@ static void syncobj_wait_fence_func(struct dma_fence *fence,
 static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj,
 				      struct syncobj_wait_entry *wait)
 {
+	struct dma_fence *fence;
+
 	/* This happens inside the syncobj lock */
-	wait->fence = dma_fence_get(rcu_dereference_protected(syncobj->fence,
-							      lockdep_is_held(&syncobj->lock)));
+	fence = rcu_dereference_protected(syncobj->fence,
+					  lockdep_is_held(&syncobj->lock));
+	dma_fence_get(fence);
+	if (!fence || dma_fence_chain_find_seqno(&fence, wait->point)) {
+		dma_fence_put(fence);
+		return;
+	} else if (!fence) {
+		wait->fence = dma_fence_get_stub();
+	} else {
+		wait->fence = fence;
+	}
+
 	wake_up_process(wait->task);
+	list_del_init(&wait->node);
 }
 
 static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
+						  void __user *user_points,
 						  uint32_t count,
 						  uint32_t flags,
 						  signed long timeout,
@@ -656,12 +673,27 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
 {
 	struct syncobj_wait_entry *entries;
 	struct dma_fence *fence;
+	uint64_t *points;
 	uint32_t signaled_count, i;
 
-	entries = kcalloc(count, sizeof(*entries), GFP_KERNEL);
-	if (!entries)
+	points = kmalloc_array(count, sizeof(*points), GFP_KERNEL);
+	if (points == NULL)
 		return -ENOMEM;
 
+	if (!user_points) {
+		memset(points, 0, count * sizeof(uint64_t));
+
+	} else if (copy_from_user(points, user_points,
+				  sizeof(uint64_t) * count)) {
+		timeout = -EFAULT;
+		goto err_free_points;
+	}
+
+	entries = kcalloc(count, sizeof(*entries), GFP_KERNEL);
+	if (!entries) {
+		timeout = -ENOMEM;
+		goto err_free_points;
+	}
 	/* Walk the list of sync objects and initialize entries.  We do
 	 * this up-front so that we can properly return -EINVAL if there is
 	 * a syncobj with a missing fence and then never have the chance of
@@ -669,9 +701,13 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
 	 */
 	signaled_count = 0;
 	for (i = 0; i < count; ++i) {
+		struct dma_fence *fence;
+
 		entries[i].task = current;
-		entries[i].fence = drm_syncobj_fence_get(syncobjs[i]);
-		if (!entries[i].fence) {
+		entries[i].point = points[i];
+		fence = drm_syncobj_fence_get(syncobjs[i]);
+		if (!fence || dma_fence_chain_find_seqno(&fence, points[i])) {
+			dma_fence_put(fence);
 			if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) {
 				continue;
 			} else {
@@ -680,7 +716,13 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
 			}
 		}
 
-		if (dma_fence_is_signaled(entries[i].fence)) {
+		if (fence)
+			entries[i].fence = fence;
+		else
+			entries[i].fence = dma_fence_get_stub();
+
+		if ((flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE) ||
+		    dma_fence_is_signaled(entries[i].fence)) {
 			if (signaled_count == 0 && idx)
 				*idx = i;
 			signaled_count++;
@@ -713,7 +755,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
 			if (!fence)
 				continue;
 
-			if (dma_fence_is_signaled(fence) ||
+			if ((flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE) ||
+			    dma_fence_is_signaled(fence) ||
 			    (!entries[i].fence_cb.func &&
 			     dma_fence_add_callback(fence,
 						    &entries[i].fence_cb,
@@ -758,6 +801,9 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs,
 	}
 	kfree(entries);
 
+err_free_points:
+	kfree(points);
+
 	return timeout;
 }
 
@@ -796,19 +842,33 @@ static signed long drm_timeout_abs_to_jiffies(int64_t timeout_nsec)
 static int drm_syncobj_array_wait(struct drm_device *dev,
 				  struct drm_file *file_private,
 				  struct drm_syncobj_wait *wait,
-				  struct drm_syncobj **syncobjs)
+				  struct drm_syncobj_timeline_wait *timeline_wait,
+				  struct drm_syncobj **syncobjs, bool timeline)
 {
-	signed long timeout = drm_timeout_abs_to_jiffies(wait->timeout_nsec);
+	signed long timeout = 0;
 	uint32_t first = ~0;
 
-	timeout = drm_syncobj_array_wait_timeout(syncobjs,
-						 wait->count_handles,
-						 wait->flags,
-						 timeout, &first);
-	if (timeout < 0)
-		return timeout;
-
-	wait->first_signaled = first;
+	if (!timeline) {
+		timeout = drm_timeout_abs_to_jiffies(wait->timeout_nsec);
+		timeout = drm_syncobj_array_wait_timeout(syncobjs,
+							 NULL,
+							 wait->count_handles,
+							 wait->flags,
+							 timeout, &first);
+		if (timeout < 0)
+			return timeout;
+		wait->first_signaled = first;
+	} else {
+		timeout = drm_timeout_abs_to_jiffies(timeline_wait->timeout_nsec);
+		timeout = drm_syncobj_array_wait_timeout(syncobjs,
+							 u64_to_user_ptr(timeline_wait->points),
+							 timeline_wait->count_handles,
+							 timeline_wait->flags,
+							 timeout, &first);
+		if (timeout < 0)
+			return timeout;
+		timeline_wait->first_signaled = first;
+	}
 	return 0;
 }
 
@@ -894,13 +954,48 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
 		return ret;
 
 	ret = drm_syncobj_array_wait(dev, file_private,
-				     args, syncobjs);
+				     args, NULL, syncobjs, false);
 
 	drm_syncobj_array_free(syncobjs, args->count_handles);
 
 	return ret;
 }
 
+int
+drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file_private)
+{
+	struct drm_syncobj_timeline_wait *args = data;
+	struct drm_syncobj **syncobjs;
+	int ret = 0;
+
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+		return -ENODEV;
+
+	if (args->flags & ~(DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL |
+			    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT |
+			    DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE))
+		return -EINVAL;
+
+	if (args->count_handles == 0)
+		return -EINVAL;
+
+	ret = drm_syncobj_array_find(file_private,
+				     u64_to_user_ptr(args->handles),
+				     args->count_handles,
+				     &syncobjs);
+	if (ret < 0)
+		return ret;
+
+	ret = drm_syncobj_array_wait(dev, file_private,
+				     NULL, args, syncobjs, true);
+
+	drm_syncobj_array_free(syncobjs, args->count_handles);
+
+	return ret;
+}
+
+
 int
 drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_private)
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 300f336633f2..0092111d002c 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -737,6 +737,7 @@ struct drm_syncobj_handle {
 
 #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
 #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2)
 struct drm_syncobj_wait {
 	__u64 handles;
 	/* absolute timeout */
@@ -747,6 +748,19 @@ struct drm_syncobj_wait {
 	__u32 pad;
 };
 
+struct drm_syncobj_timeline_wait {
+	__u64 handles;
+	/* wait on specific timeline point for every handles*/
+	__u64 points;
+	/* absolute timeout */
+	__s64 timeout_nsec;
+	__u32 count_handles;
+	__u32 flags;
+	__u32 first_signaled; /* only valid when not waiting all */
+	__u32 pad;
+};
+
+
 struct drm_syncobj_array {
 	__u64 handles;
 	__u32 count_handles;
@@ -909,6 +923,7 @@ extern "C" {
 #define DRM_IOCTL_MODE_GET_LEASE	DRM_IOWR(0xC8, struct drm_mode_get_lease)
 #define DRM_IOCTL_MODE_REVOKE_LEASE	DRM_IOWR(0xC9, struct drm_mode_revoke_lease)
 
+#define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT	DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait)
 /**
  * Device specific ioctls should only be in their respective headers
  * The device specific ioctl range is from 0x40 to 0x9f.
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 06/11] drm/syncobj: add timeline payload query ioctl v4
       [not found] ` <20181207095601.2058-1-david1.zhou-5C7GfCeVMHo@public.gmane.org>
  2018-12-07  9:55   ` [PATCH 04/11] drm/syncobj: add new drm_syncobj_add_point interface v2 Chunming Zhou
@ 2018-12-07  9:55   ` Chunming Zhou
       [not found]     ` <20181207095601.2058-6-david1.zhou-5C7GfCeVMHo@public.gmane.org>
  2018-12-07  9:55   ` [PATCH 07/11] drm/syncobj: use the timeline point in drm_syncobj_find_fence v3 Chunming Zhou
                     ` (2 subsequent siblings)
  4 siblings, 1 reply; 42+ messages in thread
From: Chunming Zhou @ 2018-12-07  9:55 UTC (permalink / raw)
  To: Christian.Koenig-5C7GfCeVMHo,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Chunming Zhou, Chris Wilson, Daniel Rakos, Jason Ekstrand,
	Bas Nieuwenhuizen, Dave Airlie, Christian König

user mode can query timeline payload.
v2: check return value of copy_to_user
v3: handle querying entry by entry
v4: rebase on new chain container, simplify interface

Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
Cc: Daniel Rakos <Daniel.Rakos@amd.com>
Cc: Jason Ekstrand <jason@jlekstrand.net>
Cc: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/drm_internal.h |  2 ++
 drivers/gpu/drm/drm_ioctl.c    |  2 ++
 drivers/gpu/drm/drm_syncobj.c  | 43 ++++++++++++++++++++++++++++++++++
 include/uapi/drm/drm.h         | 10 ++++++++
 4 files changed, 57 insertions(+)

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 18b41e10195c..dab4d5936441 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -184,6 +184,8 @@ int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file_private);
 int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_private);
+int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *file_private);
 
 /* drm_framebuffer.c */
 void drm_framebuffer_print_info(struct drm_printer *p, unsigned int indent,
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index a9a17ed35cc4..7578ef6dc1d1 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -681,6 +681,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
+		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE, drm_crtc_queue_sequence_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, drm_mode_create_lease_ioctl, DRM_MASTER|DRM_UNLOCKED),
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 348079bb0965..f97fa00ca1d0 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -1061,3 +1061,46 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
 
 	return ret;
 }
+
+int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *file_private)
+{
+	struct drm_syncobj_timeline_array *args = data;
+	struct drm_syncobj **syncobjs;
+	uint64_t __user *points = u64_to_user_ptr(args->points);
+	uint32_t i;
+	int ret;
+
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+		return -ENODEV;
+
+	if (args->pad != 0)
+		return -EINVAL;
+
+	if (args->count_handles == 0)
+		return -EINVAL;
+
+	ret = drm_syncobj_array_find(file_private,
+				     u64_to_user_ptr(args->handles),
+				     args->count_handles,
+				     &syncobjs);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < args->count_handles; i++) {
+		struct dma_fence_chain *chain;
+		struct dma_fence *fence;
+		uint64_t point;
+
+		fence = drm_syncobj_fence_get(syncobjs[i]);
+		chain = to_dma_fence_chain(fence);
+		point = chain ? fence->seqno : 0;
+		ret = copy_to_user(&points[i], &point, sizeof(uint64_t));
+		ret = ret ? -EFAULT : 0;
+		if (ret)
+			break;
+	}
+	drm_syncobj_array_free(syncobjs, args->count_handles);
+
+	return ret;
+}
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 0092111d002c..b2c36f2b2599 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -767,6 +767,14 @@ struct drm_syncobj_array {
 	__u32 pad;
 };
 
+struct drm_syncobj_timeline_array {
+	__u64 handles;
+	__u64 points;
+	__u32 count_handles;
+	__u32 pad;
+};
+
+
 /* Query current scanout sequence number */
 struct drm_crtc_get_sequence {
 	__u32 crtc_id;		/* requested crtc_id */
@@ -924,6 +932,8 @@ extern "C" {
 #define DRM_IOCTL_MODE_REVOKE_LEASE	DRM_IOWR(0xC9, struct drm_mode_revoke_lease)
 
 #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT	DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait)
+#define DRM_IOCTL_SYNCOBJ_QUERY		DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
+
 /**
  * Device specific ioctls should only be in their respective headers
  * The device specific ioctl range is from 0x40 to 0x9f.
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 07/11] drm/syncobj: use the timeline point in drm_syncobj_find_fence v3
       [not found] ` <20181207095601.2058-1-david1.zhou-5C7GfCeVMHo@public.gmane.org>
  2018-12-07  9:55   ` [PATCH 04/11] drm/syncobj: add new drm_syncobj_add_point interface v2 Chunming Zhou
  2018-12-07  9:55   ` [PATCH 06/11] drm/syncobj: add timeline payload query ioctl v4 Chunming Zhou
@ 2018-12-07  9:55   ` Chunming Zhou
  2018-12-07  9:55   ` [PATCH 09/11] drm/syncobj: add transition iotcls between binary and timeline Chunming Zhou
  2018-12-07  9:56   ` [PATCH 11/11] drm/amdgpu: update version for timeline syncobj support in amdgpu Chunming Zhou
  4 siblings, 0 replies; 42+ messages in thread
From: Chunming Zhou @ 2018-12-07  9:55 UTC (permalink / raw)
  To: Christian.Koenig-5C7GfCeVMHo,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Christian König, Christian König

From: Christian König <ckoenig.leichtzumerken@gmail.com>

Implement finding the right timeline point in drm_syncobj_find_fence.

v2: return -EINVAL when the point is not submitted yet.
v3: fix reference counting bug, add flags handling as well

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/drm_syncobj.c | 43 ++++++++++++++++++++++++++++++++---
 1 file changed, 40 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index f97fa00ca1d0..282982e58dbd 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -231,16 +231,53 @@ int drm_syncobj_find_fence(struct drm_file *file_private,
 			   struct dma_fence **fence)
 {
 	struct drm_syncobj *syncobj = drm_syncobj_find(file_private, handle);
-	int ret = 0;
+	struct syncobj_wait_entry wait;
+	int ret;
 
 	if (!syncobj)
 		return -ENOENT;
 
 	*fence = drm_syncobj_fence_get(syncobj);
-	if (!*fence) {
+	drm_syncobj_put(syncobj);
+
+	if (*fence) {
+		ret = dma_fence_chain_find_seqno(fence, point);
+		if (!ret)
+			return 0;
+		dma_fence_put(*fence);
+	} else {
 		ret = -EINVAL;
 	}
-	drm_syncobj_put(syncobj);
+
+	if (!(flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT))
+		return ret;
+
+	memset(&wait, 0, sizeof(wait));
+	wait.task = current;
+	wait.point = point;
+	drm_syncobj_fence_add_wait(syncobj, &wait);
+
+	do {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (wait.fence) {
+			ret = 0;
+			break;
+		}
+
+		if (signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+
+		schedule();
+	} while (1);
+
+	__set_current_state(TASK_RUNNING);
+	*fence = wait.fence;
+
+	if (wait.node.next)
+		drm_syncobj_remove_wait(syncobj, &wait);
+
 	return ret;
 }
 EXPORT_SYMBOL(drm_syncobj_find_fence);
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 08/11] drm/amdgpu: add timeline support in amdgpu CS v2
  2018-12-07  9:55 [PATCH 01/11] dma-buf: make fence sequence numbers 64 bit v2 Chunming Zhou
                   ` (2 preceding siblings ...)
  2018-12-07  9:55 ` [PATCH 05/11] drm/syncobj: add support for timeline point wait v8 Chunming Zhou
@ 2018-12-07  9:55 ` Chunming Zhou
       [not found] ` <20181207095601.2058-1-david1.zhou-5C7GfCeVMHo@public.gmane.org>
  2018-12-07  9:56 ` [PATCH 10/11] drm/syncobj: add timeline signal ioctl for syncobj Chunming Zhou
  5 siblings, 0 replies; 42+ messages in thread
From: Chunming Zhou @ 2018-12-07  9:55 UTC (permalink / raw)
  To: Christian.Koenig, dri-devel, amd-gfx
  Cc: Daniel Rakos, Jason Ekstrand, Dave Airlie, Christian König

syncobj wait/signal operation is appending in command submission.
v2: separate to two kinds in/out_deps functions

Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
Cc: Daniel Rakos <Daniel.Rakos@amd.com>
Cc: Jason Ekstrand <jason@jlekstrand.net>
Cc: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  10 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 147 +++++++++++++++++++++----
 include/uapi/drm/amdgpu_drm.h          |   8 ++
 3 files changed, 140 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 42f882c633ee..f9160ea1396a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -545,6 +545,12 @@ struct amdgpu_cs_chunk {
 	void			*kdata;
 };
 
+struct amdgpu_cs_post_dep {
+	struct drm_syncobj *syncobj;
+	struct dma_fence_chain *chain;
+	u64 point;
+};
+
 struct amdgpu_cs_parser {
 	struct amdgpu_device	*adev;
 	struct drm_file		*filp;
@@ -574,8 +580,8 @@ struct amdgpu_cs_parser {
 	/* user fence */
 	struct amdgpu_bo_list_entry	uf_entry;
 
-	unsigned num_post_dep_syncobjs;
-	struct drm_syncobj **post_dep_syncobjs;
+	unsigned			num_post_deps;
+	struct amdgpu_cs_post_dep	*post_deps;
 };
 
 static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index dc54e9efd910..580f1ea27157 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -213,6 +213,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
 		case AMDGPU_CHUNK_ID_DEPENDENCIES:
 		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
 		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
+		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
+		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
 			break;
 
 		default:
@@ -792,9 +794,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
 		ttm_eu_backoff_reservation(&parser->ticket,
 					   &parser->validated);
 
-	for (i = 0; i < parser->num_post_dep_syncobjs; i++)
-		drm_syncobj_put(parser->post_dep_syncobjs[i]);
-	kfree(parser->post_dep_syncobjs);
+	for (i = 0; i < parser->num_post_deps; i++) {
+		drm_syncobj_put(parser->post_deps[i].syncobj);
+		kfree(parser->post_deps[i].chain);
+	}
+	kfree(parser->post_deps);
 
 	dma_fence_put(parser->fence);
 
@@ -1100,13 +1104,18 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
 }
 
 static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
-						 uint32_t handle)
+						 uint32_t handle, u64 point,
+						 u64 flags)
 {
-	int r;
 	struct dma_fence *fence;
-	r = drm_syncobj_find_fence(p->filp, handle, 0, 0, &fence);
-	if (r)
+	int r;
+
+	r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
+	if (r) {
+		DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
+			  handle, point, r);
 		return r;
+	}
 
 	r = amdgpu_sync_fence(p->adev, &p->job->sync, fence, true);
 	dma_fence_put(fence);
@@ -1117,46 +1126,115 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
 static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
 					    struct amdgpu_cs_chunk *chunk)
 {
+	struct drm_amdgpu_cs_chunk_sem *deps;
 	unsigned num_deps;
 	int i, r;
-	struct drm_amdgpu_cs_chunk_sem *deps;
 
 	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
 	num_deps = chunk->length_dw * 4 /
 		sizeof(struct drm_amdgpu_cs_chunk_sem);
+	for (i = 0; i < num_deps; ++i) {
+		r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
+							  0, 0);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
 
+
+static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p,
+						     struct amdgpu_cs_chunk *chunk)
+{
+	struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
+	unsigned num_deps;
+	int i, r;
+
+	syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
+	num_deps = chunk->length_dw * 4 /
+		sizeof(struct drm_amdgpu_cs_chunk_syncobj);
 	for (i = 0; i < num_deps; ++i) {
-		r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle);
+		r = amdgpu_syncobj_lookup_and_add_to_sync(p,
+							  syncobj_deps[i].handle,
+							  syncobj_deps[i].point,
+							  syncobj_deps[i].flags);
 		if (r)
 			return r;
 	}
+
 	return 0;
 }
 
 static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
 					     struct amdgpu_cs_chunk *chunk)
 {
+	struct drm_amdgpu_cs_chunk_sem *deps;
 	unsigned num_deps;
 	int i;
-	struct drm_amdgpu_cs_chunk_sem *deps;
+
 	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
 	num_deps = chunk->length_dw * 4 /
 		sizeof(struct drm_amdgpu_cs_chunk_sem);
 
-	p->post_dep_syncobjs = kmalloc_array(num_deps,
-					     sizeof(struct drm_syncobj *),
-					     GFP_KERNEL);
-	p->num_post_dep_syncobjs = 0;
+	p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
+				     GFP_KERNEL);
+	p->num_post_deps = 0;
+
+	if (!p->post_deps)
+		return -ENOMEM;
+
+
+	for (i = 0; i < num_deps; ++i) {
+		p->post_deps[i].syncobj =
+			drm_syncobj_find(p->filp, deps[i].handle);
+		if (!p->post_deps[i].syncobj)
+			return -EINVAL;
+		p->post_deps[i].chain = NULL;
+		p->post_deps[i].point = 0;
+		p->num_post_deps++;
+	}
+
+	return 0;
+}
+
+
+static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p,
+						      struct amdgpu_cs_chunk
+						      *chunk)
+{
+	struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
+	unsigned num_deps;
+	int i;
+
+	syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
+	num_deps = chunk->length_dw * 4 /
+		sizeof(struct drm_amdgpu_cs_chunk_syncobj);
+
+	p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
+				     GFP_KERNEL);
+	p->num_post_deps = 0;
 
-	if (!p->post_dep_syncobjs)
+	if (!p->post_deps)
 		return -ENOMEM;
 
 	for (i = 0; i < num_deps; ++i) {
-		p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle);
-		if (!p->post_dep_syncobjs[i])
+		struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
+
+		dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL);
+		if (!dep->chain)
+			return -ENOMEM;
+
+		dep->syncobj = drm_syncobj_find(p->filp,
+						syncobj_deps[i].handle);
+		if (!dep->syncobj) {
+			kfree(dep->chain);
 			return -EINVAL;
-		p->num_post_dep_syncobjs++;
+		}
+		dep->point = syncobj_deps[i].point;
+		p->num_post_deps++;
 	}
+
 	return 0;
 }
 
@@ -1170,18 +1248,32 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
 
 		chunk = &p->chunks[i];
 
-		if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) {
+		switch (chunk->chunk_id) {
+		case AMDGPU_CHUNK_ID_DEPENDENCIES:
 			r = amdgpu_cs_process_fence_dep(p, chunk);
 			if (r)
 				return r;
-		} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) {
+			break;
+		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
 			r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
 			if (r)
 				return r;
-		} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) {
+			break;
+		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
 			r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
 			if (r)
 				return r;
+			break;
+		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
+			r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
+			if (r)
+				return r;
+			break;
+		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
+			r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
+			if (r)
+				return r;
+			break;
 		}
 	}
 
@@ -1192,8 +1284,17 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
 {
 	int i;
 
-	for (i = 0; i < p->num_post_dep_syncobjs; ++i)
-		drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence);
+	for (i = 0; i < p->num_post_deps; ++i) {
+		if (p->post_deps[i].chain) {
+			drm_syncobj_add_point(p->post_deps[i].syncobj,
+					      p->post_deps[i].chain,
+					      p->fence, p->post_deps[i].point);
+			p->post_deps[i].chain = NULL;
+		} else {
+			drm_syncobj_replace_fence(p->post_deps[i].syncobj,
+						  p->fence);
+		}
+	}
 }
 
 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index be84e43c1e19..997222bc1afe 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -523,6 +523,8 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_CHUNK_ID_SYNCOBJ_IN      0x04
 #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT     0x05
 #define AMDGPU_CHUNK_ID_BO_HANDLES      0x06
+#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT    0x07
+#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL  0x08
 
 struct drm_amdgpu_cs_chunk {
 	__u32		chunk_id;
@@ -598,6 +600,12 @@ struct drm_amdgpu_cs_chunk_sem {
 	__u32 handle;
 };
 
+struct drm_amdgpu_cs_chunk_syncobj {
+       __u32 handle;
+       __u32 flags;
+       __u64 point;
+};
+
 #define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ	0
 #define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD	1
 #define AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD	2
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 09/11] drm/syncobj: add transition iotcls between binary and timeline
       [not found] ` <20181207095601.2058-1-david1.zhou-5C7GfCeVMHo@public.gmane.org>
                     ` (2 preceding siblings ...)
  2018-12-07  9:55   ` [PATCH 07/11] drm/syncobj: use the timeline point in drm_syncobj_find_fence v3 Chunming Zhou
@ 2018-12-07  9:55   ` Chunming Zhou
  2018-12-07 11:28     ` Koenig, Christian
       [not found]     ` <20181207095601.2058-9-david1.zhou-5C7GfCeVMHo@public.gmane.org>
  2018-12-07  9:56   ` [PATCH 11/11] drm/amdgpu: update version for timeline syncobj support in amdgpu Chunming Zhou
  4 siblings, 2 replies; 42+ messages in thread
From: Chunming Zhou @ 2018-12-07  9:55 UTC (permalink / raw)
  To: Christian.Koenig-5C7GfCeVMHo,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Chunming Zhou

we need to import/export timeline point

Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
---
 drivers/gpu/drm/drm_internal.h |  4 +++
 drivers/gpu/drm/drm_ioctl.c    |  6 ++++
 drivers/gpu/drm/drm_syncobj.c  | 66 ++++++++++++++++++++++++++++++++++
 include/uapi/drm/drm.h         | 10 ++++++
 4 files changed, 86 insertions(+)

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index dab4d5936441..ecbe3d51a702 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -176,6 +176,10 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data,
 				   struct drm_file *file_private);
 int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
 				   struct drm_file *file_private);
+int drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
+					 struct drm_file *file_private);
+int drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
+					 struct drm_file *file_private);
 int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_private);
 int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 7578ef6dc1d1..6b417e3c3ea5 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -673,6 +673,12 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, drm_syncobj_fd_to_handle_ioctl,
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE,
+		      drm_syncobj_binary_to_timeline_ioctl,
+		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY,
+		      drm_syncobj_timeline_to_binary_ioctl,
+		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, drm_syncobj_timeline_wait_ioctl,
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 282982e58dbd..cf4daa670252 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -670,6 +670,72 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
 					&args->handle);
 }
 
+int
+drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
+				     struct drm_file *file_private)
+{
+	struct drm_syncobj_transfer *args = data;
+	struct drm_syncobj *timeline_syncobj = NULL;
+	struct dma_fence *fence;
+	struct dma_fence_chain *chain;
+	int ret;
+
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+		return -ENODEV;
+
+	if (args->pad)
+		return -EINVAL;
+
+	timeline_syncobj = drm_syncobj_find(file_private, args->timeline_handle);
+	if (!timeline_syncobj) {
+		return -ENOENT;
+	}
+	ret = drm_syncobj_find_fence(file_private, args->binary_handle, 0, 0,
+				     &fence);
+	if (ret)
+		goto err;
+	chain = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
+	if (!chain)
+		goto err1;
+	drm_syncobj_add_point(timeline_syncobj, chain, fence, args->point);
+err1:
+	dma_fence_put(fence);
+err:
+	drm_syncobj_put(timeline_syncobj);
+
+	return ret;
+}
+
+int
+drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
+				     struct drm_file *file_private)
+{
+	struct drm_syncobj_transfer *args = data;
+	struct drm_syncobj *binary_syncobj = NULL;
+	struct dma_fence *fence;
+	int ret;
+
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+		return -ENODEV;
+
+	if (args->pad)
+		return -EINVAL;
+
+	binary_syncobj = drm_syncobj_find(file_private, args->binary_handle);
+	if (!binary_syncobj)
+		return -ENOENT;
+	ret = drm_syncobj_find_fence(file_private, args->timeline_handle,
+				     args->point, args->flags, &fence);
+	if (ret)
+		goto err;
+	drm_syncobj_replace_fence(binary_syncobj, fence);
+	dma_fence_put(fence);
+err:
+	drm_syncobj_put(binary_syncobj);
+
+	return ret;
+}
+
 static void syncobj_wait_fence_func(struct dma_fence *fence,
 				    struct dma_fence_cb *cb)
 {
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index b2c36f2b2599..88d6129d4a18 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -735,6 +735,14 @@ struct drm_syncobj_handle {
 	__u32 pad;
 };
 
+struct drm_syncobj_transfer {
+	__u32 binary_handle;
+	__u32 timeline_handle;
+	__u64 point;
+	__u32 flags;
+	__u32 pad;
+};
+
 #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
 #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
 #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2)
@@ -933,6 +941,8 @@ extern "C" {
 
 #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT	DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait)
 #define DRM_IOCTL_SYNCOBJ_QUERY		DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
+#define DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE	DRM_IOWR(0xCC, struct drm_syncobj_transfer)
+#define DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY	DRM_IOWR(0xCD, struct drm_syncobj_transfer)
 
 /**
  * Device specific ioctls should only be in their respective headers
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 10/11] drm/syncobj: add timeline signal ioctl for syncobj
  2018-12-07  9:55 [PATCH 01/11] dma-buf: make fence sequence numbers 64 bit v2 Chunming Zhou
                   ` (4 preceding siblings ...)
       [not found] ` <20181207095601.2058-1-david1.zhou-5C7GfCeVMHo@public.gmane.org>
@ 2018-12-07  9:56 ` Chunming Zhou
       [not found]   ` <20181207095601.2058-10-david1.zhou-5C7GfCeVMHo@public.gmane.org>
  5 siblings, 1 reply; 42+ messages in thread
From: Chunming Zhou @ 2018-12-07  9:56 UTC (permalink / raw)
  To: Christian.Koenig, dri-devel, amd-gfx

Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
---
 drivers/gpu/drm/drm_internal.h |  2 +
 drivers/gpu/drm/drm_ioctl.c    |  2 +
 drivers/gpu/drm/drm_syncobj.c  | 70 ++++++++++++++++++++++++++++++++++
 include/uapi/drm/drm.h         |  1 +
 4 files changed, 75 insertions(+)

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index ecbe3d51a702..149c2f589ec9 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -188,6 +188,8 @@ int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file_private);
 int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_private);
+int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
+				      struct drm_file *file_private);
 int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file_private);
 
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 6b417e3c3ea5..d05586601eb5 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -687,6 +687,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, drm_syncobj_timeline_signal_ioctl,
+		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, DRM_UNLOCKED),
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index cf4daa670252..238ed89593a7 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -1165,6 +1165,76 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
 	return ret;
 }
 
+int
+drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
+				  struct drm_file *file_private)
+{
+	struct drm_syncobj_timeline_array *args = data;
+	struct drm_syncobj **syncobjs;
+	struct dma_fence_chain *chains;
+	uint64_t *points;
+	uint32_t i, j, timeline_count = 0;
+	int ret;
+
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+		return -EOPNOTSUPP;
+
+	if (args->pad != 0)
+		return -EINVAL;
+
+	if (args->count_handles == 0)
+		return -EINVAL;
+
+	ret = drm_syncobj_array_find(file_private,
+				     u64_to_user_ptr(args->handles),
+				     args->count_handles,
+				     &syncobjs);
+	if (ret < 0)
+		return ret;
+
+	points = kmalloc_array(args->count_handles, sizeof(*points),
+			       GFP_KERNEL);
+	if (!points) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	if (!u64_to_user_ptr(args->points)) {
+		memset(points, 0, args->count_handles * sizeof(uint64_t));
+	} else if (copy_from_user(points, u64_to_user_ptr(args->points),
+				  sizeof(uint64_t) * args->count_handles)) {
+		ret = -EFAULT;
+		goto err_points;
+	}
+
+
+	for (i = 0; i < args->count_handles; i++) {
+		if (points[i])
+			timeline_count++;
+	}
+	chains = kmalloc_array(timeline_count, sizeof(*chains), GFP_KERNEL);
+	if (!chains) {
+		ret = -ENOMEM;
+		goto err_points;
+	}
+
+	for (i = 0, j = 0; i < args->count_handles; i++) {
+		if (points[i]) {
+			struct dma_fence *fence = dma_fence_get_stub();
+
+			drm_syncobj_add_point(syncobjs[i], &chains[j++],
+					      fence, points[i]);
+			dma_fence_put(fence);
+		} else
+			drm_syncobj_assign_null_handle(syncobjs[i]);
+	}
+err_points:
+	kfree(points);
+out:
+	drm_syncobj_array_free(syncobjs, args->count_handles);
+
+	return ret;
+}
+
 int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file_private)
 {
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 88d6129d4a18..9a5fa3c26f22 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -943,6 +943,7 @@ extern "C" {
 #define DRM_IOCTL_SYNCOBJ_QUERY		DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
 #define DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE	DRM_IOWR(0xCC, struct drm_syncobj_transfer)
 #define DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY	DRM_IOWR(0xCD, struct drm_syncobj_transfer)
+#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL	DRM_IOWR(0xCE, struct drm_syncobj_timeline_array)
 
 /**
  * Device specific ioctls should only be in their respective headers
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 11/11] drm/amdgpu: update version for timeline syncobj support in amdgpu
       [not found] ` <20181207095601.2058-1-david1.zhou-5C7GfCeVMHo@public.gmane.org>
                     ` (3 preceding siblings ...)
  2018-12-07  9:55   ` [PATCH 09/11] drm/syncobj: add transition iotcls between binary and timeline Chunming Zhou
@ 2018-12-07  9:56   ` Chunming Zhou
  4 siblings, 0 replies; 42+ messages in thread
From: Chunming Zhou @ 2018-12-07  9:56 UTC (permalink / raw)
  To: Christian.Koenig-5C7GfCeVMHo,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Chunming Zhou

Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 8de55f7f1a3a..cafafdb1d03f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -71,9 +71,10 @@
  * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
  * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
  * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
+ * - 3.28.0 - Add syncobj timeline support to AMDGPU_CS.
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	27
+#define KMS_DRIVER_MINOR	28
 #define KMS_DRIVER_PATCHLEVEL	0
 
 int amdgpu_vram_limit = 0;
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [PATCH 09/11] drm/syncobj: add transition iotcls between binary and timeline
  2018-12-07  9:55   ` [PATCH 09/11] drm/syncobj: add transition iotcls between binary and timeline Chunming Zhou
@ 2018-12-07 11:28     ` Koenig, Christian
       [not found]     ` <20181207095601.2058-9-david1.zhou-5C7GfCeVMHo@public.gmane.org>
  1 sibling, 0 replies; 42+ messages in thread
From: Koenig, Christian @ 2018-12-07 11:28 UTC (permalink / raw)
  To: Zhou, David(ChunMing), dri-devel, amd-gfx

I would rather just use a single IOCTL for this and handle point 0 as 
distinction if we should add a new point or replace the binary one.

Christian.

Am 07.12.18 um 10:55 schrieb Chunming Zhou:
> we need to import/export timeline point
>
> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
> ---
>   drivers/gpu/drm/drm_internal.h |  4 +++
>   drivers/gpu/drm/drm_ioctl.c    |  6 ++++
>   drivers/gpu/drm/drm_syncobj.c  | 66 ++++++++++++++++++++++++++++++++++
>   include/uapi/drm/drm.h         | 10 ++++++
>   4 files changed, 86 insertions(+)
>
> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
> index dab4d5936441..ecbe3d51a702 100644
> --- a/drivers/gpu/drm/drm_internal.h
> +++ b/drivers/gpu/drm/drm_internal.h
> @@ -176,6 +176,10 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data,
>   				   struct drm_file *file_private);
>   int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
>   				   struct drm_file *file_private);
> +int drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
> +					 struct drm_file *file_private);
> +int drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
> +					 struct drm_file *file_private);
>   int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>   			   struct drm_file *file_private);
>   int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
> index 7578ef6dc1d1..6b417e3c3ea5 100644
> --- a/drivers/gpu/drm/drm_ioctl.c
> +++ b/drivers/gpu/drm/drm_ioctl.c
> @@ -673,6 +673,12 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>   		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, drm_syncobj_fd_to_handle_ioctl,
>   		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
> +	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE,
> +		      drm_syncobj_binary_to_timeline_ioctl,
> +		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
> +	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY,
> +		      drm_syncobj_timeline_to_binary_ioctl,
> +		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
>   		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, drm_syncobj_timeline_wait_ioctl,
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index 282982e58dbd..cf4daa670252 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -670,6 +670,72 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
>   					&args->handle);
>   }
>   
> +int
> +drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
> +				     struct drm_file *file_private)
> +{
> +	struct drm_syncobj_transfer *args = data;
> +	struct drm_syncobj *timeline_syncobj = NULL;
> +	struct dma_fence *fence;
> +	struct dma_fence_chain *chain;
> +	int ret;
> +
> +	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
> +		return -ENODEV;
> +
> +	if (args->pad)
> +		return -EINVAL;
> +
> +	timeline_syncobj = drm_syncobj_find(file_private, args->timeline_handle);
> +	if (!timeline_syncobj) {
> +		return -ENOENT;
> +	}
> +	ret = drm_syncobj_find_fence(file_private, args->binary_handle, 0, 0,
> +				     &fence);
> +	if (ret)
> +		goto err;
> +	chain = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
> +	if (!chain)
> +		goto err1;
> +	drm_syncobj_add_point(timeline_syncobj, chain, fence, args->point);
> +err1:
> +	dma_fence_put(fence);
> +err:
> +	drm_syncobj_put(timeline_syncobj);
> +
> +	return ret;
> +}
> +
> +int
> +drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
> +				     struct drm_file *file_private)
> +{
> +	struct drm_syncobj_transfer *args = data;
> +	struct drm_syncobj *binary_syncobj = NULL;
> +	struct dma_fence *fence;
> +	int ret;
> +
> +	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
> +		return -ENODEV;
> +
> +	if (args->pad)
> +		return -EINVAL;
> +
> +	binary_syncobj = drm_syncobj_find(file_private, args->binary_handle);
> +	if (!binary_syncobj)
> +		return -ENOENT;
> +	ret = drm_syncobj_find_fence(file_private, args->timeline_handle,
> +				     args->point, args->flags, &fence);
> +	if (ret)
> +		goto err;
> +	drm_syncobj_replace_fence(binary_syncobj, fence);
> +	dma_fence_put(fence);
> +err:
> +	drm_syncobj_put(binary_syncobj);
> +
> +	return ret;
> +}
> +
>   static void syncobj_wait_fence_func(struct dma_fence *fence,
>   				    struct dma_fence_cb *cb)
>   {
> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
> index b2c36f2b2599..88d6129d4a18 100644
> --- a/include/uapi/drm/drm.h
> +++ b/include/uapi/drm/drm.h
> @@ -735,6 +735,14 @@ struct drm_syncobj_handle {
>   	__u32 pad;
>   };
>   
> +struct drm_syncobj_transfer {
> +	__u32 binary_handle;
> +	__u32 timeline_handle;
> +	__u64 point;
> +	__u32 flags;
> +	__u32 pad;
> +};
> +
>   #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
>   #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
>   #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2)
> @@ -933,6 +941,8 @@ extern "C" {
>   
>   #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT	DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait)
>   #define DRM_IOCTL_SYNCOBJ_QUERY		DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
> +#define DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE	DRM_IOWR(0xCC, struct drm_syncobj_transfer)
> +#define DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY	DRM_IOWR(0xCD, struct drm_syncobj_transfer)
>   
>   /**
>    * Device specific ioctls should only be in their respective headers

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 10/11] drm/syncobj: add timeline signal ioctl for syncobj
       [not found]   ` <20181207095601.2058-10-david1.zhou-5C7GfCeVMHo@public.gmane.org>
@ 2018-12-07 11:31     ` Christian König
  2018-12-07 13:09       ` Chunming Zhou
  0 siblings, 1 reply; 42+ messages in thread
From: Christian König @ 2018-12-07 11:31 UTC (permalink / raw)
  To: Chunming Zhou, Christian.Koenig-5C7GfCeVMHo,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 07.12.18 um 10:56 schrieb Chunming Zhou:
> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
> ---
>   drivers/gpu/drm/drm_internal.h |  2 +
>   drivers/gpu/drm/drm_ioctl.c    |  2 +
>   drivers/gpu/drm/drm_syncobj.c  | 70 ++++++++++++++++++++++++++++++++++
>   include/uapi/drm/drm.h         |  1 +
>   4 files changed, 75 insertions(+)
>
> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
> index ecbe3d51a702..149c2f589ec9 100644
> --- a/drivers/gpu/drm/drm_internal.h
> +++ b/drivers/gpu/drm/drm_internal.h
> @@ -188,6 +188,8 @@ int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
>   			    struct drm_file *file_private);
>   int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
>   			     struct drm_file *file_private);
> +int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
> +				      struct drm_file *file_private);
>   int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>   			    struct drm_file *file_private);
>   
> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
> index 6b417e3c3ea5..d05586601eb5 100644
> --- a/drivers/gpu/drm/drm_ioctl.c
> +++ b/drivers/gpu/drm/drm_ioctl.c
> @@ -687,6 +687,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>   		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
>   		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
> +	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, drm_syncobj_timeline_signal_ioctl,
> +		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
>   		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, DRM_UNLOCKED),
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index cf4daa670252..238ed89593a7 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -1165,6 +1165,76 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
>   	return ret;
>   }
>   
> +int
> +drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
> +				  struct drm_file *file_private)
> +{
> +	struct drm_syncobj_timeline_array *args = data;
> +	struct drm_syncobj **syncobjs;
> +	struct dma_fence_chain *chains;
> +	uint64_t *points;
> +	uint32_t i, j, timeline_count = 0;
> +	int ret;
> +
> +	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
> +		return -EOPNOTSUPP;
> +
> +	if (args->pad != 0)
> +		return -EINVAL;
> +
> +	if (args->count_handles == 0)
> +		return -EINVAL;
> +
> +	ret = drm_syncobj_array_find(file_private,
> +				     u64_to_user_ptr(args->handles),
> +				     args->count_handles,
> +				     &syncobjs);
> +	if (ret < 0)
> +		return ret;
> +
> +	points = kmalloc_array(args->count_handles, sizeof(*points),
> +			       GFP_KERNEL);
> +	if (!points) {
> +		ret = -ENOMEM;
> +		goto out;
> +	}
> +	if (!u64_to_user_ptr(args->points)) {
> +		memset(points, 0, args->count_handles * sizeof(uint64_t));
> +	} else if (copy_from_user(points, u64_to_user_ptr(args->points),
> +				  sizeof(uint64_t) * args->count_handles)) {
> +		ret = -EFAULT;
> +		goto err_points;
> +	}
> +
> +
> +	for (i = 0; i < args->count_handles; i++) {
> +		if (points[i])
> +			timeline_count++;
> +	}
> +	chains = kmalloc_array(timeline_count, sizeof(*chains), GFP_KERNEL);

I don't think that this will work. We need individually allocated chain 
objects.

Apart from that this looks good to me,
Christian.

> +	if (!chains) {
> +		ret = -ENOMEM;
> +		goto err_points;
> +	}
> +
> +	for (i = 0, j = 0; i < args->count_handles; i++) {
> +		if (points[i]) {
> +			struct dma_fence *fence = dma_fence_get_stub();
> +
> +			drm_syncobj_add_point(syncobjs[i], &chains[j++],
> +					      fence, points[i]);
> +			dma_fence_put(fence);
> +		} else
> +			drm_syncobj_assign_null_handle(syncobjs[i]);
> +	}
> +err_points:
> +	kfree(points);
> +out:
> +	drm_syncobj_array_free(syncobjs, args->count_handles);
> +
> +	return ret;
> +}
> +
>   int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>   			    struct drm_file *file_private)
>   {
> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
> index 88d6129d4a18..9a5fa3c26f22 100644
> --- a/include/uapi/drm/drm.h
> +++ b/include/uapi/drm/drm.h
> @@ -943,6 +943,7 @@ extern "C" {
>   #define DRM_IOCTL_SYNCOBJ_QUERY		DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
>   #define DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE	DRM_IOWR(0xCC, struct drm_syncobj_transfer)
>   #define DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY	DRM_IOWR(0xCD, struct drm_syncobj_transfer)
> +#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL	DRM_IOWR(0xCE, struct drm_syncobj_timeline_array)
>   
>   /**
>    * Device specific ioctls should only be in their respective headers

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 10/11] drm/syncobj: add timeline signal ioctl for syncobj
  2018-12-07 11:31     ` Christian König
@ 2018-12-07 13:09       ` Chunming Zhou
       [not found]         ` <8c34aaf0-13b3-4070-f4ef-076fe1ab3197-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 42+ messages in thread
From: Chunming Zhou @ 2018-12-07 13:09 UTC (permalink / raw)
  To: Koenig, Christian, Zhou, David(ChunMing), dri-devel, amd-gfx


在 2018/12/7 19:31, Christian König 写道:
> Am 07.12.18 um 10:56 schrieb Chunming Zhou:
>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
>> ---
>>   drivers/gpu/drm/drm_internal.h |  2 +
>>   drivers/gpu/drm/drm_ioctl.c    |  2 +
>>   drivers/gpu/drm/drm_syncobj.c  | 70 ++++++++++++++++++++++++++++++++++
>>   include/uapi/drm/drm.h         |  1 +
>>   4 files changed, 75 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/drm_internal.h 
>> b/drivers/gpu/drm/drm_internal.h
>> index ecbe3d51a702..149c2f589ec9 100644
>> --- a/drivers/gpu/drm/drm_internal.h
>> +++ b/drivers/gpu/drm/drm_internal.h
>> @@ -188,6 +188,8 @@ int drm_syncobj_reset_ioctl(struct drm_device 
>> *dev, void *data,
>>                   struct drm_file *file_private);
>>   int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
>>                    struct drm_file *file_private);
>> +int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void 
>> *data,
>> +                      struct drm_file *file_private);
>>   int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>                   struct drm_file *file_private);
>>   diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
>> index 6b417e3c3ea5..d05586601eb5 100644
>> --- a/drivers/gpu/drm/drm_ioctl.c
>> +++ b/drivers/gpu/drm/drm_ioctl.c
>> @@ -687,6 +687,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>>                 DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
>>                 DRM_UNLOCKED|DRM_RENDER_ALLOW),
>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, 
>> drm_syncobj_timeline_signal_ioctl,
>> +              DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
>>                 DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>       DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, 
>> drm_crtc_get_sequence_ioctl, DRM_UNLOCKED),
>> diff --git a/drivers/gpu/drm/drm_syncobj.c 
>> b/drivers/gpu/drm/drm_syncobj.c
>> index cf4daa670252..238ed89593a7 100644
>> --- a/drivers/gpu/drm/drm_syncobj.c
>> +++ b/drivers/gpu/drm/drm_syncobj.c
>> @@ -1165,6 +1165,76 @@ drm_syncobj_signal_ioctl(struct drm_device 
>> *dev, void *data,
>>       return ret;
>>   }
>>   +int
>> +drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
>> +                  struct drm_file *file_private)
>> +{
>> +    struct drm_syncobj_timeline_array *args = data;
>> +    struct drm_syncobj **syncobjs;
>> +    struct dma_fence_chain *chains;
>> +    uint64_t *points;
>> +    uint32_t i, j, timeline_count = 0;
>> +    int ret;
>> +
>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>> +        return -EOPNOTSUPP;
>> +
>> +    if (args->pad != 0)
>> +        return -EINVAL;
>> +
>> +    if (args->count_handles == 0)
>> +        return -EINVAL;
>> +
>> +    ret = drm_syncobj_array_find(file_private,
>> +                     u64_to_user_ptr(args->handles),
>> +                     args->count_handles,
>> +                     &syncobjs);
>> +    if (ret < 0)
>> +        return ret;
>> +
>> +    points = kmalloc_array(args->count_handles, sizeof(*points),
>> +                   GFP_KERNEL);
>> +    if (!points) {
>> +        ret = -ENOMEM;
>> +        goto out;
>> +    }
>> +    if (!u64_to_user_ptr(args->points)) {
>> +        memset(points, 0, args->count_handles * sizeof(uint64_t));
>> +    } else if (copy_from_user(points, u64_to_user_ptr(args->points),
>> +                  sizeof(uint64_t) * args->count_handles)) {
>> +        ret = -EFAULT;
>> +        goto err_points;
>> +    }
>> +
>> +
>> +    for (i = 0; i < args->count_handles; i++) {
>> +        if (points[i])
>> +            timeline_count++;
>> +    }
>> +    chains = kmalloc_array(timeline_count, sizeof(*chains), 
>> GFP_KERNEL);
>
> I don't think that this will work. We need individually allocated 
> chain objects.

Yeah, I shouldn't use &chains[i] to get address, how about using cast 
like "(struct dma_fence_chain *)chains[i]".

btw, I will add a test case for signal array in igt.


-David

>
> Apart from that this looks good to me,
> Christian.
>
>> +    if (!chains) {
>> +        ret = -ENOMEM;
>> +        goto err_points;
>> +    }
>> +
>> +    for (i = 0, j = 0; i < args->count_handles; i++) {
>> +        if (points[i]) {
>> +            struct dma_fence *fence = dma_fence_get_stub();
>> +
>> +            drm_syncobj_add_point(syncobjs[i], &chains[j++],
>> +                          fence, points[i]);
>> +            dma_fence_put(fence);
>> +        } else
>> +            drm_syncobj_assign_null_handle(syncobjs[i]);
>> +    }
>> +err_points:
>> +    kfree(points);
>> +out:
>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>> +
>> +    return ret;
>> +}
>> +
>>   int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>                   struct drm_file *file_private)
>>   {
>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>> index 88d6129d4a18..9a5fa3c26f22 100644
>> --- a/include/uapi/drm/drm.h
>> +++ b/include/uapi/drm/drm.h
>> @@ -943,6 +943,7 @@ extern "C" {
>>   #define DRM_IOCTL_SYNCOBJ_QUERY        DRM_IOWR(0xCB, struct 
>> drm_syncobj_timeline_array)
>>   #define DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE    DRM_IOWR(0xCC, 
>> struct drm_syncobj_transfer)
>>   #define DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY    DRM_IOWR(0xCD, 
>> struct drm_syncobj_transfer)
>> +#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL    DRM_IOWR(0xCE, struct 
>> drm_syncobj_timeline_array)
>>     /**
>>    * Device specific ioctls should only be in their respective headers
>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 10/11] drm/syncobj: add timeline signal ioctl for syncobj
       [not found]         ` <8c34aaf0-13b3-4070-f4ef-076fe1ab3197-5C7GfCeVMHo@public.gmane.org>
@ 2018-12-07 13:14           ` Koenig, Christian
  0 siblings, 0 replies; 42+ messages in thread
From: Koenig, Christian @ 2018-12-07 13:14 UTC (permalink / raw)
  To: Zhou, David(ChunMing),
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 07.12.18 um 14:09 schrieb Zhou, David(ChunMing):
> 在 2018/12/7 19:31, Christian König 写道:
>> Am 07.12.18 um 10:56 schrieb Chunming Zhou:
>>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
>>> ---
>>>    drivers/gpu/drm/drm_internal.h |  2 +
>>>    drivers/gpu/drm/drm_ioctl.c    |  2 +
>>>    drivers/gpu/drm/drm_syncobj.c  | 70 ++++++++++++++++++++++++++++++++++
>>>    include/uapi/drm/drm.h         |  1 +
>>>    4 files changed, 75 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/drm_internal.h
>>> b/drivers/gpu/drm/drm_internal.h
>>> index ecbe3d51a702..149c2f589ec9 100644
>>> --- a/drivers/gpu/drm/drm_internal.h
>>> +++ b/drivers/gpu/drm/drm_internal.h
>>> @@ -188,6 +188,8 @@ int drm_syncobj_reset_ioctl(struct drm_device
>>> *dev, void *data,
>>>                    struct drm_file *file_private);
>>>    int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
>>>                     struct drm_file *file_private);
>>> +int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void
>>> *data,
>>> +                      struct drm_file *file_private);
>>>    int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>>                    struct drm_file *file_private);
>>>    diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
>>> index 6b417e3c3ea5..d05586601eb5 100644
>>> --- a/drivers/gpu/drm/drm_ioctl.c
>>> +++ b/drivers/gpu/drm/drm_ioctl.c
>>> @@ -687,6 +687,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>>>                  DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>        DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
>>>                  DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL,
>>> drm_syncobj_timeline_signal_ioctl,
>>> +              DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>        DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
>>>                  DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>        DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE,
>>> drm_crtc_get_sequence_ioctl, DRM_UNLOCKED),
>>> diff --git a/drivers/gpu/drm/drm_syncobj.c
>>> b/drivers/gpu/drm/drm_syncobj.c
>>> index cf4daa670252..238ed89593a7 100644
>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>> @@ -1165,6 +1165,76 @@ drm_syncobj_signal_ioctl(struct drm_device
>>> *dev, void *data,
>>>        return ret;
>>>    }
>>>    +int
>>> +drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
>>> +                  struct drm_file *file_private)
>>> +{
>>> +    struct drm_syncobj_timeline_array *args = data;
>>> +    struct drm_syncobj **syncobjs;
>>> +    struct dma_fence_chain *chains;
>>> +    uint64_t *points;
>>> +    uint32_t i, j, timeline_count = 0;
>>> +    int ret;
>>> +
>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>>> +        return -EOPNOTSUPP;
>>> +
>>> +    if (args->pad != 0)
>>> +        return -EINVAL;
>>> +
>>> +    if (args->count_handles == 0)
>>> +        return -EINVAL;
>>> +
>>> +    ret = drm_syncobj_array_find(file_private,
>>> +                     u64_to_user_ptr(args->handles),
>>> +                     args->count_handles,
>>> +                     &syncobjs);
>>> +    if (ret < 0)
>>> +        return ret;
>>> +
>>> +    points = kmalloc_array(args->count_handles, sizeof(*points),
>>> +                   GFP_KERNEL);
>>> +    if (!points) {
>>> +        ret = -ENOMEM;
>>> +        goto out;
>>> +    }
>>> +    if (!u64_to_user_ptr(args->points)) {
>>> +        memset(points, 0, args->count_handles * sizeof(uint64_t));
>>> +    } else if (copy_from_user(points, u64_to_user_ptr(args->points),
>>> +                  sizeof(uint64_t) * args->count_handles)) {
>>> +        ret = -EFAULT;
>>> +        goto err_points;
>>> +    }
>>> +
>>> +
>>> +    for (i = 0; i < args->count_handles; i++) {
>>> +        if (points[i])
>>> +            timeline_count++;
>>> +    }
>>> +    chains = kmalloc_array(timeline_count, sizeof(*chains),
>>> GFP_KERNEL);
>> I don't think that this will work. We need individually allocated
>> chain objects.
> Yeah, I shouldn't use &chains[i] to get address, how about using cast
> like "(struct dma_fence_chain *)chains[i]".

No, that still won't work. You need an array of individual objects here, 
not a single array with many entries.

E.g. something like this:

struct dma_fence_chain **chains = kmalloc_array(count, sizeof(void*));

for (i = 0; i < count; ++i)
     chains[i] = kmalloc(....);

Christian.

>
> btw, I will add a test case for signal array in igt.
>
>
> -David
>
>> Apart from that this looks good to me,
>> Christian.
>>
>>> +    if (!chains) {
>>> +        ret = -ENOMEM;
>>> +        goto err_points;
>>> +    }
>>> +
>>> +    for (i = 0, j = 0; i < args->count_handles; i++) {
>>> +        if (points[i]) {
>>> +            struct dma_fence *fence = dma_fence_get_stub();
>>> +
>>> +            drm_syncobj_add_point(syncobjs[i], &chains[j++],
>>> +                          fence, points[i]);
>>> +            dma_fence_put(fence);
>>> +        } else
>>> +            drm_syncobj_assign_null_handle(syncobjs[i]);
>>> +    }
>>> +err_points:
>>> +    kfree(points);
>>> +out:
>>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>>> +
>>> +    return ret;
>>> +}
>>> +
>>>    int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>>                    struct drm_file *file_private)
>>>    {
>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>> index 88d6129d4a18..9a5fa3c26f22 100644
>>> --- a/include/uapi/drm/drm.h
>>> +++ b/include/uapi/drm/drm.h
>>> @@ -943,6 +943,7 @@ extern "C" {
>>>    #define DRM_IOCTL_SYNCOBJ_QUERY        DRM_IOWR(0xCB, struct
>>> drm_syncobj_timeline_array)
>>>    #define DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE    DRM_IOWR(0xCC,
>>> struct drm_syncobj_transfer)
>>>    #define DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY    DRM_IOWR(0xCD,
>>> struct drm_syncobj_transfer)
>>> +#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL    DRM_IOWR(0xCE, struct
>>> drm_syncobj_timeline_array)
>>>      /**
>>>     * Device specific ioctls should only be in their respective headers

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 02/11] dma-buf: add new dma_fence_chain container v4
  2018-12-07  9:55 ` [PATCH 02/11] dma-buf: add new dma_fence_chain container v4 Chunming Zhou
@ 2019-02-15 14:23   ` Lionel Landwerlin via dri-devel
       [not found]     ` <6c2adaf5-6871-20be-a26d-182f8ca8ab8a-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 42+ messages in thread
From: Lionel Landwerlin via dri-devel @ 2019-02-15 14:23 UTC (permalink / raw)
  To: Chunming Zhou, Christian.Koenig, dri-devel, amd-gfx; +Cc: Christian König

Hi Christian, David,

For timeline semaphore we need points to signaled in order.
I'm struggling to understand how this fence-chain implementation 
preserves ordering of the seqnos.

One of the scenario I can see an issue happening is when you have a 
timeline with points 1 & 2 and userspace submits for 2 different engines :
     - first with let's say a blitter style engine on point 2
     - then a 3d style engine on point 1

Another scenario would be signaling a timeline with points 1 & 2 with 
those points in reverse order in the submission array.

-Lionel

On 07/12/2018 09:55, Chunming Zhou wrote:
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>
> Lockless container implementation similar to a dma_fence_array, but with
> only two elements per node and automatic garbage collection.
>
> v2: properly document dma_fence_chain_for_each, add dma_fence_chain_find_seqno,
>      drop prev reference during garbage collection if it's not a chain fence.
> v3: use head and iterator for dma_fence_chain_for_each
> v4: fix reference count in dma_fence_chain_enable_signaling
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/dma-buf/Makefile          |   3 +-
>   drivers/dma-buf/dma-fence-chain.c | 241 ++++++++++++++++++++++++++++++
>   include/linux/dma-fence-chain.h   |  81 ++++++++++
>   3 files changed, 324 insertions(+), 1 deletion(-)
>   create mode 100644 drivers/dma-buf/dma-fence-chain.c
>   create mode 100644 include/linux/dma-fence-chain.h
>
> diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
> index 0913a6ccab5a..1f006e083eb9 100644
> --- a/drivers/dma-buf/Makefile
> +++ b/drivers/dma-buf/Makefile
> @@ -1,4 +1,5 @@
> -obj-y := dma-buf.o dma-fence.o dma-fence-array.o reservation.o seqno-fence.o
> +obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \
> +	 reservation.o seqno-fence.o
>   obj-$(CONFIG_SYNC_FILE)		+= sync_file.o
>   obj-$(CONFIG_SW_SYNC)		+= sw_sync.o sync_debug.o
>   obj-$(CONFIG_UDMABUF)		+= udmabuf.o
> diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
> new file mode 100644
> index 000000000000..0c5e3c902fa0
> --- /dev/null
> +++ b/drivers/dma-buf/dma-fence-chain.c
> @@ -0,0 +1,241 @@
> +/*
> + * fence-chain: chain fences together in a timeline
> + *
> + * Copyright (C) 2018 Advanced Micro Devices, Inc.
> + * Authors:
> + *	Christian König <christian.koenig@amd.com>
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License version 2 as published by
> + * the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + */
> +
> +#include <linux/dma-fence-chain.h>
> +
> +static bool dma_fence_chain_enable_signaling(struct dma_fence *fence);
> +
> +/**
> + * dma_fence_chain_get_prev - use RCU to get a reference to the previous fence
> + * @chain: chain node to get the previous node from
> + *
> + * Use dma_fence_get_rcu_safe to get a reference to the previous fence of the
> + * chain node.
> + */
> +static struct dma_fence *dma_fence_chain_get_prev(struct dma_fence_chain *chain)
> +{
> +	struct dma_fence *prev;
> +
> +	rcu_read_lock();
> +	prev = dma_fence_get_rcu_safe(&chain->prev);
> +	rcu_read_unlock();
> +	return prev;
> +}
> +
> +/**
> + * dma_fence_chain_walk - chain walking function
> + * @fence: current chain node
> + *
> + * Walk the chain to the next node. Returns the next fence or NULL if we are at
> + * the end of the chain. Garbage collects chain nodes which are already
> + * signaled.
> + */
> +struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence)
> +{
> +	struct dma_fence_chain *chain, *prev_chain;
> +	struct dma_fence *prev, *replacement, *tmp;
> +
> +	chain = to_dma_fence_chain(fence);
> +	if (!chain) {
> +		dma_fence_put(fence);
> +		return NULL;
> +	}
> +
> +	while ((prev = dma_fence_chain_get_prev(chain))) {
> +
> +		prev_chain = to_dma_fence_chain(prev);
> +		if (prev_chain) {
> +			if (!dma_fence_is_signaled(prev_chain->fence))
> +				break;
> +
> +			replacement = dma_fence_chain_get_prev(prev_chain);
> +		} else {
> +			if (!dma_fence_is_signaled(prev))
> +				break;
> +
> +			replacement = NULL;
> +		}
> +
> +		tmp = cmpxchg(&chain->prev, prev, replacement);
> +		if (tmp == prev)
> +			dma_fence_put(tmp);
> +		else
> +			dma_fence_put(replacement);
> +		dma_fence_put(prev);
> +	}
> +
> +	dma_fence_put(fence);
> +	return prev;
> +}
> +EXPORT_SYMBOL(dma_fence_chain_walk);
> +
> +/**
> + * dma_fence_chain_find_seqno - find fence chain node by seqno
> + * @pfence: pointer to the chain node where to start
> + * @seqno: the sequence number to search for
> + *
> + * Advance the fence pointer to the chain node which will signal this sequence
> + * number. If no sequence number is provided then this is a no-op.
> + *
> + * Returns EINVAL if the fence is not a chain node or the sequence number has
> + * not yet advanced far enough.
> + */
> +int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t seqno)
> +{
> +	struct dma_fence_chain *chain;
> +
> +	if (!seqno)
> +		return 0;
> +
> +	chain = to_dma_fence_chain(*pfence);
> +	if (!chain || chain->base.seqno < seqno)
> +		return -EINVAL;
> +
> +	dma_fence_chain_for_each(*pfence, &chain->base) {
> +		if ((*pfence)->context != chain->base.context ||
> +		    to_dma_fence_chain(*pfence)->prev_seqno < seqno)
> +			break;
> +	}
> +	dma_fence_put(&chain->base);
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL(dma_fence_chain_find_seqno);
> +
> +static const char *dma_fence_chain_get_driver_name(struct dma_fence *fence)
> +{
> +        return "dma_fence_chain";
> +}
> +
> +static const char *dma_fence_chain_get_timeline_name(struct dma_fence *fence)
> +{
> +        return "unbound";
> +}
> +
> +static void dma_fence_chain_irq_work(struct irq_work *work)
> +{
> +	struct dma_fence_chain *chain;
> +
> +	chain = container_of(work, typeof(*chain), work);
> +
> +	/* Try to rearm the callback */
> +	if (!dma_fence_chain_enable_signaling(&chain->base))
> +		/* Ok, we are done. No more unsignaled fences left */
> +		dma_fence_signal(&chain->base);
> +	dma_fence_put(&chain->base);
> +}
> +
> +static void dma_fence_chain_cb(struct dma_fence *f, struct dma_fence_cb *cb)
> +{
> +	struct dma_fence_chain *chain;
> +
> +	chain = container_of(cb, typeof(*chain), cb);
> +	irq_work_queue(&chain->work);
> +	dma_fence_put(f);
> +}
> +
> +static bool dma_fence_chain_enable_signaling(struct dma_fence *fence)
> +{
> +	struct dma_fence_chain *head = to_dma_fence_chain(fence);
> +
> +	dma_fence_get(&head->base);
> +	dma_fence_chain_for_each(fence, &head->base) {
> +		struct dma_fence_chain *chain = to_dma_fence_chain(fence);
> +		struct dma_fence *f = chain ? chain->fence : fence;
> +
> +		dma_fence_get(f);
> +		if (!dma_fence_add_callback(f, &head->cb, dma_fence_chain_cb)) {
> +			dma_fence_put(fence);
> +			return true;
> +		}
> +		dma_fence_put(f);
> +	}
> +	dma_fence_put(&head->base);
> +	return false;
> +}
> +
> +static bool dma_fence_chain_signaled(struct dma_fence *fence)
> +{
> +	dma_fence_chain_for_each(fence, fence) {
> +		struct dma_fence_chain *chain = to_dma_fence_chain(fence);
> +		struct dma_fence *f = chain ? chain->fence : fence;
> +
> +		if (!dma_fence_is_signaled(f)) {
> +			dma_fence_put(fence);
> +			return false;
> +		}
> +	}
> +
> +	return true;
> +}
> +
> +static void dma_fence_chain_release(struct dma_fence *fence)
> +{
> +	struct dma_fence_chain *chain = to_dma_fence_chain(fence);
> +
> +	dma_fence_put(chain->prev);
> +	dma_fence_put(chain->fence);
> +	dma_fence_free(fence);
> +}
> +
> +const struct dma_fence_ops dma_fence_chain_ops = {
> +	.get_driver_name = dma_fence_chain_get_driver_name,
> +	.get_timeline_name = dma_fence_chain_get_timeline_name,
> +	.enable_signaling = dma_fence_chain_enable_signaling,
> +	.signaled = dma_fence_chain_signaled,
> +	.release = dma_fence_chain_release,
> +};
> +EXPORT_SYMBOL(dma_fence_chain_ops);
> +
> +/**
> + * dma_fence_chain_init - initialize a fence chain
> + * @chain: the chain node to initialize
> + * @prev: the previous fence
> + * @fence: the current fence
> + *
> + * Initialize a new chain node and either start a new chain or add the node to
> + * the existing chain of the previous fence.
> + */
> +void dma_fence_chain_init(struct dma_fence_chain *chain,
> +			  struct dma_fence *prev,
> +			  struct dma_fence *fence,
> +			  uint64_t seqno)
> +{
> +	struct dma_fence_chain *prev_chain = to_dma_fence_chain(prev);
> +	uint64_t context;
> +
> +	spin_lock_init(&chain->lock);
> +	chain->prev = prev;
> +	chain->fence = fence;
> +	chain->prev_seqno = 0;
> +	init_irq_work(&chain->work, dma_fence_chain_irq_work);
> +
> +	/* Try to reuse the context of the previous chain node. */
> +	if (prev_chain && __dma_fence_is_later(seqno, prev->seqno)) {
> +		context = prev->context;
> +		chain->prev_seqno = prev->seqno;
> +	} else {
> +		context = dma_fence_context_alloc(1);
> +		/* Make sure that we always have a valid sequence number. */
> +		if (prev_chain)
> +			seqno = max(prev->seqno, seqno);
> +	}
> +
> +	dma_fence_init(&chain->base, &dma_fence_chain_ops,
> +		       &chain->lock, context, seqno);
> +}
> +EXPORT_SYMBOL(dma_fence_chain_init);
> diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h
> new file mode 100644
> index 000000000000..a5c2e8c6915c
> --- /dev/null
> +++ b/include/linux/dma-fence-chain.h
> @@ -0,0 +1,81 @@
> +/*
> + * fence-chain: chain fences together in a timeline
> + *
> + * Copyright (C) 2018 Advanced Micro Devices, Inc.
> + * Authors:
> + *	Christian König <christian.koenig@amd.com>
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License version 2 as published by
> + * the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + */
> +
> +#ifndef __LINUX_DMA_FENCE_CHAIN_H
> +#define __LINUX_DMA_FENCE_CHAIN_H
> +
> +#include <linux/dma-fence.h>
> +#include <linux/irq_work.h>
> +
> +/**
> + * struct dma_fence_chain - fence to represent an node of a fence chain
> + * @base: fence base class
> + * @lock: spinlock for fence handling
> + * @prev: previous fence of the chain
> + * @prev_seqno: original previous seqno before garbage collection
> + * @fence: encapsulated fence
> + * @cb: callback structure for signaling
> + * @work: irq work item for signaling
> + */
> +struct dma_fence_chain {
> +	struct dma_fence base;
> +	spinlock_t lock;
> +	struct dma_fence *prev;
> +	u64 prev_seqno;
> +	struct dma_fence *fence;
> +	struct dma_fence_cb cb;
> +	struct irq_work work;
> +};
> +
> +extern const struct dma_fence_ops dma_fence_chain_ops;
> +
> +/**
> + * to_dma_fence_chain - cast a fence to a dma_fence_chain
> + * @fence: fence to cast to a dma_fence_array
> + *
> + * Returns NULL if the fence is not a dma_fence_chain,
> + * or the dma_fence_chain otherwise.
> + */
> +static inline struct dma_fence_chain *
> +to_dma_fence_chain(struct dma_fence *fence)
> +{
> +	if (!fence || fence->ops != &dma_fence_chain_ops)
> +		return NULL;
> +
> +	return container_of(fence, struct dma_fence_chain, base);
> +}
> +
> +/**
> + * dma_fence_chain_for_each - iterate over all fences in chain
> + * @iter: current fence
> + * @head: starting point
> + *
> + * Iterate over all fences in the chain. We keep a reference to the current
> + * fence while inside the loop which must be dropped when breaking out.
> + */
> +#define dma_fence_chain_for_each(iter, head)	\
> +	for (iter = dma_fence_get(head); iter; \
> +	     iter = dma_fence_chain_walk(head))
> +
> +struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence);
> +int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t seqno);
> +void dma_fence_chain_init(struct dma_fence_chain *chain,
> +			  struct dma_fence *prev,
> +			  struct dma_fence *fence,
> +			  uint64_t seqno);
> +
> +#endif /* __LINUX_DMA_FENCE_CHAIN_H */


_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 09/11] drm/syncobj: add transition iotcls between binary and timeline
       [not found]     ` <20181207095601.2058-9-david1.zhou-5C7GfCeVMHo@public.gmane.org>
@ 2019-02-15 14:28       ` Lionel Landwerlin via amd-gfx
  2019-02-18 10:35         ` zhoucm1
  0 siblings, 1 reply; 42+ messages in thread
From: Lionel Landwerlin via amd-gfx @ 2019-02-15 14:28 UTC (permalink / raw)
  To: Chunming Zhou, Christian.Koenig-5C7GfCeVMHo,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Hi David,

Thanks a lot for point me to the tests you've added in IGT.
While adding a test with that signals fences imported into a timeline 
syncobj out of order, I ran into a deadlock.
Here is the test : 
https://github.com/djdeath/intel-gpu-tools/commit/1e46cf7e7bff09b78a24367ddc2314f97eb0a1b9

Trying to kill the deadlocked process I got this backtrace :


[   33.969136] [IGT] syncobj_timeline: starting subtest signal-order
[   60.452823] watchdog: BUG: soft lockup - CPU#6 stuck for 23s! 
[syncobj_timelin:2021]
[   60.452826] Modules linked in: rfcomm cmac bnep binfmt_misc 
nls_iso8859_1 snd_hda_codec_hdmi snd_hda_codec_realtek 
snd_hda_codec_generic ledtrig_audio sch_fq_codel ib_iser snd_hda_intel 
rdma_cm iw_cm snd_hda_codec ib_cm snd_hda_core snd_hwdep intel_rapl 
snd_pcm ib_core x86_pkg_temp_thermal intel_powerclamp configf
s coretemp iscsi_tcp snd_seq_midi libiscsi_tcp snd_seq_midi_event 
libiscsi kvm_intel scsi_transport_iscsi kvm btusb snd_rawmidi irqbypass 
btrtl intel_cstate intel_rapl_perf btbcm btintel bluetooth snd_seq 
snd_seq_device snd_timer input_leds ecdh_generic snd soundcore mei_me 
mei intel_pch_thermal mac_hid acpi_pad parp
ort_pc ppdev lp parport ip_tables x_tables autofs4 btrfs zstd_decompress 
zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq 
async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear 
hid_generic usbhid hid i915 crct10dif_pclmul crc32_pclmul i2c_algo_bit 
ghash_clmulni_intel prime_numbers
drm_kms_helper aesni_intel syscopyarea sysfillrect
[   60.452876]  sysimgblt fb_sys_fops aes_x86_64 crypto_simd sdhci_pci 
cryptd drm e1000e glue_helper cqhci sdhci wmi video
[   60.452881] CPU: 6 PID: 2021 Comm: syncobj_timelin Tainted: G     
U            5.0.0-rc5+ #337
[   60.452882] Hardware name:  /NUC6i7KYB, BIOS 
KYSKLi70.86A.0042.2016.0929.1933 09/29/2016
[   60.452886] RIP: 0010:dma_fence_chain_walk+0x22c/0x260
[   60.452888] Code: ff e9 93 fe ff ff 48 8b 45 08 48 8b 40 18 48 85 c0 
74 0c 48 89 ef e8 33 0f 58 00 84 c0 75 23 f0 41 ff 4d 00 0f 88 99 87 2f 
00 <0f> 85 05 fe ff ff 4c 89 ef e8 56 ea ff ff 48 89 d8 5b 5d 41 5c 41
[   60.452888] RSP: 0018:ffff9a5804653ca8 EFLAGS: 00010296 ORIG_RAX: 
ffffffffffffff13
[   60.452889] RAX: 0000000000000000 RBX: ffff8f5690fb2480 RCX: 
ffff8f5690fb2f00
[   60.452890] RDX: 00000000003e3730 RSI: 0000000000000000 RDI: 
ffff8f5690fb2180
[   60.452891] RBP: ffff8f5690fb2180 R08: 0000000000000000 R09: 
ffff8f5690fb2eb0
[   60.452891] R10: 0000000000000000 R11: ffff8f5660469860 R12: 
ffff8f5690fb2f68
[   60.452892] R13: ffff8f5690fb2f00 R14: 0000000000000003 R15: 
ffff8f5655a45fc0
[   60.452913] FS:  00007fdc5c459980(0000) GS:ffff8f569eb80000(0000) 
knlGS:0000000000000000
[   60.452913] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   60.452914] CR2: 00007f9d74336dd8 CR3: 000000084a67e004 CR4: 
00000000003606e0
[   60.452915] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 
0000000000000000
[   60.452915] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 
0000000000000400
[   60.452916] Call Trace:
[   60.452958]  drm_syncobj_add_point+0x102/0x160 [drm]
[   60.452965]  ? drm_syncobj_fd_to_handle_ioctl+0x1b0/0x1b0 [drm]
[   60.452971]  drm_syncobj_transfer_ioctl+0x10f/0x180 [drm]
[   60.452978]  drm_ioctl_kernel+0xac/0xf0 [drm]
[   60.452984]  drm_ioctl+0x2eb/0x3b0 [drm]
[   60.452990]  ? drm_syncobj_fd_to_handle_ioctl+0x1b0/0x1b0 [drm]
[   60.452992]  ? sw_sync_ioctl+0x347/0x370
[   60.452994]  do_vfs_ioctl+0xa4/0x640
[   60.452995]  ? __fput+0x134/0x220
[   60.452997]  ? do_fcntl+0x1a5/0x650
[   60.452998]  ksys_ioctl+0x70/0x80
[   60.452999]  __x64_sys_ioctl+0x16/0x20
[   60.453002]  do_syscall_64+0x55/0x110
[   60.453004]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[   60.453005] RIP: 0033:0x7fdc5b6e45d7
[   60.453006] Code: b3 66 90 48 8b 05 b1 48 2d 00 64 c7 00 26 00 00 00 
48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 
05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 81 48 2d 00 f7 d8 64 89 01 48
[   60.453007] RSP: 002b:00007fff25c4d198 EFLAGS: 00000206 ORIG_RAX: 
0000000000000010
[   60.453008] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 
00007fdc5b6e45d7
[   60.453008] RDX: 00007fff25c4d200 RSI: 00000000c02064cc RDI: 
0000000000000003
[   60.453009] RBP: 00007fff25c4d1d0 R08: 0000000000000000 R09: 
000000000000001e
[   60.453010] R10: 0000000000000000 R11: 0000000000000206 R12: 
0000563d3959e4d0
[   60.453010] R13: 00007fff25c4d620 R14: 0000000000000000 R15: 
0000000000000000
[   88.447359] watchdog: BUG: soft lockup - CPU#6 stuck for 22s! 
[syncobj_timelin:2021]


-Lionel


On 07/12/2018 09:55, Chunming Zhou wrote:
> we need to import/export timeline point
>
> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
> ---
>   drivers/gpu/drm/drm_internal.h |  4 +++
>   drivers/gpu/drm/drm_ioctl.c    |  6 ++++
>   drivers/gpu/drm/drm_syncobj.c  | 66 ++++++++++++++++++++++++++++++++++
>   include/uapi/drm/drm.h         | 10 ++++++
>   4 files changed, 86 insertions(+)
>
> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
> index dab4d5936441..ecbe3d51a702 100644
> --- a/drivers/gpu/drm/drm_internal.h
> +++ b/drivers/gpu/drm/drm_internal.h
> @@ -176,6 +176,10 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data,
>   				   struct drm_file *file_private);
>   int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
>   				   struct drm_file *file_private);
> +int drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
> +					 struct drm_file *file_private);
> +int drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
> +					 struct drm_file *file_private);
>   int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>   			   struct drm_file *file_private);
>   int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
> index 7578ef6dc1d1..6b417e3c3ea5 100644
> --- a/drivers/gpu/drm/drm_ioctl.c
> +++ b/drivers/gpu/drm/drm_ioctl.c
> @@ -673,6 +673,12 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>   		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, drm_syncobj_fd_to_handle_ioctl,
>   		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
> +	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE,
> +		      drm_syncobj_binary_to_timeline_ioctl,
> +		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
> +	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY,
> +		      drm_syncobj_timeline_to_binary_ioctl,
> +		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
>   		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, drm_syncobj_timeline_wait_ioctl,
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index 282982e58dbd..cf4daa670252 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -670,6 +670,72 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
>   					&args->handle);
>   }
>   
> +int
> +drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
> +				     struct drm_file *file_private)
> +{
> +	struct drm_syncobj_transfer *args = data;
> +	struct drm_syncobj *timeline_syncobj = NULL;
> +	struct dma_fence *fence;
> +	struct dma_fence_chain *chain;
> +	int ret;
> +
> +	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
> +		return -ENODEV;
> +
> +	if (args->pad)
> +		return -EINVAL;
> +
> +	timeline_syncobj = drm_syncobj_find(file_private, args->timeline_handle);
> +	if (!timeline_syncobj) {
> +		return -ENOENT;
> +	}
> +	ret = drm_syncobj_find_fence(file_private, args->binary_handle, 0, 0,
> +				     &fence);
> +	if (ret)
> +		goto err;
> +	chain = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
> +	if (!chain)
> +		goto err1;
> +	drm_syncobj_add_point(timeline_syncobj, chain, fence, args->point);
> +err1:
> +	dma_fence_put(fence);
> +err:
> +	drm_syncobj_put(timeline_syncobj);
> +
> +	return ret;
> +}
> +
> +int
> +drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
> +				     struct drm_file *file_private)
> +{
> +	struct drm_syncobj_transfer *args = data;
> +	struct drm_syncobj *binary_syncobj = NULL;
> +	struct dma_fence *fence;
> +	int ret;
> +
> +	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
> +		return -ENODEV;
> +
> +	if (args->pad)
> +		return -EINVAL;
> +
> +	binary_syncobj = drm_syncobj_find(file_private, args->binary_handle);
> +	if (!binary_syncobj)
> +		return -ENOENT;
> +	ret = drm_syncobj_find_fence(file_private, args->timeline_handle,
> +				     args->point, args->flags, &fence);
> +	if (ret)
> +		goto err;
> +	drm_syncobj_replace_fence(binary_syncobj, fence);
> +	dma_fence_put(fence);
> +err:
> +	drm_syncobj_put(binary_syncobj);
> +
> +	return ret;
> +}
> +
>   static void syncobj_wait_fence_func(struct dma_fence *fence,
>   				    struct dma_fence_cb *cb)
>   {
> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
> index b2c36f2b2599..88d6129d4a18 100644
> --- a/include/uapi/drm/drm.h
> +++ b/include/uapi/drm/drm.h
> @@ -735,6 +735,14 @@ struct drm_syncobj_handle {
>   	__u32 pad;
>   };
>   
> +struct drm_syncobj_transfer {
> +	__u32 binary_handle;
> +	__u32 timeline_handle;
> +	__u64 point;
> +	__u32 flags;
> +	__u32 pad;
> +};
> +
>   #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
>   #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
>   #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2)
> @@ -933,6 +941,8 @@ extern "C" {
>   
>   #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT	DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait)
>   #define DRM_IOCTL_SYNCOBJ_QUERY		DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
> +#define DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE	DRM_IOWR(0xCC, struct drm_syncobj_transfer)
> +#define DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY	DRM_IOWR(0xCD, struct drm_syncobj_transfer)
>   
>   /**
>    * Device specific ioctls should only be in their respective headers


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 02/11] dma-buf: add new dma_fence_chain container v4
       [not found]     ` <6c2adaf5-6871-20be-a26d-182f8ca8ab8a-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
@ 2019-02-15 14:32       ` Koenig, Christian
       [not found]         ` <e170ceed-fdb7-8b4a-93d7-e565641390b3-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 42+ messages in thread
From: Koenig, Christian @ 2019-02-15 14:32 UTC (permalink / raw)
  To: Lionel Landwerlin, Zhou, David(ChunMing),
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Christian König

Am 15.02.19 um 15:23 schrieb Lionel Landwerlin:
> Hi Christian, David,
>
> For timeline semaphore we need points to signaled in order.
> I'm struggling to understand how this fence-chain implementation 
> preserves ordering of the seqnos.
>
> One of the scenario I can see an issue happening is when you have a 
> timeline with points 1 & 2 and userspace submits for 2 different 
> engines :
>     - first with let's say a blitter style engine on point 2
>     - then a 3d style engine on point 1

Yeah, and where exactly is the problem?

Seqno 1 will signal when the 3d style engine finishes work.

And seqno 2 will signal when both seqno 1 is signaled and the blitter 
style engine has finished its work.

> Another scenario would be signaling a timeline with points 1 & 2 with 
> those points in reverse order in the submission array.

That is actually illegal in the spec, but actually handled gracefully as 
well.

E.g. when you add seqno 1 to the syncobj container it will only signal 
when 2 is signaled as well.

Regards,
Christian.

>
> -Lionel
>
> On 07/12/2018 09:55, Chunming Zhou wrote:
>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>
>> Lockless container implementation similar to a dma_fence_array, but with
>> only two elements per node and automatic garbage collection.
>>
>> v2: properly document dma_fence_chain_for_each, add 
>> dma_fence_chain_find_seqno,
>>      drop prev reference during garbage collection if it's not a 
>> chain fence.
>> v3: use head and iterator for dma_fence_chain_for_each
>> v4: fix reference count in dma_fence_chain_enable_signaling
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/dma-buf/Makefile          |   3 +-
>>   drivers/dma-buf/dma-fence-chain.c | 241 ++++++++++++++++++++++++++++++
>>   include/linux/dma-fence-chain.h   |  81 ++++++++++
>>   3 files changed, 324 insertions(+), 1 deletion(-)
>>   create mode 100644 drivers/dma-buf/dma-fence-chain.c
>>   create mode 100644 include/linux/dma-fence-chain.h
>>
>> diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
>> index 0913a6ccab5a..1f006e083eb9 100644
>> --- a/drivers/dma-buf/Makefile
>> +++ b/drivers/dma-buf/Makefile
>> @@ -1,4 +1,5 @@
>> -obj-y := dma-buf.o dma-fence.o dma-fence-array.o reservation.o 
>> seqno-fence.o
>> +obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \
>> +     reservation.o seqno-fence.o
>>   obj-$(CONFIG_SYNC_FILE)        += sync_file.o
>>   obj-$(CONFIG_SW_SYNC)        += sw_sync.o sync_debug.o
>>   obj-$(CONFIG_UDMABUF)        += udmabuf.o
>> diff --git a/drivers/dma-buf/dma-fence-chain.c 
>> b/drivers/dma-buf/dma-fence-chain.c
>> new file mode 100644
>> index 000000000000..0c5e3c902fa0
>> --- /dev/null
>> +++ b/drivers/dma-buf/dma-fence-chain.c
>> @@ -0,0 +1,241 @@
>> +/*
>> + * fence-chain: chain fences together in a timeline
>> + *
>> + * Copyright (C) 2018 Advanced Micro Devices, Inc.
>> + * Authors:
>> + *    Christian König <christian.koenig@amd.com>
>> + *
>> + * This program is free software; you can redistribute it and/or 
>> modify it
>> + * under the terms of the GNU General Public License version 2 as 
>> published by
>> + * the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful, 
>> but WITHOUT
>> + * ANY WARRANTY; without even the implied warranty of 
>> MERCHANTABILITY or
>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public 
>> License for
>> + * more details.
>> + */
>> +
>> +#include <linux/dma-fence-chain.h>
>> +
>> +static bool dma_fence_chain_enable_signaling(struct dma_fence *fence);
>> +
>> +/**
>> + * dma_fence_chain_get_prev - use RCU to get a reference to the 
>> previous fence
>> + * @chain: chain node to get the previous node from
>> + *
>> + * Use dma_fence_get_rcu_safe to get a reference to the previous 
>> fence of the
>> + * chain node.
>> + */
>> +static struct dma_fence *dma_fence_chain_get_prev(struct 
>> dma_fence_chain *chain)
>> +{
>> +    struct dma_fence *prev;
>> +
>> +    rcu_read_lock();
>> +    prev = dma_fence_get_rcu_safe(&chain->prev);
>> +    rcu_read_unlock();
>> +    return prev;
>> +}
>> +
>> +/**
>> + * dma_fence_chain_walk - chain walking function
>> + * @fence: current chain node
>> + *
>> + * Walk the chain to the next node. Returns the next fence or NULL 
>> if we are at
>> + * the end of the chain. Garbage collects chain nodes which are already
>> + * signaled.
>> + */
>> +struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence)
>> +{
>> +    struct dma_fence_chain *chain, *prev_chain;
>> +    struct dma_fence *prev, *replacement, *tmp;
>> +
>> +    chain = to_dma_fence_chain(fence);
>> +    if (!chain) {
>> +        dma_fence_put(fence);
>> +        return NULL;
>> +    }
>> +
>> +    while ((prev = dma_fence_chain_get_prev(chain))) {
>> +
>> +        prev_chain = to_dma_fence_chain(prev);
>> +        if (prev_chain) {
>> +            if (!dma_fence_is_signaled(prev_chain->fence))
>> +                break;
>> +
>> +            replacement = dma_fence_chain_get_prev(prev_chain);
>> +        } else {
>> +            if (!dma_fence_is_signaled(prev))
>> +                break;
>> +
>> +            replacement = NULL;
>> +        }
>> +
>> +        tmp = cmpxchg(&chain->prev, prev, replacement);
>> +        if (tmp == prev)
>> +            dma_fence_put(tmp);
>> +        else
>> +            dma_fence_put(replacement);
>> +        dma_fence_put(prev);
>> +    }
>> +
>> +    dma_fence_put(fence);
>> +    return prev;
>> +}
>> +EXPORT_SYMBOL(dma_fence_chain_walk);
>> +
>> +/**
>> + * dma_fence_chain_find_seqno - find fence chain node by seqno
>> + * @pfence: pointer to the chain node where to start
>> + * @seqno: the sequence number to search for
>> + *
>> + * Advance the fence pointer to the chain node which will signal 
>> this sequence
>> + * number. If no sequence number is provided then this is a no-op.
>> + *
>> + * Returns EINVAL if the fence is not a chain node or the sequence 
>> number has
>> + * not yet advanced far enough.
>> + */
>> +int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t 
>> seqno)
>> +{
>> +    struct dma_fence_chain *chain;
>> +
>> +    if (!seqno)
>> +        return 0;
>> +
>> +    chain = to_dma_fence_chain(*pfence);
>> +    if (!chain || chain->base.seqno < seqno)
>> +        return -EINVAL;
>> +
>> +    dma_fence_chain_for_each(*pfence, &chain->base) {
>> +        if ((*pfence)->context != chain->base.context ||
>> +            to_dma_fence_chain(*pfence)->prev_seqno < seqno)
>> +            break;
>> +    }
>> +    dma_fence_put(&chain->base);
>> +
>> +    return 0;
>> +}
>> +EXPORT_SYMBOL(dma_fence_chain_find_seqno);
>> +
>> +static const char *dma_fence_chain_get_driver_name(struct dma_fence 
>> *fence)
>> +{
>> +        return "dma_fence_chain";
>> +}
>> +
>> +static const char *dma_fence_chain_get_timeline_name(struct 
>> dma_fence *fence)
>> +{
>> +        return "unbound";
>> +}
>> +
>> +static void dma_fence_chain_irq_work(struct irq_work *work)
>> +{
>> +    struct dma_fence_chain *chain;
>> +
>> +    chain = container_of(work, typeof(*chain), work);
>> +
>> +    /* Try to rearm the callback */
>> +    if (!dma_fence_chain_enable_signaling(&chain->base))
>> +        /* Ok, we are done. No more unsignaled fences left */
>> +        dma_fence_signal(&chain->base);
>> +    dma_fence_put(&chain->base);
>> +}
>> +
>> +static void dma_fence_chain_cb(struct dma_fence *f, struct 
>> dma_fence_cb *cb)
>> +{
>> +    struct dma_fence_chain *chain;
>> +
>> +    chain = container_of(cb, typeof(*chain), cb);
>> +    irq_work_queue(&chain->work);
>> +    dma_fence_put(f);
>> +}
>> +
>> +static bool dma_fence_chain_enable_signaling(struct dma_fence *fence)
>> +{
>> +    struct dma_fence_chain *head = to_dma_fence_chain(fence);
>> +
>> +    dma_fence_get(&head->base);
>> +    dma_fence_chain_for_each(fence, &head->base) {
>> +        struct dma_fence_chain *chain = to_dma_fence_chain(fence);
>> +        struct dma_fence *f = chain ? chain->fence : fence;
>> +
>> +        dma_fence_get(f);
>> +        if (!dma_fence_add_callback(f, &head->cb, 
>> dma_fence_chain_cb)) {
>> +            dma_fence_put(fence);
>> +            return true;
>> +        }
>> +        dma_fence_put(f);
>> +    }
>> +    dma_fence_put(&head->base);
>> +    return false;
>> +}
>> +
>> +static bool dma_fence_chain_signaled(struct dma_fence *fence)
>> +{
>> +    dma_fence_chain_for_each(fence, fence) {
>> +        struct dma_fence_chain *chain = to_dma_fence_chain(fence);
>> +        struct dma_fence *f = chain ? chain->fence : fence;
>> +
>> +        if (!dma_fence_is_signaled(f)) {
>> +            dma_fence_put(fence);
>> +            return false;
>> +        }
>> +    }
>> +
>> +    return true;
>> +}
>> +
>> +static void dma_fence_chain_release(struct dma_fence *fence)
>> +{
>> +    struct dma_fence_chain *chain = to_dma_fence_chain(fence);
>> +
>> +    dma_fence_put(chain->prev);
>> +    dma_fence_put(chain->fence);
>> +    dma_fence_free(fence);
>> +}
>> +
>> +const struct dma_fence_ops dma_fence_chain_ops = {
>> +    .get_driver_name = dma_fence_chain_get_driver_name,
>> +    .get_timeline_name = dma_fence_chain_get_timeline_name,
>> +    .enable_signaling = dma_fence_chain_enable_signaling,
>> +    .signaled = dma_fence_chain_signaled,
>> +    .release = dma_fence_chain_release,
>> +};
>> +EXPORT_SYMBOL(dma_fence_chain_ops);
>> +
>> +/**
>> + * dma_fence_chain_init - initialize a fence chain
>> + * @chain: the chain node to initialize
>> + * @prev: the previous fence
>> + * @fence: the current fence
>> + *
>> + * Initialize a new chain node and either start a new chain or add 
>> the node to
>> + * the existing chain of the previous fence.
>> + */
>> +void dma_fence_chain_init(struct dma_fence_chain *chain,
>> +              struct dma_fence *prev,
>> +              struct dma_fence *fence,
>> +              uint64_t seqno)
>> +{
>> +    struct dma_fence_chain *prev_chain = to_dma_fence_chain(prev);
>> +    uint64_t context;
>> +
>> +    spin_lock_init(&chain->lock);
>> +    chain->prev = prev;
>> +    chain->fence = fence;
>> +    chain->prev_seqno = 0;
>> +    init_irq_work(&chain->work, dma_fence_chain_irq_work);
>> +
>> +    /* Try to reuse the context of the previous chain node. */
>> +    if (prev_chain && __dma_fence_is_later(seqno, prev->seqno)) {
>> +        context = prev->context;
>> +        chain->prev_seqno = prev->seqno;
>> +    } else {
>> +        context = dma_fence_context_alloc(1);
>> +        /* Make sure that we always have a valid sequence number. */
>> +        if (prev_chain)
>> +            seqno = max(prev->seqno, seqno);
>> +    }
>> +
>> +    dma_fence_init(&chain->base, &dma_fence_chain_ops,
>> +               &chain->lock, context, seqno);
>> +}
>> +EXPORT_SYMBOL(dma_fence_chain_init);
>> diff --git a/include/linux/dma-fence-chain.h 
>> b/include/linux/dma-fence-chain.h
>> new file mode 100644
>> index 000000000000..a5c2e8c6915c
>> --- /dev/null
>> +++ b/include/linux/dma-fence-chain.h
>> @@ -0,0 +1,81 @@
>> +/*
>> + * fence-chain: chain fences together in a timeline
>> + *
>> + * Copyright (C) 2018 Advanced Micro Devices, Inc.
>> + * Authors:
>> + *    Christian König <christian.koenig@amd.com>
>> + *
>> + * This program is free software; you can redistribute it and/or 
>> modify it
>> + * under the terms of the GNU General Public License version 2 as 
>> published by
>> + * the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful, 
>> but WITHOUT
>> + * ANY WARRANTY; without even the implied warranty of 
>> MERCHANTABILITY or
>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public 
>> License for
>> + * more details.
>> + */
>> +
>> +#ifndef __LINUX_DMA_FENCE_CHAIN_H
>> +#define __LINUX_DMA_FENCE_CHAIN_H
>> +
>> +#include <linux/dma-fence.h>
>> +#include <linux/irq_work.h>
>> +
>> +/**
>> + * struct dma_fence_chain - fence to represent an node of a fence chain
>> + * @base: fence base class
>> + * @lock: spinlock for fence handling
>> + * @prev: previous fence of the chain
>> + * @prev_seqno: original previous seqno before garbage collection
>> + * @fence: encapsulated fence
>> + * @cb: callback structure for signaling
>> + * @work: irq work item for signaling
>> + */
>> +struct dma_fence_chain {
>> +    struct dma_fence base;
>> +    spinlock_t lock;
>> +    struct dma_fence *prev;
>> +    u64 prev_seqno;
>> +    struct dma_fence *fence;
>> +    struct dma_fence_cb cb;
>> +    struct irq_work work;
>> +};
>> +
>> +extern const struct dma_fence_ops dma_fence_chain_ops;
>> +
>> +/**
>> + * to_dma_fence_chain - cast a fence to a dma_fence_chain
>> + * @fence: fence to cast to a dma_fence_array
>> + *
>> + * Returns NULL if the fence is not a dma_fence_chain,
>> + * or the dma_fence_chain otherwise.
>> + */
>> +static inline struct dma_fence_chain *
>> +to_dma_fence_chain(struct dma_fence *fence)
>> +{
>> +    if (!fence || fence->ops != &dma_fence_chain_ops)
>> +        return NULL;
>> +
>> +    return container_of(fence, struct dma_fence_chain, base);
>> +}
>> +
>> +/**
>> + * dma_fence_chain_for_each - iterate over all fences in chain
>> + * @iter: current fence
>> + * @head: starting point
>> + *
>> + * Iterate over all fences in the chain. We keep a reference to the 
>> current
>> + * fence while inside the loop which must be dropped when breaking out.
>> + */
>> +#define dma_fence_chain_for_each(iter, head)    \
>> +    for (iter = dma_fence_get(head); iter; \
>> +         iter = dma_fence_chain_walk(head))
>> +
>> +struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence);
>> +int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t 
>> seqno);
>> +void dma_fence_chain_init(struct dma_fence_chain *chain,
>> +              struct dma_fence *prev,
>> +              struct dma_fence *fence,
>> +              uint64_t seqno);
>> +
>> +#endif /* __LINUX_DMA_FENCE_CHAIN_H */
>
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 02/11] dma-buf: add new dma_fence_chain container v4
       [not found]         ` <e170ceed-fdb7-8b4a-93d7-e565641390b3-5C7GfCeVMHo@public.gmane.org>
@ 2019-02-15 15:52           ` Lionel Landwerlin via amd-gfx
       [not found]             ` <bbae2023-8dee-692e-9549-40779a202587-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
  2019-02-15 16:49             ` Jason Ekstrand
  0 siblings, 2 replies; 42+ messages in thread
From: Lionel Landwerlin via amd-gfx @ 2019-02-15 15:52 UTC (permalink / raw)
  To: Koenig, Christian, Zhou, David(ChunMing),
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Christian König

On 15/02/2019 14:32, Koenig, Christian wrote:
> Am 15.02.19 um 15:23 schrieb Lionel Landwerlin:
>> Hi Christian, David,
>>
>> For timeline semaphore we need points to signaled in order.
>> I'm struggling to understand how this fence-chain implementation
>> preserves ordering of the seqnos.
>>
>> One of the scenario I can see an issue happening is when you have a
>> timeline with points 1 & 2 and userspace submits for 2 different
>> engines :
>>      - first with let's say a blitter style engine on point 2
>>      - then a 3d style engine on point 1
> Yeah, and where exactly is the problem?
>
> Seqno 1 will signal when the 3d style engine finishes work.
>
> And seqno 2 will signal when both seqno 1 is signaled and the blitter
> style engine has finished its work.

That's not really how I understood the spec, but I might be wrong.

What makes me thing 1 should be signaled as soon as 2 is signaled
(regardless of whether the fence attached on point 1 is been signaled),
is that the spec defines wait & signal operations in term of the value
of the timeline.


-Lionel

>
>> Another scenario would be signaling a timeline with points 1 & 2 with
>> those points in reverse order in the submission array.
> That is actually illegal in the spec, but actually handled gracefully as
> well.
>
> E.g. when you add seqno 1 to the syncobj container it will only signal
> when 2 is signaled as well.




>
> Regards,
> Christian.
>
>> -Lionel
>>
>> On 07/12/2018 09:55, Chunming Zhou wrote:
>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>>
>>> Lockless container implementation similar to a dma_fence_array, but with
>>> only two elements per node and automatic garbage collection.
>>>
>>> v2: properly document dma_fence_chain_for_each, add
>>> dma_fence_chain_find_seqno,
>>>       drop prev reference during garbage collection if it's not a
>>> chain fence.
>>> v3: use head and iterator for dma_fence_chain_for_each
>>> v4: fix reference count in dma_fence_chain_enable_signaling
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>    drivers/dma-buf/Makefile          |   3 +-
>>>    drivers/dma-buf/dma-fence-chain.c | 241 ++++++++++++++++++++++++++++++
>>>    include/linux/dma-fence-chain.h   |  81 ++++++++++
>>>    3 files changed, 324 insertions(+), 1 deletion(-)
>>>    create mode 100644 drivers/dma-buf/dma-fence-chain.c
>>>    create mode 100644 include/linux/dma-fence-chain.h
>>>
>>> diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
>>> index 0913a6ccab5a..1f006e083eb9 100644
>>> --- a/drivers/dma-buf/Makefile
>>> +++ b/drivers/dma-buf/Makefile
>>> @@ -1,4 +1,5 @@
>>> -obj-y := dma-buf.o dma-fence.o dma-fence-array.o reservation.o
>>> seqno-fence.o
>>> +obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \
>>> +     reservation.o seqno-fence.o
>>>    obj-$(CONFIG_SYNC_FILE)        += sync_file.o
>>>    obj-$(CONFIG_SW_SYNC)        += sw_sync.o sync_debug.o
>>>    obj-$(CONFIG_UDMABUF)        += udmabuf.o
>>> diff --git a/drivers/dma-buf/dma-fence-chain.c
>>> b/drivers/dma-buf/dma-fence-chain.c
>>> new file mode 100644
>>> index 000000000000..0c5e3c902fa0
>>> --- /dev/null
>>> +++ b/drivers/dma-buf/dma-fence-chain.c
>>> @@ -0,0 +1,241 @@
>>> +/*
>>> + * fence-chain: chain fences together in a timeline
>>> + *
>>> + * Copyright (C) 2018 Advanced Micro Devices, Inc.
>>> + * Authors:
>>> + *    Christian König <christian.koenig@amd.com>
>>> + *
>>> + * This program is free software; you can redistribute it and/or
>>> modify it
>>> + * under the terms of the GNU General Public License version 2 as
>>> published by
>>> + * the Free Software Foundation.
>>> + *
>>> + * This program is distributed in the hope that it will be useful,
>>> but WITHOUT
>>> + * ANY WARRANTY; without even the implied warranty of
>>> MERCHANTABILITY or
>>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
>>> License for
>>> + * more details.
>>> + */
>>> +
>>> +#include <linux/dma-fence-chain.h>
>>> +
>>> +static bool dma_fence_chain_enable_signaling(struct dma_fence *fence);
>>> +
>>> +/**
>>> + * dma_fence_chain_get_prev - use RCU to get a reference to the
>>> previous fence
>>> + * @chain: chain node to get the previous node from
>>> + *
>>> + * Use dma_fence_get_rcu_safe to get a reference to the previous
>>> fence of the
>>> + * chain node.
>>> + */
>>> +static struct dma_fence *dma_fence_chain_get_prev(struct
>>> dma_fence_chain *chain)
>>> +{
>>> +    struct dma_fence *prev;
>>> +
>>> +    rcu_read_lock();
>>> +    prev = dma_fence_get_rcu_safe(&chain->prev);
>>> +    rcu_read_unlock();
>>> +    return prev;
>>> +}
>>> +
>>> +/**
>>> + * dma_fence_chain_walk - chain walking function
>>> + * @fence: current chain node
>>> + *
>>> + * Walk the chain to the next node. Returns the next fence or NULL
>>> if we are at
>>> + * the end of the chain. Garbage collects chain nodes which are already
>>> + * signaled.
>>> + */
>>> +struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence)
>>> +{
>>> +    struct dma_fence_chain *chain, *prev_chain;
>>> +    struct dma_fence *prev, *replacement, *tmp;
>>> +
>>> +    chain = to_dma_fence_chain(fence);
>>> +    if (!chain) {
>>> +        dma_fence_put(fence);
>>> +        return NULL;
>>> +    }
>>> +
>>> +    while ((prev = dma_fence_chain_get_prev(chain))) {
>>> +
>>> +        prev_chain = to_dma_fence_chain(prev);
>>> +        if (prev_chain) {
>>> +            if (!dma_fence_is_signaled(prev_chain->fence))
>>> +                break;
>>> +
>>> +            replacement = dma_fence_chain_get_prev(prev_chain);
>>> +        } else {
>>> +            if (!dma_fence_is_signaled(prev))
>>> +                break;
>>> +
>>> +            replacement = NULL;
>>> +        }
>>> +
>>> +        tmp = cmpxchg(&chain->prev, prev, replacement);
>>> +        if (tmp == prev)
>>> +            dma_fence_put(tmp);
>>> +        else
>>> +            dma_fence_put(replacement);
>>> +        dma_fence_put(prev);
>>> +    }
>>> +
>>> +    dma_fence_put(fence);
>>> +    return prev;
>>> +}
>>> +EXPORT_SYMBOL(dma_fence_chain_walk);
>>> +
>>> +/**
>>> + * dma_fence_chain_find_seqno - find fence chain node by seqno
>>> + * @pfence: pointer to the chain node where to start
>>> + * @seqno: the sequence number to search for
>>> + *
>>> + * Advance the fence pointer to the chain node which will signal
>>> this sequence
>>> + * number. If no sequence number is provided then this is a no-op.
>>> + *
>>> + * Returns EINVAL if the fence is not a chain node or the sequence
>>> number has
>>> + * not yet advanced far enough.
>>> + */
>>> +int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t
>>> seqno)
>>> +{
>>> +    struct dma_fence_chain *chain;
>>> +
>>> +    if (!seqno)
>>> +        return 0;
>>> +
>>> +    chain = to_dma_fence_chain(*pfence);
>>> +    if (!chain || chain->base.seqno < seqno)
>>> +        return -EINVAL;
>>> +
>>> +    dma_fence_chain_for_each(*pfence, &chain->base) {
>>> +        if ((*pfence)->context != chain->base.context ||
>>> +            to_dma_fence_chain(*pfence)->prev_seqno < seqno)
>>> +            break;
>>> +    }
>>> +    dma_fence_put(&chain->base);
>>> +
>>> +    return 0;
>>> +}
>>> +EXPORT_SYMBOL(dma_fence_chain_find_seqno);
>>> +
>>> +static const char *dma_fence_chain_get_driver_name(struct dma_fence
>>> *fence)
>>> +{
>>> +        return "dma_fence_chain";
>>> +}
>>> +
>>> +static const char *dma_fence_chain_get_timeline_name(struct
>>> dma_fence *fence)
>>> +{
>>> +        return "unbound";
>>> +}
>>> +
>>> +static void dma_fence_chain_irq_work(struct irq_work *work)
>>> +{
>>> +    struct dma_fence_chain *chain;
>>> +
>>> +    chain = container_of(work, typeof(*chain), work);
>>> +
>>> +    /* Try to rearm the callback */
>>> +    if (!dma_fence_chain_enable_signaling(&chain->base))
>>> +        /* Ok, we are done. No more unsignaled fences left */
>>> +        dma_fence_signal(&chain->base);
>>> +    dma_fence_put(&chain->base);
>>> +}
>>> +
>>> +static void dma_fence_chain_cb(struct dma_fence *f, struct
>>> dma_fence_cb *cb)
>>> +{
>>> +    struct dma_fence_chain *chain;
>>> +
>>> +    chain = container_of(cb, typeof(*chain), cb);
>>> +    irq_work_queue(&chain->work);
>>> +    dma_fence_put(f);
>>> +}
>>> +
>>> +static bool dma_fence_chain_enable_signaling(struct dma_fence *fence)
>>> +{
>>> +    struct dma_fence_chain *head = to_dma_fence_chain(fence);
>>> +
>>> +    dma_fence_get(&head->base);
>>> +    dma_fence_chain_for_each(fence, &head->base) {
>>> +        struct dma_fence_chain *chain = to_dma_fence_chain(fence);
>>> +        struct dma_fence *f = chain ? chain->fence : fence;
>>> +
>>> +        dma_fence_get(f);
>>> +        if (!dma_fence_add_callback(f, &head->cb,
>>> dma_fence_chain_cb)) {
>>> +            dma_fence_put(fence);
>>> +            return true;
>>> +        }
>>> +        dma_fence_put(f);
>>> +    }
>>> +    dma_fence_put(&head->base);
>>> +    return false;
>>> +}
>>> +
>>> +static bool dma_fence_chain_signaled(struct dma_fence *fence)
>>> +{
>>> +    dma_fence_chain_for_each(fence, fence) {
>>> +        struct dma_fence_chain *chain = to_dma_fence_chain(fence);
>>> +        struct dma_fence *f = chain ? chain->fence : fence;
>>> +
>>> +        if (!dma_fence_is_signaled(f)) {
>>> +            dma_fence_put(fence);
>>> +            return false;
>>> +        }
>>> +    }
>>> +
>>> +    return true;
>>> +}
>>> +
>>> +static void dma_fence_chain_release(struct dma_fence *fence)
>>> +{
>>> +    struct dma_fence_chain *chain = to_dma_fence_chain(fence);
>>> +
>>> +    dma_fence_put(chain->prev);
>>> +    dma_fence_put(chain->fence);
>>> +    dma_fence_free(fence);
>>> +}
>>> +
>>> +const struct dma_fence_ops dma_fence_chain_ops = {
>>> +    .get_driver_name = dma_fence_chain_get_driver_name,
>>> +    .get_timeline_name = dma_fence_chain_get_timeline_name,
>>> +    .enable_signaling = dma_fence_chain_enable_signaling,
>>> +    .signaled = dma_fence_chain_signaled,
>>> +    .release = dma_fence_chain_release,
>>> +};
>>> +EXPORT_SYMBOL(dma_fence_chain_ops);
>>> +
>>> +/**
>>> + * dma_fence_chain_init - initialize a fence chain
>>> + * @chain: the chain node to initialize
>>> + * @prev: the previous fence
>>> + * @fence: the current fence
>>> + *
>>> + * Initialize a new chain node and either start a new chain or add
>>> the node to
>>> + * the existing chain of the previous fence.
>>> + */
>>> +void dma_fence_chain_init(struct dma_fence_chain *chain,
>>> +              struct dma_fence *prev,
>>> +              struct dma_fence *fence,
>>> +              uint64_t seqno)
>>> +{
>>> +    struct dma_fence_chain *prev_chain = to_dma_fence_chain(prev);
>>> +    uint64_t context;
>>> +
>>> +    spin_lock_init(&chain->lock);
>>> +    chain->prev = prev;
>>> +    chain->fence = fence;
>>> +    chain->prev_seqno = 0;
>>> +    init_irq_work(&chain->work, dma_fence_chain_irq_work);
>>> +
>>> +    /* Try to reuse the context of the previous chain node. */
>>> +    if (prev_chain && __dma_fence_is_later(seqno, prev->seqno)) {
>>> +        context = prev->context;
>>> +        chain->prev_seqno = prev->seqno;
>>> +    } else {
>>> +        context = dma_fence_context_alloc(1);
>>> +        /* Make sure that we always have a valid sequence number. */
>>> +        if (prev_chain)
>>> +            seqno = max(prev->seqno, seqno);
>>> +    }
>>> +
>>> +    dma_fence_init(&chain->base, &dma_fence_chain_ops,
>>> +               &chain->lock, context, seqno);
>>> +}
>>> +EXPORT_SYMBOL(dma_fence_chain_init);
>>> diff --git a/include/linux/dma-fence-chain.h
>>> b/include/linux/dma-fence-chain.h
>>> new file mode 100644
>>> index 000000000000..a5c2e8c6915c
>>> --- /dev/null
>>> +++ b/include/linux/dma-fence-chain.h
>>> @@ -0,0 +1,81 @@
>>> +/*
>>> + * fence-chain: chain fences together in a timeline
>>> + *
>>> + * Copyright (C) 2018 Advanced Micro Devices, Inc.
>>> + * Authors:
>>> + *    Christian König <christian.koenig@amd.com>
>>> + *
>>> + * This program is free software; you can redistribute it and/or
>>> modify it
>>> + * under the terms of the GNU General Public License version 2 as
>>> published by
>>> + * the Free Software Foundation.
>>> + *
>>> + * This program is distributed in the hope that it will be useful,
>>> but WITHOUT
>>> + * ANY WARRANTY; without even the implied warranty of
>>> MERCHANTABILITY or
>>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
>>> License for
>>> + * more details.
>>> + */
>>> +
>>> +#ifndef __LINUX_DMA_FENCE_CHAIN_H
>>> +#define __LINUX_DMA_FENCE_CHAIN_H
>>> +
>>> +#include <linux/dma-fence.h>
>>> +#include <linux/irq_work.h>
>>> +
>>> +/**
>>> + * struct dma_fence_chain - fence to represent an node of a fence chain
>>> + * @base: fence base class
>>> + * @lock: spinlock for fence handling
>>> + * @prev: previous fence of the chain
>>> + * @prev_seqno: original previous seqno before garbage collection
>>> + * @fence: encapsulated fence
>>> + * @cb: callback structure for signaling
>>> + * @work: irq work item for signaling
>>> + */
>>> +struct dma_fence_chain {
>>> +    struct dma_fence base;
>>> +    spinlock_t lock;
>>> +    struct dma_fence *prev;
>>> +    u64 prev_seqno;
>>> +    struct dma_fence *fence;
>>> +    struct dma_fence_cb cb;
>>> +    struct irq_work work;
>>> +};
>>> +
>>> +extern const struct dma_fence_ops dma_fence_chain_ops;
>>> +
>>> +/**
>>> + * to_dma_fence_chain - cast a fence to a dma_fence_chain
>>> + * @fence: fence to cast to a dma_fence_array
>>> + *
>>> + * Returns NULL if the fence is not a dma_fence_chain,
>>> + * or the dma_fence_chain otherwise.
>>> + */
>>> +static inline struct dma_fence_chain *
>>> +to_dma_fence_chain(struct dma_fence *fence)
>>> +{
>>> +    if (!fence || fence->ops != &dma_fence_chain_ops)
>>> +        return NULL;
>>> +
>>> +    return container_of(fence, struct dma_fence_chain, base);
>>> +}
>>> +
>>> +/**
>>> + * dma_fence_chain_for_each - iterate over all fences in chain
>>> + * @iter: current fence
>>> + * @head: starting point
>>> + *
>>> + * Iterate over all fences in the chain. We keep a reference to the
>>> current
>>> + * fence while inside the loop which must be dropped when breaking out.
>>> + */
>>> +#define dma_fence_chain_for_each(iter, head)    \
>>> +    for (iter = dma_fence_get(head); iter; \
>>> +         iter = dma_fence_chain_walk(head))
>>> +
>>> +struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence);
>>> +int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t
>>> seqno);
>>> +void dma_fence_chain_init(struct dma_fence_chain *chain,
>>> +              struct dma_fence *prev,
>>> +              struct dma_fence *fence,
>>> +              uint64_t seqno);
>>> +
>>> +#endif /* __LINUX_DMA_FENCE_CHAIN_H */
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 02/11] dma-buf: add new dma_fence_chain container v4
       [not found]             ` <bbae2023-8dee-692e-9549-40779a202587-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
@ 2019-02-15 16:39               ` Christian König via amd-gfx
  0 siblings, 0 replies; 42+ messages in thread
From: Christian König via amd-gfx @ 2019-02-15 16:39 UTC (permalink / raw)
  To: Lionel Landwerlin, Koenig, Christian, Zhou, David(ChunMing),
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Christian König

Am 15.02.19 um 16:52 schrieb Lionel Landwerlin:
> On 15/02/2019 14:32, Koenig, Christian wrote:
>> Am 15.02.19 um 15:23 schrieb Lionel Landwerlin:
>>> Hi Christian, David,
>>>
>>> For timeline semaphore we need points to signaled in order.
>>> I'm struggling to understand how this fence-chain implementation
>>> preserves ordering of the seqnos.
>>>
>>> One of the scenario I can see an issue happening is when you have a
>>> timeline with points 1 & 2 and userspace submits for 2 different
>>> engines :
>>>      - first with let's say a blitter style engine on point 2
>>>      - then a 3d style engine on point 1
>> Yeah, and where exactly is the problem?
>>
>> Seqno 1 will signal when the 3d style engine finishes work.
>>
>> And seqno 2 will signal when both seqno 1 is signaled and the blitter
>> style engine has finished its work.
>
> That's not really how I understood the spec, but I might be wrong.
>
> What makes me thing 1 should be signaled as soon as 2 is signaled
> (regardless of whether the fence attached on point 1 is been signaled),
> is that the spec defines wait & signal operations in term of the value
> of the timeline.

That's what we had initially as well and it was rejected.

When 2 signals before 1 is signaled you can't call this a timeline any more.

Christian.

>
>
> -Lionel
>
>>
>>> Another scenario would be signaling a timeline with points 1 & 2 with
>>> those points in reverse order in the submission array.
>> That is actually illegal in the spec, but actually handled gracefully as
>> well.
>>
>> E.g. when you add seqno 1 to the syncobj container it will only signal
>> when 2 is signaled as well.
>
>
>
>
>>
>> Regards,
>> Christian.
>>
>>> -Lionel
>>>
>>> On 07/12/2018 09:55, Chunming Zhou wrote:
>>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>>>
>>>> Lockless container implementation similar to a dma_fence_array, but 
>>>> with
>>>> only two elements per node and automatic garbage collection.
>>>>
>>>> v2: properly document dma_fence_chain_for_each, add
>>>> dma_fence_chain_find_seqno,
>>>>       drop prev reference during garbage collection if it's not a
>>>> chain fence.
>>>> v3: use head and iterator for dma_fence_chain_for_each
>>>> v4: fix reference count in dma_fence_chain_enable_signaling
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>> ---
>>>>    drivers/dma-buf/Makefile          |   3 +-
>>>>    drivers/dma-buf/dma-fence-chain.c | 241 
>>>> ++++++++++++++++++++++++++++++
>>>>    include/linux/dma-fence-chain.h   |  81 ++++++++++
>>>>    3 files changed, 324 insertions(+), 1 deletion(-)
>>>>    create mode 100644 drivers/dma-buf/dma-fence-chain.c
>>>>    create mode 100644 include/linux/dma-fence-chain.h
>>>>
>>>> diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
>>>> index 0913a6ccab5a..1f006e083eb9 100644
>>>> --- a/drivers/dma-buf/Makefile
>>>> +++ b/drivers/dma-buf/Makefile
>>>> @@ -1,4 +1,5 @@
>>>> -obj-y := dma-buf.o dma-fence.o dma-fence-array.o reservation.o
>>>> seqno-fence.o
>>>> +obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \
>>>> +     reservation.o seqno-fence.o
>>>>    obj-$(CONFIG_SYNC_FILE)        += sync_file.o
>>>>    obj-$(CONFIG_SW_SYNC)        += sw_sync.o sync_debug.o
>>>>    obj-$(CONFIG_UDMABUF)        += udmabuf.o
>>>> diff --git a/drivers/dma-buf/dma-fence-chain.c
>>>> b/drivers/dma-buf/dma-fence-chain.c
>>>> new file mode 100644
>>>> index 000000000000..0c5e3c902fa0
>>>> --- /dev/null
>>>> +++ b/drivers/dma-buf/dma-fence-chain.c
>>>> @@ -0,0 +1,241 @@
>>>> +/*
>>>> + * fence-chain: chain fences together in a timeline
>>>> + *
>>>> + * Copyright (C) 2018 Advanced Micro Devices, Inc.
>>>> + * Authors:
>>>> + *    Christian König <christian.koenig@amd.com>
>>>> + *
>>>> + * This program is free software; you can redistribute it and/or
>>>> modify it
>>>> + * under the terms of the GNU General Public License version 2 as
>>>> published by
>>>> + * the Free Software Foundation.
>>>> + *
>>>> + * This program is distributed in the hope that it will be useful,
>>>> but WITHOUT
>>>> + * ANY WARRANTY; without even the implied warranty of
>>>> MERCHANTABILITY or
>>>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
>>>> License for
>>>> + * more details.
>>>> + */
>>>> +
>>>> +#include <linux/dma-fence-chain.h>
>>>> +
>>>> +static bool dma_fence_chain_enable_signaling(struct dma_fence 
>>>> *fence);
>>>> +
>>>> +/**
>>>> + * dma_fence_chain_get_prev - use RCU to get a reference to the
>>>> previous fence
>>>> + * @chain: chain node to get the previous node from
>>>> + *
>>>> + * Use dma_fence_get_rcu_safe to get a reference to the previous
>>>> fence of the
>>>> + * chain node.
>>>> + */
>>>> +static struct dma_fence *dma_fence_chain_get_prev(struct
>>>> dma_fence_chain *chain)
>>>> +{
>>>> +    struct dma_fence *prev;
>>>> +
>>>> +    rcu_read_lock();
>>>> +    prev = dma_fence_get_rcu_safe(&chain->prev);
>>>> +    rcu_read_unlock();
>>>> +    return prev;
>>>> +}
>>>> +
>>>> +/**
>>>> + * dma_fence_chain_walk - chain walking function
>>>> + * @fence: current chain node
>>>> + *
>>>> + * Walk the chain to the next node. Returns the next fence or NULL
>>>> if we are at
>>>> + * the end of the chain. Garbage collects chain nodes which are 
>>>> already
>>>> + * signaled.
>>>> + */
>>>> +struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence)
>>>> +{
>>>> +    struct dma_fence_chain *chain, *prev_chain;
>>>> +    struct dma_fence *prev, *replacement, *tmp;
>>>> +
>>>> +    chain = to_dma_fence_chain(fence);
>>>> +    if (!chain) {
>>>> +        dma_fence_put(fence);
>>>> +        return NULL;
>>>> +    }
>>>> +
>>>> +    while ((prev = dma_fence_chain_get_prev(chain))) {
>>>> +
>>>> +        prev_chain = to_dma_fence_chain(prev);
>>>> +        if (prev_chain) {
>>>> +            if (!dma_fence_is_signaled(prev_chain->fence))
>>>> +                break;
>>>> +
>>>> +            replacement = dma_fence_chain_get_prev(prev_chain);
>>>> +        } else {
>>>> +            if (!dma_fence_is_signaled(prev))
>>>> +                break;
>>>> +
>>>> +            replacement = NULL;
>>>> +        }
>>>> +
>>>> +        tmp = cmpxchg(&chain->prev, prev, replacement);
>>>> +        if (tmp == prev)
>>>> +            dma_fence_put(tmp);
>>>> +        else
>>>> +            dma_fence_put(replacement);
>>>> +        dma_fence_put(prev);
>>>> +    }
>>>> +
>>>> +    dma_fence_put(fence);
>>>> +    return prev;
>>>> +}
>>>> +EXPORT_SYMBOL(dma_fence_chain_walk);
>>>> +
>>>> +/**
>>>> + * dma_fence_chain_find_seqno - find fence chain node by seqno
>>>> + * @pfence: pointer to the chain node where to start
>>>> + * @seqno: the sequence number to search for
>>>> + *
>>>> + * Advance the fence pointer to the chain node which will signal
>>>> this sequence
>>>> + * number. If no sequence number is provided then this is a no-op.
>>>> + *
>>>> + * Returns EINVAL if the fence is not a chain node or the sequence
>>>> number has
>>>> + * not yet advanced far enough.
>>>> + */
>>>> +int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t
>>>> seqno)
>>>> +{
>>>> +    struct dma_fence_chain *chain;
>>>> +
>>>> +    if (!seqno)
>>>> +        return 0;
>>>> +
>>>> +    chain = to_dma_fence_chain(*pfence);
>>>> +    if (!chain || chain->base.seqno < seqno)
>>>> +        return -EINVAL;
>>>> +
>>>> +    dma_fence_chain_for_each(*pfence, &chain->base) {
>>>> +        if ((*pfence)->context != chain->base.context ||
>>>> +            to_dma_fence_chain(*pfence)->prev_seqno < seqno)
>>>> +            break;
>>>> +    }
>>>> +    dma_fence_put(&chain->base);
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +EXPORT_SYMBOL(dma_fence_chain_find_seqno);
>>>> +
>>>> +static const char *dma_fence_chain_get_driver_name(struct dma_fence
>>>> *fence)
>>>> +{
>>>> +        return "dma_fence_chain";
>>>> +}
>>>> +
>>>> +static const char *dma_fence_chain_get_timeline_name(struct
>>>> dma_fence *fence)
>>>> +{
>>>> +        return "unbound";
>>>> +}
>>>> +
>>>> +static void dma_fence_chain_irq_work(struct irq_work *work)
>>>> +{
>>>> +    struct dma_fence_chain *chain;
>>>> +
>>>> +    chain = container_of(work, typeof(*chain), work);
>>>> +
>>>> +    /* Try to rearm the callback */
>>>> +    if (!dma_fence_chain_enable_signaling(&chain->base))
>>>> +        /* Ok, we are done. No more unsignaled fences left */
>>>> +        dma_fence_signal(&chain->base);
>>>> +    dma_fence_put(&chain->base);
>>>> +}
>>>> +
>>>> +static void dma_fence_chain_cb(struct dma_fence *f, struct
>>>> dma_fence_cb *cb)
>>>> +{
>>>> +    struct dma_fence_chain *chain;
>>>> +
>>>> +    chain = container_of(cb, typeof(*chain), cb);
>>>> +    irq_work_queue(&chain->work);
>>>> +    dma_fence_put(f);
>>>> +}
>>>> +
>>>> +static bool dma_fence_chain_enable_signaling(struct dma_fence *fence)
>>>> +{
>>>> +    struct dma_fence_chain *head = to_dma_fence_chain(fence);
>>>> +
>>>> +    dma_fence_get(&head->base);
>>>> +    dma_fence_chain_for_each(fence, &head->base) {
>>>> +        struct dma_fence_chain *chain = to_dma_fence_chain(fence);
>>>> +        struct dma_fence *f = chain ? chain->fence : fence;
>>>> +
>>>> +        dma_fence_get(f);
>>>> +        if (!dma_fence_add_callback(f, &head->cb,
>>>> dma_fence_chain_cb)) {
>>>> +            dma_fence_put(fence);
>>>> +            return true;
>>>> +        }
>>>> +        dma_fence_put(f);
>>>> +    }
>>>> +    dma_fence_put(&head->base);
>>>> +    return false;
>>>> +}
>>>> +
>>>> +static bool dma_fence_chain_signaled(struct dma_fence *fence)
>>>> +{
>>>> +    dma_fence_chain_for_each(fence, fence) {
>>>> +        struct dma_fence_chain *chain = to_dma_fence_chain(fence);
>>>> +        struct dma_fence *f = chain ? chain->fence : fence;
>>>> +
>>>> +        if (!dma_fence_is_signaled(f)) {
>>>> +            dma_fence_put(fence);
>>>> +            return false;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    return true;
>>>> +}
>>>> +
>>>> +static void dma_fence_chain_release(struct dma_fence *fence)
>>>> +{
>>>> +    struct dma_fence_chain *chain = to_dma_fence_chain(fence);
>>>> +
>>>> +    dma_fence_put(chain->prev);
>>>> +    dma_fence_put(chain->fence);
>>>> +    dma_fence_free(fence);
>>>> +}
>>>> +
>>>> +const struct dma_fence_ops dma_fence_chain_ops = {
>>>> +    .get_driver_name = dma_fence_chain_get_driver_name,
>>>> +    .get_timeline_name = dma_fence_chain_get_timeline_name,
>>>> +    .enable_signaling = dma_fence_chain_enable_signaling,
>>>> +    .signaled = dma_fence_chain_signaled,
>>>> +    .release = dma_fence_chain_release,
>>>> +};
>>>> +EXPORT_SYMBOL(dma_fence_chain_ops);
>>>> +
>>>> +/**
>>>> + * dma_fence_chain_init - initialize a fence chain
>>>> + * @chain: the chain node to initialize
>>>> + * @prev: the previous fence
>>>> + * @fence: the current fence
>>>> + *
>>>> + * Initialize a new chain node and either start a new chain or add
>>>> the node to
>>>> + * the existing chain of the previous fence.
>>>> + */
>>>> +void dma_fence_chain_init(struct dma_fence_chain *chain,
>>>> +              struct dma_fence *prev,
>>>> +              struct dma_fence *fence,
>>>> +              uint64_t seqno)
>>>> +{
>>>> +    struct dma_fence_chain *prev_chain = to_dma_fence_chain(prev);
>>>> +    uint64_t context;
>>>> +
>>>> +    spin_lock_init(&chain->lock);
>>>> +    chain->prev = prev;
>>>> +    chain->fence = fence;
>>>> +    chain->prev_seqno = 0;
>>>> +    init_irq_work(&chain->work, dma_fence_chain_irq_work);
>>>> +
>>>> +    /* Try to reuse the context of the previous chain node. */
>>>> +    if (prev_chain && __dma_fence_is_later(seqno, prev->seqno)) {
>>>> +        context = prev->context;
>>>> +        chain->prev_seqno = prev->seqno;
>>>> +    } else {
>>>> +        context = dma_fence_context_alloc(1);
>>>> +        /* Make sure that we always have a valid sequence number. */
>>>> +        if (prev_chain)
>>>> +            seqno = max(prev->seqno, seqno);
>>>> +    }
>>>> +
>>>> +    dma_fence_init(&chain->base, &dma_fence_chain_ops,
>>>> +               &chain->lock, context, seqno);
>>>> +}
>>>> +EXPORT_SYMBOL(dma_fence_chain_init);
>>>> diff --git a/include/linux/dma-fence-chain.h
>>>> b/include/linux/dma-fence-chain.h
>>>> new file mode 100644
>>>> index 000000000000..a5c2e8c6915c
>>>> --- /dev/null
>>>> +++ b/include/linux/dma-fence-chain.h
>>>> @@ -0,0 +1,81 @@
>>>> +/*
>>>> + * fence-chain: chain fences together in a timeline
>>>> + *
>>>> + * Copyright (C) 2018 Advanced Micro Devices, Inc.
>>>> + * Authors:
>>>> + *    Christian König <christian.koenig@amd.com>
>>>> + *
>>>> + * This program is free software; you can redistribute it and/or
>>>> modify it
>>>> + * under the terms of the GNU General Public License version 2 as
>>>> published by
>>>> + * the Free Software Foundation.
>>>> + *
>>>> + * This program is distributed in the hope that it will be useful,
>>>> but WITHOUT
>>>> + * ANY WARRANTY; without even the implied warranty of
>>>> MERCHANTABILITY or
>>>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
>>>> License for
>>>> + * more details.
>>>> + */
>>>> +
>>>> +#ifndef __LINUX_DMA_FENCE_CHAIN_H
>>>> +#define __LINUX_DMA_FENCE_CHAIN_H
>>>> +
>>>> +#include <linux/dma-fence.h>
>>>> +#include <linux/irq_work.h>
>>>> +
>>>> +/**
>>>> + * struct dma_fence_chain - fence to represent an node of a fence 
>>>> chain
>>>> + * @base: fence base class
>>>> + * @lock: spinlock for fence handling
>>>> + * @prev: previous fence of the chain
>>>> + * @prev_seqno: original previous seqno before garbage collection
>>>> + * @fence: encapsulated fence
>>>> + * @cb: callback structure for signaling
>>>> + * @work: irq work item for signaling
>>>> + */
>>>> +struct dma_fence_chain {
>>>> +    struct dma_fence base;
>>>> +    spinlock_t lock;
>>>> +    struct dma_fence *prev;
>>>> +    u64 prev_seqno;
>>>> +    struct dma_fence *fence;
>>>> +    struct dma_fence_cb cb;
>>>> +    struct irq_work work;
>>>> +};
>>>> +
>>>> +extern const struct dma_fence_ops dma_fence_chain_ops;
>>>> +
>>>> +/**
>>>> + * to_dma_fence_chain - cast a fence to a dma_fence_chain
>>>> + * @fence: fence to cast to a dma_fence_array
>>>> + *
>>>> + * Returns NULL if the fence is not a dma_fence_chain,
>>>> + * or the dma_fence_chain otherwise.
>>>> + */
>>>> +static inline struct dma_fence_chain *
>>>> +to_dma_fence_chain(struct dma_fence *fence)
>>>> +{
>>>> +    if (!fence || fence->ops != &dma_fence_chain_ops)
>>>> +        return NULL;
>>>> +
>>>> +    return container_of(fence, struct dma_fence_chain, base);
>>>> +}
>>>> +
>>>> +/**
>>>> + * dma_fence_chain_for_each - iterate over all fences in chain
>>>> + * @iter: current fence
>>>> + * @head: starting point
>>>> + *
>>>> + * Iterate over all fences in the chain. We keep a reference to the
>>>> current
>>>> + * fence while inside the loop which must be dropped when breaking 
>>>> out.
>>>> + */
>>>> +#define dma_fence_chain_for_each(iter, head)    \
>>>> +    for (iter = dma_fence_get(head); iter; \
>>>> +         iter = dma_fence_chain_walk(head))
>>>> +
>>>> +struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence);
>>>> +int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t
>>>> seqno);
>>>> +void dma_fence_chain_init(struct dma_fence_chain *chain,
>>>> +              struct dma_fence *prev,
>>>> +              struct dma_fence *fence,
>>>> +              uint64_t seqno);
>>>> +
>>>> +#endif /* __LINUX_DMA_FENCE_CHAIN_H */
>>>
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 02/11] dma-buf: add new dma_fence_chain container v4
  2019-02-15 15:52           ` Lionel Landwerlin via amd-gfx
       [not found]             ` <bbae2023-8dee-692e-9549-40779a202587-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
@ 2019-02-15 16:49             ` Jason Ekstrand
       [not found]               ` <CAOFGe96HUkzHPJKYT-07X3vMvCRD-=Hba1=Ke24qt_PY2vn0YQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  1 sibling, 1 reply; 42+ messages in thread
From: Jason Ekstrand @ 2019-02-15 16:49 UTC (permalink / raw)
  To: Lionel Landwerlin
  Cc: amd-gfx, Koenig, Christian, dri-devel, Christian König


[-- Attachment #1.1: Type: text/plain, Size: 19950 bytes --]

On Fri, Feb 15, 2019 at 9:52 AM Lionel Landwerlin via dri-devel <
dri-devel@lists.freedesktop.org> wrote:

> On 15/02/2019 14:32, Koenig, Christian wrote:
> > Am 15.02.19 um 15:23 schrieb Lionel Landwerlin:
> >> Hi Christian, David,
> >>
> >> For timeline semaphore we need points to signaled in order.
> >> I'm struggling to understand how this fence-chain implementation
> >> preserves ordering of the seqnos.
> >>
> >> One of the scenario I can see an issue happening is when you have a
> >> timeline with points 1 & 2 and userspace submits for 2 different
> >> engines :
> >>      - first with let's say a blitter style engine on point 2
> >>      - then a 3d style engine on point 1
> > Yeah, and where exactly is the problem?
> >
> > Seqno 1 will signal when the 3d style engine finishes work.
> >
> > And seqno 2 will signal when both seqno 1 is signaled and the blitter
> > style engine has finished its work.
>

That's an interesting interpretation of the spec.  I think it's legal and I
could see that behavior may be desirable in some ways.


> That's not really how I understood the spec, but I might be wrong.
>
> What makes me thing 1 should be signaled as soon as 2 is signaled
> (regardless of whether the fence attached on point 1 is been signaled),
> is that the spec defines wait & signal operations in term of the value
> of the timeline.
>
>
> -Lionel
>
> >
> >> Another scenario would be signaling a timeline with points 1 & 2 with
> >> those points in reverse order in the submission array.
> > That is actually illegal in the spec, but actually handled gracefully as
> > well.
> >
> > E.g. when you add seqno 1 to the syncobj container it will only signal
> > when 2 is signaled as well.
>

I think what Christian is suggesting is a valid interpretation of the spec
though it is rather unconventional.  The Vulkan spec, as it stands today,
requires that the application ensure that at the time of signaling, the
timeline semaphore value increases.  This means that all of the above
possible cases are technically illegal in Vulkan and so it doesn't really
matter what we do as long as we don't do anyting especially stupid.

My understanding of how this works on Windows is that a wait operation on 3
is a wait until x >= 3 where x is a 64-bit value and a signal operation is
simply a write to x.  This means that, in the above cases, waits on 1 will
be triggered immediately when 2 is written but waits on 2 may or may not
happen at all depending on whether the GPU write which overwrites x to 1 or
the CPU (or potentially GPU in a different context) read gets there first
such that the reader observes 2.  If you mess this up and something isn't
signaled, that's your fault.

Instead of specifying things to be exactly the Windows behavior, Vulkan
says that you must only ever increase the value and anything else is
illegal and therefore leads to undefined behavior.  The usual consequences
of undefined behavior apply: anything can happen up to and including
process termination.  In other words, how we handle those cases is
completely up to us as long as we do something sane that doesn't result in
kernel crashes or anything like that.  We do have to handle it in some way
because we can't outright prevent those cases from happening.  The question
then becomes what's the best way for the behavior to degrade.

In my opinion, the smoothest degredation is if you take the windows model
and replace the 64-bit write to x with a 64-bit atomic MAX operation.  In
other words, signaling 2 automatically unblocks 1 and any attempt to signal
a value lower than the current value is a no-op.  It has a few nice
advantages:

 1. Signaling N is guaranteed to unblock everything waiting on n <= N
regardless of what else may be pending.
 2. It matches what I think is the next natural evolution of the Windows
model where the write is replaced with an atomic.
 3. It gracefully handles the case where the operation to signal 1 is added
after the one to signal 2.  We can also make this case illegal but this
model extends to one in which it could be legal and well-defined.
 4. If you do get into a sticky situation, you can unblock an entire
timeline by using the CPU signal ioctl to set it to a high value.

Of all these reasons, I think 1 and 2 carry the most weight.  2, in
particular, is interesting if we one day want to implement the same
behavior with a simple 64-bit value like Windows does.  Immagine, for
instance, a scenario where the GPU is doing it's own scheduling or command
buffers are submitted ahead of the signal operation being available and
told to just sit on the GPU until they see x >= 3.  (Yes, there are issues
here with residency, contention, etc.  I'm asking you to use your
immagination.)  Assuming you can do 64-bit atomics (there are aparently
issues here with PCIe that make things sticky), the behavior I'm suggesting
is completely implementable in that way whereas the behavior Christian is
suggesting is only implementable if you're maintaining a CPU-side list of
fences.  I don't think we want to paint ourselves into that corner.

--Jason


> >
> > Regards,
> > Christian.
> >
> >> -Lionel
> >>
> >> On 07/12/2018 09:55, Chunming Zhou wrote:
> >>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> >>>
> >>> Lockless container implementation similar to a dma_fence_array, but
> with
> >>> only two elements per node and automatic garbage collection.
> >>>
> >>> v2: properly document dma_fence_chain_for_each, add
> >>> dma_fence_chain_find_seqno,
> >>>       drop prev reference during garbage collection if it's not a
> >>> chain fence.
> >>> v3: use head and iterator for dma_fence_chain_for_each
> >>> v4: fix reference count in dma_fence_chain_enable_signaling
> >>>
> >>> Signed-off-by: Christian König <christian.koenig@amd.com>
> >>> ---
> >>>    drivers/dma-buf/Makefile          |   3 +-
> >>>    drivers/dma-buf/dma-fence-chain.c | 241
> ++++++++++++++++++++++++++++++
> >>>    include/linux/dma-fence-chain.h   |  81 ++++++++++
> >>>    3 files changed, 324 insertions(+), 1 deletion(-)
> >>>    create mode 100644 drivers/dma-buf/dma-fence-chain.c
> >>>    create mode 100644 include/linux/dma-fence-chain.h
> >>>
> >>> diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
> >>> index 0913a6ccab5a..1f006e083eb9 100644
> >>> --- a/drivers/dma-buf/Makefile
> >>> +++ b/drivers/dma-buf/Makefile
> >>> @@ -1,4 +1,5 @@
> >>> -obj-y := dma-buf.o dma-fence.o dma-fence-array.o reservation.o
> >>> seqno-fence.o
> >>> +obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \
> >>> +     reservation.o seqno-fence.o
> >>>    obj-$(CONFIG_SYNC_FILE)        += sync_file.o
> >>>    obj-$(CONFIG_SW_SYNC)        += sw_sync.o sync_debug.o
> >>>    obj-$(CONFIG_UDMABUF)        += udmabuf.o
> >>> diff --git a/drivers/dma-buf/dma-fence-chain.c
> >>> b/drivers/dma-buf/dma-fence-chain.c
> >>> new file mode 100644
> >>> index 000000000000..0c5e3c902fa0
> >>> --- /dev/null
> >>> +++ b/drivers/dma-buf/dma-fence-chain.c
> >>> @@ -0,0 +1,241 @@
> >>> +/*
> >>> + * fence-chain: chain fences together in a timeline
> >>> + *
> >>> + * Copyright (C) 2018 Advanced Micro Devices, Inc.
> >>> + * Authors:
> >>> + *    Christian König <christian.koenig@amd.com>
> >>> + *
> >>> + * This program is free software; you can redistribute it and/or
> >>> modify it
> >>> + * under the terms of the GNU General Public License version 2 as
> >>> published by
> >>> + * the Free Software Foundation.
> >>> + *
> >>> + * This program is distributed in the hope that it will be useful,
> >>> but WITHOUT
> >>> + * ANY WARRANTY; without even the implied warranty of
> >>> MERCHANTABILITY or
> >>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
> >>> License for
> >>> + * more details.
> >>> + */
> >>> +
> >>> +#include <linux/dma-fence-chain.h>
> >>> +
> >>> +static bool dma_fence_chain_enable_signaling(struct dma_fence *fence);
> >>> +
> >>> +/**
> >>> + * dma_fence_chain_get_prev - use RCU to get a reference to the
> >>> previous fence
> >>> + * @chain: chain node to get the previous node from
> >>> + *
> >>> + * Use dma_fence_get_rcu_safe to get a reference to the previous
> >>> fence of the
> >>> + * chain node.
> >>> + */
> >>> +static struct dma_fence *dma_fence_chain_get_prev(struct
> >>> dma_fence_chain *chain)
> >>> +{
> >>> +    struct dma_fence *prev;
> >>> +
> >>> +    rcu_read_lock();
> >>> +    prev = dma_fence_get_rcu_safe(&chain->prev);
> >>> +    rcu_read_unlock();
> >>> +    return prev;
> >>> +}
> >>> +
> >>> +/**
> >>> + * dma_fence_chain_walk - chain walking function
> >>> + * @fence: current chain node
> >>> + *
> >>> + * Walk the chain to the next node. Returns the next fence or NULL
> >>> if we are at
> >>> + * the end of the chain. Garbage collects chain nodes which are
> already
> >>> + * signaled.
> >>> + */
> >>> +struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence)
> >>> +{
> >>> +    struct dma_fence_chain *chain, *prev_chain;
> >>> +    struct dma_fence *prev, *replacement, *tmp;
> >>> +
> >>> +    chain = to_dma_fence_chain(fence);
> >>> +    if (!chain) {
> >>> +        dma_fence_put(fence);
> >>> +        return NULL;
> >>> +    }
> >>> +
> >>> +    while ((prev = dma_fence_chain_get_prev(chain))) {
> >>> +
> >>> +        prev_chain = to_dma_fence_chain(prev);
> >>> +        if (prev_chain) {
> >>> +            if (!dma_fence_is_signaled(prev_chain->fence))
> >>> +                break;
> >>> +
> >>> +            replacement = dma_fence_chain_get_prev(prev_chain);
> >>> +        } else {
> >>> +            if (!dma_fence_is_signaled(prev))
> >>> +                break;
> >>> +
> >>> +            replacement = NULL;
> >>> +        }
> >>> +
> >>> +        tmp = cmpxchg(&chain->prev, prev, replacement);
> >>> +        if (tmp == prev)
> >>> +            dma_fence_put(tmp);
> >>> +        else
> >>> +            dma_fence_put(replacement);
> >>> +        dma_fence_put(prev);
> >>> +    }
> >>> +
> >>> +    dma_fence_put(fence);
> >>> +    return prev;
> >>> +}
> >>> +EXPORT_SYMBOL(dma_fence_chain_walk);
> >>> +
> >>> +/**
> >>> + * dma_fence_chain_find_seqno - find fence chain node by seqno
> >>> + * @pfence: pointer to the chain node where to start
> >>> + * @seqno: the sequence number to search for
> >>> + *
> >>> + * Advance the fence pointer to the chain node which will signal
> >>> this sequence
> >>> + * number. If no sequence number is provided then this is a no-op.
> >>> + *
> >>> + * Returns EINVAL if the fence is not a chain node or the sequence
> >>> number has
> >>> + * not yet advanced far enough.
> >>> + */
> >>> +int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t
> >>> seqno)
> >>> +{
> >>> +    struct dma_fence_chain *chain;
> >>> +
> >>> +    if (!seqno)
> >>> +        return 0;
> >>> +
> >>> +    chain = to_dma_fence_chain(*pfence);
> >>> +    if (!chain || chain->base.seqno < seqno)
> >>> +        return -EINVAL;
> >>> +
> >>> +    dma_fence_chain_for_each(*pfence, &chain->base) {
> >>> +        if ((*pfence)->context != chain->base.context ||
> >>> +            to_dma_fence_chain(*pfence)->prev_seqno < seqno)
> >>> +            break;
> >>> +    }
> >>> +    dma_fence_put(&chain->base);
> >>> +
> >>> +    return 0;
> >>> +}
> >>> +EXPORT_SYMBOL(dma_fence_chain_find_seqno);
> >>> +
> >>> +static const char *dma_fence_chain_get_driver_name(struct dma_fence
> >>> *fence)
> >>> +{
> >>> +        return "dma_fence_chain";
> >>> +}
> >>> +
> >>> +static const char *dma_fence_chain_get_timeline_name(struct
> >>> dma_fence *fence)
> >>> +{
> >>> +        return "unbound";
> >>> +}
> >>> +
> >>> +static void dma_fence_chain_irq_work(struct irq_work *work)
> >>> +{
> >>> +    struct dma_fence_chain *chain;
> >>> +
> >>> +    chain = container_of(work, typeof(*chain), work);
> >>> +
> >>> +    /* Try to rearm the callback */
> >>> +    if (!dma_fence_chain_enable_signaling(&chain->base))
> >>> +        /* Ok, we are done. No more unsignaled fences left */
> >>> +        dma_fence_signal(&chain->base);
> >>> +    dma_fence_put(&chain->base);
> >>> +}
> >>> +
> >>> +static void dma_fence_chain_cb(struct dma_fence *f, struct
> >>> dma_fence_cb *cb)
> >>> +{
> >>> +    struct dma_fence_chain *chain;
> >>> +
> >>> +    chain = container_of(cb, typeof(*chain), cb);
> >>> +    irq_work_queue(&chain->work);
> >>> +    dma_fence_put(f);
> >>> +}
> >>> +
> >>> +static bool dma_fence_chain_enable_signaling(struct dma_fence *fence)
> >>> +{
> >>> +    struct dma_fence_chain *head = to_dma_fence_chain(fence);
> >>> +
> >>> +    dma_fence_get(&head->base);
> >>> +    dma_fence_chain_for_each(fence, &head->base) {
> >>> +        struct dma_fence_chain *chain = to_dma_fence_chain(fence);
> >>> +        struct dma_fence *f = chain ? chain->fence : fence;
> >>> +
> >>> +        dma_fence_get(f);
> >>> +        if (!dma_fence_add_callback(f, &head->cb,
> >>> dma_fence_chain_cb)) {
> >>> +            dma_fence_put(fence);
> >>> +            return true;
> >>> +        }
> >>> +        dma_fence_put(f);
> >>> +    }
> >>> +    dma_fence_put(&head->base);
> >>> +    return false;
> >>> +}
> >>> +
> >>> +static bool dma_fence_chain_signaled(struct dma_fence *fence)
> >>> +{
> >>> +    dma_fence_chain_for_each(fence, fence) {
> >>> +        struct dma_fence_chain *chain = to_dma_fence_chain(fence);
> >>> +        struct dma_fence *f = chain ? chain->fence : fence;
> >>> +
> >>> +        if (!dma_fence_is_signaled(f)) {
> >>> +            dma_fence_put(fence);
> >>> +            return false;
> >>> +        }
> >>> +    }
> >>> +
> >>> +    return true;
> >>> +}
> >>> +
> >>> +static void dma_fence_chain_release(struct dma_fence *fence)
> >>> +{
> >>> +    struct dma_fence_chain *chain = to_dma_fence_chain(fence);
> >>> +
> >>> +    dma_fence_put(chain->prev);
> >>> +    dma_fence_put(chain->fence);
> >>> +    dma_fence_free(fence);
> >>> +}
> >>> +
> >>> +const struct dma_fence_ops dma_fence_chain_ops = {
> >>> +    .get_driver_name = dma_fence_chain_get_driver_name,
> >>> +    .get_timeline_name = dma_fence_chain_get_timeline_name,
> >>> +    .enable_signaling = dma_fence_chain_enable_signaling,
> >>> +    .signaled = dma_fence_chain_signaled,
> >>> +    .release = dma_fence_chain_release,
> >>> +};
> >>> +EXPORT_SYMBOL(dma_fence_chain_ops);
> >>> +
> >>> +/**
> >>> + * dma_fence_chain_init - initialize a fence chain
> >>> + * @chain: the chain node to initialize
> >>> + * @prev: the previous fence
> >>> + * @fence: the current fence
> >>> + *
> >>> + * Initialize a new chain node and either start a new chain or add
> >>> the node to
> >>> + * the existing chain of the previous fence.
> >>> + */
> >>> +void dma_fence_chain_init(struct dma_fence_chain *chain,
> >>> +              struct dma_fence *prev,
> >>> +              struct dma_fence *fence,
> >>> +              uint64_t seqno)
> >>> +{
> >>> +    struct dma_fence_chain *prev_chain = to_dma_fence_chain(prev);
> >>> +    uint64_t context;
> >>> +
> >>> +    spin_lock_init(&chain->lock);
> >>> +    chain->prev = prev;
> >>> +    chain->fence = fence;
> >>> +    chain->prev_seqno = 0;
> >>> +    init_irq_work(&chain->work, dma_fence_chain_irq_work);
> >>> +
> >>> +    /* Try to reuse the context of the previous chain node. */
> >>> +    if (prev_chain && __dma_fence_is_later(seqno, prev->seqno)) {
> >>> +        context = prev->context;
> >>> +        chain->prev_seqno = prev->seqno;
> >>> +    } else {
> >>> +        context = dma_fence_context_alloc(1);
> >>> +        /* Make sure that we always have a valid sequence number. */
> >>> +        if (prev_chain)
> >>> +            seqno = max(prev->seqno, seqno);
> >>> +    }
> >>> +
> >>> +    dma_fence_init(&chain->base, &dma_fence_chain_ops,
> >>> +               &chain->lock, context, seqno);
> >>> +}
> >>> +EXPORT_SYMBOL(dma_fence_chain_init);
> >>> diff --git a/include/linux/dma-fence-chain.h
> >>> b/include/linux/dma-fence-chain.h
> >>> new file mode 100644
> >>> index 000000000000..a5c2e8c6915c
> >>> --- /dev/null
> >>> +++ b/include/linux/dma-fence-chain.h
> >>> @@ -0,0 +1,81 @@
> >>> +/*
> >>> + * fence-chain: chain fences together in a timeline
> >>> + *
> >>> + * Copyright (C) 2018 Advanced Micro Devices, Inc.
> >>> + * Authors:
> >>> + *    Christian König <christian.koenig@amd.com>
> >>> + *
> >>> + * This program is free software; you can redistribute it and/or
> >>> modify it
> >>> + * under the terms of the GNU General Public License version 2 as
> >>> published by
> >>> + * the Free Software Foundation.
> >>> + *
> >>> + * This program is distributed in the hope that it will be useful,
> >>> but WITHOUT
> >>> + * ANY WARRANTY; without even the implied warranty of
> >>> MERCHANTABILITY or
> >>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
> >>> License for
> >>> + * more details.
> >>> + */
> >>> +
> >>> +#ifndef __LINUX_DMA_FENCE_CHAIN_H
> >>> +#define __LINUX_DMA_FENCE_CHAIN_H
> >>> +
> >>> +#include <linux/dma-fence.h>
> >>> +#include <linux/irq_work.h>
> >>> +
> >>> +/**
> >>> + * struct dma_fence_chain - fence to represent an node of a fence
> chain
> >>> + * @base: fence base class
> >>> + * @lock: spinlock for fence handling
> >>> + * @prev: previous fence of the chain
> >>> + * @prev_seqno: original previous seqno before garbage collection
> >>> + * @fence: encapsulated fence
> >>> + * @cb: callback structure for signaling
> >>> + * @work: irq work item for signaling
> >>> + */
> >>> +struct dma_fence_chain {
> >>> +    struct dma_fence base;
> >>> +    spinlock_t lock;
> >>> +    struct dma_fence *prev;
> >>> +    u64 prev_seqno;
> >>> +    struct dma_fence *fence;
> >>> +    struct dma_fence_cb cb;
> >>> +    struct irq_work work;
> >>> +};
> >>> +
> >>> +extern const struct dma_fence_ops dma_fence_chain_ops;
> >>> +
> >>> +/**
> >>> + * to_dma_fence_chain - cast a fence to a dma_fence_chain
> >>> + * @fence: fence to cast to a dma_fence_array
> >>> + *
> >>> + * Returns NULL if the fence is not a dma_fence_chain,
> >>> + * or the dma_fence_chain otherwise.
> >>> + */
> >>> +static inline struct dma_fence_chain *
> >>> +to_dma_fence_chain(struct dma_fence *fence)
> >>> +{
> >>> +    if (!fence || fence->ops != &dma_fence_chain_ops)
> >>> +        return NULL;
> >>> +
> >>> +    return container_of(fence, struct dma_fence_chain, base);
> >>> +}
> >>> +
> >>> +/**
> >>> + * dma_fence_chain_for_each - iterate over all fences in chain
> >>> + * @iter: current fence
> >>> + * @head: starting point
> >>> + *
> >>> + * Iterate over all fences in the chain. We keep a reference to the
> >>> current
> >>> + * fence while inside the loop which must be dropped when breaking
> out.
> >>> + */
> >>> +#define dma_fence_chain_for_each(iter, head)    \
> >>> +    for (iter = dma_fence_get(head); iter; \
> >>> +         iter = dma_fence_chain_walk(head))
> >>> +
> >>> +struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence);
> >>> +int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t
> >>> seqno);
> >>> +void dma_fence_chain_init(struct dma_fence_chain *chain,
> >>> +              struct dma_fence *prev,
> >>> +              struct dma_fence *fence,
> >>> +              uint64_t seqno);
> >>> +
> >>> +#endif /* __LINUX_DMA_FENCE_CHAIN_H */
> >>
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

[-- Attachment #1.2: Type: text/html, Size: 26854 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 02/11] dma-buf: add new dma_fence_chain container v4
       [not found]               ` <CAOFGe96HUkzHPJKYT-07X3vMvCRD-=Hba1=Ke24qt_PY2vn0YQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2019-02-15 17:51                 ` Christian König via amd-gfx
       [not found]                   ` <a0b27d87-50f2-56ce-1db7-5a1dc005a798-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 42+ messages in thread
From: Christian König via amd-gfx @ 2019-02-15 17:51 UTC (permalink / raw)
  To: Jason Ekstrand, Lionel Landwerlin
  Cc: Christian König, Zhou, David(ChunMing),
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Koenig, Christian,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 22324 bytes --]

Am 15.02.19 um 17:49 schrieb Jason Ekstrand:
> On Fri, Feb 15, 2019 at 9:52 AM Lionel Landwerlin via dri-devel 
> <dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org 
> <mailto:dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>> wrote:
>
>     On 15/02/2019 14:32, Koenig, Christian wrote:
>     > Am 15.02.19 um 15:23 schrieb Lionel Landwerlin:
>     >> Hi Christian, David,
>     >>
>     >> For timeline semaphore we need points to signaled in order.
>     >> I'm struggling to understand how this fence-chain implementation
>     >> preserves ordering of the seqnos.
>     >>
>     >> One of the scenario I can see an issue happening is when you have a
>     >> timeline with points 1 & 2 and userspace submits for 2 different
>     >> engines :
>     >>      - first with let's say a blitter style engine on point 2
>     >>      - then a 3d style engine on point 1
>     > Yeah, and where exactly is the problem?
>     >
>     > Seqno 1 will signal when the 3d style engine finishes work.
>     >
>     > And seqno 2 will signal when both seqno 1 is signaled and the
>     blitter
>     > style engine has finished its work.
>
>
> That's an interesting interpretation of the spec.  I think it's legal 
> and I could see that behavior may be desirable in some ways.

Well we actually had this discussion multiple times now, both internally 
as well as on the mailing list. Please also see the previous mails with 
Daniel on this topic.

My initial suggestion was actually to exactly what Leonid suggested as well.

And following this I used a rather simple container for the 
implementation, e.g. just a ring buffer indexed by the sequence number. 
In this scenario userspace can specify on syncobj creation time how big 
the window for sequence numbers should be, e.g. in this implementation 
how big the ring buffer would be.

This was rejected by our guys who actually wrote a good part of the 
Vulkan specification. Daniel then has gone into the same direction 
during the public discussion.

[SNIP]
> I think what Christian is suggesting is a valid interpretation of the 
> spec though it is rather unconventional.  The Vulkan spec, as it 
> stands today, requires that the application ensure that at the time of 
> signaling, the timeline semaphore value increases.  This means that 
> all of the above possible cases are technically illegal in Vulkan and 
> so it doesn't really matter what we do as long as we don't do anyting 
> especially stupid.

And exactly that's the point. When an application does something stupid 
with its own submissions then this is not much of a problem.

But this interface is meant to be made for communication between 
processes, and here we want to be sure that nobody can do anything stupid.

> My understanding of how this works on Windows is that a wait operation 
> on 3 is a wait until x >= 3 where x is a 64-bit value and a signal 
> operation is simply a write to x. This means that, in the above cases, 
> waits on 1 will be triggered immediately when 2 is written but waits 
> on 2 may or may not happen at all depending on whether the GPU write 
> which overwrites x to 1 or the CPU (or potentially GPU in a different 
> context) read gets there first such that the reader observes 2.  If 
> you mess this up and something isn't signaled, that's your fault.

Yeah and I think that this is actually not a good idea at all. 
Implementing it like this ultimately means that you can only use polling 
on the number.

>  4. If you do get into a sticky situation, you can unblock an entire 
> timeline by using the CPU signal ioctl to set it to a high value.

Well I think that this could be problematic as well. Keep in mind that 
main use case for this is sharing timelines between processes.

In other words you don't want applications to be able to mess with it to 
much.

>
> Of all these reasons, I think 1 and 2 carry the most weight.  2, in 
> particular, is interesting if we one day want to implement the same 
> behavior with a simple 64-bit value like Windows does.  Immagine, for 
> instance, a scenario where the GPU is doing it's own scheduling or 
> command buffers are submitted ahead of the signal operation being 
> available and told to just sit on the GPU until they see x >= 3.  
> (Yes, there are issues here with residency, contention, etc.  I'm 
> asking you to use your immagination.)  Assuming you can do 64-bit 
> atomics (there are aparently issues here with PCIe that make things 
> sticky), the behavior I'm suggesting is completely implementable in 
> that way whereas the behavior Christian is suggesting is only 
> implementable if you're maintaining a CPU-side list of fences.  I 
> don't think we want to paint ourselves into that corner.

Actually we already had such an implementation with radeon. And I can 
only say that it was a totally PAIN IN THE A* to maintain.

This is one of the reason why we are not using hardware semaphores any 
more with amdgpu.

Regards,
Christian.

>
> --Jason
>
>     >
>     > Regards,
>     > Christian.
>     >
>     >> -Lionel
>     >>
>     >> On 07/12/2018 09:55, Chunming Zhou wrote:
>     >>> From: Christian König <ckoenig.leichtzumerken-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org
>     <mailto:ckoenig.leichtzumerken-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>>
>     >>>
>     >>> Lockless container implementation similar to a
>     dma_fence_array, but with
>     >>> only two elements per node and automatic garbage collection.
>     >>>
>     >>> v2: properly document dma_fence_chain_for_each, add
>     >>> dma_fence_chain_find_seqno,
>     >>>       drop prev reference during garbage collection if it's not a
>     >>> chain fence.
>     >>> v3: use head and iterator for dma_fence_chain_for_each
>     >>> v4: fix reference count in dma_fence_chain_enable_signaling
>     >>>
>     >>> Signed-off-by: Christian König <christian.koenig-5C7GfCeVMHo@public.gmane.org
>     <mailto:christian.koenig-5C7GfCeVMHo@public.gmane.org>>
>     >>> ---
>     >>>    drivers/dma-buf/Makefile          |   3 +-
>     >>>    drivers/dma-buf/dma-fence-chain.c | 241
>     ++++++++++++++++++++++++++++++
>     >>>    include/linux/dma-fence-chain.h   |  81 ++++++++++
>     >>>    3 files changed, 324 insertions(+), 1 deletion(-)
>     >>>    create mode 100644 drivers/dma-buf/dma-fence-chain.c
>     >>>    create mode 100644 include/linux/dma-fence-chain.h
>     >>>
>     >>> diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
>     >>> index 0913a6ccab5a..1f006e083eb9 100644
>     >>> --- a/drivers/dma-buf/Makefile
>     >>> +++ b/drivers/dma-buf/Makefile
>     >>> @@ -1,4 +1,5 @@
>     >>> -obj-y := dma-buf.o dma-fence.o dma-fence-array.o reservation.o
>     >>> seqno-fence.o
>     >>> +obj-y := dma-buf.o dma-fence.o dma-fence-array.o
>     dma-fence-chain.o \
>     >>> +     reservation.o seqno-fence.o
>     >>>    obj-$(CONFIG_SYNC_FILE)        += sync_file.o
>     >>>    obj-$(CONFIG_SW_SYNC)        += sw_sync.o sync_debug.o
>     >>>    obj-$(CONFIG_UDMABUF)        += udmabuf.o
>     >>> diff --git a/drivers/dma-buf/dma-fence-chain.c
>     >>> b/drivers/dma-buf/dma-fence-chain.c
>     >>> new file mode 100644
>     >>> index 000000000000..0c5e3c902fa0
>     >>> --- /dev/null
>     >>> +++ b/drivers/dma-buf/dma-fence-chain.c
>     >>> @@ -0,0 +1,241 @@
>     >>> +/*
>     >>> + * fence-chain: chain fences together in a timeline
>     >>> + *
>     >>> + * Copyright (C) 2018 Advanced Micro Devices, Inc.
>     >>> + * Authors:
>     >>> + *    Christian König <christian.koenig-5C7GfCeVMHo@public.gmane.org
>     <mailto:christian.koenig-5C7GfCeVMHo@public.gmane.org>>
>     >>> + *
>     >>> + * This program is free software; you can redistribute it and/or
>     >>> modify it
>     >>> + * under the terms of the GNU General Public License version 2 as
>     >>> published by
>     >>> + * the Free Software Foundation.
>     >>> + *
>     >>> + * This program is distributed in the hope that it will be
>     useful,
>     >>> but WITHOUT
>     >>> + * ANY WARRANTY; without even the implied warranty of
>     >>> MERCHANTABILITY or
>     >>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
>     >>> License for
>     >>> + * more details.
>     >>> + */
>     >>> +
>     >>> +#include <linux/dma-fence-chain.h>
>     >>> +
>     >>> +static bool dma_fence_chain_enable_signaling(struct dma_fence
>     *fence);
>     >>> +
>     >>> +/**
>     >>> + * dma_fence_chain_get_prev - use RCU to get a reference to the
>     >>> previous fence
>     >>> + * @chain: chain node to get the previous node from
>     >>> + *
>     >>> + * Use dma_fence_get_rcu_safe to get a reference to the previous
>     >>> fence of the
>     >>> + * chain node.
>     >>> + */
>     >>> +static struct dma_fence *dma_fence_chain_get_prev(struct
>     >>> dma_fence_chain *chain)
>     >>> +{
>     >>> +    struct dma_fence *prev;
>     >>> +
>     >>> +    rcu_read_lock();
>     >>> +    prev = dma_fence_get_rcu_safe(&chain->prev);
>     >>> +    rcu_read_unlock();
>     >>> +    return prev;
>     >>> +}
>     >>> +
>     >>> +/**
>     >>> + * dma_fence_chain_walk - chain walking function
>     >>> + * @fence: current chain node
>     >>> + *
>     >>> + * Walk the chain to the next node. Returns the next fence or
>     NULL
>     >>> if we are at
>     >>> + * the end of the chain. Garbage collects chain nodes which
>     are already
>     >>> + * signaled.
>     >>> + */
>     >>> +struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence)
>     >>> +{
>     >>> +    struct dma_fence_chain *chain, *prev_chain;
>     >>> +    struct dma_fence *prev, *replacement, *tmp;
>     >>> +
>     >>> +    chain = to_dma_fence_chain(fence);
>     >>> +    if (!chain) {
>     >>> +        dma_fence_put(fence);
>     >>> +        return NULL;
>     >>> +    }
>     >>> +
>     >>> +    while ((prev = dma_fence_chain_get_prev(chain))) {
>     >>> +
>     >>> +        prev_chain = to_dma_fence_chain(prev);
>     >>> +        if (prev_chain) {
>     >>> +            if (!dma_fence_is_signaled(prev_chain->fence))
>     >>> +                break;
>     >>> +
>     >>> +            replacement = dma_fence_chain_get_prev(prev_chain);
>     >>> +        } else {
>     >>> +            if (!dma_fence_is_signaled(prev))
>     >>> +                break;
>     >>> +
>     >>> +            replacement = NULL;
>     >>> +        }
>     >>> +
>     >>> +        tmp = cmpxchg(&chain->prev, prev, replacement);
>     >>> +        if (tmp == prev)
>     >>> +            dma_fence_put(tmp);
>     >>> +        else
>     >>> +            dma_fence_put(replacement);
>     >>> +        dma_fence_put(prev);
>     >>> +    }
>     >>> +
>     >>> +    dma_fence_put(fence);
>     >>> +    return prev;
>     >>> +}
>     >>> +EXPORT_SYMBOL(dma_fence_chain_walk);
>     >>> +
>     >>> +/**
>     >>> + * dma_fence_chain_find_seqno - find fence chain node by seqno
>     >>> + * @pfence: pointer to the chain node where to start
>     >>> + * @seqno: the sequence number to search for
>     >>> + *
>     >>> + * Advance the fence pointer to the chain node which will signal
>     >>> this sequence
>     >>> + * number. If no sequence number is provided then this is a
>     no-op.
>     >>> + *
>     >>> + * Returns EINVAL if the fence is not a chain node or the
>     sequence
>     >>> number has
>     >>> + * not yet advanced far enough.
>     >>> + */
>     >>> +int dma_fence_chain_find_seqno(struct dma_fence **pfence,
>     uint64_t
>     >>> seqno)
>     >>> +{
>     >>> +    struct dma_fence_chain *chain;
>     >>> +
>     >>> +    if (!seqno)
>     >>> +        return 0;
>     >>> +
>     >>> +    chain = to_dma_fence_chain(*pfence);
>     >>> +    if (!chain || chain->base.seqno < seqno)
>     >>> +        return -EINVAL;
>     >>> +
>     >>> +    dma_fence_chain_for_each(*pfence, &chain->base) {
>     >>> +        if ((*pfence)->context != chain->base.context ||
>     >>> + to_dma_fence_chain(*pfence)->prev_seqno < seqno)
>     >>> +            break;
>     >>> +    }
>     >>> +    dma_fence_put(&chain->base);
>     >>> +
>     >>> +    return 0;
>     >>> +}
>     >>> +EXPORT_SYMBOL(dma_fence_chain_find_seqno);
>     >>> +
>     >>> +static const char *dma_fence_chain_get_driver_name(struct
>     dma_fence
>     >>> *fence)
>     >>> +{
>     >>> +        return "dma_fence_chain";
>     >>> +}
>     >>> +
>     >>> +static const char *dma_fence_chain_get_timeline_name(struct
>     >>> dma_fence *fence)
>     >>> +{
>     >>> +        return "unbound";
>     >>> +}
>     >>> +
>     >>> +static void dma_fence_chain_irq_work(struct irq_work *work)
>     >>> +{
>     >>> +    struct dma_fence_chain *chain;
>     >>> +
>     >>> +    chain = container_of(work, typeof(*chain), work);
>     >>> +
>     >>> +    /* Try to rearm the callback */
>     >>> +    if (!dma_fence_chain_enable_signaling(&chain->base))
>     >>> +        /* Ok, we are done. No more unsignaled fences left */
>     >>> +        dma_fence_signal(&chain->base);
>     >>> +    dma_fence_put(&chain->base);
>     >>> +}
>     >>> +
>     >>> +static void dma_fence_chain_cb(struct dma_fence *f, struct
>     >>> dma_fence_cb *cb)
>     >>> +{
>     >>> +    struct dma_fence_chain *chain;
>     >>> +
>     >>> +    chain = container_of(cb, typeof(*chain), cb);
>     >>> +    irq_work_queue(&chain->work);
>     >>> +    dma_fence_put(f);
>     >>> +}
>     >>> +
>     >>> +static bool dma_fence_chain_enable_signaling(struct dma_fence
>     *fence)
>     >>> +{
>     >>> +    struct dma_fence_chain *head = to_dma_fence_chain(fence);
>     >>> +
>     >>> +    dma_fence_get(&head->base);
>     >>> +    dma_fence_chain_for_each(fence, &head->base) {
>     >>> +        struct dma_fence_chain *chain =
>     to_dma_fence_chain(fence);
>     >>> +        struct dma_fence *f = chain ? chain->fence : fence;
>     >>> +
>     >>> +        dma_fence_get(f);
>     >>> +        if (!dma_fence_add_callback(f, &head->cb,
>     >>> dma_fence_chain_cb)) {
>     >>> +            dma_fence_put(fence);
>     >>> +            return true;
>     >>> +        }
>     >>> +        dma_fence_put(f);
>     >>> +    }
>     >>> +    dma_fence_put(&head->base);
>     >>> +    return false;
>     >>> +}
>     >>> +
>     >>> +static bool dma_fence_chain_signaled(struct dma_fence *fence)
>     >>> +{
>     >>> +    dma_fence_chain_for_each(fence, fence) {
>     >>> +        struct dma_fence_chain *chain =
>     to_dma_fence_chain(fence);
>     >>> +        struct dma_fence *f = chain ? chain->fence : fence;
>     >>> +
>     >>> +        if (!dma_fence_is_signaled(f)) {
>     >>> +            dma_fence_put(fence);
>     >>> +            return false;
>     >>> +        }
>     >>> +    }
>     >>> +
>     >>> +    return true;
>     >>> +}
>     >>> +
>     >>> +static void dma_fence_chain_release(struct dma_fence *fence)
>     >>> +{
>     >>> +    struct dma_fence_chain *chain = to_dma_fence_chain(fence);
>     >>> +
>     >>> +    dma_fence_put(chain->prev);
>     >>> +    dma_fence_put(chain->fence);
>     >>> +    dma_fence_free(fence);
>     >>> +}
>     >>> +
>     >>> +const struct dma_fence_ops dma_fence_chain_ops = {
>     >>> +    .get_driver_name = dma_fence_chain_get_driver_name,
>     >>> +    .get_timeline_name = dma_fence_chain_get_timeline_name,
>     >>> +    .enable_signaling = dma_fence_chain_enable_signaling,
>     >>> +    .signaled = dma_fence_chain_signaled,
>     >>> +    .release = dma_fence_chain_release,
>     >>> +};
>     >>> +EXPORT_SYMBOL(dma_fence_chain_ops);
>     >>> +
>     >>> +/**
>     >>> + * dma_fence_chain_init - initialize a fence chain
>     >>> + * @chain: the chain node to initialize
>     >>> + * @prev: the previous fence
>     >>> + * @fence: the current fence
>     >>> + *
>     >>> + * Initialize a new chain node and either start a new chain
>     or add
>     >>> the node to
>     >>> + * the existing chain of the previous fence.
>     >>> + */
>     >>> +void dma_fence_chain_init(struct dma_fence_chain *chain,
>     >>> +              struct dma_fence *prev,
>     >>> +              struct dma_fence *fence,
>     >>> +              uint64_t seqno)
>     >>> +{
>     >>> +    struct dma_fence_chain *prev_chain =
>     to_dma_fence_chain(prev);
>     >>> +    uint64_t context;
>     >>> +
>     >>> +    spin_lock_init(&chain->lock);
>     >>> +    chain->prev = prev;
>     >>> +    chain->fence = fence;
>     >>> +    chain->prev_seqno = 0;
>     >>> +    init_irq_work(&chain->work, dma_fence_chain_irq_work);
>     >>> +
>     >>> +    /* Try to reuse the context of the previous chain node. */
>     >>> +    if (prev_chain && __dma_fence_is_later(seqno, prev->seqno)) {
>     >>> +        context = prev->context;
>     >>> +        chain->prev_seqno = prev->seqno;
>     >>> +    } else {
>     >>> +        context = dma_fence_context_alloc(1);
>     >>> +        /* Make sure that we always have a valid sequence
>     number. */
>     >>> +        if (prev_chain)
>     >>> +            seqno = max(prev->seqno, seqno);
>     >>> +    }
>     >>> +
>     >>> +    dma_fence_init(&chain->base, &dma_fence_chain_ops,
>     >>> +               &chain->lock, context, seqno);
>     >>> +}
>     >>> +EXPORT_SYMBOL(dma_fence_chain_init);
>     >>> diff --git a/include/linux/dma-fence-chain.h
>     >>> b/include/linux/dma-fence-chain.h
>     >>> new file mode 100644
>     >>> index 000000000000..a5c2e8c6915c
>     >>> --- /dev/null
>     >>> +++ b/include/linux/dma-fence-chain.h
>     >>> @@ -0,0 +1,81 @@
>     >>> +/*
>     >>> + * fence-chain: chain fences together in a timeline
>     >>> + *
>     >>> + * Copyright (C) 2018 Advanced Micro Devices, Inc.
>     >>> + * Authors:
>     >>> + *    Christian König <christian.koenig-5C7GfCeVMHo@public.gmane.org
>     <mailto:christian.koenig-5C7GfCeVMHo@public.gmane.org>>
>     >>> + *
>     >>> + * This program is free software; you can redistribute it and/or
>     >>> modify it
>     >>> + * under the terms of the GNU General Public License version 2 as
>     >>> published by
>     >>> + * the Free Software Foundation.
>     >>> + *
>     >>> + * This program is distributed in the hope that it will be
>     useful,
>     >>> but WITHOUT
>     >>> + * ANY WARRANTY; without even the implied warranty of
>     >>> MERCHANTABILITY or
>     >>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
>     >>> License for
>     >>> + * more details.
>     >>> + */
>     >>> +
>     >>> +#ifndef __LINUX_DMA_FENCE_CHAIN_H
>     >>> +#define __LINUX_DMA_FENCE_CHAIN_H
>     >>> +
>     >>> +#include <linux/dma-fence.h>
>     >>> +#include <linux/irq_work.h>
>     >>> +
>     >>> +/**
>     >>> + * struct dma_fence_chain - fence to represent an node of a
>     fence chain
>     >>> + * @base: fence base class
>     >>> + * @lock: spinlock for fence handling
>     >>> + * @prev: previous fence of the chain
>     >>> + * @prev_seqno: original previous seqno before garbage collection
>     >>> + * @fence: encapsulated fence
>     >>> + * @cb: callback structure for signaling
>     >>> + * @work: irq work item for signaling
>     >>> + */
>     >>> +struct dma_fence_chain {
>     >>> +    struct dma_fence base;
>     >>> +    spinlock_t lock;
>     >>> +    struct dma_fence *prev;
>     >>> +    u64 prev_seqno;
>     >>> +    struct dma_fence *fence;
>     >>> +    struct dma_fence_cb cb;
>     >>> +    struct irq_work work;
>     >>> +};
>     >>> +
>     >>> +extern const struct dma_fence_ops dma_fence_chain_ops;
>     >>> +
>     >>> +/**
>     >>> + * to_dma_fence_chain - cast a fence to a dma_fence_chain
>     >>> + * @fence: fence to cast to a dma_fence_array
>     >>> + *
>     >>> + * Returns NULL if the fence is not a dma_fence_chain,
>     >>> + * or the dma_fence_chain otherwise.
>     >>> + */
>     >>> +static inline struct dma_fence_chain *
>     >>> +to_dma_fence_chain(struct dma_fence *fence)
>     >>> +{
>     >>> +    if (!fence || fence->ops != &dma_fence_chain_ops)
>     >>> +        return NULL;
>     >>> +
>     >>> +    return container_of(fence, struct dma_fence_chain, base);
>     >>> +}
>     >>> +
>     >>> +/**
>     >>> + * dma_fence_chain_for_each - iterate over all fences in chain
>     >>> + * @iter: current fence
>     >>> + * @head: starting point
>     >>> + *
>     >>> + * Iterate over all fences in the chain. We keep a reference
>     to the
>     >>> current
>     >>> + * fence while inside the loop which must be dropped when
>     breaking out.
>     >>> + */
>     >>> +#define dma_fence_chain_for_each(iter, head)    \
>     >>> +    for (iter = dma_fence_get(head); iter; \
>     >>> +         iter = dma_fence_chain_walk(head))
>     >>> +
>     >>> +struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence);
>     >>> +int dma_fence_chain_find_seqno(struct dma_fence **pfence,
>     uint64_t
>     >>> seqno);
>     >>> +void dma_fence_chain_init(struct dma_fence_chain *chain,
>     >>> +              struct dma_fence *prev,
>     >>> +              struct dma_fence *fence,
>     >>> +              uint64_t seqno);
>     >>> +
>     >>> +#endif /* __LINUX_DMA_FENCE_CHAIN_H */
>     >>
>
>     _______________________________________________
>     dri-devel mailing list
>     dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>     <mailto:dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org>
>     https://lists.freedesktop.org/mailman/listinfo/dri-devel
>


[-- Attachment #1.2: Type: text/html, Size: 35307 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 02/11] dma-buf: add new dma_fence_chain container v4
       [not found]                   ` <a0b27d87-50f2-56ce-1db7-5a1dc005a798-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2019-02-15 18:16                     ` Jason Ekstrand
       [not found]                       ` <CAOFGe9611MqmsvdvZS4_vuJjrrUAmjK5-41Z6tpaxTHJsB8CwA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 42+ messages in thread
From: Jason Ekstrand @ 2019-02-15 18:16 UTC (permalink / raw)
  To: Christian König
  Cc: Zhou, David(ChunMing),
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Lionel Landwerlin


[-- Attachment #1.1: Type: text/plain, Size: 21714 bytes --]

On Fri, Feb 15, 2019 at 11:51 AM Christian König <
ckoenig.leichtzumerken-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:

> Am 15.02.19 um 17:49 schrieb Jason Ekstrand:
>
> On Fri, Feb 15, 2019 at 9:52 AM Lionel Landwerlin via dri-devel <
> dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> wrote:
>
>> On 15/02/2019 14:32, Koenig, Christian wrote:
>> > Am 15.02.19 um 15:23 schrieb Lionel Landwerlin:
>> >> Hi Christian, David,
>> >>
>> >> For timeline semaphore we need points to signaled in order.
>> >> I'm struggling to understand how this fence-chain implementation
>> >> preserves ordering of the seqnos.
>> >>
>> >> One of the scenario I can see an issue happening is when you have a
>> >> timeline with points 1 & 2 and userspace submits for 2 different
>> >> engines :
>> >>      - first with let's say a blitter style engine on point 2
>> >>      - then a 3d style engine on point 1
>> > Yeah, and where exactly is the problem?
>> >
>> > Seqno 1 will signal when the 3d style engine finishes work.
>> >
>> > And seqno 2 will signal when both seqno 1 is signaled and the blitter
>> > style engine has finished its work.
>>
>
> That's an interesting interpretation of the spec.  I think it's legal and
> I could see that behavior may be desirable in some ways.
>
>
> Well we actually had this discussion multiple times now, both internally
> as well as on the mailing list. Please also see the previous mails with
> Daniel on this topic.
>

I dug through dri-devel and read everything I could find with a search for
"timeline semaphore"  I didn't find all that much but this did come up once.


> My initial suggestion was actually to exactly what Leonid suggested as
> well.
>
> And following this I used a rather simple container for the
> implementation, e.g. just a ring buffer indexed by the sequence number. In
> this scenario userspace can specify on syncobj creation time how big the
> window for sequence numbers should be, e.g. in this implementation how big
> the ring buffer would be.
>
> This was rejected by our guys who actually wrote a good part of the Vulkan
> specification. Daniel then has gone into the same direction during the
> public discussion.
>

I agree with whoever said that specifying a ringbuffer size is
unacceptable.  I'm not really sure how that's relevant though.  Is a
ringbuffer required to implement the behavior that is being suggested
here?  Genuine question; I'm trying to get back up to speed.


> [SNIP]
>
> I think what Christian is suggesting is a valid interpretation of the spec
> though it is rather unconventional.  The Vulkan spec, as it stands today,
> requires that the application ensure that at the time of signaling, the
> timeline semaphore value increases.  This means that all of the above
> possible cases are technically illegal in Vulkan and so it doesn't really
> matter what we do as long as we don't do anyting especially stupid.
>
>
> And exactly that's the point. When an application does something stupid
> with its own submissions then this is not much of a problem.
>
> But this interface is meant to be made for communication between
> processes, and here we want to be sure that nobody can do anything stupid.
>
> My understanding of how this works on Windows is that a wait operation on
> 3 is a wait until x >= 3 where x is a 64-bit value and a signal operation
> is simply a write to x.  This means that, in the above cases, waits on 1
> will be triggered immediately when 2 is written but waits on 2 may or may
> not happen at all depending on whether the GPU write which overwrites x to
> 1 or the CPU (or potentially GPU in a different context) read gets there
> first such that the reader observes 2.  If you mess this up and something
> isn't signaled, that's your fault.
>
>
> Yeah and I think that this is actually not a good idea at all.
> Implementing it like this ultimately means that you can only use polling on
> the number.
>

Yeah, there are problems with it.  I'm just putting it out there for
reference and because it's what developers expect regardless of whether
that's a good thing or not.

 4. If you do get into a sticky situation, you can unblock an entire
> timeline by using the CPU signal ioctl to set it to a high value.
>
>
> Well I think that this could be problematic as well. Keep in mind that
> main use case for this is sharing timelines between processes.
>
> In other words you don't want applications to be able to mess with it to
> much.
>

Cross-process is exactly why you want it.  Suppose you're a compositor and
you have a timeline shared with another application and you've submitted
work which waits on it.  Then you get a notification somehow (SIGHUP?) that
the client has died leaving you hanging.  What do you do?  You take the
semaphore that's shared with you and the client and whack it to UINT64_MAX
to unblock yourself.  Of course, this can be abused and that's always the
risk you take with timelines.


>
> Of all these reasons, I think 1 and 2 carry the most weight.  2, in
> particular, is interesting if we one day want to implement the same
> behavior with a simple 64-bit value like Windows does.  Immagine, for
> instance, a scenario where the GPU is doing it's own scheduling or command
> buffers are submitted ahead of the signal operation being available and
> told to just sit on the GPU until they see x >= 3.  (Yes, there are issues
> here with residency, contention, etc.  I'm asking you to use your
> immagination.)  Assuming you can do 64-bit atomics (there are aparently
> issues here with PCIe that make things sticky), the behavior I'm suggesting
> is completely implementable in that way whereas the behavior Christian is
> suggesting is only implementable if you're maintaining a CPU-side list of
> fences.  I don't think we want to paint ourselves into that corner.
>
>
> Actually we already had such an implementation with radeon. And I can only
> say that it was a totally PAIN IN THE A* to maintain.
>
> This is one of the reason why we are not using hardware semaphores any
> more with amdgpu.
>

Yeah, there are serious issues with just using a 64-bit integer and, to be
honest, I haven't thought (or fought, for that matter) through them all.

--Jason



> Regards,
> Christian.
>
>
> --Jason
>
>
>> >
>> > Regards,
>> > Christian.
>> >
>> >> -Lionel
>> >>
>> >> On 07/12/2018 09:55, Chunming Zhou wrote:
>> >>> From: Christian König <ckoenig.leichtzumerken-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>> >>>
>> >>> Lockless container implementation similar to a dma_fence_array, but
>> with
>> >>> only two elements per node and automatic garbage collection.
>> >>>
>> >>> v2: properly document dma_fence_chain_for_each, add
>> >>> dma_fence_chain_find_seqno,
>> >>>       drop prev reference during garbage collection if it's not a
>> >>> chain fence.
>> >>> v3: use head and iterator for dma_fence_chain_for_each
>> >>> v4: fix reference count in dma_fence_chain_enable_signaling
>> >>>
>> >>> Signed-off-by: Christian König <christian.koenig-5C7GfCeVMHo@public.gmane.org>
>> >>> ---
>> >>>    drivers/dma-buf/Makefile          |   3 +-
>> >>>    drivers/dma-buf/dma-fence-chain.c | 241
>> ++++++++++++++++++++++++++++++
>> >>>    include/linux/dma-fence-chain.h   |  81 ++++++++++
>> >>>    3 files changed, 324 insertions(+), 1 deletion(-)
>> >>>    create mode 100644 drivers/dma-buf/dma-fence-chain.c
>> >>>    create mode 100644 include/linux/dma-fence-chain.h
>> >>>
>> >>> diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
>> >>> index 0913a6ccab5a..1f006e083eb9 100644
>> >>> --- a/drivers/dma-buf/Makefile
>> >>> +++ b/drivers/dma-buf/Makefile
>> >>> @@ -1,4 +1,5 @@
>> >>> -obj-y := dma-buf.o dma-fence.o dma-fence-array.o reservation.o
>> >>> seqno-fence.o
>> >>> +obj-y := dma-buf.o dma-fence.o dma-fence-array.o dma-fence-chain.o \
>> >>> +     reservation.o seqno-fence.o
>> >>>    obj-$(CONFIG_SYNC_FILE)        += sync_file.o
>> >>>    obj-$(CONFIG_SW_SYNC)        += sw_sync.o sync_debug.o
>> >>>    obj-$(CONFIG_UDMABUF)        += udmabuf.o
>> >>> diff --git a/drivers/dma-buf/dma-fence-chain.c
>> >>> b/drivers/dma-buf/dma-fence-chain.c
>> >>> new file mode 100644
>> >>> index 000000000000..0c5e3c902fa0
>> >>> --- /dev/null
>> >>> +++ b/drivers/dma-buf/dma-fence-chain.c
>> >>> @@ -0,0 +1,241 @@
>> >>> +/*
>> >>> + * fence-chain: chain fences together in a timeline
>> >>> + *
>> >>> + * Copyright (C) 2018 Advanced Micro Devices, Inc.
>> >>> + * Authors:
>> >>> + *    Christian König <christian.koenig-5C7GfCeVMHo@public.gmane.org>
>> >>> + *
>> >>> + * This program is free software; you can redistribute it and/or
>> >>> modify it
>> >>> + * under the terms of the GNU General Public License version 2 as
>> >>> published by
>> >>> + * the Free Software Foundation.
>> >>> + *
>> >>> + * This program is distributed in the hope that it will be useful,
>> >>> but WITHOUT
>> >>> + * ANY WARRANTY; without even the implied warranty of
>> >>> MERCHANTABILITY or
>> >>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
>> >>> License for
>> >>> + * more details.
>> >>> + */
>> >>> +
>> >>> +#include <linux/dma-fence-chain.h>
>> >>> +
>> >>> +static bool dma_fence_chain_enable_signaling(struct dma_fence
>> *fence);
>> >>> +
>> >>> +/**
>> >>> + * dma_fence_chain_get_prev - use RCU to get a reference to the
>> >>> previous fence
>> >>> + * @chain: chain node to get the previous node from
>> >>> + *
>> >>> + * Use dma_fence_get_rcu_safe to get a reference to the previous
>> >>> fence of the
>> >>> + * chain node.
>> >>> + */
>> >>> +static struct dma_fence *dma_fence_chain_get_prev(struct
>> >>> dma_fence_chain *chain)
>> >>> +{
>> >>> +    struct dma_fence *prev;
>> >>> +
>> >>> +    rcu_read_lock();
>> >>> +    prev = dma_fence_get_rcu_safe(&chain->prev);
>> >>> +    rcu_read_unlock();
>> >>> +    return prev;
>> >>> +}
>> >>> +
>> >>> +/**
>> >>> + * dma_fence_chain_walk - chain walking function
>> >>> + * @fence: current chain node
>> >>> + *
>> >>> + * Walk the chain to the next node. Returns the next fence or NULL
>> >>> if we are at
>> >>> + * the end of the chain. Garbage collects chain nodes which are
>> already
>> >>> + * signaled.
>> >>> + */
>> >>> +struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence)
>> >>> +{
>> >>> +    struct dma_fence_chain *chain, *prev_chain;
>> >>> +    struct dma_fence *prev, *replacement, *tmp;
>> >>> +
>> >>> +    chain = to_dma_fence_chain(fence);
>> >>> +    if (!chain) {
>> >>> +        dma_fence_put(fence);
>> >>> +        return NULL;
>> >>> +    }
>> >>> +
>> >>> +    while ((prev = dma_fence_chain_get_prev(chain))) {
>> >>> +
>> >>> +        prev_chain = to_dma_fence_chain(prev);
>> >>> +        if (prev_chain) {
>> >>> +            if (!dma_fence_is_signaled(prev_chain->fence))
>> >>> +                break;
>> >>> +
>> >>> +            replacement = dma_fence_chain_get_prev(prev_chain);
>> >>> +        } else {
>> >>> +            if (!dma_fence_is_signaled(prev))
>> >>> +                break;
>> >>> +
>> >>> +            replacement = NULL;
>> >>> +        }
>> >>> +
>> >>> +        tmp = cmpxchg(&chain->prev, prev, replacement);
>> >>> +        if (tmp == prev)
>> >>> +            dma_fence_put(tmp);
>> >>> +        else
>> >>> +            dma_fence_put(replacement);
>> >>> +        dma_fence_put(prev);
>> >>> +    }
>> >>> +
>> >>> +    dma_fence_put(fence);
>> >>> +    return prev;
>> >>> +}
>> >>> +EXPORT_SYMBOL(dma_fence_chain_walk);
>> >>> +
>> >>> +/**
>> >>> + * dma_fence_chain_find_seqno - find fence chain node by seqno
>> >>> + * @pfence: pointer to the chain node where to start
>> >>> + * @seqno: the sequence number to search for
>> >>> + *
>> >>> + * Advance the fence pointer to the chain node which will signal
>> >>> this sequence
>> >>> + * number. If no sequence number is provided then this is a no-op.
>> >>> + *
>> >>> + * Returns EINVAL if the fence is not a chain node or the sequence
>> >>> number has
>> >>> + * not yet advanced far enough.
>> >>> + */
>> >>> +int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t
>> >>> seqno)
>> >>> +{
>> >>> +    struct dma_fence_chain *chain;
>> >>> +
>> >>> +    if (!seqno)
>> >>> +        return 0;
>> >>> +
>> >>> +    chain = to_dma_fence_chain(*pfence);
>> >>> +    if (!chain || chain->base.seqno < seqno)
>> >>> +        return -EINVAL;
>> >>> +
>> >>> +    dma_fence_chain_for_each(*pfence, &chain->base) {
>> >>> +        if ((*pfence)->context != chain->base.context ||
>> >>> +            to_dma_fence_chain(*pfence)->prev_seqno < seqno)
>> >>> +            break;
>> >>> +    }
>> >>> +    dma_fence_put(&chain->base);
>> >>> +
>> >>> +    return 0;
>> >>> +}
>> >>> +EXPORT_SYMBOL(dma_fence_chain_find_seqno);
>> >>> +
>> >>> +static const char *dma_fence_chain_get_driver_name(struct dma_fence
>> >>> *fence)
>> >>> +{
>> >>> +        return "dma_fence_chain";
>> >>> +}
>> >>> +
>> >>> +static const char *dma_fence_chain_get_timeline_name(struct
>> >>> dma_fence *fence)
>> >>> +{
>> >>> +        return "unbound";
>> >>> +}
>> >>> +
>> >>> +static void dma_fence_chain_irq_work(struct irq_work *work)
>> >>> +{
>> >>> +    struct dma_fence_chain *chain;
>> >>> +
>> >>> +    chain = container_of(work, typeof(*chain), work);
>> >>> +
>> >>> +    /* Try to rearm the callback */
>> >>> +    if (!dma_fence_chain_enable_signaling(&chain->base))
>> >>> +        /* Ok, we are done. No more unsignaled fences left */
>> >>> +        dma_fence_signal(&chain->base);
>> >>> +    dma_fence_put(&chain->base);
>> >>> +}
>> >>> +
>> >>> +static void dma_fence_chain_cb(struct dma_fence *f, struct
>> >>> dma_fence_cb *cb)
>> >>> +{
>> >>> +    struct dma_fence_chain *chain;
>> >>> +
>> >>> +    chain = container_of(cb, typeof(*chain), cb);
>> >>> +    irq_work_queue(&chain->work);
>> >>> +    dma_fence_put(f);
>> >>> +}
>> >>> +
>> >>> +static bool dma_fence_chain_enable_signaling(struct dma_fence *fence)
>> >>> +{
>> >>> +    struct dma_fence_chain *head = to_dma_fence_chain(fence);
>> >>> +
>> >>> +    dma_fence_get(&head->base);
>> >>> +    dma_fence_chain_for_each(fence, &head->base) {
>> >>> +        struct dma_fence_chain *chain = to_dma_fence_chain(fence);
>> >>> +        struct dma_fence *f = chain ? chain->fence : fence;
>> >>> +
>> >>> +        dma_fence_get(f);
>> >>> +        if (!dma_fence_add_callback(f, &head->cb,
>> >>> dma_fence_chain_cb)) {
>> >>> +            dma_fence_put(fence);
>> >>> +            return true;
>> >>> +        }
>> >>> +        dma_fence_put(f);
>> >>> +    }
>> >>> +    dma_fence_put(&head->base);
>> >>> +    return false;
>> >>> +}
>> >>> +
>> >>> +static bool dma_fence_chain_signaled(struct dma_fence *fence)
>> >>> +{
>> >>> +    dma_fence_chain_for_each(fence, fence) {
>> >>> +        struct dma_fence_chain *chain = to_dma_fence_chain(fence);
>> >>> +        struct dma_fence *f = chain ? chain->fence : fence;
>> >>> +
>> >>> +        if (!dma_fence_is_signaled(f)) {
>> >>> +            dma_fence_put(fence);
>> >>> +            return false;
>> >>> +        }
>> >>> +    }
>> >>> +
>> >>> +    return true;
>> >>> +}
>> >>> +
>> >>> +static void dma_fence_chain_release(struct dma_fence *fence)
>> >>> +{
>> >>> +    struct dma_fence_chain *chain = to_dma_fence_chain(fence);
>> >>> +
>> >>> +    dma_fence_put(chain->prev);
>> >>> +    dma_fence_put(chain->fence);
>> >>> +    dma_fence_free(fence);
>> >>> +}
>> >>> +
>> >>> +const struct dma_fence_ops dma_fence_chain_ops = {
>> >>> +    .get_driver_name = dma_fence_chain_get_driver_name,
>> >>> +    .get_timeline_name = dma_fence_chain_get_timeline_name,
>> >>> +    .enable_signaling = dma_fence_chain_enable_signaling,
>> >>> +    .signaled = dma_fence_chain_signaled,
>> >>> +    .release = dma_fence_chain_release,
>> >>> +};
>> >>> +EXPORT_SYMBOL(dma_fence_chain_ops);
>> >>> +
>> >>> +/**
>> >>> + * dma_fence_chain_init - initialize a fence chain
>> >>> + * @chain: the chain node to initialize
>> >>> + * @prev: the previous fence
>> >>> + * @fence: the current fence
>> >>> + *
>> >>> + * Initialize a new chain node and either start a new chain or add
>> >>> the node to
>> >>> + * the existing chain of the previous fence.
>> >>> + */
>> >>> +void dma_fence_chain_init(struct dma_fence_chain *chain,
>> >>> +              struct dma_fence *prev,
>> >>> +              struct dma_fence *fence,
>> >>> +              uint64_t seqno)
>> >>> +{
>> >>> +    struct dma_fence_chain *prev_chain = to_dma_fence_chain(prev);
>> >>> +    uint64_t context;
>> >>> +
>> >>> +    spin_lock_init(&chain->lock);
>> >>> +    chain->prev = prev;
>> >>> +    chain->fence = fence;
>> >>> +    chain->prev_seqno = 0;
>> >>> +    init_irq_work(&chain->work, dma_fence_chain_irq_work);
>> >>> +
>> >>> +    /* Try to reuse the context of the previous chain node. */
>> >>> +    if (prev_chain && __dma_fence_is_later(seqno, prev->seqno)) {
>> >>> +        context = prev->context;
>> >>> +        chain->prev_seqno = prev->seqno;
>> >>> +    } else {
>> >>> +        context = dma_fence_context_alloc(1);
>> >>> +        /* Make sure that we always have a valid sequence number. */
>> >>> +        if (prev_chain)
>> >>> +            seqno = max(prev->seqno, seqno);
>> >>> +    }
>> >>> +
>> >>> +    dma_fence_init(&chain->base, &dma_fence_chain_ops,
>> >>> +               &chain->lock, context, seqno);
>> >>> +}
>> >>> +EXPORT_SYMBOL(dma_fence_chain_init);
>> >>> diff --git a/include/linux/dma-fence-chain.h
>> >>> b/include/linux/dma-fence-chain.h
>> >>> new file mode 100644
>> >>> index 000000000000..a5c2e8c6915c
>> >>> --- /dev/null
>> >>> +++ b/include/linux/dma-fence-chain.h
>> >>> @@ -0,0 +1,81 @@
>> >>> +/*
>> >>> + * fence-chain: chain fences together in a timeline
>> >>> + *
>> >>> + * Copyright (C) 2018 Advanced Micro Devices, Inc.
>> >>> + * Authors:
>> >>> + *    Christian König <christian.koenig-5C7GfCeVMHo@public.gmane.org>
>> >>> + *
>> >>> + * This program is free software; you can redistribute it and/or
>> >>> modify it
>> >>> + * under the terms of the GNU General Public License version 2 as
>> >>> published by
>> >>> + * the Free Software Foundation.
>> >>> + *
>> >>> + * This program is distributed in the hope that it will be useful,
>> >>> but WITHOUT
>> >>> + * ANY WARRANTY; without even the implied warranty of
>> >>> MERCHANTABILITY or
>> >>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
>> >>> License for
>> >>> + * more details.
>> >>> + */
>> >>> +
>> >>> +#ifndef __LINUX_DMA_FENCE_CHAIN_H
>> >>> +#define __LINUX_DMA_FENCE_CHAIN_H
>> >>> +
>> >>> +#include <linux/dma-fence.h>
>> >>> +#include <linux/irq_work.h>
>> >>> +
>> >>> +/**
>> >>> + * struct dma_fence_chain - fence to represent an node of a fence
>> chain
>> >>> + * @base: fence base class
>> >>> + * @lock: spinlock for fence handling
>> >>> + * @prev: previous fence of the chain
>> >>> + * @prev_seqno: original previous seqno before garbage collection
>> >>> + * @fence: encapsulated fence
>> >>> + * @cb: callback structure for signaling
>> >>> + * @work: irq work item for signaling
>> >>> + */
>> >>> +struct dma_fence_chain {
>> >>> +    struct dma_fence base;
>> >>> +    spinlock_t lock;
>> >>> +    struct dma_fence *prev;
>> >>> +    u64 prev_seqno;
>> >>> +    struct dma_fence *fence;
>> >>> +    struct dma_fence_cb cb;
>> >>> +    struct irq_work work;
>> >>> +};
>> >>> +
>> >>> +extern const struct dma_fence_ops dma_fence_chain_ops;
>> >>> +
>> >>> +/**
>> >>> + * to_dma_fence_chain - cast a fence to a dma_fence_chain
>> >>> + * @fence: fence to cast to a dma_fence_array
>> >>> + *
>> >>> + * Returns NULL if the fence is not a dma_fence_chain,
>> >>> + * or the dma_fence_chain otherwise.
>> >>> + */
>> >>> +static inline struct dma_fence_chain *
>> >>> +to_dma_fence_chain(struct dma_fence *fence)
>> >>> +{
>> >>> +    if (!fence || fence->ops != &dma_fence_chain_ops)
>> >>> +        return NULL;
>> >>> +
>> >>> +    return container_of(fence, struct dma_fence_chain, base);
>> >>> +}
>> >>> +
>> >>> +/**
>> >>> + * dma_fence_chain_for_each - iterate over all fences in chain
>> >>> + * @iter: current fence
>> >>> + * @head: starting point
>> >>> + *
>> >>> + * Iterate over all fences in the chain. We keep a reference to the
>> >>> current
>> >>> + * fence while inside the loop which must be dropped when breaking
>> out.
>> >>> + */
>> >>> +#define dma_fence_chain_for_each(iter, head)    \
>> >>> +    for (iter = dma_fence_get(head); iter; \
>> >>> +         iter = dma_fence_chain_walk(head))
>> >>> +
>> >>> +struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence);
>> >>> +int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t
>> >>> seqno);
>> >>> +void dma_fence_chain_init(struct dma_fence_chain *chain,
>> >>> +              struct dma_fence *prev,
>> >>> +              struct dma_fence *fence,
>> >>> +              uint64_t seqno);
>> >>> +
>> >>> +#endif /* __LINUX_DMA_FENCE_CHAIN_H */
>> >>
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
>
>

[-- Attachment #1.2: Type: text/html, Size: 37612 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 02/11] dma-buf: add new dma_fence_chain container v4
       [not found]                       ` <CAOFGe9611MqmsvdvZS4_vuJjrrUAmjK5-41Z6tpaxTHJsB8CwA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2019-02-15 18:33                         ` Koenig, Christian
       [not found]                           ` <f933f9ec-6e69-f9df-f12f-5f1844a2ad37-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 42+ messages in thread
From: Koenig, Christian @ 2019-02-15 18:33 UTC (permalink / raw)
  To: Jason Ekstrand
  Cc: Zhou, David(ChunMing),
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Lionel Landwerlin


[-- Attachment #1.1: Type: text/plain, Size: 4256 bytes --]

Am 15.02.19 um 19:16 schrieb Jason Ekstrand:
On Fri, Feb 15, 2019 at 11:51 AM Christian König <ckoenig.leichtzumerken@gmail.com<mailto:ckoenig.leichtzumerken@gmail.com>> wrote:
Am 15.02.19 um 17:49 schrieb Jason Ekstrand:
On Fri, Feb 15, 2019 at 9:52 AM Lionel Landwerlin via dri-devel <dri-devel@lists.freedesktop.org<mailto:dri-devel@lists.freedesktop.org>> wrote:
On 15/02/2019 14:32, Koenig, Christian wrote:
> Am 15.02.19 um 15:23 schrieb Lionel Landwerlin:
>> Hi Christian, David,
>>
>> For timeline semaphore we need points to signaled in order.
>> I'm struggling to understand how this fence-chain implementation
>> preserves ordering of the seqnos.
>>
>> One of the scenario I can see an issue happening is when you have a
>> timeline with points 1 & 2 and userspace submits for 2 different
>> engines :
>>      - first with let's say a blitter style engine on point 2
>>      - then a 3d style engine on point 1
> Yeah, and where exactly is the problem?
>
> Seqno 1 will signal when the 3d style engine finishes work.
>
> And seqno 2 will signal when both seqno 1 is signaled and the blitter
> style engine has finished its work.

That's an interesting interpretation of the spec.  I think it's legal and I could see that behavior may be desirable in some ways.

Well we actually had this discussion multiple times now, both internally as well as on the mailing list. Please also see the previous mails with Daniel on this topic.

I dug through dri-devel and read everything I could find with a search for "timeline semaphore"  I didn't find all that much but this did come up once.

Need to dig through my mails as well, that was back in November/December last year.


My initial suggestion was actually to exactly what Leonid suggested as well.

And following this I used a rather simple container for the implementation, e.g. just a ring buffer indexed by the sequence number. In this scenario userspace can specify on syncobj creation time how big the window for sequence numbers should be, e.g. in this implementation how big the ring buffer would be.

This was rejected by our guys who actually wrote a good part of the Vulkan specification. Daniel then has gone into the same direction during the public discussion.

I agree with whoever said that specifying a ringbuffer size is unacceptable.  I'm not really sure how that's relevant though.  Is a ringbuffer required to implement the behavior that is being suggested here?  Genuine question; I'm trying to get back up to speed.

Using a ring buffer was just an example how we could do it if we follow my and Lionel's suggestion.

Key point is that we could simplify the implementation massively if sequence numbers don't need to depend on each other.

In other words we just see the syncobj as container where fences are added and retrieved from instead of something actively involved in the signaling.

Main reason we didn't do it this way is because the AMD Vulkan team has rejected this approach.

Additional to that chaining sequence numbers together is really the more defensive approach, e.g. it is less likely that applications can shoot themselves in the foot.


 4. If you do get into a sticky situation, you can unblock an entire timeline by using the CPU signal ioctl to set it to a high value.

Well I think that this could be problematic as well. Keep in mind that main use case for this is sharing timelines between processes.

In other words you don't want applications to be able to mess with it to much.

Cross-process is exactly why you want it.  Suppose you're a compositor and you have a timeline shared with another application and you've submitted work which waits on it.  Then you get a notification somehow (SIGHUP?) that the client has died leaving you hanging.  What do you do?  You take the semaphore that's shared with you and the client and whack it to UINT64_MAX to unblock yourself.  Of course, this can be abused and that's always the risk you take with timelines.

My last status is that basically everybody agrees now that wait before signal in the kernel is forbidden.

So when you get a SIGHUB because your client is dead you just kill your thread waiting on it.

Regards,
Christian.

[-- Attachment #1.2: Type: text/html, Size: 6819 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 02/11] dma-buf: add new dma_fence_chain container v4
       [not found]                           ` <f933f9ec-6e69-f9df-f12f-5f1844a2ad37-5C7GfCeVMHo@public.gmane.org>
@ 2019-02-15 19:11                             ` Jason Ekstrand
  0 siblings, 0 replies; 42+ messages in thread
From: Jason Ekstrand @ 2019-02-15 19:11 UTC (permalink / raw)
  To: Koenig, Christian
  Cc: Zhou, David(ChunMing),
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Lionel Landwerlin


[-- Attachment #1.1: Type: text/plain, Size: 6325 bytes --]

On Fri, Feb 15, 2019 at 12:33 PM Koenig, Christian <Christian.Koenig-urvtwAKJhsc@public.gmane.orgm>
wrote:

> Am 15.02.19 um 19:16 schrieb Jason Ekstrand:
>
> On Fri, Feb 15, 2019 at 11:51 AM Christian König <
> ckoenig.leichtzumerken-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:
>
>> Am 15.02.19 um 17:49 schrieb Jason Ekstrand:
>>
>> On Fri, Feb 15, 2019 at 9:52 AM Lionel Landwerlin via dri-devel <
>> dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> wrote:
>>
>>> On 15/02/2019 14:32, Koenig, Christian wrote:
>>> > Am 15.02.19 um 15:23 schrieb Lionel Landwerlin:
>>> >> Hi Christian, David,
>>> >>
>>> >> For timeline semaphore we need points to signaled in order.
>>> >> I'm struggling to understand how this fence-chain implementation
>>> >> preserves ordering of the seqnos.
>>> >>
>>> >> One of the scenario I can see an issue happening is when you have a
>>> >> timeline with points 1 & 2 and userspace submits for 2 different
>>> >> engines :
>>> >>      - first with let's say a blitter style engine on point 2
>>> >>      - then a 3d style engine on point 1
>>> > Yeah, and where exactly is the problem?
>>> >
>>> > Seqno 1 will signal when the 3d style engine finishes work.
>>> >
>>> > And seqno 2 will signal when both seqno 1 is signaled and the blitter
>>> > style engine has finished its work.
>>>
>>
>> That's an interesting interpretation of the spec.  I think it's legal and
>> I could see that behavior may be desirable in some ways.
>>
>>
>> Well we actually had this discussion multiple times now, both internally
>> as well as on the mailing list. Please also see the previous mails with
>> Daniel on this topic.
>>
>
> I dug through dri-devel and read everything I could find with a search for
> "timeline semaphore"  I didn't find all that much but this did come up once.
>
>
> Need to dig through my mails as well, that was back in November/December
> last year.
>
>
>
>> My initial suggestion was actually to exactly what Leonid suggested as
>> well.
>>
>> And following this I used a rather simple container for the
>> implementation, e.g. just a ring buffer indexed by the sequence number. In
>> this scenario userspace can specify on syncobj creation time how big the
>> window for sequence numbers should be, e.g. in this implementation how big
>> the ring buffer would be.
>>
>> This was rejected by our guys who actually wrote a good part of the
>> Vulkan specification. Daniel then has gone into the same direction during
>> the public discussion.
>>
>
> I agree with whoever said that specifying a ringbuffer size is
> unacceptable.  I'm not really sure how that's relevant though.  Is a
> ringbuffer required to implement the behavior that is being suggested
> here?  Genuine question; I'm trying to get back up to speed.
>
>
> Using a ring buffer was just an example how we could do it if we follow my
> and Lionel's suggestion.
>
> Key point is that we could simplify the implementation massively if
> sequence numbers don't need to depend on each other.
>
> In other words we just see the syncobj as container where fences are added
> and retrieved from instead of something actively involved in the signaling.
>

In principal, I think this is a reasonable argument.  Having it involved in
signalling doesn't seem terrible to me but it would mean that a driver
wouldn't be able to detect that the fence it's waiting on actually belongs
to itself and optimize things.


> Main reason we didn't do it this way is because the AMD Vulkan team has
> rejected this approach.
>

Clearly, there's not quite as much agreement as I'd thought there was.  Oh,
well, that's why we have these discussions.


> Additional to that chaining sequence numbers together is really the more
> defensive approach, e.g. it is less likely that applications can shoot
> themselves in the foot.
>

Yeah, I can see how the "everything prior to n must be signalled" could be
safer.  I think both wait-any and wait-all have their ups and downs.  It
just took me by surprise.


>
>  4. If you do get into a sticky situation, you can unblock an entire
>> timeline by using the CPU signal ioctl to set it to a high value.
>>
>>
>> Well I think that this could be problematic as well. Keep in mind that
>> main use case for this is sharing timelines between processes.
>>
>> In other words you don't want applications to be able to mess with it to
>> much.
>>
>
> Cross-process is exactly why you want it.  Suppose you're a compositor and
> you have a timeline shared with another application and you've submitted
> work which waits on it.  Then you get a notification somehow (SIGHUP?) that
> the client has died leaving you hanging.  What do you do?  You take the
> semaphore that's shared with you and the client and whack it to UINT64_MAX
> to unblock yourself.  Of course, this can be abused and that's always the
> risk you take with timelines.
>
>
> My last status is that basically everybody agrees now that wait before
> signal in the kernel is forbidden.
>

Agreed.  I'm not saying that wait before signal in the kernel should be a
thing.  I think we're all agreed that wait-before-signal with the current
GEM infrastructure is utter insanity.

However, timeline syncobjs are both a kernel wait mechanism (only for time
points that already have a known-to-signal dma_fence) and a userspace wait
mechanism (which can wait for things which haven't materialized yet).  The
whacking to UINT64_MAX would be to unblock waiting userspace threads as you
mentioned below.

That said, in the case that I suggested here, if the client process died,
got kicked off the GPU, or whatever, then the kernel has likely declared
it's context a loss and signalled all dma_fences associated with it.  If
this is true, then whacking it to UINT64_MAX still works to unblock the
timeline because there would be nothing else pending to signal time
points.  Worst case, the process crashed and left valid GPU work pending in
the kernel and the compositor ends up waiting a little while longer for
said work to complete.

Ok, I think I'm reasonably convinced that the wait-all behaviour implied by
the chaining approach, while unexpected, isn't harmful.

--Jason

[-- Attachment #1.2: Type: text/html, Size: 9258 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 06/11] drm/syncobj: add timeline payload query ioctl v4
       [not found]     ` <20181207095601.2058-6-david1.zhou-5C7GfCeVMHo@public.gmane.org>
@ 2019-02-15 19:31       ` Lionel Landwerlin via amd-gfx
       [not found]         ` <157f8231-57e2-0492-de5d-f9ba4761c4c9-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 42+ messages in thread
From: Lionel Landwerlin via amd-gfx @ 2019-02-15 19:31 UTC (permalink / raw)
  To: Chunming Zhou, Christian.Koenig-5C7GfCeVMHo,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Dave Airlie, Daniel Rakos, Jason Ekstrand

On 07/12/2018 09:55, Chunming Zhou wrote:
> user mode can query timeline payload.
> v2: check return value of copy_to_user
> v3: handle querying entry by entry
> v4: rebase on new chain container, simplify interface
>
> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
> Cc: Daniel Rakos <Daniel.Rakos@amd.com>
> Cc: Jason Ekstrand <jason@jlekstrand.net>
> Cc: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
> Cc: Dave Airlie <airlied@redhat.com>
> Cc: Christian König <christian.koenig@amd.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/drm_internal.h |  2 ++
>   drivers/gpu/drm/drm_ioctl.c    |  2 ++
>   drivers/gpu/drm/drm_syncobj.c  | 43 ++++++++++++++++++++++++++++++++++
>   include/uapi/drm/drm.h         | 10 ++++++++
>   4 files changed, 57 insertions(+)
>
> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
> index 18b41e10195c..dab4d5936441 100644
> --- a/drivers/gpu/drm/drm_internal.h
> +++ b/drivers/gpu/drm/drm_internal.h
> @@ -184,6 +184,8 @@ int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
>   			    struct drm_file *file_private);
>   int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
>   			     struct drm_file *file_private);
> +int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
> +			    struct drm_file *file_private);
>   
>   /* drm_framebuffer.c */
>   void drm_framebuffer_print_info(struct drm_printer *p, unsigned int indent,
> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
> index a9a17ed35cc4..7578ef6dc1d1 100644
> --- a/drivers/gpu/drm/drm_ioctl.c
> +++ b/drivers/gpu/drm/drm_ioctl.c
> @@ -681,6 +681,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>   		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
>   		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
> +	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
> +		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, DRM_UNLOCKED),
>   	DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE, drm_crtc_queue_sequence_ioctl, DRM_UNLOCKED),
>   	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, drm_mode_create_lease_ioctl, DRM_MASTER|DRM_UNLOCKED),
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index 348079bb0965..f97fa00ca1d0 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -1061,3 +1061,46 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
>   
>   	return ret;
>   }
> +
> +int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
> +			    struct drm_file *file_private)
> +{
> +	struct drm_syncobj_timeline_array *args = data;
> +	struct drm_syncobj **syncobjs;
> +	uint64_t __user *points = u64_to_user_ptr(args->points);
> +	uint32_t i;
> +	int ret;
> +
> +	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
> +		return -ENODEV;
> +
> +	if (args->pad != 0)
> +		return -EINVAL;
> +
> +	if (args->count_handles == 0)
> +		return -EINVAL;
> +
> +	ret = drm_syncobj_array_find(file_private,
> +				     u64_to_user_ptr(args->handles),
> +				     args->count_handles,
> +				     &syncobjs);
> +	if (ret < 0)
> +		return ret;
> +
> +	for (i = 0; i < args->count_handles; i++) {
> +		struct dma_fence_chain *chain;
> +		struct dma_fence *fence;
> +		uint64_t point;
> +
> +		fence = drm_syncobj_fence_get(syncobjs[i]);
> +		chain = to_dma_fence_chain(fence);
> +		point = chain ? fence->seqno : 0;


Sorry, I don' t want to sound annoying, but this looks like this could 
report values going backward.

Anything add a point X to a timeline that has reached value Y with X < Y 
would trigger that.

Either through the submission or userspace signaling or importing 
another syncpoint's fence.


-Lionel


> +		ret = copy_to_user(&points[i], &point, sizeof(uint64_t));
> +		ret = ret ? -EFAULT : 0;
> +		if (ret)
> +			break;
> +	}
> +	drm_syncobj_array_free(syncobjs, args->count_handles);
> +
> +	return ret;
> +}
> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
> index 0092111d002c..b2c36f2b2599 100644
> --- a/include/uapi/drm/drm.h
> +++ b/include/uapi/drm/drm.h
> @@ -767,6 +767,14 @@ struct drm_syncobj_array {
>   	__u32 pad;
>   };
>   
> +struct drm_syncobj_timeline_array {
> +	__u64 handles;
> +	__u64 points;
> +	__u32 count_handles;
> +	__u32 pad;
> +};
> +
> +
>   /* Query current scanout sequence number */
>   struct drm_crtc_get_sequence {
>   	__u32 crtc_id;		/* requested crtc_id */
> @@ -924,6 +932,8 @@ extern "C" {
>   #define DRM_IOCTL_MODE_REVOKE_LEASE	DRM_IOWR(0xC9, struct drm_mode_revoke_lease)
>   
>   #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT	DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait)
> +#define DRM_IOCTL_SYNCOBJ_QUERY		DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
> +
>   /**
>    * Device specific ioctls should only be in their respective headers
>    * The device specific ioctl range is from 0x40 to 0x9f.


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 06/11] drm/syncobj: add timeline payload query ioctl v4
       [not found]         ` <157f8231-57e2-0492-de5d-f9ba4761c4c9-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
@ 2019-02-16 19:22           ` Christian König via amd-gfx
       [not found]             ` <a24728a8-5b80-e746-a1f2-6555cd817e99-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 42+ messages in thread
From: Christian König via amd-gfx @ 2019-02-16 19:22 UTC (permalink / raw)
  To: Lionel Landwerlin, Chunming Zhou, Christian.Koenig-5C7GfCeVMHo,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Christian König, Dave Airlie, Daniel Rakos, Jason Ekstrand

Am 15.02.19 um 20:31 schrieb Lionel Landwerlin via amd-gfx:
> On 07/12/2018 09:55, Chunming Zhou wrote:
>> user mode can query timeline payload.
>> v2: check return value of copy_to_user
>> v3: handle querying entry by entry
>> v4: rebase on new chain container, simplify interface
>>
>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
>> Cc: Daniel Rakos <Daniel.Rakos@amd.com>
>> Cc: Jason Ekstrand <jason@jlekstrand.net>
>> Cc: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
>> Cc: Dave Airlie <airlied@redhat.com>
>> Cc: Christian König <christian.koenig@amd.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> ---
>>   drivers/gpu/drm/drm_internal.h |  2 ++
>>   drivers/gpu/drm/drm_ioctl.c    |  2 ++
>>   drivers/gpu/drm/drm_syncobj.c  | 43 ++++++++++++++++++++++++++++++++++
>>   include/uapi/drm/drm.h         | 10 ++++++++
>>   4 files changed, 57 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/drm_internal.h 
>> b/drivers/gpu/drm/drm_internal.h
>> index 18b41e10195c..dab4d5936441 100644
>> --- a/drivers/gpu/drm/drm_internal.h
>> +++ b/drivers/gpu/drm/drm_internal.h
>> @@ -184,6 +184,8 @@ int drm_syncobj_reset_ioctl(struct drm_device 
>> *dev, void *data,
>>                   struct drm_file *file_private);
>>   int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
>>                    struct drm_file *file_private);
>> +int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>> +                struct drm_file *file_private);
>>     /* drm_framebuffer.c */
>>   void drm_framebuffer_print_info(struct drm_printer *p, unsigned int 
>> indent,
>> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
>> index a9a17ed35cc4..7578ef6dc1d1 100644
>> --- a/drivers/gpu/drm/drm_ioctl.c
>> +++ b/drivers/gpu/drm/drm_ioctl.c
>> @@ -681,6 +681,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>>                 DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
>>                 DRM_UNLOCKED|DRM_RENDER_ALLOW),
>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
>> +              DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>       DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, 
>> drm_crtc_get_sequence_ioctl, DRM_UNLOCKED),
>>       DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE, 
>> drm_crtc_queue_sequence_ioctl, DRM_UNLOCKED),
>>       DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, 
>> drm_mode_create_lease_ioctl, DRM_MASTER|DRM_UNLOCKED),
>> diff --git a/drivers/gpu/drm/drm_syncobj.c 
>> b/drivers/gpu/drm/drm_syncobj.c
>> index 348079bb0965..f97fa00ca1d0 100644
>> --- a/drivers/gpu/drm/drm_syncobj.c
>> +++ b/drivers/gpu/drm/drm_syncobj.c
>> @@ -1061,3 +1061,46 @@ drm_syncobj_signal_ioctl(struct drm_device 
>> *dev, void *data,
>>         return ret;
>>   }
>> +
>> +int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>> +                struct drm_file *file_private)
>> +{
>> +    struct drm_syncobj_timeline_array *args = data;
>> +    struct drm_syncobj **syncobjs;
>> +    uint64_t __user *points = u64_to_user_ptr(args->points);
>> +    uint32_t i;
>> +    int ret;
>> +
>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>> +        return -ENODEV;
>> +
>> +    if (args->pad != 0)
>> +        return -EINVAL;
>> +
>> +    if (args->count_handles == 0)
>> +        return -EINVAL;
>> +
>> +    ret = drm_syncobj_array_find(file_private,
>> +                     u64_to_user_ptr(args->handles),
>> +                     args->count_handles,
>> +                     &syncobjs);
>> +    if (ret < 0)
>> +        return ret;
>> +
>> +    for (i = 0; i < args->count_handles; i++) {
>> +        struct dma_fence_chain *chain;
>> +        struct dma_fence *fence;
>> +        uint64_t point;
>> +
>> +        fence = drm_syncobj_fence_get(syncobjs[i]);
>> +        chain = to_dma_fence_chain(fence);
>> +        point = chain ? fence->seqno : 0;
>
>
> Sorry, I don' t want to sound annoying, but this looks like this could 
> report values going backward.

Well please be annoying as much as you can :) But yeah all that stuff 
has been discussed before as well.

>
> Anything add a point X to a timeline that has reached value Y with X < 
> Y would trigger that.

Yes, that can indeed happen. But adding a timeline point X which is 
before the already added point Y is illegal in the first place :)

So when the application does something stupid and breaks it can just 
keep the pieces.

In the kernel we still do the most defensive thing and sync to 
everything in this case.

I'm just not sure if we should print an error into syslog or just 
continue silently.

Regards,
Christian.

>
> Either through the submission or userspace signaling or importing 
> another syncpoint's fence.
>
>
> -Lionel
>
>
>> +        ret = copy_to_user(&points[i], &point, sizeof(uint64_t));
>> +        ret = ret ? -EFAULT : 0;
>> +        if (ret)
>> +            break;
>> +    }
>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>> +
>> +    return ret;
>> +}
>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>> index 0092111d002c..b2c36f2b2599 100644
>> --- a/include/uapi/drm/drm.h
>> +++ b/include/uapi/drm/drm.h
>> @@ -767,6 +767,14 @@ struct drm_syncobj_array {
>>       __u32 pad;
>>   };
>>   +struct drm_syncobj_timeline_array {
>> +    __u64 handles;
>> +    __u64 points;
>> +    __u32 count_handles;
>> +    __u32 pad;
>> +};
>> +
>> +
>>   /* Query current scanout sequence number */
>>   struct drm_crtc_get_sequence {
>>       __u32 crtc_id;        /* requested crtc_id */
>> @@ -924,6 +932,8 @@ extern "C" {
>>   #define DRM_IOCTL_MODE_REVOKE_LEASE    DRM_IOWR(0xC9, struct 
>> drm_mode_revoke_lease)
>>     #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT    DRM_IOWR(0xCA, struct 
>> drm_syncobj_timeline_wait)
>> +#define DRM_IOCTL_SYNCOBJ_QUERY        DRM_IOWR(0xCB, struct 
>> drm_syncobj_timeline_array)
>> +
>>   /**
>>    * Device specific ioctls should only be in their respective headers
>>    * The device specific ioctl range is from 0x40 to 0x9f.
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 06/11] drm/syncobj: add timeline payload query ioctl v4
       [not found]             ` <a24728a8-5b80-e746-a1f2-6555cd817e99-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2019-02-18  3:10               ` zhoucm1
       [not found]                 ` <eae060f6-6493-ef71-ed9d-52d7dd768b03-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 42+ messages in thread
From: zhoucm1 @ 2019-02-18  3:10 UTC (permalink / raw)
  To: christian.koenig-5C7GfCeVMHo, Lionel Landwerlin, Chunming Zhou,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Dave Airlie, Daniel Rakos, Jason Ekstrand



On 2019年02月17日 03:22, Christian König wrote:
> Am 15.02.19 um 20:31 schrieb Lionel Landwerlin via amd-gfx:
>> On 07/12/2018 09:55, Chunming Zhou wrote:
>>> user mode can query timeline payload.
>>> v2: check return value of copy_to_user
>>> v3: handle querying entry by entry
>>> v4: rebase on new chain container, simplify interface
>>>
>>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
>>> Cc: Daniel Rakos <Daniel.Rakos@amd.com>
>>> Cc: Jason Ekstrand <jason@jlekstrand.net>
>>> Cc: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
>>> Cc: Dave Airlie <airlied@redhat.com>
>>> Cc: Christian König <christian.koenig@amd.com>
>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>> ---
>>>   drivers/gpu/drm/drm_internal.h |  2 ++
>>>   drivers/gpu/drm/drm_ioctl.c    |  2 ++
>>>   drivers/gpu/drm/drm_syncobj.c  | 43 
>>> ++++++++++++++++++++++++++++++++++
>>>   include/uapi/drm/drm.h         | 10 ++++++++
>>>   4 files changed, 57 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/drm_internal.h 
>>> b/drivers/gpu/drm/drm_internal.h
>>> index 18b41e10195c..dab4d5936441 100644
>>> --- a/drivers/gpu/drm/drm_internal.h
>>> +++ b/drivers/gpu/drm/drm_internal.h
>>> @@ -184,6 +184,8 @@ int drm_syncobj_reset_ioctl(struct drm_device 
>>> *dev, void *data,
>>>                   struct drm_file *file_private);
>>>   int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
>>>                    struct drm_file *file_private);
>>> +int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>> +                struct drm_file *file_private);
>>>     /* drm_framebuffer.c */
>>>   void drm_framebuffer_print_info(struct drm_printer *p, unsigned 
>>> int indent,
>>> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
>>> index a9a17ed35cc4..7578ef6dc1d1 100644
>>> --- a/drivers/gpu/drm/drm_ioctl.c
>>> +++ b/drivers/gpu/drm/drm_ioctl.c
>>> @@ -681,6 +681,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>>>                 DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
>>>                 DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
>>> +              DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>       DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, 
>>> drm_crtc_get_sequence_ioctl, DRM_UNLOCKED),
>>>       DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE, 
>>> drm_crtc_queue_sequence_ioctl, DRM_UNLOCKED),
>>>       DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, 
>>> drm_mode_create_lease_ioctl, DRM_MASTER|DRM_UNLOCKED),
>>> diff --git a/drivers/gpu/drm/drm_syncobj.c 
>>> b/drivers/gpu/drm/drm_syncobj.c
>>> index 348079bb0965..f97fa00ca1d0 100644
>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>> @@ -1061,3 +1061,46 @@ drm_syncobj_signal_ioctl(struct drm_device 
>>> *dev, void *data,
>>>         return ret;
>>>   }
>>> +
>>> +int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>> +                struct drm_file *file_private)
>>> +{
>>> +    struct drm_syncobj_timeline_array *args = data;
>>> +    struct drm_syncobj **syncobjs;
>>> +    uint64_t __user *points = u64_to_user_ptr(args->points);
>>> +    uint32_t i;
>>> +    int ret;
>>> +
>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>>> +        return -ENODEV;
>>> +
>>> +    if (args->pad != 0)
>>> +        return -EINVAL;
>>> +
>>> +    if (args->count_handles == 0)
>>> +        return -EINVAL;
>>> +
>>> +    ret = drm_syncobj_array_find(file_private,
>>> +                     u64_to_user_ptr(args->handles),
>>> +                     args->count_handles,
>>> +                     &syncobjs);
>>> +    if (ret < 0)
>>> +        return ret;
>>> +
>>> +    for (i = 0; i < args->count_handles; i++) {
>>> +        struct dma_fence_chain *chain;
>>> +        struct dma_fence *fence;
>>> +        uint64_t point;
>>> +
>>> +        fence = drm_syncobj_fence_get(syncobjs[i]);
>>> +        chain = to_dma_fence_chain(fence);
>>> +        point = chain ? fence->seqno : 0;
>>
>>
>> Sorry, I don' t want to sound annoying, but this looks like this 
>> could report values going backward.
>
> Well please be annoying as much as you can :) But yeah all that stuff 
> has been discussed before as well.
>
>>
>> Anything add a point X to a timeline that has reached value Y with X 
>> < Y would trigger that.
>
> Yes, that can indeed happen.
trigger what? when adding x (x < y), then return 0 when query?
Why would this happen?
No, syncobj->fence should always be there and the last chain node, if it 
is ever added.

-David
> But adding a timeline point X which is before the already added point 
> Y is illegal in the first place :)
>
> So when the application does something stupid and breaks it can just 
> keep the pieces.
>
> In the kernel we still do the most defensive thing and sync to 
> everything in this case.
>
> I'm just not sure if we should print an error into syslog or just 
> continue silently.
>
> Regards,
> Christian.
>
>>
>> Either through the submission or userspace signaling or importing 
>> another syncpoint's fence.
>>
>>
>> -Lionel
>>
>>
>>> +        ret = copy_to_user(&points[i], &point, sizeof(uint64_t));
>>> +        ret = ret ? -EFAULT : 0;
>>> +        if (ret)
>>> +            break;
>>> +    }
>>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>>> +
>>> +    return ret;
>>> +}
>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>> index 0092111d002c..b2c36f2b2599 100644
>>> --- a/include/uapi/drm/drm.h
>>> +++ b/include/uapi/drm/drm.h
>>> @@ -767,6 +767,14 @@ struct drm_syncobj_array {
>>>       __u32 pad;
>>>   };
>>>   +struct drm_syncobj_timeline_array {
>>> +    __u64 handles;
>>> +    __u64 points;
>>> +    __u32 count_handles;
>>> +    __u32 pad;
>>> +};
>>> +
>>> +
>>>   /* Query current scanout sequence number */
>>>   struct drm_crtc_get_sequence {
>>>       __u32 crtc_id;        /* requested crtc_id */
>>> @@ -924,6 +932,8 @@ extern "C" {
>>>   #define DRM_IOCTL_MODE_REVOKE_LEASE    DRM_IOWR(0xC9, struct 
>>> drm_mode_revoke_lease)
>>>     #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT    DRM_IOWR(0xCA, struct 
>>> drm_syncobj_timeline_wait)
>>> +#define DRM_IOCTL_SYNCOBJ_QUERY        DRM_IOWR(0xCB, struct 
>>> drm_syncobj_timeline_array)
>>> +
>>>   /**
>>>    * Device specific ioctls should only be in their respective headers
>>>    * The device specific ioctl range is from 0x40 to 0x9f.
>>
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 06/11] drm/syncobj: add timeline payload query ioctl v4
       [not found]                 ` <eae060f6-6493-ef71-ed9d-52d7dd768b03-5C7GfCeVMHo@public.gmane.org>
@ 2019-02-18  7:28                   ` Koenig, Christian
       [not found]                     ` <4becddef-3bb3-5a66-34d4-95cced896939-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 42+ messages in thread
From: Koenig, Christian @ 2019-02-18  7:28 UTC (permalink / raw)
  To: Zhou, David(ChunMing), Lionel Landwerlin
  Cc: Dave Airlie, Daniel Rakos, Jason Ekstrand

Am 18.02.19 um 04:10 schrieb zhoucm1:
>
>
> On 2019年02月17日 03:22, Christian König wrote:
>> Am 15.02.19 um 20:31 schrieb Lionel Landwerlin via amd-gfx:
>>> On 07/12/2018 09:55, Chunming Zhou wrote:
>>>> user mode can query timeline payload.
>>>> v2: check return value of copy_to_user
>>>> v3: handle querying entry by entry
>>>> v4: rebase on new chain container, simplify interface
>>>>
>>>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
>>>> Cc: Daniel Rakos <Daniel.Rakos@amd.com>
>>>> Cc: Jason Ekstrand <jason@jlekstrand.net>
>>>> Cc: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
>>>> Cc: Dave Airlie <airlied@redhat.com>
>>>> Cc: Christian König <christian.koenig@amd.com>
>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>>> ---
>>>>   drivers/gpu/drm/drm_internal.h |  2 ++
>>>>   drivers/gpu/drm/drm_ioctl.c    |  2 ++
>>>>   drivers/gpu/drm/drm_syncobj.c  | 43 
>>>> ++++++++++++++++++++++++++++++++++
>>>>   include/uapi/drm/drm.h         | 10 ++++++++
>>>>   4 files changed, 57 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/drm/drm_internal.h 
>>>> b/drivers/gpu/drm/drm_internal.h
>>>> index 18b41e10195c..dab4d5936441 100644
>>>> --- a/drivers/gpu/drm/drm_internal.h
>>>> +++ b/drivers/gpu/drm/drm_internal.h
>>>> @@ -184,6 +184,8 @@ int drm_syncobj_reset_ioctl(struct drm_device 
>>>> *dev, void *data,
>>>>                   struct drm_file *file_private);
>>>>   int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
>>>>                    struct drm_file *file_private);
>>>> +int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>>> +                struct drm_file *file_private);
>>>>     /* drm_framebuffer.c */
>>>>   void drm_framebuffer_print_info(struct drm_printer *p, unsigned 
>>>> int indent,
>>>> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
>>>> index a9a17ed35cc4..7578ef6dc1d1 100644
>>>> --- a/drivers/gpu/drm/drm_ioctl.c
>>>> +++ b/drivers/gpu/drm/drm_ioctl.c
>>>> @@ -681,6 +681,8 @@ static const struct drm_ioctl_desc drm_ioctls[] 
>>>> = {
>>>>                 DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, 
>>>> drm_syncobj_signal_ioctl,
>>>>                 DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
>>>> +              DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>       DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, 
>>>> drm_crtc_get_sequence_ioctl, DRM_UNLOCKED),
>>>>       DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE, 
>>>> drm_crtc_queue_sequence_ioctl, DRM_UNLOCKED),
>>>>       DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, 
>>>> drm_mode_create_lease_ioctl, DRM_MASTER|DRM_UNLOCKED),
>>>> diff --git a/drivers/gpu/drm/drm_syncobj.c 
>>>> b/drivers/gpu/drm/drm_syncobj.c
>>>> index 348079bb0965..f97fa00ca1d0 100644
>>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>>> @@ -1061,3 +1061,46 @@ drm_syncobj_signal_ioctl(struct drm_device 
>>>> *dev, void *data,
>>>>         return ret;
>>>>   }
>>>> +
>>>> +int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>>> +                struct drm_file *file_private)
>>>> +{
>>>> +    struct drm_syncobj_timeline_array *args = data;
>>>> +    struct drm_syncobj **syncobjs;
>>>> +    uint64_t __user *points = u64_to_user_ptr(args->points);
>>>> +    uint32_t i;
>>>> +    int ret;
>>>> +
>>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>>>> +        return -ENODEV;
>>>> +
>>>> +    if (args->pad != 0)
>>>> +        return -EINVAL;
>>>> +
>>>> +    if (args->count_handles == 0)
>>>> +        return -EINVAL;
>>>> +
>>>> +    ret = drm_syncobj_array_find(file_private,
>>>> +                     u64_to_user_ptr(args->handles),
>>>> +                     args->count_handles,
>>>> +                     &syncobjs);
>>>> +    if (ret < 0)
>>>> +        return ret;
>>>> +
>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>> +        struct dma_fence_chain *chain;
>>>> +        struct dma_fence *fence;
>>>> +        uint64_t point;
>>>> +
>>>> +        fence = drm_syncobj_fence_get(syncobjs[i]);
>>>> +        chain = to_dma_fence_chain(fence);
>>>> +        point = chain ? fence->seqno : 0;
>>>
>>>
>>> Sorry, I don' t want to sound annoying, but this looks like this 
>>> could report values going backward.
>>
>> Well please be annoying as much as you can :) But yeah all that stuff 
>> has been discussed before as well.
>>
>>>
>>> Anything add a point X to a timeline that has reached value Y with X 
>>> < Y would trigger that.
>>
>> Yes, that can indeed happen.
> trigger what? when adding x (x < y), then return 0 when query?
> Why would this happen?
> No, syncobj->fence should always be there and the last chain node, if 
> it is ever added.

Well maybe Lionel should clarify a bit what he means?

I thought he is concerned that the call could return values where X < Y, 
but that doesn't seem to be the case.

Christian.

>
> -David
>> But adding a timeline point X which is before the already added point 
>> Y is illegal in the first place :)
>>
>> So when the application does something stupid and breaks it can just 
>> keep the pieces.
>>
>> In the kernel we still do the most defensive thing and sync to 
>> everything in this case.
>>
>> I'm just not sure if we should print an error into syslog or just 
>> continue silently.
>>
>> Regards,
>> Christian.
>>
>>>
>>> Either through the submission or userspace signaling or importing 
>>> another syncpoint's fence.
>>>
>>>
>>> -Lionel
>>>
>>>
>>>> +        ret = copy_to_user(&points[i], &point, sizeof(uint64_t));
>>>> +        ret = ret ? -EFAULT : 0;
>>>> +        if (ret)
>>>> +            break;
>>>> +    }
>>>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>>>> +
>>>> +    return ret;
>>>> +}
>>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>>> index 0092111d002c..b2c36f2b2599 100644
>>>> --- a/include/uapi/drm/drm.h
>>>> +++ b/include/uapi/drm/drm.h
>>>> @@ -767,6 +767,14 @@ struct drm_syncobj_array {
>>>>       __u32 pad;
>>>>   };
>>>>   +struct drm_syncobj_timeline_array {
>>>> +    __u64 handles;
>>>> +    __u64 points;
>>>> +    __u32 count_handles;
>>>> +    __u32 pad;
>>>> +};
>>>> +
>>>> +
>>>>   /* Query current scanout sequence number */
>>>>   struct drm_crtc_get_sequence {
>>>>       __u32 crtc_id;        /* requested crtc_id */
>>>> @@ -924,6 +932,8 @@ extern "C" {
>>>>   #define DRM_IOCTL_MODE_REVOKE_LEASE    DRM_IOWR(0xC9, struct 
>>>> drm_mode_revoke_lease)
>>>>     #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT DRM_IOWR(0xCA, struct 
>>>> drm_syncobj_timeline_wait)
>>>> +#define DRM_IOCTL_SYNCOBJ_QUERY        DRM_IOWR(0xCB, struct 
>>>> drm_syncobj_timeline_array)
>>>> +
>>>>   /**
>>>>    * Device specific ioctls should only be in their respective headers
>>>>    * The device specific ioctl range is from 0x40 to 0x9f.
>>>
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 09/11] drm/syncobj: add transition iotcls between binary and timeline
  2019-02-15 14:28       ` Lionel Landwerlin via amd-gfx
@ 2019-02-18 10:35         ` zhoucm1
  2019-02-18 11:01           ` Koenig, Christian
  0 siblings, 1 reply; 42+ messages in thread
From: zhoucm1 @ 2019-02-18 10:35 UTC (permalink / raw)
  To: Lionel Landwerlin, Zhou, David(ChunMing),
	Koenig, Christian, dri-devel, amd-gfx


[-- Attachment #1.1: Type: text/plain, Size: 10966 bytes --]

Hi Lionel,

I checked your igt test case,

uint64_t points[5] = { 1, 5, 3, 7, 6 };

which is illegal signal order.

I must admit we should handle it gracefully if signal isn't in-order, 
and we shouldn't lead to deadlock.

Hi Christian,

Can we just ignore when signal point X <= timeline Y? Or just give a 
warning?

Otherwise like Lionel's unexpected use cases, which easily leads to 
deadlock.


-David


On 2019年02月15日 22:28, Lionel Landwerlin wrote:
> Hi David,
>
> Thanks a lot for point me to the tests you've added in IGT.
> While adding a test with that signals fences imported into a timeline
> syncobj out of order, I ran into a deadlock.
> Here is the test :
> https://github.com/djdeath/intel-gpu-tools/commit/1e46cf7e7bff09b78a24367ddc2314f97eb0a1b9
>
> Trying to kill the deadlocked process I got this backtrace :
>
>
> [   33.969136] [IGT] syncobj_timeline: starting subtest signal-order
> [   60.452823] watchdog: BUG: soft lockup - CPU#6 stuck for 23s!
> [syncobj_timelin:2021]
> [   60.452826] Modules linked in: rfcomm cmac bnep binfmt_misc
> nls_iso8859_1 snd_hda_codec_hdmi snd_hda_codec_realtek
> snd_hda_codec_generic ledtrig_audio sch_fq_codel ib_iser snd_hda_intel
> rdma_cm iw_cm snd_hda_codec ib_cm snd_hda_core snd_hwdep intel_rapl
> snd_pcm ib_core x86_pkg_temp_thermal intel_powerclamp configf
> s coretemp iscsi_tcp snd_seq_midi libiscsi_tcp snd_seq_midi_event
> libiscsi kvm_intel scsi_transport_iscsi kvm btusb snd_rawmidi irqbypass
> btrtl intel_cstate intel_rapl_perf btbcm btintel bluetooth snd_seq
> snd_seq_device snd_timer input_leds ecdh_generic snd soundcore mei_me
> mei intel_pch_thermal mac_hid acpi_pad parp
> ort_pc ppdev lp parport ip_tables x_tables autofs4 btrfs zstd_decompress
> zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq
> async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear
> hid_generic usbhid hid i915 crct10dif_pclmul crc32_pclmul i2c_algo_bit
> ghash_clmulni_intel prime_numbers
> drm_kms_helper aesni_intel syscopyarea sysfillrect
> [   60.452876]  sysimgblt fb_sys_fops aes_x86_64 crypto_simd sdhci_pci
> cryptd drm e1000e glue_helper cqhci sdhci wmi video
> [   60.452881] CPU: 6 PID: 2021 Comm: syncobj_timelin Tainted: G
> U            5.0.0-rc5+ #337
> [   60.452882] Hardware name:  /NUC6i7KYB, BIOS
> KYSKLi70.86A.0042.2016.0929.1933 09/29/2016
> [   60.452886] RIP: 0010:dma_fence_chain_walk+0x22c/0x260
> [   60.452888] Code: ff e9 93 fe ff ff 48 8b 45 08 48 8b 40 18 48 85 c0
> 74 0c 48 89 ef e8 33 0f 58 00 84 c0 75 23 f0 41 ff 4d 00 0f 88 99 87 2f
> 00 <0f> 85 05 fe ff ff 4c 89 ef e8 56 ea ff ff 48 89 d8 5b 5d 41 5c 41
> [   60.452888] RSP: 0018:ffff9a5804653ca8 EFLAGS: 00010296 ORIG_RAX:
> ffffffffffffff13
> [   60.452889] RAX: 0000000000000000 RBX: ffff8f5690fb2480 RCX:
> ffff8f5690fb2f00
> [   60.452890] RDX: 00000000003e3730 RSI: 0000000000000000 RDI:
> ffff8f5690fb2180
> [   60.452891] RBP: ffff8f5690fb2180 R08: 0000000000000000 R09:
> ffff8f5690fb2eb0
> [   60.452891] R10: 0000000000000000 R11: ffff8f5660469860 R12:
> ffff8f5690fb2f68
> [   60.452892] R13: ffff8f5690fb2f00 R14: 0000000000000003 R15:
> ffff8f5655a45fc0
> [   60.452913] FS:  00007fdc5c459980(0000) GS:ffff8f569eb80000(0000)
> knlGS:0000000000000000
> [   60.452913] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [   60.452914] CR2: 00007f9d74336dd8 CR3: 000000084a67e004 CR4:
> 00000000003606e0
> [   60.452915] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
> 0000000000000000
> [   60.452915] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
> 0000000000000400
> [   60.452916] Call Trace:
> [   60.452958]  drm_syncobj_add_point+0x102/0x160 [drm]
> [   60.452965]  ? drm_syncobj_fd_to_handle_ioctl+0x1b0/0x1b0 [drm]
> [   60.452971]  drm_syncobj_transfer_ioctl+0x10f/0x180 [drm]
> [   60.452978]  drm_ioctl_kernel+0xac/0xf0 [drm]
> [   60.452984]  drm_ioctl+0x2eb/0x3b0 [drm]
> [   60.452990]  ? drm_syncobj_fd_to_handle_ioctl+0x1b0/0x1b0 [drm]
> [   60.452992]  ? sw_sync_ioctl+0x347/0x370
> [   60.452994]  do_vfs_ioctl+0xa4/0x640
> [   60.452995]  ? __fput+0x134/0x220
> [   60.452997]  ? do_fcntl+0x1a5/0x650
> [   60.452998]  ksys_ioctl+0x70/0x80
> [   60.452999]  __x64_sys_ioctl+0x16/0x20
> [   60.453002]  do_syscall_64+0x55/0x110
> [   60.453004]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
> [   60.453005] RIP: 0033:0x7fdc5b6e45d7
> [   60.453006] Code: b3 66 90 48 8b 05 b1 48 2d 00 64 c7 00 26 00 00 00
> 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f
> 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 81 48 2d 00 f7 d8 64 89 01 48
> [   60.453007] RSP: 002b:00007fff25c4d198 EFLAGS: 00000206 ORIG_RAX:
> 0000000000000010
> [   60.453008] RAX: ffffffffffffffda RBX: 0000000000000000 RCX:
> 00007fdc5b6e45d7
> [   60.453008] RDX: 00007fff25c4d200 RSI: 00000000c02064cc RDI:
> 0000000000000003
> [   60.453009] RBP: 00007fff25c4d1d0 R08: 0000000000000000 R09:
> 000000000000001e
> [   60.453010] R10: 0000000000000000 R11: 0000000000000206 R12:
> 0000563d3959e4d0
> [   60.453010] R13: 00007fff25c4d620 R14: 0000000000000000 R15:
> 0000000000000000
> [   88.447359] watchdog: BUG: soft lockup - CPU#6 stuck for 22s!
> [syncobj_timelin:2021]
>
>
> -Lionel
>
>
> On 07/12/2018 09:55, Chunming Zhou wrote:
>> we need to import/export timeline point
>>
>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
>> ---
>>    drivers/gpu/drm/drm_internal.h |  4 +++
>>    drivers/gpu/drm/drm_ioctl.c    |  6 ++++
>>    drivers/gpu/drm/drm_syncobj.c  | 66 ++++++++++++++++++++++++++++++++++
>>    include/uapi/drm/drm.h         | 10 ++++++
>>    4 files changed, 86 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
>> index dab4d5936441..ecbe3d51a702 100644
>> --- a/drivers/gpu/drm/drm_internal.h
>> +++ b/drivers/gpu/drm/drm_internal.h
>> @@ -176,6 +176,10 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data,
>>    				   struct drm_file *file_private);
>>    int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
>>    				   struct drm_file *file_private);
>> +int drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
>> +					 struct drm_file *file_private);
>> +int drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
>> +					 struct drm_file *file_private);
>>    int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>>    			   struct drm_file *file_private);
>>    int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
>> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
>> index 7578ef6dc1d1..6b417e3c3ea5 100644
>> --- a/drivers/gpu/drm/drm_ioctl.c
>> +++ b/drivers/gpu/drm/drm_ioctl.c
>> @@ -673,6 +673,12 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>>    		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>    	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, drm_syncobj_fd_to_handle_ioctl,
>>    		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>> +	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE,
>> +		      drm_syncobj_binary_to_timeline_ioctl,
>> +		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>> +	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY,
>> +		      drm_syncobj_timeline_to_binary_ioctl,
>> +		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>    	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
>>    		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>    	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, drm_syncobj_timeline_wait_ioctl,
>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
>> index 282982e58dbd..cf4daa670252 100644
>> --- a/drivers/gpu/drm/drm_syncobj.c
>> +++ b/drivers/gpu/drm/drm_syncobj.c
>> @@ -670,6 +670,72 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
>>    					&args->handle);
>>    }
>>    
>> +int
>> +drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
>> +				     struct drm_file *file_private)
>> +{
>> +	struct drm_syncobj_transfer *args = data;
>> +	struct drm_syncobj *timeline_syncobj = NULL;
>> +	struct dma_fence *fence;
>> +	struct dma_fence_chain *chain;
>> +	int ret;
>> +
>> +	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>> +		return -ENODEV;
>> +
>> +	if (args->pad)
>> +		return -EINVAL;
>> +
>> +	timeline_syncobj = drm_syncobj_find(file_private, args->timeline_handle);
>> +	if (!timeline_syncobj) {
>> +		return -ENOENT;
>> +	}
>> +	ret = drm_syncobj_find_fence(file_private, args->binary_handle, 0, 0,
>> +				     &fence);
>> +	if (ret)
>> +		goto err;
>> +	chain = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
>> +	if (!chain)
>> +		goto err1;
>> +	drm_syncobj_add_point(timeline_syncobj, chain, fence, args->point);
>> +err1:
>> +	dma_fence_put(fence);
>> +err:
>> +	drm_syncobj_put(timeline_syncobj);
>> +
>> +	return ret;
>> +}
>> +
>> +int
>> +drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
>> +				     struct drm_file *file_private)
>> +{
>> +	struct drm_syncobj_transfer *args = data;
>> +	struct drm_syncobj *binary_syncobj = NULL;
>> +	struct dma_fence *fence;
>> +	int ret;
>> +
>> +	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>> +		return -ENODEV;
>> +
>> +	if (args->pad)
>> +		return -EINVAL;
>> +
>> +	binary_syncobj = drm_syncobj_find(file_private, args->binary_handle);
>> +	if (!binary_syncobj)
>> +		return -ENOENT;
>> +	ret = drm_syncobj_find_fence(file_private, args->timeline_handle,
>> +				     args->point, args->flags, &fence);
>> +	if (ret)
>> +		goto err;
>> +	drm_syncobj_replace_fence(binary_syncobj, fence);
>> +	dma_fence_put(fence);
>> +err:
>> +	drm_syncobj_put(binary_syncobj);
>> +
>> +	return ret;
>> +}
>> +
>>    static void syncobj_wait_fence_func(struct dma_fence *fence,
>>    				    struct dma_fence_cb *cb)
>>    {
>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>> index b2c36f2b2599..88d6129d4a18 100644
>> --- a/include/uapi/drm/drm.h
>> +++ b/include/uapi/drm/drm.h
>> @@ -735,6 +735,14 @@ struct drm_syncobj_handle {
>>    	__u32 pad;
>>    };
>>    
>> +struct drm_syncobj_transfer {
>> +	__u32 binary_handle;
>> +	__u32 timeline_handle;
>> +	__u64 point;
>> +	__u32 flags;
>> +	__u32 pad;
>> +};
>> +
>>    #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
>>    #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
>>    #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2)
>> @@ -933,6 +941,8 @@ extern "C" {
>>    
>>    #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT	DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait)
>>    #define DRM_IOCTL_SYNCOBJ_QUERY		DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
>> +#define DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE	DRM_IOWR(0xCC, struct drm_syncobj_transfer)
>> +#define DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY	DRM_IOWR(0xCD, struct drm_syncobj_transfer)
>>    
>>    /**
>>     * Device specific ioctls should only be in their respective headers
>


[-- Attachment #1.2: Type: text/html, Size: 11768 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 09/11] drm/syncobj: add transition iotcls between binary and timeline
  2019-02-18 10:35         ` zhoucm1
@ 2019-02-18 11:01           ` Koenig, Christian
       [not found]             ` <27a38e11-0c77-4340-aac9-b02e816c6f58-2ueSQiBKiTY7tOexoI0I+QC/G2K4zDHf@public.gmane.org>
  0 siblings, 1 reply; 42+ messages in thread
From: Koenig, Christian @ 2019-02-18 11:01 UTC (permalink / raw)
  To: Zhou, David(ChunMing); +Cc: amd-gfx, dri-devel


[-- Attachment #1.1: Type: text/plain, Size: 11695 bytes --]

Hi David,

well I think Lionel is testing the invalid signal order on purpose :)

Anyway we really need to handle invalid order graceful here. E.g. either the same way as during CS or we abort and return an error message.

I think just using the same approach as during CS ist the best we can do.

Regards,
Christian


Am 18.02.2019 11:35 schrieb "Zhou, David(ChunMing)" <David1.Zhou@amd.com>:

Hi Lionel,

I checked your igt test case,

uint64_t points[5] = { 1, 5, 3, 7, 6 };

which is illegal signal order.

I must admit we should handle it gracefully if signal isn't in-order, and we shouldn't lead to deadlock.

Hi Christian,

Can we just ignore when signal point X <= timeline Y? Or just give a warning?

Otherwise like Lionel's unexpected use cases, which easily leads to deadlock.


-David

On 2019年02月15日 22:28, Lionel Landwerlin wrote:

Hi David,

Thanks a lot for point me to the tests you've added in IGT.
While adding a test with that signals fences imported into a timeline
syncobj out of order, I ran into a deadlock.
Here is the test :
https://github.com/djdeath/intel-gpu-tools/commit/1e46cf7e7bff09b78a24367ddc2314f97eb0a1b9

Trying to kill the deadlocked process I got this backtrace :


[   33.969136] [IGT] syncobj_timeline: starting subtest signal-order
[   60.452823] watchdog: BUG: soft lockup - CPU#6 stuck for 23s!
[syncobj_timelin:2021]
[   60.452826] Modules linked in: rfcomm cmac bnep binfmt_misc
nls_iso8859_1 snd_hda_codec_hdmi snd_hda_codec_realtek
snd_hda_codec_generic ledtrig_audio sch_fq_codel ib_iser snd_hda_intel
rdma_cm iw_cm snd_hda_codec ib_cm snd_hda_core snd_hwdep intel_rapl
snd_pcm ib_core x86_pkg_temp_thermal intel_powerclamp configf
s coretemp iscsi_tcp snd_seq_midi libiscsi_tcp snd_seq_midi_event
libiscsi kvm_intel scsi_transport_iscsi kvm btusb snd_rawmidi irqbypass
btrtl intel_cstate intel_rapl_perf btbcm btintel bluetooth snd_seq
snd_seq_device snd_timer input_leds ecdh_generic snd soundcore mei_me
mei intel_pch_thermal mac_hid acpi_pad parp
ort_pc ppdev lp parport ip_tables x_tables autofs4 btrfs zstd_decompress
zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq
async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear
hid_generic usbhid hid i915 crct10dif_pclmul crc32_pclmul i2c_algo_bit
ghash_clmulni_intel prime_numbers
drm_kms_helper aesni_intel syscopyarea sysfillrect
[   60.452876]  sysimgblt fb_sys_fops aes_x86_64 crypto_simd sdhci_pci
cryptd drm e1000e glue_helper cqhci sdhci wmi video
[   60.452881] CPU: 6 PID: 2021 Comm: syncobj_timelin Tainted: G
U            5.0.0-rc5+ #337
[   60.452882] Hardware name:  /NUC6i7KYB, BIOS
KYSKLi70.86A.0042.2016.0929.1933 09/29/2016
[   60.452886] RIP: 0010:dma_fence_chain_walk+0x22c/0x260
[   60.452888] Code: ff e9 93 fe ff ff 48 8b 45 08 48 8b 40 18 48 85 c0
74 0c 48 89 ef e8 33 0f 58 00 84 c0 75 23 f0 41 ff 4d 00 0f 88 99 87 2f
00 <0f> 85 05 fe ff ff 4c 89 ef e8 56 ea ff ff 48 89 d8 5b 5d 41 5c 41
[   60.452888] RSP: 0018:ffff9a5804653ca8 EFLAGS: 00010296 ORIG_RAX:
ffffffffffffff13
[   60.452889] RAX: 0000000000000000 RBX: ffff8f5690fb2480 RCX:
ffff8f5690fb2f00
[   60.452890] RDX: 00000000003e3730 RSI: 0000000000000000 RDI:
ffff8f5690fb2180
[   60.452891] RBP: ffff8f5690fb2180 R08: 0000000000000000 R09:
ffff8f5690fb2eb0
[   60.452891] R10: 0000000000000000 R11: ffff8f5660469860 R12:
ffff8f5690fb2f68
[   60.452892] R13: ffff8f5690fb2f00 R14: 0000000000000003 R15:
ffff8f5655a45fc0
[   60.452913] FS:  00007fdc5c459980(0000) GS:ffff8f569eb80000(0000)
knlGS:0000000000000000
[   60.452913] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   60.452914] CR2: 00007f9d74336dd8 CR3: 000000084a67e004 CR4:
00000000003606e0
[   60.452915] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
0000000000000000
[   60.452915] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
0000000000000400
[   60.452916] Call Trace:
[   60.452958]  drm_syncobj_add_point+0x102/0x160 [drm]
[   60.452965]  ? drm_syncobj_fd_to_handle_ioctl+0x1b0/0x1b0 [drm]
[   60.452971]  drm_syncobj_transfer_ioctl+0x10f/0x180 [drm]
[   60.452978]  drm_ioctl_kernel+0xac/0xf0 [drm]
[   60.452984]  drm_ioctl+0x2eb/0x3b0 [drm]
[   60.452990]  ? drm_syncobj_fd_to_handle_ioctl+0x1b0/0x1b0 [drm]
[   60.452992]  ? sw_sync_ioctl+0x347/0x370
[   60.452994]  do_vfs_ioctl+0xa4/0x640
[   60.452995]  ? __fput+0x134/0x220
[   60.452997]  ? do_fcntl+0x1a5/0x650
[   60.452998]  ksys_ioctl+0x70/0x80
[   60.452999]  __x64_sys_ioctl+0x16/0x20
[   60.453002]  do_syscall_64+0x55/0x110
[   60.453004]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[   60.453005] RIP: 0033:0x7fdc5b6e45d7
[   60.453006] Code: b3 66 90 48 8b 05 b1 48 2d 00 64 c7 00 26 00 00 00
48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f
05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 81 48 2d 00 f7 d8 64 89 01 48
[   60.453007] RSP: 002b:00007fff25c4d198 EFLAGS: 00000206 ORIG_RAX:
0000000000000010
[   60.453008] RAX: ffffffffffffffda RBX: 0000000000000000 RCX:
00007fdc5b6e45d7
[   60.453008] RDX: 00007fff25c4d200 RSI: 00000000c02064cc RDI:
0000000000000003
[   60.453009] RBP: 00007fff25c4d1d0 R08: 0000000000000000 R09:
000000000000001e
[   60.453010] R10: 0000000000000000 R11: 0000000000000206 R12:
0000563d3959e4d0
[   60.453010] R13: 00007fff25c4d620 R14: 0000000000000000 R15:
0000000000000000
[   88.447359] watchdog: BUG: soft lockup - CPU#6 stuck for 22s!
[syncobj_timelin:2021]


-Lionel


On 07/12/2018 09:55, Chunming Zhou wrote:


we need to import/export timeline point

Signed-off-by: Chunming Zhou <david1.zhou@amd.com><mailto:david1.zhou@amd.com>
---
  drivers/gpu/drm/drm_internal.h |  4 +++
  drivers/gpu/drm/drm_ioctl.c    |  6 ++++
  drivers/gpu/drm/drm_syncobj.c  | 66 ++++++++++++++++++++++++++++++++++
  include/uapi/drm/drm.h         | 10 ++++++
  4 files changed, 86 insertions(+)

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index dab4d5936441..ecbe3d51a702 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -176,6 +176,10 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data,
                                   struct drm_file *file_private);
  int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
                                   struct drm_file *file_private);
+int drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
+                                        struct drm_file *file_private);
+int drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
+                                        struct drm_file *file_private);
  int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
                           struct drm_file *file_private);
  int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 7578ef6dc1d1..6b417e3c3ea5 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -673,6 +673,12 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
                      DRM_UNLOCKED|DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, drm_syncobj_fd_to_handle_ioctl,
                      DRM_UNLOCKED|DRM_RENDER_ALLOW),
+       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE,
+                     drm_syncobj_binary_to_timeline_ioctl,
+                     DRM_UNLOCKED|DRM_RENDER_ALLOW),
+       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY,
+                     drm_syncobj_timeline_to_binary_ioctl,
+                     DRM_UNLOCKED|DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
                      DRM_UNLOCKED|DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, drm_syncobj_timeline_wait_ioctl,
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 282982e58dbd..cf4daa670252 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -670,6 +670,72 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
                                        &args->handle);
  }

+int
+drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
+                                    struct drm_file *file_private)
+{
+       struct drm_syncobj_transfer *args = data;
+       struct drm_syncobj *timeline_syncobj = NULL;
+       struct dma_fence *fence;
+       struct dma_fence_chain *chain;
+       int ret;
+
+       if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+               return -ENODEV;
+
+       if (args->pad)
+               return -EINVAL;
+
+       timeline_syncobj = drm_syncobj_find(file_private, args->timeline_handle);
+       if (!timeline_syncobj) {
+               return -ENOENT;
+       }
+       ret = drm_syncobj_find_fence(file_private, args->binary_handle, 0, 0,
+                                    &fence);
+       if (ret)
+               goto err;
+       chain = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
+       if (!chain)
+               goto err1;
+       drm_syncobj_add_point(timeline_syncobj, chain, fence, args->point);
+err1:
+       dma_fence_put(fence);
+err:
+       drm_syncobj_put(timeline_syncobj);
+
+       return ret;
+}
+
+int
+drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
+                                    struct drm_file *file_private)
+{
+       struct drm_syncobj_transfer *args = data;
+       struct drm_syncobj *binary_syncobj = NULL;
+       struct dma_fence *fence;
+       int ret;
+
+       if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+               return -ENODEV;
+
+       if (args->pad)
+               return -EINVAL;
+
+       binary_syncobj = drm_syncobj_find(file_private, args->binary_handle);
+       if (!binary_syncobj)
+               return -ENOENT;
+       ret = drm_syncobj_find_fence(file_private, args->timeline_handle,
+                                    args->point, args->flags, &fence);
+       if (ret)
+               goto err;
+       drm_syncobj_replace_fence(binary_syncobj, fence);
+       dma_fence_put(fence);
+err:
+       drm_syncobj_put(binary_syncobj);
+
+       return ret;
+}
+
  static void syncobj_wait_fence_func(struct dma_fence *fence,
                                    struct dma_fence_cb *cb)
  {
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index b2c36f2b2599..88d6129d4a18 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -735,6 +735,14 @@ struct drm_syncobj_handle {
        __u32 pad;
  };

+struct drm_syncobj_transfer {
+       __u32 binary_handle;
+       __u32 timeline_handle;
+       __u64 point;
+       __u32 flags;
+       __u32 pad;
+};
+
  #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
  #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
  #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2)
@@ -933,6 +941,8 @@ extern "C" {

  #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT       DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait)
  #define DRM_IOCTL_SYNCOBJ_QUERY               DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
+#define DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE   DRM_IOWR(0xCC, struct drm_syncobj_transfer)
+#define DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY   DRM_IOWR(0xCD, struct drm_syncobj_transfer)

  /**
   * Device specific ioctls should only be in their respective headers







[-- Attachment #1.2: Type: text/html, Size: 13784 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [PATCH 06/11] drm/syncobj: add timeline payload query ioctl v4
       [not found]                     ` <4becddef-3bb3-5a66-34d4-95cced896939-5C7GfCeVMHo@public.gmane.org>
@ 2019-02-18 11:40                       ` Lionel Landwerlin
  0 siblings, 0 replies; 42+ messages in thread
From: Lionel Landwerlin @ 2019-02-18 11:40 UTC (permalink / raw)
  To: Koenig, Christian, Zhou, David(ChunMing),
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Dave Airlie, Daniel Rakos, Jason Ekstrand

On 18/02/2019 07:28, Koenig, Christian wrote:
> Am 18.02.19 um 04:10 schrieb zhoucm1:
>>
>> On 2019年02月17日 03:22, Christian König wrote:
>>> Am 15.02.19 um 20:31 schrieb Lionel Landwerlin via amd-gfx:
>>>> On 07/12/2018 09:55, Chunming Zhou wrote:
>>>>> user mode can query timeline payload.
>>>>> v2: check return value of copy_to_user
>>>>> v3: handle querying entry by entry
>>>>> v4: rebase on new chain container, simplify interface
>>>>>
>>>>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
>>>>> Cc: Daniel Rakos <Daniel.Rakos@amd.com>
>>>>> Cc: Jason Ekstrand <jason@jlekstrand.net>
>>>>> Cc: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
>>>>> Cc: Dave Airlie <airlied@redhat.com>
>>>>> Cc: Christian König <christian.koenig@amd.com>
>>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>>>> ---
>>>>>    drivers/gpu/drm/drm_internal.h |  2 ++
>>>>>    drivers/gpu/drm/drm_ioctl.c    |  2 ++
>>>>>    drivers/gpu/drm/drm_syncobj.c  | 43
>>>>> ++++++++++++++++++++++++++++++++++
>>>>>    include/uapi/drm/drm.h         | 10 ++++++++
>>>>>    4 files changed, 57 insertions(+)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/drm_internal.h
>>>>> b/drivers/gpu/drm/drm_internal.h
>>>>> index 18b41e10195c..dab4d5936441 100644
>>>>> --- a/drivers/gpu/drm/drm_internal.h
>>>>> +++ b/drivers/gpu/drm/drm_internal.h
>>>>> @@ -184,6 +184,8 @@ int drm_syncobj_reset_ioctl(struct drm_device
>>>>> *dev, void *data,
>>>>>                    struct drm_file *file_private);
>>>>>    int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
>>>>>                     struct drm_file *file_private);
>>>>> +int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>>>> +                struct drm_file *file_private);
>>>>>      /* drm_framebuffer.c */
>>>>>    void drm_framebuffer_print_info(struct drm_printer *p, unsigned
>>>>> int indent,
>>>>> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
>>>>> index a9a17ed35cc4..7578ef6dc1d1 100644
>>>>> --- a/drivers/gpu/drm/drm_ioctl.c
>>>>> +++ b/drivers/gpu/drm/drm_ioctl.c
>>>>> @@ -681,6 +681,8 @@ static const struct drm_ioctl_desc drm_ioctls[]
>>>>> = {
>>>>>                  DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>>        DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL,
>>>>> drm_syncobj_signal_ioctl,
>>>>>                  DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
>>>>> +              DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>>        DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE,
>>>>> drm_crtc_get_sequence_ioctl, DRM_UNLOCKED),
>>>>>        DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE,
>>>>> drm_crtc_queue_sequence_ioctl, DRM_UNLOCKED),
>>>>>        DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE,
>>>>> drm_mode_create_lease_ioctl, DRM_MASTER|DRM_UNLOCKED),
>>>>> diff --git a/drivers/gpu/drm/drm_syncobj.c
>>>>> b/drivers/gpu/drm/drm_syncobj.c
>>>>> index 348079bb0965..f97fa00ca1d0 100644
>>>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>>>> @@ -1061,3 +1061,46 @@ drm_syncobj_signal_ioctl(struct drm_device
>>>>> *dev, void *data,
>>>>>          return ret;
>>>>>    }
>>>>> +
>>>>> +int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>>>> +                struct drm_file *file_private)
>>>>> +{
>>>>> +    struct drm_syncobj_timeline_array *args = data;
>>>>> +    struct drm_syncobj **syncobjs;
>>>>> +    uint64_t __user *points = u64_to_user_ptr(args->points);
>>>>> +    uint32_t i;
>>>>> +    int ret;
>>>>> +
>>>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>>>>> +        return -ENODEV;
>>>>> +
>>>>> +    if (args->pad != 0)
>>>>> +        return -EINVAL;
>>>>> +
>>>>> +    if (args->count_handles == 0)
>>>>> +        return -EINVAL;
>>>>> +
>>>>> +    ret = drm_syncobj_array_find(file_private,
>>>>> +                     u64_to_user_ptr(args->handles),
>>>>> +                     args->count_handles,
>>>>> +                     &syncobjs);
>>>>> +    if (ret < 0)
>>>>> +        return ret;
>>>>> +
>>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>>> +        struct dma_fence_chain *chain;
>>>>> +        struct dma_fence *fence;
>>>>> +        uint64_t point;
>>>>> +
>>>>> +        fence = drm_syncobj_fence_get(syncobjs[i]);
>>>>> +        chain = to_dma_fence_chain(fence);
>>>>> +        point = chain ? fence->seqno : 0;
>>>>
>>>> Sorry, I don' t want to sound annoying, but this looks like this
>>>> could report values going backward.
>>> Well please be annoying as much as you can :) But yeah all that stuff
>>> has been discussed before as well.
>>>
>>>> Anything add a point X to a timeline that has reached value Y with X
>>>> < Y would trigger that.
>>> Yes, that can indeed happen.
>> trigger what? when adding x (x < y), then return 0 when query?
>> Why would this happen?
>> No, syncobj->fence should always be there and the last chain node, if
>> it is ever added.
> Well maybe Lionel should clarify a bit what he means?
>
> I thought he is concerned that the call could return values where X < Y,
> but that doesn't seem to be the case.
>
> Christian.


I meant something like this :


t = create_timeline_syncobj()

signal(t, 2)

query(t) => 2

signal(t, 1)

query(t) => 1


-Lionel



>
>> -David
>>> But adding a timeline point X which is before the already added point
>>> Y is illegal in the first place :)
>>>
>>> So when the application does something stupid and breaks it can just
>>> keep the pieces.
>>>
>>> In the kernel we still do the most defensive thing and sync to
>>> everything in this case.
>>>
>>> I'm just not sure if we should print an error into syslog or just
>>> continue silently.
>>>
>>> Regards,
>>> Christian.
>>>
>>>> Either through the submission or userspace signaling or importing
>>>> another syncpoint's fence.
>>>>
>>>>
>>>> -Lionel
>>>>
>>>>
>>>>> +        ret = copy_to_user(&points[i], &point, sizeof(uint64_t));
>>>>> +        ret = ret ? -EFAULT : 0;
>>>>> +        if (ret)
>>>>> +            break;
>>>>> +    }
>>>>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>> +
>>>>> +    return ret;
>>>>> +}
>>>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>>>> index 0092111d002c..b2c36f2b2599 100644
>>>>> --- a/include/uapi/drm/drm.h
>>>>> +++ b/include/uapi/drm/drm.h
>>>>> @@ -767,6 +767,14 @@ struct drm_syncobj_array {
>>>>>        __u32 pad;
>>>>>    };
>>>>>    +struct drm_syncobj_timeline_array {
>>>>> +    __u64 handles;
>>>>> +    __u64 points;
>>>>> +    __u32 count_handles;
>>>>> +    __u32 pad;
>>>>> +};
>>>>> +
>>>>> +
>>>>>    /* Query current scanout sequence number */
>>>>>    struct drm_crtc_get_sequence {
>>>>>        __u32 crtc_id;        /* requested crtc_id */
>>>>> @@ -924,6 +932,8 @@ extern "C" {
>>>>>    #define DRM_IOCTL_MODE_REVOKE_LEASE    DRM_IOWR(0xC9, struct
>>>>> drm_mode_revoke_lease)
>>>>>      #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT DRM_IOWR(0xCA, struct
>>>>> drm_syncobj_timeline_wait)
>>>>> +#define DRM_IOCTL_SYNCOBJ_QUERY        DRM_IOWR(0xCB, struct
>>>>> drm_syncobj_timeline_array)
>>>>> +
>>>>>    /**
>>>>>     * Device specific ioctls should only be in their respective headers
>>>>>     * The device specific ioctl range is from 0x40 to 0x9f.
>>>>
>>>> _______________________________________________
>>>> amd-gfx mailing list
>>>> amd-gfx@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 09/11] drm/syncobj: add transition iotcls between binary and timeline
       [not found]             ` <27a38e11-0c77-4340-aac9-b02e816c6f58-2ueSQiBKiTY7tOexoI0I+QC/G2K4zDHf@public.gmane.org>
@ 2019-02-18 12:07               ` Lionel Landwerlin
       [not found]                 ` <83890a08-769a-b52a-f2f6-9fe425f2562c-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 42+ messages in thread
From: Lionel Landwerlin @ 2019-02-18 12:07 UTC (permalink / raw)
  To: Koenig, Christian, Zhou, David(ChunMing)
  Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 12123 bytes --]

Thanks guys :)

You mentioned that signaling out of order is illegal.
Is this illegal with regard to the vulkan spec or to the syncobj 
implementation?

I'm not finding anything in the vulkan spec that makes out of order 
signaling illegal.
That's why I came up with this test, just verifying that the timeline 
does not go backward in term of its payload.

-Lionel

On 18/02/2019 11:01, Koenig, Christian wrote:
> Hi David,
>
> well I think Lionel is testing the invalid signal order on purpose :)
>
> Anyway we really need to handle invalid order graceful here. E.g. 
> either the same way as during CS or we abort and return an error message.
>
> I think just using the same approach as during CS ist the best we can do.
>
> Regards,
> Christian
>
>
> Am 18.02.2019 11:35 schrieb "Zhou, David(ChunMing)" <David1.Zhou-5C7GfCeVMHo@public.gmane.org>:
>
> Hi Lionel,
>
> I checked your igt test case,
>
> uint64_t points[5] = { 1, 5, 3, 7, 6 };
>
> which is illegal signal order.
>
> I must admit we should handle it gracefully if signal isn't in-order, 
> and we shouldn't lead to deadlock.
>
> Hi Christian,
>
> Can we just ignore when signal point X <= timeline Y? Or just give a 
> warning?
>
> Otherwise like Lionel's unexpected use cases, which easily leads to 
> deadlock.
>
>
> -David
>
>
> On 2019年02月15日 22:28, Lionel Landwerlin wrote:
>> Hi David,
>>
>> Thanks a lot for point me to the tests you've added in IGT.
>> While adding a test with that signals fences imported into a timeline
>> syncobj out of order, I ran into a deadlock.
>> Here is the test :
>> https://github.com/djdeath/intel-gpu-tools/commit/1e46cf7e7bff09b78a24367ddc2314f97eb0a1b9
>>
>> Trying to kill the deadlocked process I got this backtrace :
>>
>>
>> [   33.969136] [IGT] syncobj_timeline: starting subtest signal-order
>> [   60.452823] watchdog: BUG: soft lockup - CPU#6 stuck for 23s!
>> [syncobj_timelin:2021]
>> [   60.452826] Modules linked in: rfcomm cmac bnep binfmt_misc
>> nls_iso8859_1 snd_hda_codec_hdmi snd_hda_codec_realtek
>> snd_hda_codec_generic ledtrig_audio sch_fq_codel ib_iser snd_hda_intel
>> rdma_cm iw_cm snd_hda_codec ib_cm snd_hda_core snd_hwdep intel_rapl
>> snd_pcm ib_core x86_pkg_temp_thermal intel_powerclamp configf
>> s coretemp iscsi_tcp snd_seq_midi libiscsi_tcp snd_seq_midi_event
>> libiscsi kvm_intel scsi_transport_iscsi kvm btusb snd_rawmidi irqbypass
>> btrtl intel_cstate intel_rapl_perf btbcm btintel bluetooth snd_seq
>> snd_seq_device snd_timer input_leds ecdh_generic snd soundcore mei_me
>> mei intel_pch_thermal mac_hid acpi_pad parp
>> ort_pc ppdev lp parport ip_tables x_tables autofs4 btrfs zstd_decompress
>> zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq
>> async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear
>> hid_generic usbhid hid i915 crct10dif_pclmul crc32_pclmul i2c_algo_bit
>> ghash_clmulni_intel prime_numbers
>> drm_kms_helper aesni_intel syscopyarea sysfillrect
>> [   60.452876]  sysimgblt fb_sys_fops aes_x86_64 crypto_simd sdhci_pci
>> cryptd drm e1000e glue_helper cqhci sdhci wmi video
>> [   60.452881] CPU: 6 PID: 2021 Comm: syncobj_timelin Tainted: G
>> U            5.0.0-rc5+ #337
>> [   60.452882] Hardware name:  /NUC6i7KYB, BIOS
>> KYSKLi70.86A.0042.2016.0929.1933 09/29/2016
>> [   60.452886] RIP: 0010:dma_fence_chain_walk+0x22c/0x260
>> [   60.452888] Code: ff e9 93 fe ff ff 48 8b 45 08 48 8b 40 18 48 85 c0
>> 74 0c 48 89 ef e8 33 0f 58 00 84 c0 75 23 f0 41 ff 4d 00 0f 88 99 87 2f
>> 00 <0f> 85 05 fe ff ff 4c 89 ef e8 56 ea ff ff 48 89 d8 5b 5d 41 5c 41
>> [   60.452888] RSP: 0018:ffff9a5804653ca8 EFLAGS: 00010296 ORIG_RAX:
>> ffffffffffffff13
>> [   60.452889] RAX: 0000000000000000 RBX: ffff8f5690fb2480 RCX:
>> ffff8f5690fb2f00
>> [   60.452890] RDX: 00000000003e3730 RSI: 0000000000000000 RDI:
>> ffff8f5690fb2180
>> [   60.452891] RBP: ffff8f5690fb2180 R08: 0000000000000000 R09:
>> ffff8f5690fb2eb0
>> [   60.452891] R10: 0000000000000000 R11: ffff8f5660469860 R12:
>> ffff8f5690fb2f68
>> [   60.452892] R13: ffff8f5690fb2f00 R14: 0000000000000003 R15:
>> ffff8f5655a45fc0
>> [   60.452913] FS:  00007fdc5c459980(0000) GS:ffff8f569eb80000(0000)
>> knlGS:0000000000000000
>> [   60.452913] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>> [   60.452914] CR2: 00007f9d74336dd8 CR3: 000000084a67e004 CR4:
>> 00000000003606e0
>> [   60.452915] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
>> 0000000000000000
>> [   60.452915] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
>> 0000000000000400
>> [   60.452916] Call Trace:
>> [   60.452958]  drm_syncobj_add_point+0x102/0x160 [drm]
>> [   60.452965]  ? drm_syncobj_fd_to_handle_ioctl+0x1b0/0x1b0 [drm]
>> [   60.452971]  drm_syncobj_transfer_ioctl+0x10f/0x180 [drm]
>> [   60.452978]  drm_ioctl_kernel+0xac/0xf0 [drm]
>> [   60.452984]  drm_ioctl+0x2eb/0x3b0 [drm]
>> [   60.452990]  ? drm_syncobj_fd_to_handle_ioctl+0x1b0/0x1b0 [drm]
>> [   60.452992]  ? sw_sync_ioctl+0x347/0x370
>> [   60.452994]  do_vfs_ioctl+0xa4/0x640
>> [   60.452995]  ? __fput+0x134/0x220
>> [   60.452997]  ? do_fcntl+0x1a5/0x650
>> [   60.452998]  ksys_ioctl+0x70/0x80
>> [   60.452999]  __x64_sys_ioctl+0x16/0x20
>> [   60.453002]  do_syscall_64+0x55/0x110
>> [   60.453004]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
>> [   60.453005] RIP: 0033:0x7fdc5b6e45d7
>> [   60.453006] Code: b3 66 90 48 8b 05 b1 48 2d 00 64 c7 00 26 00 00 00
>> 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f
>> 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 81 48 2d 00 f7 d8 64 89 01 48
>> [   60.453007] RSP: 002b:00007fff25c4d198 EFLAGS: 00000206 ORIG_RAX:
>> 0000000000000010
>> [   60.453008] RAX: ffffffffffffffda RBX: 0000000000000000 RCX:
>> 00007fdc5b6e45d7
>> [   60.453008] RDX: 00007fff25c4d200 RSI: 00000000c02064cc RDI:
>> 0000000000000003
>> [   60.453009] RBP: 00007fff25c4d1d0 R08: 0000000000000000 R09:
>> 000000000000001e
>> [   60.453010] R10: 0000000000000000 R11: 0000000000000206 R12:
>> 0000563d3959e4d0
>> [   60.453010] R13: 00007fff25c4d620 R14: 0000000000000000 R15:
>> 0000000000000000
>> [   88.447359] watchdog: BUG: soft lockup - CPU#6 stuck for 22s!
>> [syncobj_timelin:2021]
>>
>>
>> -Lionel
>>
>>
>> On 07/12/2018 09:55, Chunming Zhou wrote:
>>> we need to import/export timeline point
>>>
>>> Signed-off-by: Chunming Zhou<david1.zhou-5C7GfCeVMHo@public.gmane.org>
>>> ---
>>>    drivers/gpu/drm/drm_internal.h |  4 +++
>>>    drivers/gpu/drm/drm_ioctl.c    |  6 ++++
>>>    drivers/gpu/drm/drm_syncobj.c  | 66 ++++++++++++++++++++++++++++++++++
>>>    include/uapi/drm/drm.h         | 10 ++++++
>>>    4 files changed, 86 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
>>> index dab4d5936441..ecbe3d51a702 100644
>>> --- a/drivers/gpu/drm/drm_internal.h
>>> +++ b/drivers/gpu/drm/drm_internal.h
>>> @@ -176,6 +176,10 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data,
>>>    				   struct drm_file *file_private);
>>>    int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
>>>    				   struct drm_file *file_private);
>>> +int drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
>>> +					 struct drm_file *file_private);
>>> +int drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
>>> +					 struct drm_file *file_private);
>>>    int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>>>    			   struct drm_file *file_private);
>>>    int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
>>> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
>>> index 7578ef6dc1d1..6b417e3c3ea5 100644
>>> --- a/drivers/gpu/drm/drm_ioctl.c
>>> +++ b/drivers/gpu/drm/drm_ioctl.c
>>> @@ -673,6 +673,12 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>>>    		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>    	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, drm_syncobj_fd_to_handle_ioctl,
>>>    		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>> +	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE,
>>> +		      drm_syncobj_binary_to_timeline_ioctl,
>>> +		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>> +	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY,
>>> +		      drm_syncobj_timeline_to_binary_ioctl,
>>> +		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>    	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
>>>    		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>    	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, drm_syncobj_timeline_wait_ioctl,
>>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
>>> index 282982e58dbd..cf4daa670252 100644
>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>> @@ -670,6 +670,72 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
>>>    					&args->handle);
>>>    }
>>>    
>>> +int
>>> +drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
>>> +				     struct drm_file *file_private)
>>> +{
>>> +	struct drm_syncobj_transfer *args = data;
>>> +	struct drm_syncobj *timeline_syncobj = NULL;
>>> +	struct dma_fence *fence;
>>> +	struct dma_fence_chain *chain;
>>> +	int ret;
>>> +
>>> +	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>>> +		return -ENODEV;
>>> +
>>> +	if (args->pad)
>>> +		return -EINVAL;
>>> +
>>> +	timeline_syncobj = drm_syncobj_find(file_private, args->timeline_handle);
>>> +	if (!timeline_syncobj) {
>>> +		return -ENOENT;
>>> +	}
>>> +	ret = drm_syncobj_find_fence(file_private, args->binary_handle, 0, 0,
>>> +				     &fence);
>>> +	if (ret)
>>> +		goto err;
>>> +	chain = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
>>> +	if (!chain)
>>> +		goto err1;
>>> +	drm_syncobj_add_point(timeline_syncobj, chain, fence, args->point);
>>> +err1:
>>> +	dma_fence_put(fence);
>>> +err:
>>> +	drm_syncobj_put(timeline_syncobj);
>>> +
>>> +	return ret;
>>> +}
>>> +
>>> +int
>>> +drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
>>> +				     struct drm_file *file_private)
>>> +{
>>> +	struct drm_syncobj_transfer *args = data;
>>> +	struct drm_syncobj *binary_syncobj = NULL;
>>> +	struct dma_fence *fence;
>>> +	int ret;
>>> +
>>> +	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>>> +		return -ENODEV;
>>> +
>>> +	if (args->pad)
>>> +		return -EINVAL;
>>> +
>>> +	binary_syncobj = drm_syncobj_find(file_private, args->binary_handle);
>>> +	if (!binary_syncobj)
>>> +		return -ENOENT;
>>> +	ret = drm_syncobj_find_fence(file_private, args->timeline_handle,
>>> +				     args->point, args->flags, &fence);
>>> +	if (ret)
>>> +		goto err;
>>> +	drm_syncobj_replace_fence(binary_syncobj, fence);
>>> +	dma_fence_put(fence);
>>> +err:
>>> +	drm_syncobj_put(binary_syncobj);
>>> +
>>> +	return ret;
>>> +}
>>> +
>>>    static void syncobj_wait_fence_func(struct dma_fence *fence,
>>>    				    struct dma_fence_cb *cb)
>>>    {
>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>> index b2c36f2b2599..88d6129d4a18 100644
>>> --- a/include/uapi/drm/drm.h
>>> +++ b/include/uapi/drm/drm.h
>>> @@ -735,6 +735,14 @@ struct drm_syncobj_handle {
>>>    	__u32 pad;
>>>    };
>>>    
>>> +struct drm_syncobj_transfer {
>>> +	__u32 binary_handle;
>>> +	__u32 timeline_handle;
>>> +	__u64 point;
>>> +	__u32 flags;
>>> +	__u32 pad;
>>> +};
>>> +
>>>    #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
>>>    #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
>>>    #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2)
>>> @@ -933,6 +941,8 @@ extern "C" {
>>>    
>>>    #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT	DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait)
>>>    #define DRM_IOCTL_SYNCOBJ_QUERY		DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
>>> +#define DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE	DRM_IOWR(0xCC, struct drm_syncobj_transfer)
>>> +#define DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY	DRM_IOWR(0xCD, struct drm_syncobj_transfer)
>>>    
>>>    /**
>>>     * Device specific ioctls should only be in their respective headers
>


[-- Attachment #1.2: Type: text/html, Size: 14200 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 09/11] drm/syncobj: add transition iotcls between binary and timeline
       [not found]                 ` <83890a08-769a-b52a-f2f6-9fe425f2562c-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
@ 2019-02-18 17:01                   ` Koenig, Christian
       [not found]                     ` <64c548d0-b062-f937-30a5-5a4d3f296f91-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 42+ messages in thread
From: Koenig, Christian @ 2019-02-18 17:01 UTC (permalink / raw)
  To: Lionel Landwerlin, Zhou, David(ChunMing)
  Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 12375 bytes --]

Am 18.02.19 um 13:07 schrieb Lionel Landwerlin:
Thanks guys :)

You mentioned that signaling out of order is illegal.
Is this illegal with regard to the vulkan spec or to the syncobj implementation?

David is the expert on that, but as far as I know that is forbidden by the vulkan spec.

I'm not finding anything in the vulkan spec that makes out of order signaling illegal.
That's why I came up with this test, just verifying that the timeline does not go backward in term of its payload.

Well we need to handle this case gracefully in the kernel, so it is still a good testcase.

Christian.


-Lionel

On 18/02/2019 11:01, Koenig, Christian wrote:
Hi David,

well I think Lionel is testing the invalid signal order on purpose :)

Anyway we really need to handle invalid order graceful here. E.g. either the same way as during CS or we abort and return an error message.

I think just using the same approach as during CS ist the best we can do.

Regards,
Christian


Am 18.02.2019 11:35 schrieb "Zhou, David(ChunMing)" <David1.Zhou@amd.com><mailto:David1.Zhou@amd.com>:

Hi Lionel,

I checked your igt test case,

uint64_t points[5] = { 1, 5, 3, 7, 6 };

which is illegal signal order.

I must admit we should handle it gracefully if signal isn't in-order, and we shouldn't lead to deadlock.

Hi Christian,

Can we just ignore when signal point X <= timeline Y? Or just give a warning?

Otherwise like Lionel's unexpected use cases, which easily leads to deadlock.


-David

On 2019年02月15日 22:28, Lionel Landwerlin wrote:

Hi David,

Thanks a lot for point me to the tests you've added in IGT.
While adding a test with that signals fences imported into a timeline
syncobj out of order, I ran into a deadlock.
Here is the test :
https://github.com/djdeath/intel-gpu-tools/commit/1e46cf7e7bff09b78a24367ddc2314f97eb0a1b9

Trying to kill the deadlocked process I got this backtrace :


[   33.969136] [IGT] syncobj_timeline: starting subtest signal-order
[   60.452823] watchdog: BUG: soft lockup - CPU#6 stuck for 23s!
[syncobj_timelin:2021]
[   60.452826] Modules linked in: rfcomm cmac bnep binfmt_misc
nls_iso8859_1 snd_hda_codec_hdmi snd_hda_codec_realtek
snd_hda_codec_generic ledtrig_audio sch_fq_codel ib_iser snd_hda_intel
rdma_cm iw_cm snd_hda_codec ib_cm snd_hda_core snd_hwdep intel_rapl
snd_pcm ib_core x86_pkg_temp_thermal intel_powerclamp configf
s coretemp iscsi_tcp snd_seq_midi libiscsi_tcp snd_seq_midi_event
libiscsi kvm_intel scsi_transport_iscsi kvm btusb snd_rawmidi irqbypass
btrtl intel_cstate intel_rapl_perf btbcm btintel bluetooth snd_seq
snd_seq_device snd_timer input_leds ecdh_generic snd soundcore mei_me
mei intel_pch_thermal mac_hid acpi_pad parp
ort_pc ppdev lp parport ip_tables x_tables autofs4 btrfs zstd_decompress
zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq
async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear
hid_generic usbhid hid i915 crct10dif_pclmul crc32_pclmul i2c_algo_bit
ghash_clmulni_intel prime_numbers
drm_kms_helper aesni_intel syscopyarea sysfillrect
[   60.452876]  sysimgblt fb_sys_fops aes_x86_64 crypto_simd sdhci_pci
cryptd drm e1000e glue_helper cqhci sdhci wmi video
[   60.452881] CPU: 6 PID: 2021 Comm: syncobj_timelin Tainted: G
U            5.0.0-rc5+ #337
[   60.452882] Hardware name:  /NUC6i7KYB, BIOS
KYSKLi70.86A.0042.2016.0929.1933 09/29/2016
[   60.452886] RIP: 0010:dma_fence_chain_walk+0x22c/0x260
[   60.452888] Code: ff e9 93 fe ff ff 48 8b 45 08 48 8b 40 18 48 85 c0
74 0c 48 89 ef e8 33 0f 58 00 84 c0 75 23 f0 41 ff 4d 00 0f 88 99 87 2f
00 <0f> 85 05 fe ff ff 4c 89 ef e8 56 ea ff ff 48 89 d8 5b 5d 41 5c 41
[   60.452888] RSP: 0018:ffff9a5804653ca8 EFLAGS: 00010296 ORIG_RAX:
ffffffffffffff13
[   60.452889] RAX: 0000000000000000 RBX: ffff8f5690fb2480 RCX:
ffff8f5690fb2f00
[   60.452890] RDX: 00000000003e3730 RSI: 0000000000000000 RDI:
ffff8f5690fb2180
[   60.452891] RBP: ffff8f5690fb2180 R08: 0000000000000000 R09:
ffff8f5690fb2eb0
[   60.452891] R10: 0000000000000000 R11: ffff8f5660469860 R12:
ffff8f5690fb2f68
[   60.452892] R13: ffff8f5690fb2f00 R14: 0000000000000003 R15:
ffff8f5655a45fc0
[   60.452913] FS:  00007fdc5c459980(0000) GS:ffff8f569eb80000(0000)
knlGS:0000000000000000
[   60.452913] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   60.452914] CR2: 00007f9d74336dd8 CR3: 000000084a67e004 CR4:
00000000003606e0
[   60.452915] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
0000000000000000
[   60.452915] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
0000000000000400
[   60.452916] Call Trace:
[   60.452958]  drm_syncobj_add_point+0x102/0x160 [drm]
[   60.452965]  ? drm_syncobj_fd_to_handle_ioctl+0x1b0/0x1b0 [drm]
[   60.452971]  drm_syncobj_transfer_ioctl+0x10f/0x180 [drm]
[   60.452978]  drm_ioctl_kernel+0xac/0xf0 [drm]
[   60.452984]  drm_ioctl+0x2eb/0x3b0 [drm]
[   60.452990]  ? drm_syncobj_fd_to_handle_ioctl+0x1b0/0x1b0 [drm]
[   60.452992]  ? sw_sync_ioctl+0x347/0x370
[   60.452994]  do_vfs_ioctl+0xa4/0x640
[   60.452995]  ? __fput+0x134/0x220
[   60.452997]  ? do_fcntl+0x1a5/0x650
[   60.452998]  ksys_ioctl+0x70/0x80
[   60.452999]  __x64_sys_ioctl+0x16/0x20
[   60.453002]  do_syscall_64+0x55/0x110
[   60.453004]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[   60.453005] RIP: 0033:0x7fdc5b6e45d7
[   60.453006] Code: b3 66 90 48 8b 05 b1 48 2d 00 64 c7 00 26 00 00 00
48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f
05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 81 48 2d 00 f7 d8 64 89 01 48
[   60.453007] RSP: 002b:00007fff25c4d198 EFLAGS: 00000206 ORIG_RAX:
0000000000000010
[   60.453008] RAX: ffffffffffffffda RBX: 0000000000000000 RCX:
00007fdc5b6e45d7
[   60.453008] RDX: 00007fff25c4d200 RSI: 00000000c02064cc RDI:
0000000000000003
[   60.453009] RBP: 00007fff25c4d1d0 R08: 0000000000000000 R09:
000000000000001e
[   60.453010] R10: 0000000000000000 R11: 0000000000000206 R12:
0000563d3959e4d0
[   60.453010] R13: 00007fff25c4d620 R14: 0000000000000000 R15:
0000000000000000
[   88.447359] watchdog: BUG: soft lockup - CPU#6 stuck for 22s!
[syncobj_timelin:2021]


-Lionel


On 07/12/2018 09:55, Chunming Zhou wrote:


we need to import/export timeline point

Signed-off-by: Chunming Zhou <david1.zhou@amd.com><mailto:david1.zhou@amd.com>
---
  drivers/gpu/drm/drm_internal.h |  4 +++
  drivers/gpu/drm/drm_ioctl.c    |  6 ++++
  drivers/gpu/drm/drm_syncobj.c  | 66 ++++++++++++++++++++++++++++++++++
  include/uapi/drm/drm.h         | 10 ++++++
  4 files changed, 86 insertions(+)

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index dab4d5936441..ecbe3d51a702 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -176,6 +176,10 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data,
                                   struct drm_file *file_private);
  int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
                                   struct drm_file *file_private);
+int drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
+                                        struct drm_file *file_private);
+int drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
+                                        struct drm_file *file_private);
  int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
                           struct drm_file *file_private);
  int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 7578ef6dc1d1..6b417e3c3ea5 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -673,6 +673,12 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
                      DRM_UNLOCKED|DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, drm_syncobj_fd_to_handle_ioctl,
                      DRM_UNLOCKED|DRM_RENDER_ALLOW),
+       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE,
+                     drm_syncobj_binary_to_timeline_ioctl,
+                     DRM_UNLOCKED|DRM_RENDER_ALLOW),
+       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY,
+                     drm_syncobj_timeline_to_binary_ioctl,
+                     DRM_UNLOCKED|DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
                      DRM_UNLOCKED|DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, drm_syncobj_timeline_wait_ioctl,
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 282982e58dbd..cf4daa670252 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -670,6 +670,72 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
                                        &args->handle);
  }

+int
+drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
+                                    struct drm_file *file_private)
+{
+       struct drm_syncobj_transfer *args = data;
+       struct drm_syncobj *timeline_syncobj = NULL;
+       struct dma_fence *fence;
+       struct dma_fence_chain *chain;
+       int ret;
+
+       if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+               return -ENODEV;
+
+       if (args->pad)
+               return -EINVAL;
+
+       timeline_syncobj = drm_syncobj_find(file_private, args->timeline_handle);
+       if (!timeline_syncobj) {
+               return -ENOENT;
+       }
+       ret = drm_syncobj_find_fence(file_private, args->binary_handle, 0, 0,
+                                    &fence);
+       if (ret)
+               goto err;
+       chain = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
+       if (!chain)
+               goto err1;
+       drm_syncobj_add_point(timeline_syncobj, chain, fence, args->point);
+err1:
+       dma_fence_put(fence);
+err:
+       drm_syncobj_put(timeline_syncobj);
+
+       return ret;
+}
+
+int
+drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
+                                    struct drm_file *file_private)
+{
+       struct drm_syncobj_transfer *args = data;
+       struct drm_syncobj *binary_syncobj = NULL;
+       struct dma_fence *fence;
+       int ret;
+
+       if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+               return -ENODEV;
+
+       if (args->pad)
+               return -EINVAL;
+
+       binary_syncobj = drm_syncobj_find(file_private, args->binary_handle);
+       if (!binary_syncobj)
+               return -ENOENT;
+       ret = drm_syncobj_find_fence(file_private, args->timeline_handle,
+                                    args->point, args->flags, &fence);
+       if (ret)
+               goto err;
+       drm_syncobj_replace_fence(binary_syncobj, fence);
+       dma_fence_put(fence);
+err:
+       drm_syncobj_put(binary_syncobj);
+
+       return ret;
+}
+
  static void syncobj_wait_fence_func(struct dma_fence *fence,
                                    struct dma_fence_cb *cb)
  {
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index b2c36f2b2599..88d6129d4a18 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -735,6 +735,14 @@ struct drm_syncobj_handle {
        __u32 pad;
  };

+struct drm_syncobj_transfer {
+       __u32 binary_handle;
+       __u32 timeline_handle;
+       __u64 point;
+       __u32 flags;
+       __u32 pad;
+};
+
  #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
  #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
  #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2)
@@ -933,6 +941,8 @@ extern "C" {

  #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT       DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait)
  #define DRM_IOCTL_SYNCOBJ_QUERY               DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
+#define DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE   DRM_IOWR(0xCC, struct drm_syncobj_transfer)
+#define DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY   DRM_IOWR(0xCD, struct drm_syncobj_transfer)

  /**
   * Device specific ioctls should only be in their respective headers





[-- Attachment #1.2: Type: text/html, Size: 15460 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [PATCH 09/11] drm/syncobj: add transition iotcls between binary and timeline
       [not found]                     ` <64c548d0-b062-f937-30a5-5a4d3f296f91-5C7GfCeVMHo@public.gmane.org>
@ 2019-02-19 10:46                       ` zhoucm1
       [not found]                         ` <c2c12849-d26b-3212-40ca-682d6f8006fa-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 42+ messages in thread
From: zhoucm1 @ 2019-02-19 10:46 UTC (permalink / raw)
  To: Koenig, Christian, Lionel Landwerlin, Zhou, David(ChunMing)
  Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 13392 bytes --]

Hi Lionel,

the attached should fix your problem and also messed signal order.

Hi Christian,

Could you have a look if it's reasonable?


btw: I pushed to change to 
https://github.com/amingriyue/timeline-syncobj-kernel, which is already 
rebased to latest drm-misc(kernel 5.0). You can directly use that branch.


-David


On 2019年02月19日 01:01, Koenig, Christian wrote:
> Am 18.02.19 um 13:07 schrieb Lionel Landwerlin:
>> Thanks guys :)
>>
>> You mentioned that signaling out of order is illegal.
>> Is this illegal with regard to the vulkan spec or to the syncobj 
>> implementation?
>
> David is the expert on that, but as far as I know that is forbidden by 
> the vulkan spec.
>
>> I'm not finding anything in the vulkan spec that makes out of order 
>> signaling illegal.
>> That's why I came up with this test, just verifying that the timeline 
>> does not go backward in term of its payload.
>
> Well we need to handle this case gracefully in the kernel, so it is 
> still a good testcase.
>
> Christian.
>
>>
>> -Lionel
>>
>> On 18/02/2019 11:01, Koenig, Christian wrote:
>>> Hi David,
>>>
>>> well I think Lionel is testing the invalid signal order on purpose :)
>>>
>>> Anyway we really need to handle invalid order graceful here. E.g. 
>>> either the same way as during CS or we abort and return an error 
>>> message.
>>>
>>> I think just using the same approach as during CS ist the best we 
>>> can do.
>>>
>>> Regards,
>>> Christian
>>>
>>>
>>> Am 18.02.2019 11:35 schrieb "Zhou, David(ChunMing)" 
>>> <David1.Zhou-5C7GfCeVMHo@public.gmane.org>:
>>>
>>> Hi Lionel,
>>>
>>> I checked your igt test case,
>>>
>>> uint64_t points[5] = { 1, 5, 3, 7, 6 };
>>>
>>> which is illegal signal order.
>>>
>>> I must admit we should handle it gracefully if signal isn't 
>>> in-order, and we shouldn't lead to deadlock.
>>>
>>> Hi Christian,
>>>
>>> Can we just ignore when signal point X <= timeline Y? Or just give a 
>>> warning?
>>>
>>> Otherwise like Lionel's unexpected use cases, which easily leads to 
>>> deadlock.
>>>
>>>
>>> -David
>>>
>>>
>>> On 2019年02月15日 22:28, Lionel Landwerlin wrote:
>>>> Hi David,
>>>>
>>>> Thanks a lot for point me to the tests you've added in IGT.
>>>> While adding a test with that signals fences imported into a timeline
>>>> syncobj out of order, I ran into a deadlock.
>>>> Here is the test :
>>>> https://github.com/djdeath/intel-gpu-tools/commit/1e46cf7e7bff09b78a24367ddc2314f97eb0a1b9
>>>>
>>>> Trying to kill the deadlocked process I got this backtrace :
>>>>
>>>>
>>>> [   33.969136] [IGT] syncobj_timeline: starting subtest signal-order
>>>> [   60.452823] watchdog: BUG: soft lockup - CPU#6 stuck for 23s!
>>>> [syncobj_timelin:2021]
>>>> [   60.452826] Modules linked in: rfcomm cmac bnep binfmt_misc
>>>> nls_iso8859_1 snd_hda_codec_hdmi snd_hda_codec_realtek
>>>> snd_hda_codec_generic ledtrig_audio sch_fq_codel ib_iser snd_hda_intel
>>>> rdma_cm iw_cm snd_hda_codec ib_cm snd_hda_core snd_hwdep intel_rapl
>>>> snd_pcm ib_core x86_pkg_temp_thermal intel_powerclamp configf
>>>> s coretemp iscsi_tcp snd_seq_midi libiscsi_tcp snd_seq_midi_event
>>>> libiscsi kvm_intel scsi_transport_iscsi kvm btusb snd_rawmidi irqbypass
>>>> btrtl intel_cstate intel_rapl_perf btbcm btintel bluetooth snd_seq
>>>> snd_seq_device snd_timer input_leds ecdh_generic snd soundcore mei_me
>>>> mei intel_pch_thermal mac_hid acpi_pad parp
>>>> ort_pc ppdev lp parport ip_tables x_tables autofs4 btrfs zstd_decompress
>>>> zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq
>>>> async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear
>>>> hid_generic usbhid hid i915 crct10dif_pclmul crc32_pclmul i2c_algo_bit
>>>> ghash_clmulni_intel prime_numbers
>>>> drm_kms_helper aesni_intel syscopyarea sysfillrect
>>>> [   60.452876]  sysimgblt fb_sys_fops aes_x86_64 crypto_simd sdhci_pci
>>>> cryptd drm e1000e glue_helper cqhci sdhci wmi video
>>>> [   60.452881] CPU: 6 PID: 2021 Comm: syncobj_timelin Tainted: G
>>>> U            5.0.0-rc5+ #337
>>>> [   60.452882] Hardware name:  /NUC6i7KYB, BIOS
>>>> KYSKLi70.86A.0042.2016.0929.1933 09/29/2016
>>>> [   60.452886] RIP: 0010:dma_fence_chain_walk+0x22c/0x260
>>>> [   60.452888] Code: ff e9 93 fe ff ff 48 8b 45 08 48 8b 40 18 48 85 c0
>>>> 74 0c 48 89 ef e8 33 0f 58 00 84 c0 75 23 f0 41 ff 4d 00 0f 88 99 87 2f
>>>> 00 <0f> 85 05 fe ff ff 4c 89 ef e8 56 ea ff ff 48 89 d8 5b 5d 41 5c 41
>>>> [   60.452888] RSP: 0018:ffff9a5804653ca8 EFLAGS: 00010296 ORIG_RAX:
>>>> ffffffffffffff13
>>>> [   60.452889] RAX: 0000000000000000 RBX: ffff8f5690fb2480 RCX:
>>>> ffff8f5690fb2f00
>>>> [   60.452890] RDX: 00000000003e3730 RSI: 0000000000000000 RDI:
>>>> ffff8f5690fb2180
>>>> [   60.452891] RBP: ffff8f5690fb2180 R08: 0000000000000000 R09:
>>>> ffff8f5690fb2eb0
>>>> [   60.452891] R10: 0000000000000000 R11: ffff8f5660469860 R12:
>>>> ffff8f5690fb2f68
>>>> [   60.452892] R13: ffff8f5690fb2f00 R14: 0000000000000003 R15:
>>>> ffff8f5655a45fc0
>>>> [   60.452913] FS:  00007fdc5c459980(0000) GS:ffff8f569eb80000(0000)
>>>> knlGS:0000000000000000
>>>> [   60.452913] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>>> [   60.452914] CR2: 00007f9d74336dd8 CR3: 000000084a67e004 CR4:
>>>> 00000000003606e0
>>>> [   60.452915] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
>>>> 0000000000000000
>>>> [   60.452915] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
>>>> 0000000000000400
>>>> [   60.452916] Call Trace:
>>>> [   60.452958]  drm_syncobj_add_point+0x102/0x160 [drm]
>>>> [   60.452965]  ? drm_syncobj_fd_to_handle_ioctl+0x1b0/0x1b0 [drm]
>>>> [   60.452971]  drm_syncobj_transfer_ioctl+0x10f/0x180 [drm]
>>>> [   60.452978]  drm_ioctl_kernel+0xac/0xf0 [drm]
>>>> [   60.452984]  drm_ioctl+0x2eb/0x3b0 [drm]
>>>> [   60.452990]  ? drm_syncobj_fd_to_handle_ioctl+0x1b0/0x1b0 [drm]
>>>> [   60.452992]  ? sw_sync_ioctl+0x347/0x370
>>>> [   60.452994]  do_vfs_ioctl+0xa4/0x640
>>>> [   60.452995]  ? __fput+0x134/0x220
>>>> [   60.452997]  ? do_fcntl+0x1a5/0x650
>>>> [   60.452998]  ksys_ioctl+0x70/0x80
>>>> [   60.452999]  __x64_sys_ioctl+0x16/0x20
>>>> [   60.453002]  do_syscall_64+0x55/0x110
>>>> [   60.453004]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
>>>> [   60.453005] RIP: 0033:0x7fdc5b6e45d7
>>>> [   60.453006] Code: b3 66 90 48 8b 05 b1 48 2d 00 64 c7 00 26 00 00 00
>>>> 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f
>>>> 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 81 48 2d 00 f7 d8 64 89 01 48
>>>> [   60.453007] RSP: 002b:00007fff25c4d198 EFLAGS: 00000206 ORIG_RAX:
>>>> 0000000000000010
>>>> [   60.453008] RAX: ffffffffffffffda RBX: 0000000000000000 RCX:
>>>> 00007fdc5b6e45d7
>>>> [   60.453008] RDX: 00007fff25c4d200 RSI: 00000000c02064cc RDI:
>>>> 0000000000000003
>>>> [   60.453009] RBP: 00007fff25c4d1d0 R08: 0000000000000000 R09:
>>>> 000000000000001e
>>>> [   60.453010] R10: 0000000000000000 R11: 0000000000000206 R12:
>>>> 0000563d3959e4d0
>>>> [   60.453010] R13: 00007fff25c4d620 R14: 0000000000000000 R15:
>>>> 0000000000000000
>>>> [   88.447359] watchdog: BUG: soft lockup - CPU#6 stuck for 22s!
>>>> [syncobj_timelin:2021]
>>>>
>>>>
>>>> -Lionel
>>>>
>>>>
>>>> On 07/12/2018 09:55, Chunming Zhou wrote:
>>>>> we need to import/export timeline point
>>>>>
>>>>> Signed-off-by: Chunming Zhou<david1.zhou-5C7GfCeVMHo@public.gmane.org>
>>>>> ---
>>>>>    drivers/gpu/drm/drm_internal.h |  4 +++
>>>>>    drivers/gpu/drm/drm_ioctl.c    |  6 ++++
>>>>>    drivers/gpu/drm/drm_syncobj.c  | 66 ++++++++++++++++++++++++++++++++++
>>>>>    include/uapi/drm/drm.h         | 10 ++++++
>>>>>    4 files changed, 86 insertions(+)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
>>>>> index dab4d5936441..ecbe3d51a702 100644
>>>>> --- a/drivers/gpu/drm/drm_internal.h
>>>>> +++ b/drivers/gpu/drm/drm_internal.h
>>>>> @@ -176,6 +176,10 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data,
>>>>>    				   struct drm_file *file_private);
>>>>>    int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
>>>>>    				   struct drm_file *file_private);
>>>>> +int drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
>>>>> +					 struct drm_file *file_private);
>>>>> +int drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
>>>>> +					 struct drm_file *file_private);
>>>>>    int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>>>>>    			   struct drm_file *file_private);
>>>>>    int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
>>>>> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
>>>>> index 7578ef6dc1d1..6b417e3c3ea5 100644
>>>>> --- a/drivers/gpu/drm/drm_ioctl.c
>>>>> +++ b/drivers/gpu/drm/drm_ioctl.c
>>>>> @@ -673,6 +673,12 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>>>>>    		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>>    	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, drm_syncobj_fd_to_handle_ioctl,
>>>>>    		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>> +	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE,
>>>>> +		      drm_syncobj_binary_to_timeline_ioctl,
>>>>> +		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>> +	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY,
>>>>> +		      drm_syncobj_timeline_to_binary_ioctl,
>>>>> +		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>>    	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
>>>>>    		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>>    	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, drm_syncobj_timeline_wait_ioctl,
>>>>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
>>>>> index 282982e58dbd..cf4daa670252 100644
>>>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>>>> @@ -670,6 +670,72 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
>>>>>    					&args->handle);
>>>>>    }
>>>>>    
>>>>> +int
>>>>> +drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
>>>>> +				     struct drm_file *file_private)
>>>>> +{
>>>>> +	struct drm_syncobj_transfer *args = data;
>>>>> +	struct drm_syncobj *timeline_syncobj = NULL;
>>>>> +	struct dma_fence *fence;
>>>>> +	struct dma_fence_chain *chain;
>>>>> +	int ret;
>>>>> +
>>>>> +	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>>>>> +		return -ENODEV;
>>>>> +
>>>>> +	if (args->pad)
>>>>> +		return -EINVAL;
>>>>> +
>>>>> +	timeline_syncobj = drm_syncobj_find(file_private, args->timeline_handle);
>>>>> +	if (!timeline_syncobj) {
>>>>> +		return -ENOENT;
>>>>> +	}
>>>>> +	ret = drm_syncobj_find_fence(file_private, args->binary_handle, 0, 0,
>>>>> +				     &fence);
>>>>> +	if (ret)
>>>>> +		goto err;
>>>>> +	chain = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
>>>>> +	if (!chain)
>>>>> +		goto err1;
>>>>> +	drm_syncobj_add_point(timeline_syncobj, chain, fence, args->point);
>>>>> +err1:
>>>>> +	dma_fence_put(fence);
>>>>> +err:
>>>>> +	drm_syncobj_put(timeline_syncobj);
>>>>> +
>>>>> +	return ret;
>>>>> +}
>>>>> +
>>>>> +int
>>>>> +drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
>>>>> +				     struct drm_file *file_private)
>>>>> +{
>>>>> +	struct drm_syncobj_transfer *args = data;
>>>>> +	struct drm_syncobj *binary_syncobj = NULL;
>>>>> +	struct dma_fence *fence;
>>>>> +	int ret;
>>>>> +
>>>>> +	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>>>>> +		return -ENODEV;
>>>>> +
>>>>> +	if (args->pad)
>>>>> +		return -EINVAL;
>>>>> +
>>>>> +	binary_syncobj = drm_syncobj_find(file_private, args->binary_handle);
>>>>> +	if (!binary_syncobj)
>>>>> +		return -ENOENT;
>>>>> +	ret = drm_syncobj_find_fence(file_private, args->timeline_handle,
>>>>> +				     args->point, args->flags, &fence);
>>>>> +	if (ret)
>>>>> +		goto err;
>>>>> +	drm_syncobj_replace_fence(binary_syncobj, fence);
>>>>> +	dma_fence_put(fence);
>>>>> +err:
>>>>> +	drm_syncobj_put(binary_syncobj);
>>>>> +
>>>>> +	return ret;
>>>>> +}
>>>>> +
>>>>>    static void syncobj_wait_fence_func(struct dma_fence *fence,
>>>>>    				    struct dma_fence_cb *cb)
>>>>>    {
>>>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>>>> index b2c36f2b2599..88d6129d4a18 100644
>>>>> --- a/include/uapi/drm/drm.h
>>>>> +++ b/include/uapi/drm/drm.h
>>>>> @@ -735,6 +735,14 @@ struct drm_syncobj_handle {
>>>>>    	__u32 pad;
>>>>>    };
>>>>>    
>>>>> +struct drm_syncobj_transfer {
>>>>> +	__u32 binary_handle;
>>>>> +	__u32 timeline_handle;
>>>>> +	__u64 point;
>>>>> +	__u32 flags;
>>>>> +	__u32 pad;
>>>>> +};
>>>>> +
>>>>>    #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
>>>>>    #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
>>>>>    #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2)
>>>>> @@ -933,6 +941,8 @@ extern "C" {
>>>>>    
>>>>>    #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT	DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait)
>>>>>    #define DRM_IOCTL_SYNCOBJ_QUERY		DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
>>>>> +#define DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE	DRM_IOWR(0xCC, struct drm_syncobj_transfer)
>>>>> +#define DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY	DRM_IOWR(0xCD, struct drm_syncobj_transfer)
>>>>>    
>>>>>    /**
>>>>>     * Device specific ioctls should only be in their respective headers
>>>
>>
>


[-- Attachment #1.2: Type: text/html, Size: 16232 bytes --]

[-- Attachment #2: 0001-fence-chian-fix-iterate-chain-node.patch --]
[-- Type: text/x-patch, Size: 988 bytes --]

>From 303419427d645e872fd7082c1b094d6eb1d487fc Mon Sep 17 00:00:00 2001
From: Chunming Zhou <david1.zhou-5C7GfCeVMHo@public.gmane.org>
Date: Tue, 19 Feb 2019 17:29:31 +0800
Subject: [PATCH 1/2] fence-chian: fix iterate chain node

Signed-off-by: Chunming Zhou <david1.zhou-5C7GfCeVMHo@public.gmane.org>
---
 include/linux/dma-fence-chain.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h
index a5c2e8c6915c..09b038d3f5ef 100644
--- a/include/linux/dma-fence-chain.h
+++ b/include/linux/dma-fence-chain.h
@@ -69,7 +69,7 @@ to_dma_fence_chain(struct dma_fence *fence)
  */
 #define dma_fence_chain_for_each(iter, head)	\
 	for (iter = dma_fence_get(head); iter; \
-	     iter = dma_fence_chain_walk(head))
+	     iter = dma_fence_chain_walk(iter))
 
 struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence);
 int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t seqno);
-- 
2.17.1


[-- Attachment #3: 0002-syncobj-don-t-allow-messed-order-signal-point-for-ti.patch --]
[-- Type: text/x-patch, Size: 1371 bytes --]

>From d2dbe497d51304bcdf3b1883ba6ed199c5147e2c Mon Sep 17 00:00:00 2001
From: Chunming Zhou <david1.zhou-5C7GfCeVMHo@public.gmane.org>
Date: Tue, 19 Feb 2019 17:30:53 +0800
Subject: [PATCH 2/2] syncobj: don't allow messed order signal point for
 timeline

Signed-off-by: Chunming Zhou <david1.zhou-5C7GfCeVMHo@public.gmane.org>
---
 drivers/gpu/drm/drm_syncobj.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 961f6d343564..7da20c287c41 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -145,12 +145,25 @@ void drm_syncobj_add_point(struct drm_syncobj *syncobj,
 {
 	struct syncobj_wait_entry *cur, *tmp;
 	struct dma_fence *prev;
+	struct dma_fence_chain *last_node;
+	uint64_t last_point;
 
 	dma_fence_get(fence);
 
 	spin_lock(&syncobj->lock);
 
 	prev = drm_syncobj_fence_get(syncobj);
+	last_node = to_dma_fence_chain(prev);
+	last_point = last_node ? prev->seqno : 0;
+	if (point <= last_point) {
+		/* timeline doesn't allow messed order signal points. */
+		DRM_ERROR("signal point %llu <= last point %llu!\n",
+			  point, last_point);
+		kfree(chain);
+		spin_unlock(&syncobj->lock);
+		dma_fence_put(prev);
+		return;
+	}
 	dma_fence_chain_init(chain, prev, fence, point);
 	rcu_assign_pointer(syncobj->fence, &chain->base);
 
-- 
2.17.1


[-- Attachment #4: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [PATCH 09/11] drm/syncobj: add transition iotcls between binary and timeline
       [not found]                         ` <c2c12849-d26b-3212-40ca-682d6f8006fa-5C7GfCeVMHo@public.gmane.org>
@ 2019-02-19 11:29                           ` Lionel Landwerlin
  2019-02-19 11:32                           ` Koenig, Christian
  1 sibling, 0 replies; 42+ messages in thread
From: Lionel Landwerlin @ 2019-02-19 11:29 UTC (permalink / raw)
  To: zhoucm1, Koenig, Christian, Zhou, David(ChunMing)
  Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 13824 bytes --]

Thanks David,

Will give this a go!

-Lionel

On 19/02/2019 10:46, zhoucm1 wrote:
>
> Hi Lionel,
>
> the attached should fix your problem and also messed signal order.
>
> Hi Christian,
>
> Could you have a look if it's reasonable?
>
>
> btw: I pushed to change to 
> https://github.com/amingriyue/timeline-syncobj-kernel, which is 
> already rebased to latest drm-misc(kernel 5.0). You can directly use 
> that branch.
>
>
> -David
>
>
> On 2019年02月19日 01:01, Koenig, Christian wrote:
>> Am 18.02.19 um 13:07 schrieb Lionel Landwerlin:
>>> Thanks guys :)
>>>
>>> You mentioned that signaling out of order is illegal.
>>> Is this illegal with regard to the vulkan spec or to the syncobj 
>>> implementation?
>>
>> David is the expert on that, but as far as I know that is forbidden 
>> by the vulkan spec.
>>
>>> I'm not finding anything in the vulkan spec that makes out of order 
>>> signaling illegal.
>>> That's why I came up with this test, just verifying that the 
>>> timeline does not go backward in term of its payload.
>>
>> Well we need to handle this case gracefully in the kernel, so it is 
>> still a good testcase.
>>
>> Christian.
>>
>>>
>>> -Lionel
>>>
>>> On 18/02/2019 11:01, Koenig, Christian wrote:
>>>> Hi David,
>>>>
>>>> well I think Lionel is testing the invalid signal order on purpose :)
>>>>
>>>> Anyway we really need to handle invalid order graceful here. E.g. 
>>>> either the same way as during CS or we abort and return an error 
>>>> message.
>>>>
>>>> I think just using the same approach as during CS ist the best we 
>>>> can do.
>>>>
>>>> Regards,
>>>> Christian
>>>>
>>>>
>>>> Am 18.02.2019 11:35 schrieb "Zhou, David(ChunMing)" 
>>>> <David1.Zhou-5C7GfCeVMHo@public.gmane.org>:
>>>>
>>>> Hi Lionel,
>>>>
>>>> I checked your igt test case,
>>>>
>>>> uint64_t points[5] = { 1, 5, 3, 7, 6 };
>>>>
>>>> which is illegal signal order.
>>>>
>>>> I must admit we should handle it gracefully if signal isn't 
>>>> in-order, and we shouldn't lead to deadlock.
>>>>
>>>> Hi Christian,
>>>>
>>>> Can we just ignore when signal point X <= timeline Y? Or just give 
>>>> a warning?
>>>>
>>>> Otherwise like Lionel's unexpected use cases, which easily leads to 
>>>> deadlock.
>>>>
>>>>
>>>> -David
>>>>
>>>>
>>>> On 2019年02月15日 22:28, Lionel Landwerlin wrote:
>>>>> Hi David,
>>>>>
>>>>> Thanks a lot for point me to the tests you've added in IGT.
>>>>> While adding a test with that signals fences imported into a timeline
>>>>> syncobj out of order, I ran into a deadlock.
>>>>> Here is the test :
>>>>> https://github.com/djdeath/intel-gpu-tools/commit/1e46cf7e7bff09b78a24367ddc2314f97eb0a1b9
>>>>>
>>>>> Trying to kill the deadlocked process I got this backtrace :
>>>>>
>>>>>
>>>>> [   33.969136] [IGT] syncobj_timeline: starting subtest signal-order
>>>>> [   60.452823] watchdog: BUG: soft lockup - CPU#6 stuck for 23s!
>>>>> [syncobj_timelin:2021]
>>>>> [   60.452826] Modules linked in: rfcomm cmac bnep binfmt_misc
>>>>> nls_iso8859_1 snd_hda_codec_hdmi snd_hda_codec_realtek
>>>>> snd_hda_codec_generic ledtrig_audio sch_fq_codel ib_iser snd_hda_intel
>>>>> rdma_cm iw_cm snd_hda_codec ib_cm snd_hda_core snd_hwdep intel_rapl
>>>>> snd_pcm ib_core x86_pkg_temp_thermal intel_powerclamp configf
>>>>> s coretemp iscsi_tcp snd_seq_midi libiscsi_tcp snd_seq_midi_event
>>>>> libiscsi kvm_intel scsi_transport_iscsi kvm btusb snd_rawmidi irqbypass
>>>>> btrtl intel_cstate intel_rapl_perf btbcm btintel bluetooth snd_seq
>>>>> snd_seq_device snd_timer input_leds ecdh_generic snd soundcore mei_me
>>>>> mei intel_pch_thermal mac_hid acpi_pad parp
>>>>> ort_pc ppdev lp parport ip_tables x_tables autofs4 btrfs zstd_decompress
>>>>> zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq
>>>>> async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear
>>>>> hid_generic usbhid hid i915 crct10dif_pclmul crc32_pclmul i2c_algo_bit
>>>>> ghash_clmulni_intel prime_numbers
>>>>> drm_kms_helper aesni_intel syscopyarea sysfillrect
>>>>> [   60.452876]  sysimgblt fb_sys_fops aes_x86_64 crypto_simd sdhci_pci
>>>>> cryptd drm e1000e glue_helper cqhci sdhci wmi video
>>>>> [   60.452881] CPU: 6 PID: 2021 Comm: syncobj_timelin Tainted: G
>>>>> U            5.0.0-rc5+ #337
>>>>> [   60.452882] Hardware name:  /NUC6i7KYB, BIOS
>>>>> KYSKLi70.86A.0042.2016.0929.1933 09/29/2016
>>>>> [   60.452886] RIP: 0010:dma_fence_chain_walk+0x22c/0x260
>>>>> [   60.452888] Code: ff e9 93 fe ff ff 48 8b 45 08 48 8b 40 18 48 85 c0
>>>>> 74 0c 48 89 ef e8 33 0f 58 00 84 c0 75 23 f0 41 ff 4d 00 0f 88 99 87 2f
>>>>> 00 <0f> 85 05 fe ff ff 4c 89 ef e8 56 ea ff ff 48 89 d8 5b 5d 41 5c 41
>>>>> [   60.452888] RSP: 0018:ffff9a5804653ca8 EFLAGS: 00010296 ORIG_RAX:
>>>>> ffffffffffffff13
>>>>> [   60.452889] RAX: 0000000000000000 RBX: ffff8f5690fb2480 RCX:
>>>>> ffff8f5690fb2f00
>>>>> [   60.452890] RDX: 00000000003e3730 RSI: 0000000000000000 RDI:
>>>>> ffff8f5690fb2180
>>>>> [   60.452891] RBP: ffff8f5690fb2180 R08: 0000000000000000 R09:
>>>>> ffff8f5690fb2eb0
>>>>> [   60.452891] R10: 0000000000000000 R11: ffff8f5660469860 R12:
>>>>> ffff8f5690fb2f68
>>>>> [   60.452892] R13: ffff8f5690fb2f00 R14: 0000000000000003 R15:
>>>>> ffff8f5655a45fc0
>>>>> [   60.452913] FS:  00007fdc5c459980(0000) GS:ffff8f569eb80000(0000)
>>>>> knlGS:0000000000000000
>>>>> [   60.452913] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>>>> [   60.452914] CR2: 00007f9d74336dd8 CR3: 000000084a67e004 CR4:
>>>>> 00000000003606e0
>>>>> [   60.452915] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
>>>>> 0000000000000000
>>>>> [   60.452915] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
>>>>> 0000000000000400
>>>>> [   60.452916] Call Trace:
>>>>> [   60.452958]  drm_syncobj_add_point+0x102/0x160 [drm]
>>>>> [   60.452965]  ? drm_syncobj_fd_to_handle_ioctl+0x1b0/0x1b0 [drm]
>>>>> [   60.452971]  drm_syncobj_transfer_ioctl+0x10f/0x180 [drm]
>>>>> [   60.452978]  drm_ioctl_kernel+0xac/0xf0 [drm]
>>>>> [   60.452984]  drm_ioctl+0x2eb/0x3b0 [drm]
>>>>> [   60.452990]  ? drm_syncobj_fd_to_handle_ioctl+0x1b0/0x1b0 [drm]
>>>>> [   60.452992]  ? sw_sync_ioctl+0x347/0x370
>>>>> [   60.452994]  do_vfs_ioctl+0xa4/0x640
>>>>> [   60.452995]  ? __fput+0x134/0x220
>>>>> [   60.452997]  ? do_fcntl+0x1a5/0x650
>>>>> [   60.452998]  ksys_ioctl+0x70/0x80
>>>>> [   60.452999]  __x64_sys_ioctl+0x16/0x20
>>>>> [   60.453002]  do_syscall_64+0x55/0x110
>>>>> [   60.453004]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
>>>>> [   60.453005] RIP: 0033:0x7fdc5b6e45d7
>>>>> [   60.453006] Code: b3 66 90 48 8b 05 b1 48 2d 00 64 c7 00 26 00 00 00
>>>>> 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f
>>>>> 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 81 48 2d 00 f7 d8 64 89 01 48
>>>>> [   60.453007] RSP: 002b:00007fff25c4d198 EFLAGS: 00000206 ORIG_RAX:
>>>>> 0000000000000010
>>>>> [   60.453008] RAX: ffffffffffffffda RBX: 0000000000000000 RCX:
>>>>> 00007fdc5b6e45d7
>>>>> [   60.453008] RDX: 00007fff25c4d200 RSI: 00000000c02064cc RDI:
>>>>> 0000000000000003
>>>>> [   60.453009] RBP: 00007fff25c4d1d0 R08: 0000000000000000 R09:
>>>>> 000000000000001e
>>>>> [   60.453010] R10: 0000000000000000 R11: 0000000000000206 R12:
>>>>> 0000563d3959e4d0
>>>>> [   60.453010] R13: 00007fff25c4d620 R14: 0000000000000000 R15:
>>>>> 0000000000000000
>>>>> [   88.447359] watchdog: BUG: soft lockup - CPU#6 stuck for 22s!
>>>>> [syncobj_timelin:2021]
>>>>>
>>>>>
>>>>> -Lionel
>>>>>
>>>>>
>>>>> On 07/12/2018 09:55, Chunming Zhou wrote:
>>>>>> we need to import/export timeline point
>>>>>>
>>>>>> Signed-off-by: Chunming Zhou<david1.zhou-5C7GfCeVMHo@public.gmane.org>
>>>>>> ---
>>>>>>    drivers/gpu/drm/drm_internal.h |  4 +++
>>>>>>    drivers/gpu/drm/drm_ioctl.c    |  6 ++++
>>>>>>    drivers/gpu/drm/drm_syncobj.c  | 66 ++++++++++++++++++++++++++++++++++
>>>>>>    include/uapi/drm/drm.h         | 10 ++++++
>>>>>>    4 files changed, 86 insertions(+)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
>>>>>> index dab4d5936441..ecbe3d51a702 100644
>>>>>> --- a/drivers/gpu/drm/drm_internal.h
>>>>>> +++ b/drivers/gpu/drm/drm_internal.h
>>>>>> @@ -176,6 +176,10 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data,
>>>>>>    				   struct drm_file *file_private);
>>>>>>    int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
>>>>>>    				   struct drm_file *file_private);
>>>>>> +int drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
>>>>>> +					 struct drm_file *file_private);
>>>>>> +int drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
>>>>>> +					 struct drm_file *file_private);
>>>>>>    int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>>>>>>    			   struct drm_file *file_private);
>>>>>>    int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
>>>>>> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
>>>>>> index 7578ef6dc1d1..6b417e3c3ea5 100644
>>>>>> --- a/drivers/gpu/drm/drm_ioctl.c
>>>>>> +++ b/drivers/gpu/drm/drm_ioctl.c
>>>>>> @@ -673,6 +673,12 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>>>>>>    		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>>>    	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, drm_syncobj_fd_to_handle_ioctl,
>>>>>>    		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>>> +	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE,
>>>>>> +		      drm_syncobj_binary_to_timeline_ioctl,
>>>>>> +		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>>> +	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY,
>>>>>> +		      drm_syncobj_timeline_to_binary_ioctl,
>>>>>> +		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>>>    	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
>>>>>>    		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>>>    	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, drm_syncobj_timeline_wait_ioctl,
>>>>>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
>>>>>> index 282982e58dbd..cf4daa670252 100644
>>>>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>>>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>>>>> @@ -670,6 +670,72 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
>>>>>>    					&args->handle);
>>>>>>    }
>>>>>>    
>>>>>> +int
>>>>>> +drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
>>>>>> +				     struct drm_file *file_private)
>>>>>> +{
>>>>>> +	struct drm_syncobj_transfer *args = data;
>>>>>> +	struct drm_syncobj *timeline_syncobj = NULL;
>>>>>> +	struct dma_fence *fence;
>>>>>> +	struct dma_fence_chain *chain;
>>>>>> +	int ret;
>>>>>> +
>>>>>> +	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>>>>>> +		return -ENODEV;
>>>>>> +
>>>>>> +	if (args->pad)
>>>>>> +		return -EINVAL;
>>>>>> +
>>>>>> +	timeline_syncobj = drm_syncobj_find(file_private, args->timeline_handle);
>>>>>> +	if (!timeline_syncobj) {
>>>>>> +		return -ENOENT;
>>>>>> +	}
>>>>>> +	ret = drm_syncobj_find_fence(file_private, args->binary_handle, 0, 0,
>>>>>> +				     &fence);
>>>>>> +	if (ret)
>>>>>> +		goto err;
>>>>>> +	chain = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
>>>>>> +	if (!chain)
>>>>>> +		goto err1;
>>>>>> +	drm_syncobj_add_point(timeline_syncobj, chain, fence, args->point);
>>>>>> +err1:
>>>>>> +	dma_fence_put(fence);
>>>>>> +err:
>>>>>> +	drm_syncobj_put(timeline_syncobj);
>>>>>> +
>>>>>> +	return ret;
>>>>>> +}
>>>>>> +
>>>>>> +int
>>>>>> +drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
>>>>>> +				     struct drm_file *file_private)
>>>>>> +{
>>>>>> +	struct drm_syncobj_transfer *args = data;
>>>>>> +	struct drm_syncobj *binary_syncobj = NULL;
>>>>>> +	struct dma_fence *fence;
>>>>>> +	int ret;
>>>>>> +
>>>>>> +	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>>>>>> +		return -ENODEV;
>>>>>> +
>>>>>> +	if (args->pad)
>>>>>> +		return -EINVAL;
>>>>>> +
>>>>>> +	binary_syncobj = drm_syncobj_find(file_private, args->binary_handle);
>>>>>> +	if (!binary_syncobj)
>>>>>> +		return -ENOENT;
>>>>>> +	ret = drm_syncobj_find_fence(file_private, args->timeline_handle,
>>>>>> +				     args->point, args->flags, &fence);
>>>>>> +	if (ret)
>>>>>> +		goto err;
>>>>>> +	drm_syncobj_replace_fence(binary_syncobj, fence);
>>>>>> +	dma_fence_put(fence);
>>>>>> +err:
>>>>>> +	drm_syncobj_put(binary_syncobj);
>>>>>> +
>>>>>> +	return ret;
>>>>>> +}
>>>>>> +
>>>>>>    static void syncobj_wait_fence_func(struct dma_fence *fence,
>>>>>>    				    struct dma_fence_cb *cb)
>>>>>>    {
>>>>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>>>>> index b2c36f2b2599..88d6129d4a18 100644
>>>>>> --- a/include/uapi/drm/drm.h
>>>>>> +++ b/include/uapi/drm/drm.h
>>>>>> @@ -735,6 +735,14 @@ struct drm_syncobj_handle {
>>>>>>    	__u32 pad;
>>>>>>    };
>>>>>>    
>>>>>> +struct drm_syncobj_transfer {
>>>>>> +	__u32 binary_handle;
>>>>>> +	__u32 timeline_handle;
>>>>>> +	__u64 point;
>>>>>> +	__u32 flags;
>>>>>> +	__u32 pad;
>>>>>> +};
>>>>>> +
>>>>>>    #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
>>>>>>    #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
>>>>>>    #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2)
>>>>>> @@ -933,6 +941,8 @@ extern "C" {
>>>>>>    
>>>>>>    #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT	DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait)
>>>>>>    #define DRM_IOCTL_SYNCOBJ_QUERY		DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
>>>>>> +#define DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE	DRM_IOWR(0xCC, struct drm_syncobj_transfer)
>>>>>> +#define DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY	DRM_IOWR(0xCD, struct drm_syncobj_transfer)
>>>>>>    
>>>>>>    /**
>>>>>>     * Device specific ioctls should only be in their respective headers
>>>>
>>>
>>
>


[-- Attachment #1.2: Type: text/html, Size: 17122 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 09/11] drm/syncobj: add transition iotcls between binary and timeline
       [not found]                         ` <c2c12849-d26b-3212-40ca-682d6f8006fa-5C7GfCeVMHo@public.gmane.org>
  2019-02-19 11:29                           ` Lionel Landwerlin
@ 2019-02-19 11:32                           ` Koenig, Christian
  2019-02-20  4:53                             ` zhoucm1
  1 sibling, 1 reply; 42+ messages in thread
From: Koenig, Christian @ 2019-02-19 11:32 UTC (permalink / raw)
  To: Zhou, David(ChunMing), Lionel Landwerlin
  Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 13185 bytes --]

Hi David,

Could you have a look if it's reasonable?

Patch #1 is also something I already fixed on my local branch.

But patch #2 won't work like this.

We can't return an error from drm_syncobj_add_point() because we already submitted work to the hardware. And just dropping the fence like you do in the patch is a clearly no-go as well.

Regards,
Christian.

Am 19.02.19 um 11:46 schrieb zhoucm1:

Hi Lionel,

the attached should fix your problem and also messed signal order.

Hi Christian,

Could you have a look if it's reasonable?


btw: I pushed to change to https://github.com/amingriyue/timeline-syncobj-kernel, which is already rebased to latest drm-misc(kernel 5.0). You can directly use that branch.


-David

On 2019年02月19日 01:01, Koenig, Christian wrote:
Am 18.02.19 um 13:07 schrieb Lionel Landwerlin:
Thanks guys :)

You mentioned that signaling out of order is illegal.
Is this illegal with regard to the vulkan spec or to the syncobj implementation?

David is the expert on that, but as far as I know that is forbidden by the vulkan spec.

I'm not finding anything in the vulkan spec that makes out of order signaling illegal.
That's why I came up with this test, just verifying that the timeline does not go backward in term of its payload.

Well we need to handle this case gracefully in the kernel, so it is still a good testcase.

Christian.


-Lionel

On 18/02/2019 11:01, Koenig, Christian wrote:
Hi David,

well I think Lionel is testing the invalid signal order on purpose :)

Anyway we really need to handle invalid order graceful here. E.g. either the same way as during CS or we abort and return an error message.

I think just using the same approach as during CS ist the best we can do.

Regards,
Christian


Am 18.02.2019 11:35 schrieb "Zhou, David(ChunMing)" <David1.Zhou@amd.com><mailto:David1.Zhou@amd.com>:

Hi Lionel,

I checked your igt test case,

uint64_t points[5] = { 1, 5, 3, 7, 6 };

which is illegal signal order.

I must admit we should handle it gracefully if signal isn't in-order, and we shouldn't lead to deadlock.

Hi Christian,

Can we just ignore when signal point X <= timeline Y? Or just give a warning?

Otherwise like Lionel's unexpected use cases, which easily leads to deadlock.


-David

On 2019年02月15日 22:28, Lionel Landwerlin wrote:

Hi David,

Thanks a lot for point me to the tests you've added in IGT.
While adding a test with that signals fences imported into a timeline
syncobj out of order, I ran into a deadlock.
Here is the test :
https://github.com/djdeath/intel-gpu-tools/commit/1e46cf7e7bff09b78a24367ddc2314f97eb0a1b9

Trying to kill the deadlocked process I got this backtrace :


[   33.969136] [IGT] syncobj_timeline: starting subtest signal-order
[   60.452823] watchdog: BUG: soft lockup - CPU#6 stuck for 23s!
[syncobj_timelin:2021]
[   60.452826] Modules linked in: rfcomm cmac bnep binfmt_misc
nls_iso8859_1 snd_hda_codec_hdmi snd_hda_codec_realtek
snd_hda_codec_generic ledtrig_audio sch_fq_codel ib_iser snd_hda_intel
rdma_cm iw_cm snd_hda_codec ib_cm snd_hda_core snd_hwdep intel_rapl
snd_pcm ib_core x86_pkg_temp_thermal intel_powerclamp configf
s coretemp iscsi_tcp snd_seq_midi libiscsi_tcp snd_seq_midi_event
libiscsi kvm_intel scsi_transport_iscsi kvm btusb snd_rawmidi irqbypass
btrtl intel_cstate intel_rapl_perf btbcm btintel bluetooth snd_seq
snd_seq_device snd_timer input_leds ecdh_generic snd soundcore mei_me
mei intel_pch_thermal mac_hid acpi_pad parp
ort_pc ppdev lp parport ip_tables x_tables autofs4 btrfs zstd_decompress
zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq
async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear
hid_generic usbhid hid i915 crct10dif_pclmul crc32_pclmul i2c_algo_bit
ghash_clmulni_intel prime_numbers
drm_kms_helper aesni_intel syscopyarea sysfillrect
[   60.452876]  sysimgblt fb_sys_fops aes_x86_64 crypto_simd sdhci_pci
cryptd drm e1000e glue_helper cqhci sdhci wmi video
[   60.452881] CPU: 6 PID: 2021 Comm: syncobj_timelin Tainted: G
U            5.0.0-rc5+ #337
[   60.452882] Hardware name:  /NUC6i7KYB, BIOS
KYSKLi70.86A.0042.2016.0929.1933 09/29/2016
[   60.452886] RIP: 0010:dma_fence_chain_walk+0x22c/0x260
[   60.452888] Code: ff e9 93 fe ff ff 48 8b 45 08 48 8b 40 18 48 85 c0
74 0c 48 89 ef e8 33 0f 58 00 84 c0 75 23 f0 41 ff 4d 00 0f 88 99 87 2f
00 <0f> 85 05 fe ff ff 4c 89 ef e8 56 ea ff ff 48 89 d8 5b 5d 41 5c 41
[   60.452888] RSP: 0018:ffff9a5804653ca8 EFLAGS: 00010296 ORIG_RAX:
ffffffffffffff13
[   60.452889] RAX: 0000000000000000 RBX: ffff8f5690fb2480 RCX:
ffff8f5690fb2f00
[   60.452890] RDX: 00000000003e3730 RSI: 0000000000000000 RDI:
ffff8f5690fb2180
[   60.452891] RBP: ffff8f5690fb2180 R08: 0000000000000000 R09:
ffff8f5690fb2eb0
[   60.452891] R10: 0000000000000000 R11: ffff8f5660469860 R12:
ffff8f5690fb2f68
[   60.452892] R13: ffff8f5690fb2f00 R14: 0000000000000003 R15:
ffff8f5655a45fc0
[   60.452913] FS:  00007fdc5c459980(0000) GS:ffff8f569eb80000(0000)
knlGS:0000000000000000
[   60.452913] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   60.452914] CR2: 00007f9d74336dd8 CR3: 000000084a67e004 CR4:
00000000003606e0
[   60.452915] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
0000000000000000
[   60.452915] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
0000000000000400
[   60.452916] Call Trace:
[   60.452958]  drm_syncobj_add_point+0x102/0x160 [drm]
[   60.452965]  ? drm_syncobj_fd_to_handle_ioctl+0x1b0/0x1b0 [drm]
[   60.452971]  drm_syncobj_transfer_ioctl+0x10f/0x180 [drm]
[   60.452978]  drm_ioctl_kernel+0xac/0xf0 [drm]
[   60.452984]  drm_ioctl+0x2eb/0x3b0 [drm]
[   60.452990]  ? drm_syncobj_fd_to_handle_ioctl+0x1b0/0x1b0 [drm]
[   60.452992]  ? sw_sync_ioctl+0x347/0x370
[   60.452994]  do_vfs_ioctl+0xa4/0x640
[   60.452995]  ? __fput+0x134/0x220
[   60.452997]  ? do_fcntl+0x1a5/0x650
[   60.452998]  ksys_ioctl+0x70/0x80
[   60.452999]  __x64_sys_ioctl+0x16/0x20
[   60.453002]  do_syscall_64+0x55/0x110
[   60.453004]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[   60.453005] RIP: 0033:0x7fdc5b6e45d7
[   60.453006] Code: b3 66 90 48 8b 05 b1 48 2d 00 64 c7 00 26 00 00 00
48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f
05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 81 48 2d 00 f7 d8 64 89 01 48
[   60.453007] RSP: 002b:00007fff25c4d198 EFLAGS: 00000206 ORIG_RAX:
0000000000000010
[   60.453008] RAX: ffffffffffffffda RBX: 0000000000000000 RCX:
00007fdc5b6e45d7
[   60.453008] RDX: 00007fff25c4d200 RSI: 00000000c02064cc RDI:
0000000000000003
[   60.453009] RBP: 00007fff25c4d1d0 R08: 0000000000000000 R09:
000000000000001e
[   60.453010] R10: 0000000000000000 R11: 0000000000000206 R12:
0000563d3959e4d0
[   60.453010] R13: 00007fff25c4d620 R14: 0000000000000000 R15:
0000000000000000
[   88.447359] watchdog: BUG: soft lockup - CPU#6 stuck for 22s!
[syncobj_timelin:2021]


-Lionel


On 07/12/2018 09:55, Chunming Zhou wrote:


we need to import/export timeline point

Signed-off-by: Chunming Zhou <david1.zhou@amd.com><mailto:david1.zhou@amd.com>
---
  drivers/gpu/drm/drm_internal.h |  4 +++
  drivers/gpu/drm/drm_ioctl.c    |  6 ++++
  drivers/gpu/drm/drm_syncobj.c  | 66 ++++++++++++++++++++++++++++++++++
  include/uapi/drm/drm.h         | 10 ++++++
  4 files changed, 86 insertions(+)

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index dab4d5936441..ecbe3d51a702 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -176,6 +176,10 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data,
                                   struct drm_file *file_private);
  int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
                                   struct drm_file *file_private);
+int drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
+                                        struct drm_file *file_private);
+int drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
+                                        struct drm_file *file_private);
  int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
                           struct drm_file *file_private);
  int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 7578ef6dc1d1..6b417e3c3ea5 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -673,6 +673,12 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
                      DRM_UNLOCKED|DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, drm_syncobj_fd_to_handle_ioctl,
                      DRM_UNLOCKED|DRM_RENDER_ALLOW),
+       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE,
+                     drm_syncobj_binary_to_timeline_ioctl,
+                     DRM_UNLOCKED|DRM_RENDER_ALLOW),
+       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY,
+                     drm_syncobj_timeline_to_binary_ioctl,
+                     DRM_UNLOCKED|DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
                      DRM_UNLOCKED|DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, drm_syncobj_timeline_wait_ioctl,
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 282982e58dbd..cf4daa670252 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -670,6 +670,72 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
                                        &args->handle);
  }

+int
+drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
+                                    struct drm_file *file_private)
+{
+       struct drm_syncobj_transfer *args = data;
+       struct drm_syncobj *timeline_syncobj = NULL;
+       struct dma_fence *fence;
+       struct dma_fence_chain *chain;
+       int ret;
+
+       if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+               return -ENODEV;
+
+       if (args->pad)
+               return -EINVAL;
+
+       timeline_syncobj = drm_syncobj_find(file_private, args->timeline_handle);
+       if (!timeline_syncobj) {
+               return -ENOENT;
+       }
+       ret = drm_syncobj_find_fence(file_private, args->binary_handle, 0, 0,
+                                    &fence);
+       if (ret)
+               goto err;
+       chain = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
+       if (!chain)
+               goto err1;
+       drm_syncobj_add_point(timeline_syncobj, chain, fence, args->point);
+err1:
+       dma_fence_put(fence);
+err:
+       drm_syncobj_put(timeline_syncobj);
+
+       return ret;
+}
+
+int
+drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
+                                    struct drm_file *file_private)
+{
+       struct drm_syncobj_transfer *args = data;
+       struct drm_syncobj *binary_syncobj = NULL;
+       struct dma_fence *fence;
+       int ret;
+
+       if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+               return -ENODEV;
+
+       if (args->pad)
+               return -EINVAL;
+
+       binary_syncobj = drm_syncobj_find(file_private, args->binary_handle);
+       if (!binary_syncobj)
+               return -ENOENT;
+       ret = drm_syncobj_find_fence(file_private, args->timeline_handle,
+                                    args->point, args->flags, &fence);
+       if (ret)
+               goto err;
+       drm_syncobj_replace_fence(binary_syncobj, fence);
+       dma_fence_put(fence);
+err:
+       drm_syncobj_put(binary_syncobj);
+
+       return ret;
+}
+
  static void syncobj_wait_fence_func(struct dma_fence *fence,
                                    struct dma_fence_cb *cb)
  {
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index b2c36f2b2599..88d6129d4a18 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -735,6 +735,14 @@ struct drm_syncobj_handle {
        __u32 pad;
  };

+struct drm_syncobj_transfer {
+       __u32 binary_handle;
+       __u32 timeline_handle;
+       __u64 point;
+       __u32 flags;
+       __u32 pad;
+};
+
  #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
  #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
  #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2)
@@ -933,6 +941,8 @@ extern "C" {

  #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT       DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait)
  #define DRM_IOCTL_SYNCOBJ_QUERY               DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
+#define DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE   DRM_IOWR(0xCC, struct drm_syncobj_transfer)
+#define DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY   DRM_IOWR(0xCD, struct drm_syncobj_transfer)

  /**
   * Device specific ioctls should only be in their respective headers







[-- Attachment #1.2: Type: text/html, Size: 16797 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [PATCH 09/11] drm/syncobj: add transition iotcls between binary and timeline
  2019-02-19 11:32                           ` Koenig, Christian
@ 2019-02-20  4:53                             ` zhoucm1
  2019-02-20  7:59                               ` Koenig, Christian
  0 siblings, 1 reply; 42+ messages in thread
From: zhoucm1 @ 2019-02-20  4:53 UTC (permalink / raw)
  To: Koenig, Christian, Zhou, David(ChunMing), Lionel Landwerlin
  Cc: amd-gfx, dri-devel


[-- Attachment #1.1: Type: text/plain, Size: 14597 bytes --]



On 2019年02月19日 19:32, Koenig, Christian wrote:
> Hi David,
>
>> Could you have a look if it's reasonable?
>
> Patch #1 is also something I already fixed on my local branch.
>
> But patch #2 won't work like this.
>
> We can't return an error from drm_syncobj_add_point() because we 
> already submitted work to the hardware. And just dropping the fence 
> like you do in the patch is a clearly no-go as well.

Then do you have any idea to skip the messed order signal point?

-David
>
> Regards,
> Christian.
>
> Am 19.02.19 um 11:46 schrieb zhoucm1:
>>
>> Hi Lionel,
>>
>> the attached should fix your problem and also messed signal order.
>>
>> Hi Christian,
>>
>> Could you have a look if it's reasonable?
>>
>>
>> btw: I pushed to change to 
>> https://github.com/amingriyue/timeline-syncobj-kernel, which is 
>> already rebased to latest drm-misc(kernel 5.0). You can directly use 
>> that branch.
>>
>>
>> -David
>>
>>
>> On 2019年02月19日 01:01, Koenig, Christian wrote:
>>> Am 18.02.19 um 13:07 schrieb Lionel Landwerlin:
>>>> Thanks guys :)
>>>>
>>>> You mentioned that signaling out of order is illegal.
>>>> Is this illegal with regard to the vulkan spec or to the syncobj 
>>>> implementation?
>>>
>>> David is the expert on that, but as far as I know that is forbidden 
>>> by the vulkan spec.
>>>
>>>> I'm not finding anything in the vulkan spec that makes out of order 
>>>> signaling illegal.
>>>> That's why I came up with this test, just verifying that the 
>>>> timeline does not go backward in term of its payload.
>>>
>>> Well we need to handle this case gracefully in the kernel, so it is 
>>> still a good testcase.
>>>
>>> Christian.
>>>
>>>>
>>>> -Lionel
>>>>
>>>> On 18/02/2019 11:01, Koenig, Christian wrote:
>>>>> Hi David,
>>>>>
>>>>> well I think Lionel is testing the invalid signal order on purpose :)
>>>>>
>>>>> Anyway we really need to handle invalid order graceful here. E.g. 
>>>>> either the same way as during CS or we abort and return an error 
>>>>> message.
>>>>>
>>>>> I think just using the same approach as during CS ist the best we 
>>>>> can do.
>>>>>
>>>>> Regards,
>>>>> Christian
>>>>>
>>>>>
>>>>> Am 18.02.2019 11:35 schrieb "Zhou, David(ChunMing)" 
>>>>> <David1.Zhou@amd.com>:
>>>>>
>>>>> Hi Lionel,
>>>>>
>>>>> I checked your igt test case,
>>>>>
>>>>> uint64_t points[5] = { 1, 5, 3, 7, 6 };
>>>>>
>>>>> which is illegal signal order.
>>>>>
>>>>> I must admit we should handle it gracefully if signal isn't 
>>>>> in-order, and we shouldn't lead to deadlock.
>>>>>
>>>>> Hi Christian,
>>>>>
>>>>> Can we just ignore when signal point X <= timeline Y? Or just give 
>>>>> a warning?
>>>>>
>>>>> Otherwise like Lionel's unexpected use cases, which easily leads 
>>>>> to deadlock.
>>>>>
>>>>>
>>>>> -David
>>>>>
>>>>>
>>>>> On 2019年02月15日 22:28, Lionel Landwerlin wrote:
>>>>>> Hi David,
>>>>>>
>>>>>> Thanks a lot for point me to the tests you've added in IGT.
>>>>>> While adding a test with that signals fences imported into a timeline
>>>>>> syncobj out of order, I ran into a deadlock.
>>>>>> Here is the test :
>>>>>> https://github.com/djdeath/intel-gpu-tools/commit/1e46cf7e7bff09b78a24367ddc2314f97eb0a1b9
>>>>>>
>>>>>> Trying to kill the deadlocked process I got this backtrace :
>>>>>>
>>>>>>
>>>>>> [   33.969136] [IGT] syncobj_timeline: starting subtest signal-order
>>>>>> [   60.452823] watchdog: BUG: soft lockup - CPU#6 stuck for 23s!
>>>>>> [syncobj_timelin:2021]
>>>>>> [   60.452826] Modules linked in: rfcomm cmac bnep binfmt_misc
>>>>>> nls_iso8859_1 snd_hda_codec_hdmi snd_hda_codec_realtek
>>>>>> snd_hda_codec_generic ledtrig_audio sch_fq_codel ib_iser snd_hda_intel
>>>>>> rdma_cm iw_cm snd_hda_codec ib_cm snd_hda_core snd_hwdep intel_rapl
>>>>>> snd_pcm ib_core x86_pkg_temp_thermal intel_powerclamp configf
>>>>>> s coretemp iscsi_tcp snd_seq_midi libiscsi_tcp snd_seq_midi_event
>>>>>> libiscsi kvm_intel scsi_transport_iscsi kvm btusb snd_rawmidi irqbypass
>>>>>> btrtl intel_cstate intel_rapl_perf btbcm btintel bluetooth snd_seq
>>>>>> snd_seq_device snd_timer input_leds ecdh_generic snd soundcore mei_me
>>>>>> mei intel_pch_thermal mac_hid acpi_pad parp
>>>>>> ort_pc ppdev lp parport ip_tables x_tables autofs4 btrfs zstd_decompress
>>>>>> zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq
>>>>>> async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear
>>>>>> hid_generic usbhid hid i915 crct10dif_pclmul crc32_pclmul i2c_algo_bit
>>>>>> ghash_clmulni_intel prime_numbers
>>>>>> drm_kms_helper aesni_intel syscopyarea sysfillrect
>>>>>> [   60.452876]  sysimgblt fb_sys_fops aes_x86_64 crypto_simd sdhci_pci
>>>>>> cryptd drm e1000e glue_helper cqhci sdhci wmi video
>>>>>> [   60.452881] CPU: 6 PID: 2021 Comm: syncobj_timelin Tainted: G
>>>>>> U            5.0.0-rc5+ #337
>>>>>> [   60.452882] Hardware name:  /NUC6i7KYB, BIOS
>>>>>> KYSKLi70.86A.0042.2016.0929.1933 09/29/2016
>>>>>> [   60.452886] RIP: 0010:dma_fence_chain_walk+0x22c/0x260
>>>>>> [   60.452888] Code: ff e9 93 fe ff ff 48 8b 45 08 48 8b 40 18 48 85 c0
>>>>>> 74 0c 48 89 ef e8 33 0f 58 00 84 c0 75 23 f0 41 ff 4d 00 0f 88 99 87 2f
>>>>>> 00 <0f> 85 05 fe ff ff 4c 89 ef e8 56 ea ff ff 48 89 d8 5b 5d 41 5c 41
>>>>>> [   60.452888] RSP: 0018:ffff9a5804653ca8 EFLAGS: 00010296 ORIG_RAX:
>>>>>> ffffffffffffff13
>>>>>> [   60.452889] RAX: 0000000000000000 RBX: ffff8f5690fb2480 RCX:
>>>>>> ffff8f5690fb2f00
>>>>>> [   60.452890] RDX: 00000000003e3730 RSI: 0000000000000000 RDI:
>>>>>> ffff8f5690fb2180
>>>>>> [   60.452891] RBP: ffff8f5690fb2180 R08: 0000000000000000 R09:
>>>>>> ffff8f5690fb2eb0
>>>>>> [   60.452891] R10: 0000000000000000 R11: ffff8f5660469860 R12:
>>>>>> ffff8f5690fb2f68
>>>>>> [   60.452892] R13: ffff8f5690fb2f00 R14: 0000000000000003 R15:
>>>>>> ffff8f5655a45fc0
>>>>>> [   60.452913] FS:  00007fdc5c459980(0000) GS:ffff8f569eb80000(0000)
>>>>>> knlGS:0000000000000000
>>>>>> [   60.452913] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>>>>> [   60.452914] CR2: 00007f9d74336dd8 CR3: 000000084a67e004 CR4:
>>>>>> 00000000003606e0
>>>>>> [   60.452915] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
>>>>>> 0000000000000000
>>>>>> [   60.452915] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
>>>>>> 0000000000000400
>>>>>> [   60.452916] Call Trace:
>>>>>> [   60.452958]  drm_syncobj_add_point+0x102/0x160 [drm]
>>>>>> [   60.452965]  ? drm_syncobj_fd_to_handle_ioctl+0x1b0/0x1b0 [drm]
>>>>>> [   60.452971]  drm_syncobj_transfer_ioctl+0x10f/0x180 [drm]
>>>>>> [   60.452978]  drm_ioctl_kernel+0xac/0xf0 [drm]
>>>>>> [   60.452984]  drm_ioctl+0x2eb/0x3b0 [drm]
>>>>>> [   60.452990]  ? drm_syncobj_fd_to_handle_ioctl+0x1b0/0x1b0 [drm]
>>>>>> [   60.452992]  ? sw_sync_ioctl+0x347/0x370
>>>>>> [   60.452994]  do_vfs_ioctl+0xa4/0x640
>>>>>> [   60.452995]  ? __fput+0x134/0x220
>>>>>> [   60.452997]  ? do_fcntl+0x1a5/0x650
>>>>>> [   60.452998]  ksys_ioctl+0x70/0x80
>>>>>> [   60.452999]  __x64_sys_ioctl+0x16/0x20
>>>>>> [   60.453002]  do_syscall_64+0x55/0x110
>>>>>> [   60.453004]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
>>>>>> [   60.453005] RIP: 0033:0x7fdc5b6e45d7
>>>>>> [   60.453006] Code: b3 66 90 48 8b 05 b1 48 2d 00 64 c7 00 26 00 00 00
>>>>>> 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f
>>>>>> 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 81 48 2d 00 f7 d8 64 89 01 48
>>>>>> [   60.453007] RSP: 002b:00007fff25c4d198 EFLAGS: 00000206 ORIG_RAX:
>>>>>> 0000000000000010
>>>>>> [   60.453008] RAX: ffffffffffffffda RBX: 0000000000000000 RCX:
>>>>>> 00007fdc5b6e45d7
>>>>>> [   60.453008] RDX: 00007fff25c4d200 RSI: 00000000c02064cc RDI:
>>>>>> 0000000000000003
>>>>>> [   60.453009] RBP: 00007fff25c4d1d0 R08: 0000000000000000 R09:
>>>>>> 000000000000001e
>>>>>> [   60.453010] R10: 0000000000000000 R11: 0000000000000206 R12:
>>>>>> 0000563d3959e4d0
>>>>>> [   60.453010] R13: 00007fff25c4d620 R14: 0000000000000000 R15:
>>>>>> 0000000000000000
>>>>>> [   88.447359] watchdog: BUG: soft lockup - CPU#6 stuck for 22s!
>>>>>> [syncobj_timelin:2021]
>>>>>>
>>>>>>
>>>>>> -Lionel
>>>>>>
>>>>>>
>>>>>> On 07/12/2018 09:55, Chunming Zhou wrote:
>>>>>>> we need to import/export timeline point
>>>>>>>
>>>>>>> Signed-off-by: Chunming Zhou<david1.zhou@amd.com>
>>>>>>> ---
>>>>>>>    drivers/gpu/drm/drm_internal.h |  4 +++
>>>>>>>    drivers/gpu/drm/drm_ioctl.c    |  6 ++++
>>>>>>>    drivers/gpu/drm/drm_syncobj.c  | 66 ++++++++++++++++++++++++++++++++++
>>>>>>>    include/uapi/drm/drm.h         | 10 ++++++
>>>>>>>    4 files changed, 86 insertions(+)
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
>>>>>>> index dab4d5936441..ecbe3d51a702 100644
>>>>>>> --- a/drivers/gpu/drm/drm_internal.h
>>>>>>> +++ b/drivers/gpu/drm/drm_internal.h
>>>>>>> @@ -176,6 +176,10 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data,
>>>>>>>    				   struct drm_file *file_private);
>>>>>>>    int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
>>>>>>>    				   struct drm_file *file_private);
>>>>>>> +int drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
>>>>>>> +					 struct drm_file *file_private);
>>>>>>> +int drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
>>>>>>> +					 struct drm_file *file_private);
>>>>>>>    int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>>>>>>>    			   struct drm_file *file_private);
>>>>>>>    int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
>>>>>>> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
>>>>>>> index 7578ef6dc1d1..6b417e3c3ea5 100644
>>>>>>> --- a/drivers/gpu/drm/drm_ioctl.c
>>>>>>> +++ b/drivers/gpu/drm/drm_ioctl.c
>>>>>>> @@ -673,6 +673,12 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>>>>>>>    		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>>>>    	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, drm_syncobj_fd_to_handle_ioctl,
>>>>>>>    		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>>>> +	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE,
>>>>>>> +		      drm_syncobj_binary_to_timeline_ioctl,
>>>>>>> +		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>>>> +	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY,
>>>>>>> +		      drm_syncobj_timeline_to_binary_ioctl,
>>>>>>> +		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>>>>    	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
>>>>>>>    		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>>>>    	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, drm_syncobj_timeline_wait_ioctl,
>>>>>>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
>>>>>>> index 282982e58dbd..cf4daa670252 100644
>>>>>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>>>>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>>>>>> @@ -670,6 +670,72 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
>>>>>>>    					&args->handle);
>>>>>>>    }
>>>>>>>    
>>>>>>> +int
>>>>>>> +drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
>>>>>>> +				     struct drm_file *file_private)
>>>>>>> +{
>>>>>>> +	struct drm_syncobj_transfer *args = data;
>>>>>>> +	struct drm_syncobj *timeline_syncobj = NULL;
>>>>>>> +	struct dma_fence *fence;
>>>>>>> +	struct dma_fence_chain *chain;
>>>>>>> +	int ret;
>>>>>>> +
>>>>>>> +	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>>>>>>> +		return -ENODEV;
>>>>>>> +
>>>>>>> +	if (args->pad)
>>>>>>> +		return -EINVAL;
>>>>>>> +
>>>>>>> +	timeline_syncobj = drm_syncobj_find(file_private, args->timeline_handle);
>>>>>>> +	if (!timeline_syncobj) {
>>>>>>> +		return -ENOENT;
>>>>>>> +	}
>>>>>>> +	ret = drm_syncobj_find_fence(file_private, args->binary_handle, 0, 0,
>>>>>>> +				     &fence);
>>>>>>> +	if (ret)
>>>>>>> +		goto err;
>>>>>>> +	chain = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
>>>>>>> +	if (!chain)
>>>>>>> +		goto err1;
>>>>>>> +	drm_syncobj_add_point(timeline_syncobj, chain, fence, args->point);
>>>>>>> +err1:
>>>>>>> +	dma_fence_put(fence);
>>>>>>> +err:
>>>>>>> +	drm_syncobj_put(timeline_syncobj);
>>>>>>> +
>>>>>>> +	return ret;
>>>>>>> +}
>>>>>>> +
>>>>>>> +int
>>>>>>> +drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
>>>>>>> +				     struct drm_file *file_private)
>>>>>>> +{
>>>>>>> +	struct drm_syncobj_transfer *args = data;
>>>>>>> +	struct drm_syncobj *binary_syncobj = NULL;
>>>>>>> +	struct dma_fence *fence;
>>>>>>> +	int ret;
>>>>>>> +
>>>>>>> +	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>>>>>>> +		return -ENODEV;
>>>>>>> +
>>>>>>> +	if (args->pad)
>>>>>>> +		return -EINVAL;
>>>>>>> +
>>>>>>> +	binary_syncobj = drm_syncobj_find(file_private, args->binary_handle);
>>>>>>> +	if (!binary_syncobj)
>>>>>>> +		return -ENOENT;
>>>>>>> +	ret = drm_syncobj_find_fence(file_private, args->timeline_handle,
>>>>>>> +				     args->point, args->flags, &fence);
>>>>>>> +	if (ret)
>>>>>>> +		goto err;
>>>>>>> +	drm_syncobj_replace_fence(binary_syncobj, fence);
>>>>>>> +	dma_fence_put(fence);
>>>>>>> +err:
>>>>>>> +	drm_syncobj_put(binary_syncobj);
>>>>>>> +
>>>>>>> +	return ret;
>>>>>>> +}
>>>>>>> +
>>>>>>>    static void syncobj_wait_fence_func(struct dma_fence *fence,
>>>>>>>    				    struct dma_fence_cb *cb)
>>>>>>>    {
>>>>>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>>>>>> index b2c36f2b2599..88d6129d4a18 100644
>>>>>>> --- a/include/uapi/drm/drm.h
>>>>>>> +++ b/include/uapi/drm/drm.h
>>>>>>> @@ -735,6 +735,14 @@ struct drm_syncobj_handle {
>>>>>>>    	__u32 pad;
>>>>>>>    };
>>>>>>>    
>>>>>>> +struct drm_syncobj_transfer {
>>>>>>> +	__u32 binary_handle;
>>>>>>> +	__u32 timeline_handle;
>>>>>>> +	__u64 point;
>>>>>>> +	__u32 flags;
>>>>>>> +	__u32 pad;
>>>>>>> +};
>>>>>>> +
>>>>>>>    #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
>>>>>>>    #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
>>>>>>>    #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2)
>>>>>>> @@ -933,6 +941,8 @@ extern "C" {
>>>>>>>    
>>>>>>>    #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT	DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait)
>>>>>>>    #define DRM_IOCTL_SYNCOBJ_QUERY		DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
>>>>>>> +#define DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE	DRM_IOWR(0xCC, struct drm_syncobj_transfer)
>>>>>>> +#define DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY	DRM_IOWR(0xCD, struct drm_syncobj_transfer)
>>>>>>>    
>>>>>>>    /**
>>>>>>>     * Device specific ioctls should only be in their respective headers
>>>>>
>>>>
>>>
>>
>


[-- Attachment #1.2: Type: text/html, Size: 18019 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 09/11] drm/syncobj: add transition iotcls between binary and timeline
  2019-02-20  4:53                             ` zhoucm1
@ 2019-02-20  7:59                               ` Koenig, Christian
       [not found]                                 ` <976d7032-1cde-0427-ce56-38c2ac8881ec-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 42+ messages in thread
From: Koenig, Christian @ 2019-02-20  7:59 UTC (permalink / raw)
  To: Zhou, David(ChunMing); +Cc: amd-gfx, dri-devel


[-- Attachment #1.1: Type: text/plain, Size: 13644 bytes --]

Am 20.02.19 um 05:53 schrieb zhoucm1:


On 2019年02月19日 19:32, Koenig, Christian wrote:
Hi David,

Could you have a look if it's reasonable?

Patch #1 is also something I already fixed on my local branch.

But patch #2 won't work like this.

We can't return an error from drm_syncobj_add_point() because we already submitted work to the hardware. And just dropping the fence like you do in the patch is a clearly no-go as well.

Then do you have any idea to skip the messed order signal point?

No, I don't think we can actually do this.

The only solution I can see would be to lock down the syncobj to modifications while command submission is in progress. And that in turn would mean a huge bunch of ww_mutex overhead we will certainly want to avoid.

Christian.


-David

Regards,
Christian.

Am 19.02.19 um 11:46 schrieb zhoucm1:

Hi Lionel,

the attached should fix your problem and also messed signal order.

Hi Christian,

Could you have a look if it's reasonable?


btw: I pushed to change to https://github.com/amingriyue/timeline-syncobj-kernel, which is already rebased to latest drm-misc(kernel 5.0). You can directly use that branch.


-David

On 2019年02月19日 01:01, Koenig, Christian wrote:
Am 18.02.19 um 13:07 schrieb Lionel Landwerlin:
Thanks guys :)

You mentioned that signaling out of order is illegal.
Is this illegal with regard to the vulkan spec or to the syncobj implementation?

David is the expert on that, but as far as I know that is forbidden by the vulkan spec.

I'm not finding anything in the vulkan spec that makes out of order signaling illegal.
That's why I came up with this test, just verifying that the timeline does not go backward in term of its payload.

Well we need to handle this case gracefully in the kernel, so it is still a good testcase.

Christian.


-Lionel

On 18/02/2019 11:01, Koenig, Christian wrote:
Hi David,

well I think Lionel is testing the invalid signal order on purpose :)

Anyway we really need to handle invalid order graceful here. E.g. either the same way as during CS or we abort and return an error message.

I think just using the same approach as during CS ist the best we can do.

Regards,
Christian


Am 18.02.2019 11:35 schrieb "Zhou, David(ChunMing)" <David1.Zhou@amd.com><mailto:David1.Zhou@amd.com>:

Hi Lionel,

I checked your igt test case,

uint64_t points[5] = { 1, 5, 3, 7, 6 };

which is illegal signal order.

I must admit we should handle it gracefully if signal isn't in-order, and we shouldn't lead to deadlock.

Hi Christian,

Can we just ignore when signal point X <= timeline Y? Or just give a warning?

Otherwise like Lionel's unexpected use cases, which easily leads to deadlock.


-David

On 2019年02月15日 22:28, Lionel Landwerlin wrote:

Hi David,

Thanks a lot for point me to the tests you've added in IGT.
While adding a test with that signals fences imported into a timeline
syncobj out of order, I ran into a deadlock.
Here is the test :
https://github.com/djdeath/intel-gpu-tools/commit/1e46cf7e7bff09b78a24367ddc2314f97eb0a1b9

Trying to kill the deadlocked process I got this backtrace :


[   33.969136] [IGT] syncobj_timeline: starting subtest signal-order
[   60.452823] watchdog: BUG: soft lockup - CPU#6 stuck for 23s!
[syncobj_timelin:2021]
[   60.452826] Modules linked in: rfcomm cmac bnep binfmt_misc
nls_iso8859_1 snd_hda_codec_hdmi snd_hda_codec_realtek
snd_hda_codec_generic ledtrig_audio sch_fq_codel ib_iser snd_hda_intel
rdma_cm iw_cm snd_hda_codec ib_cm snd_hda_core snd_hwdep intel_rapl
snd_pcm ib_core x86_pkg_temp_thermal intel_powerclamp configf
s coretemp iscsi_tcp snd_seq_midi libiscsi_tcp snd_seq_midi_event
libiscsi kvm_intel scsi_transport_iscsi kvm btusb snd_rawmidi irqbypass
btrtl intel_cstate intel_rapl_perf btbcm btintel bluetooth snd_seq
snd_seq_device snd_timer input_leds ecdh_generic snd soundcore mei_me
mei intel_pch_thermal mac_hid acpi_pad parp
ort_pc ppdev lp parport ip_tables x_tables autofs4 btrfs zstd_decompress
zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq
async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear
hid_generic usbhid hid i915 crct10dif_pclmul crc32_pclmul i2c_algo_bit
ghash_clmulni_intel prime_numbers
drm_kms_helper aesni_intel syscopyarea sysfillrect
[   60.452876]  sysimgblt fb_sys_fops aes_x86_64 crypto_simd sdhci_pci
cryptd drm e1000e glue_helper cqhci sdhci wmi video
[   60.452881] CPU: 6 PID: 2021 Comm: syncobj_timelin Tainted: G
U            5.0.0-rc5+ #337
[   60.452882] Hardware name:  /NUC6i7KYB, BIOS
KYSKLi70.86A.0042.2016.0929.1933 09/29/2016
[   60.452886] RIP: 0010:dma_fence_chain_walk+0x22c/0x260
[   60.452888] Code: ff e9 93 fe ff ff 48 8b 45 08 48 8b 40 18 48 85 c0
74 0c 48 89 ef e8 33 0f 58 00 84 c0 75 23 f0 41 ff 4d 00 0f 88 99 87 2f
00 <0f> 85 05 fe ff ff 4c 89 ef e8 56 ea ff ff 48 89 d8 5b 5d 41 5c 41
[   60.452888] RSP: 0018:ffff9a5804653ca8 EFLAGS: 00010296 ORIG_RAX:
ffffffffffffff13
[   60.452889] RAX: 0000000000000000 RBX: ffff8f5690fb2480 RCX:
ffff8f5690fb2f00
[   60.452890] RDX: 00000000003e3730 RSI: 0000000000000000 RDI:
ffff8f5690fb2180
[   60.452891] RBP: ffff8f5690fb2180 R08: 0000000000000000 R09:
ffff8f5690fb2eb0
[   60.452891] R10: 0000000000000000 R11: ffff8f5660469860 R12:
ffff8f5690fb2f68
[   60.452892] R13: ffff8f5690fb2f00 R14: 0000000000000003 R15:
ffff8f5655a45fc0
[   60.452913] FS:  00007fdc5c459980(0000) GS:ffff8f569eb80000(0000)
knlGS:0000000000000000
[   60.452913] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   60.452914] CR2: 00007f9d74336dd8 CR3: 000000084a67e004 CR4:
00000000003606e0
[   60.452915] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
0000000000000000
[   60.452915] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
0000000000000400
[   60.452916] Call Trace:
[   60.452958]  drm_syncobj_add_point+0x102/0x160 [drm]
[   60.452965]  ? drm_syncobj_fd_to_handle_ioctl+0x1b0/0x1b0 [drm]
[   60.452971]  drm_syncobj_transfer_ioctl+0x10f/0x180 [drm]
[   60.452978]  drm_ioctl_kernel+0xac/0xf0 [drm]
[   60.452984]  drm_ioctl+0x2eb/0x3b0 [drm]
[   60.452990]  ? drm_syncobj_fd_to_handle_ioctl+0x1b0/0x1b0 [drm]
[   60.452992]  ? sw_sync_ioctl+0x347/0x370
[   60.452994]  do_vfs_ioctl+0xa4/0x640
[   60.452995]  ? __fput+0x134/0x220
[   60.452997]  ? do_fcntl+0x1a5/0x650
[   60.452998]  ksys_ioctl+0x70/0x80
[   60.452999]  __x64_sys_ioctl+0x16/0x20
[   60.453002]  do_syscall_64+0x55/0x110
[   60.453004]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[   60.453005] RIP: 0033:0x7fdc5b6e45d7
[   60.453006] Code: b3 66 90 48 8b 05 b1 48 2d 00 64 c7 00 26 00 00 00
48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f
05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 81 48 2d 00 f7 d8 64 89 01 48
[   60.453007] RSP: 002b:00007fff25c4d198 EFLAGS: 00000206 ORIG_RAX:
0000000000000010
[   60.453008] RAX: ffffffffffffffda RBX: 0000000000000000 RCX:
00007fdc5b6e45d7
[   60.453008] RDX: 00007fff25c4d200 RSI: 00000000c02064cc RDI:
0000000000000003
[   60.453009] RBP: 00007fff25c4d1d0 R08: 0000000000000000 R09:
000000000000001e
[   60.453010] R10: 0000000000000000 R11: 0000000000000206 R12:
0000563d3959e4d0
[   60.453010] R13: 00007fff25c4d620 R14: 0000000000000000 R15:
0000000000000000
[   88.447359] watchdog: BUG: soft lockup - CPU#6 stuck for 22s!
[syncobj_timelin:2021]


-Lionel


On 07/12/2018 09:55, Chunming Zhou wrote:


we need to import/export timeline point

Signed-off-by: Chunming Zhou <david1.zhou@amd.com><mailto:david1.zhou@amd.com>
---
  drivers/gpu/drm/drm_internal.h |  4 +++
  drivers/gpu/drm/drm_ioctl.c    |  6 ++++
  drivers/gpu/drm/drm_syncobj.c  | 66 ++++++++++++++++++++++++++++++++++
  include/uapi/drm/drm.h         | 10 ++++++
  4 files changed, 86 insertions(+)

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index dab4d5936441..ecbe3d51a702 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -176,6 +176,10 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data,
                                   struct drm_file *file_private);
  int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
                                   struct drm_file *file_private);
+int drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
+                                        struct drm_file *file_private);
+int drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
+                                        struct drm_file *file_private);
  int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
                           struct drm_file *file_private);
  int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 7578ef6dc1d1..6b417e3c3ea5 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -673,6 +673,12 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
                      DRM_UNLOCKED|DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, drm_syncobj_fd_to_handle_ioctl,
                      DRM_UNLOCKED|DRM_RENDER_ALLOW),
+       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE,
+                     drm_syncobj_binary_to_timeline_ioctl,
+                     DRM_UNLOCKED|DRM_RENDER_ALLOW),
+       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY,
+                     drm_syncobj_timeline_to_binary_ioctl,
+                     DRM_UNLOCKED|DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
                      DRM_UNLOCKED|DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, drm_syncobj_timeline_wait_ioctl,
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 282982e58dbd..cf4daa670252 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -670,6 +670,72 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
                                        &args->handle);
  }

+int
+drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
+                                    struct drm_file *file_private)
+{
+       struct drm_syncobj_transfer *args = data;
+       struct drm_syncobj *timeline_syncobj = NULL;
+       struct dma_fence *fence;
+       struct dma_fence_chain *chain;
+       int ret;
+
+       if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+               return -ENODEV;
+
+       if (args->pad)
+               return -EINVAL;
+
+       timeline_syncobj = drm_syncobj_find(file_private, args->timeline_handle);
+       if (!timeline_syncobj) {
+               return -ENOENT;
+       }
+       ret = drm_syncobj_find_fence(file_private, args->binary_handle, 0, 0,
+                                    &fence);
+       if (ret)
+               goto err;
+       chain = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
+       if (!chain)
+               goto err1;
+       drm_syncobj_add_point(timeline_syncobj, chain, fence, args->point);
+err1:
+       dma_fence_put(fence);
+err:
+       drm_syncobj_put(timeline_syncobj);
+
+       return ret;
+}
+
+int
+drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
+                                    struct drm_file *file_private)
+{
+       struct drm_syncobj_transfer *args = data;
+       struct drm_syncobj *binary_syncobj = NULL;
+       struct dma_fence *fence;
+       int ret;
+
+       if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+               return -ENODEV;
+
+       if (args->pad)
+               return -EINVAL;
+
+       binary_syncobj = drm_syncobj_find(file_private, args->binary_handle);
+       if (!binary_syncobj)
+               return -ENOENT;
+       ret = drm_syncobj_find_fence(file_private, args->timeline_handle,
+                                    args->point, args->flags, &fence);
+       if (ret)
+               goto err;
+       drm_syncobj_replace_fence(binary_syncobj, fence);
+       dma_fence_put(fence);
+err:
+       drm_syncobj_put(binary_syncobj);
+
+       return ret;
+}
+
  static void syncobj_wait_fence_func(struct dma_fence *fence,
                                    struct dma_fence_cb *cb)
  {
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index b2c36f2b2599..88d6129d4a18 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -735,6 +735,14 @@ struct drm_syncobj_handle {
        __u32 pad;
  };

+struct drm_syncobj_transfer {
+       __u32 binary_handle;
+       __u32 timeline_handle;
+       __u64 point;
+       __u32 flags;
+       __u32 pad;
+};
+
  #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
  #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
  #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2)
@@ -933,6 +941,8 @@ extern "C" {

  #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT       DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait)
  #define DRM_IOCTL_SYNCOBJ_QUERY               DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
+#define DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE   DRM_IOWR(0xCC, struct drm_syncobj_transfer)
+#define DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY   DRM_IOWR(0xCD, struct drm_syncobj_transfer)

  /**
   * Device specific ioctls should only be in their respective headers









[-- Attachment #1.2: Type: text/html, Size: 17830 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* Re: [PATCH 09/11] drm/syncobj: add transition iotcls between binary and timeline
       [not found]                                 ` <976d7032-1cde-0427-ce56-38c2ac8881ec-5C7GfCeVMHo@public.gmane.org>
@ 2019-02-20  8:10                                   ` zhoucm1
       [not found]                                     ` <730eaa42-d852-e9d8-7756-43fb256a466f-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 42+ messages in thread
From: zhoucm1 @ 2019-02-20  8:10 UTC (permalink / raw)
  To: Koenig, Christian, Zhou, David(ChunMing), Lionel Landwerlin
  Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 15921 bytes --]



On 2019年02月20日 15:59, Koenig, Christian wrote:
> Am 20.02.19 um 05:53 schrieb zhoucm1:
>>
>>
>>
>> On 2019年02月19日 19:32, Koenig, Christian wrote:
>>> Hi David,
>>>
>>>> Could you have a look if it's reasonable?
>>>
>>> Patch #1 is also something I already fixed on my local branch.
>>>
>>> But patch #2 won't work like this.
>>>
>>> We can't return an error from drm_syncobj_add_point() because we 
>>> already submitted work to the hardware. And just dropping the fence 
>>> like you do in the patch is a clearly no-go as well.
>>
>> Then do you have any idea to skip the messed order signal point?
>
> No, I don't think we can actually do this.
But as Lionel pointed out, user mode shouldn't query a smaller timeline 
payload compared to last time, we must skip messed order signal point!

-David

>
> The only solution I can see would be to lock down the syncobj to 
> modifications while command submission is in progress. And that in 
> turn would mean a huge bunch of ww_mutex overhead we will certainly 
> want to avoid.
>
> Christian.
>
>>
>> -David
>>>
>>> Regards,
>>> Christian.
>>>
>>> Am 19.02.19 um 11:46 schrieb zhoucm1:
>>>>
>>>> Hi Lionel,
>>>>
>>>> the attached should fix your problem and also messed signal order.
>>>>
>>>> Hi Christian,
>>>>
>>>> Could you have a look if it's reasonable?
>>>>
>>>>
>>>> btw: I pushed to change to 
>>>> https://github.com/amingriyue/timeline-syncobj-kernel, which is 
>>>> already rebased to latest drm-misc(kernel 5.0). You can directly 
>>>> use that branch.
>>>>
>>>>
>>>> -David
>>>>
>>>>
>>>> On 2019年02月19日 01:01, Koenig, Christian wrote:
>>>>> Am 18.02.19 um 13:07 schrieb Lionel Landwerlin:
>>>>>> Thanks guys :)
>>>>>>
>>>>>> You mentioned that signaling out of order is illegal.
>>>>>> Is this illegal with regard to the vulkan spec or to the syncobj 
>>>>>> implementation?
>>>>>
>>>>> David is the expert on that, but as far as I know that is 
>>>>> forbidden by the vulkan spec.
>>>>>
>>>>>> I'm not finding anything in the vulkan spec that makes out of 
>>>>>> order signaling illegal.
>>>>>> That's why I came up with this test, just verifying that the 
>>>>>> timeline does not go backward in term of its payload.
>>>>>
>>>>> Well we need to handle this case gracefully in the kernel, so it 
>>>>> is still a good testcase.
>>>>>
>>>>> Christian.
>>>>>
>>>>>>
>>>>>> -Lionel
>>>>>>
>>>>>> On 18/02/2019 11:01, Koenig, Christian wrote:
>>>>>>> Hi David,
>>>>>>>
>>>>>>> well I think Lionel is testing the invalid signal order on 
>>>>>>> purpose :)
>>>>>>>
>>>>>>> Anyway we really need to handle invalid order graceful here. 
>>>>>>> E.g. either the same way as during CS or we abort and return an 
>>>>>>> error message.
>>>>>>>
>>>>>>> I think just using the same approach as during CS ist the best 
>>>>>>> we can do.
>>>>>>>
>>>>>>> Regards,
>>>>>>> Christian
>>>>>>>
>>>>>>>
>>>>>>> Am 18.02.2019 11:35 schrieb "Zhou, David(ChunMing)" 
>>>>>>> <David1.Zhou-5C7GfCeVMHo@public.gmane.org>:
>>>>>>>
>>>>>>> Hi Lionel,
>>>>>>>
>>>>>>> I checked your igt test case,
>>>>>>>
>>>>>>> uint64_t points[5] = { 1, 5, 3, 7, 6 };
>>>>>>>
>>>>>>> which is illegal signal order.
>>>>>>>
>>>>>>> I must admit we should handle it gracefully if signal isn't 
>>>>>>> in-order, and we shouldn't lead to deadlock.
>>>>>>>
>>>>>>> Hi Christian,
>>>>>>>
>>>>>>> Can we just ignore when signal point X <= timeline Y? Or just 
>>>>>>> give a warning?
>>>>>>>
>>>>>>> Otherwise like Lionel's unexpected use cases, which easily leads 
>>>>>>> to deadlock.
>>>>>>>
>>>>>>>
>>>>>>> -David
>>>>>>>
>>>>>>>
>>>>>>> On 2019年02月15日 22:28, Lionel Landwerlin wrote:
>>>>>>>> Hi David,
>>>>>>>>
>>>>>>>> Thanks a lot for point me to the tests you've added in IGT.
>>>>>>>> While adding a test with that signals fences imported into a timeline
>>>>>>>> syncobj out of order, I ran into a deadlock.
>>>>>>>> Here is the test :
>>>>>>>> https://github.com/djdeath/intel-gpu-tools/commit/1e46cf7e7bff09b78a24367ddc2314f97eb0a1b9
>>>>>>>>
>>>>>>>> Trying to kill the deadlocked process I got this backtrace :
>>>>>>>>
>>>>>>>>
>>>>>>>> [   33.969136] [IGT] syncobj_timeline: starting subtest signal-order
>>>>>>>> [   60.452823] watchdog: BUG: soft lockup - CPU#6 stuck for 23s!
>>>>>>>> [syncobj_timelin:2021]
>>>>>>>> [   60.452826] Modules linked in: rfcomm cmac bnep binfmt_misc
>>>>>>>> nls_iso8859_1 snd_hda_codec_hdmi snd_hda_codec_realtek
>>>>>>>> snd_hda_codec_generic ledtrig_audio sch_fq_codel ib_iser snd_hda_intel
>>>>>>>> rdma_cm iw_cm snd_hda_codec ib_cm snd_hda_core snd_hwdep intel_rapl
>>>>>>>> snd_pcm ib_core x86_pkg_temp_thermal intel_powerclamp configf
>>>>>>>> s coretemp iscsi_tcp snd_seq_midi libiscsi_tcp snd_seq_midi_event
>>>>>>>> libiscsi kvm_intel scsi_transport_iscsi kvm btusb snd_rawmidi irqbypass
>>>>>>>> btrtl intel_cstate intel_rapl_perf btbcm btintel bluetooth snd_seq
>>>>>>>> snd_seq_device snd_timer input_leds ecdh_generic snd soundcore mei_me
>>>>>>>> mei intel_pch_thermal mac_hid acpi_pad parp
>>>>>>>> ort_pc ppdev lp parport ip_tables x_tables autofs4 btrfs zstd_decompress
>>>>>>>> zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq
>>>>>>>> async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear
>>>>>>>> hid_generic usbhid hid i915 crct10dif_pclmul crc32_pclmul i2c_algo_bit
>>>>>>>> ghash_clmulni_intel prime_numbers
>>>>>>>> drm_kms_helper aesni_intel syscopyarea sysfillrect
>>>>>>>> [   60.452876]  sysimgblt fb_sys_fops aes_x86_64 crypto_simd sdhci_pci
>>>>>>>> cryptd drm e1000e glue_helper cqhci sdhci wmi video
>>>>>>>> [   60.452881] CPU: 6 PID: 2021 Comm: syncobj_timelin Tainted: G
>>>>>>>> U            5.0.0-rc5+ #337
>>>>>>>> [   60.452882] Hardware name:  /NUC6i7KYB, BIOS
>>>>>>>> KYSKLi70.86A.0042.2016.0929.1933 09/29/2016
>>>>>>>> [   60.452886] RIP: 0010:dma_fence_chain_walk+0x22c/0x260
>>>>>>>> [   60.452888] Code: ff e9 93 fe ff ff 48 8b 45 08 48 8b 40 18 48 85 c0
>>>>>>>> 74 0c 48 89 ef e8 33 0f 58 00 84 c0 75 23 f0 41 ff 4d 00 0f 88 99 87 2f
>>>>>>>> 00 <0f> 85 05 fe ff ff 4c 89 ef e8 56 ea ff ff 48 89 d8 5b 5d 41 5c 41
>>>>>>>> [   60.452888] RSP: 0018:ffff9a5804653ca8 EFLAGS: 00010296 ORIG_RAX:
>>>>>>>> ffffffffffffff13
>>>>>>>> [   60.452889] RAX: 0000000000000000 RBX: ffff8f5690fb2480 RCX:
>>>>>>>> ffff8f5690fb2f00
>>>>>>>> [   60.452890] RDX: 00000000003e3730 RSI: 0000000000000000 RDI:
>>>>>>>> ffff8f5690fb2180
>>>>>>>> [   60.452891] RBP: ffff8f5690fb2180 R08: 0000000000000000 R09:
>>>>>>>> ffff8f5690fb2eb0
>>>>>>>> [   60.452891] R10: 0000000000000000 R11: ffff8f5660469860 R12:
>>>>>>>> ffff8f5690fb2f68
>>>>>>>> [   60.452892] R13: ffff8f5690fb2f00 R14: 0000000000000003 R15:
>>>>>>>> ffff8f5655a45fc0
>>>>>>>> [   60.452913] FS:  00007fdc5c459980(0000) GS:ffff8f569eb80000(0000)
>>>>>>>> knlGS:0000000000000000
>>>>>>>> [   60.452913] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>>>>>>> [   60.452914] CR2: 00007f9d74336dd8 CR3: 000000084a67e004 CR4:
>>>>>>>> 00000000003606e0
>>>>>>>> [   60.452915] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
>>>>>>>> 0000000000000000
>>>>>>>> [   60.452915] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
>>>>>>>> 0000000000000400
>>>>>>>> [   60.452916] Call Trace:
>>>>>>>> [   60.452958]  drm_syncobj_add_point+0x102/0x160 [drm]
>>>>>>>> [   60.452965]  ? drm_syncobj_fd_to_handle_ioctl+0x1b0/0x1b0 [drm]
>>>>>>>> [   60.452971]  drm_syncobj_transfer_ioctl+0x10f/0x180 [drm]
>>>>>>>> [   60.452978]  drm_ioctl_kernel+0xac/0xf0 [drm]
>>>>>>>> [   60.452984]  drm_ioctl+0x2eb/0x3b0 [drm]
>>>>>>>> [   60.452990]  ? drm_syncobj_fd_to_handle_ioctl+0x1b0/0x1b0 [drm]
>>>>>>>> [   60.452992]  ? sw_sync_ioctl+0x347/0x370
>>>>>>>> [   60.452994]  do_vfs_ioctl+0xa4/0x640
>>>>>>>> [   60.452995]  ? __fput+0x134/0x220
>>>>>>>> [   60.452997]  ? do_fcntl+0x1a5/0x650
>>>>>>>> [   60.452998]  ksys_ioctl+0x70/0x80
>>>>>>>> [   60.452999]  __x64_sys_ioctl+0x16/0x20
>>>>>>>> [   60.453002]  do_syscall_64+0x55/0x110
>>>>>>>> [   60.453004]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
>>>>>>>> [   60.453005] RIP: 0033:0x7fdc5b6e45d7
>>>>>>>> [   60.453006] Code: b3 66 90 48 8b 05 b1 48 2d 00 64 c7 00 26 00 00 00
>>>>>>>> 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f
>>>>>>>> 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 81 48 2d 00 f7 d8 64 89 01 48
>>>>>>>> [   60.453007] RSP: 002b:00007fff25c4d198 EFLAGS: 00000206 ORIG_RAX:
>>>>>>>> 0000000000000010
>>>>>>>> [   60.453008] RAX: ffffffffffffffda RBX: 0000000000000000 RCX:
>>>>>>>> 00007fdc5b6e45d7
>>>>>>>> [   60.453008] RDX: 00007fff25c4d200 RSI: 00000000c02064cc RDI:
>>>>>>>> 0000000000000003
>>>>>>>> [   60.453009] RBP: 00007fff25c4d1d0 R08: 0000000000000000 R09:
>>>>>>>> 000000000000001e
>>>>>>>> [   60.453010] R10: 0000000000000000 R11: 0000000000000206 R12:
>>>>>>>> 0000563d3959e4d0
>>>>>>>> [   60.453010] R13: 00007fff25c4d620 R14: 0000000000000000 R15:
>>>>>>>> 0000000000000000
>>>>>>>> [   88.447359] watchdog: BUG: soft lockup - CPU#6 stuck for 22s!
>>>>>>>> [syncobj_timelin:2021]
>>>>>>>>
>>>>>>>>
>>>>>>>> -Lionel
>>>>>>>>
>>>>>>>>
>>>>>>>> On 07/12/2018 09:55, Chunming Zhou wrote:
>>>>>>>>> we need to import/export timeline point
>>>>>>>>>
>>>>>>>>> Signed-off-by: Chunming Zhou<david1.zhou-5C7GfCeVMHo@public.gmane.org>
>>>>>>>>> ---
>>>>>>>>>    drivers/gpu/drm/drm_internal.h |  4 +++
>>>>>>>>>    drivers/gpu/drm/drm_ioctl.c    |  6 ++++
>>>>>>>>>    drivers/gpu/drm/drm_syncobj.c  | 66 ++++++++++++++++++++++++++++++++++
>>>>>>>>>    include/uapi/drm/drm.h         | 10 ++++++
>>>>>>>>>    4 files changed, 86 insertions(+)
>>>>>>>>>
>>>>>>>>> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
>>>>>>>>> index dab4d5936441..ecbe3d51a702 100644
>>>>>>>>> --- a/drivers/gpu/drm/drm_internal.h
>>>>>>>>> +++ b/drivers/gpu/drm/drm_internal.h
>>>>>>>>> @@ -176,6 +176,10 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data,
>>>>>>>>>    				   struct drm_file *file_private);
>>>>>>>>>    int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
>>>>>>>>>    				   struct drm_file *file_private);
>>>>>>>>> +int drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
>>>>>>>>> +					 struct drm_file *file_private);
>>>>>>>>> +int drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
>>>>>>>>> +					 struct drm_file *file_private);
>>>>>>>>>    int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
>>>>>>>>>    			   struct drm_file *file_private);
>>>>>>>>>    int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
>>>>>>>>> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
>>>>>>>>> index 7578ef6dc1d1..6b417e3c3ea5 100644
>>>>>>>>> --- a/drivers/gpu/drm/drm_ioctl.c
>>>>>>>>> +++ b/drivers/gpu/drm/drm_ioctl.c
>>>>>>>>> @@ -673,6 +673,12 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>>>>>>>>>    		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>>>>>>    	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, drm_syncobj_fd_to_handle_ioctl,
>>>>>>>>>    		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>>>>>> +	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE,
>>>>>>>>> +		      drm_syncobj_binary_to_timeline_ioctl,
>>>>>>>>> +		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>>>>>> +	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY,
>>>>>>>>> +		      drm_syncobj_timeline_to_binary_ioctl,
>>>>>>>>> +		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>>>>>>    	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
>>>>>>>>>    		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>>>>>>>    	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, drm_syncobj_timeline_wait_ioctl,
>>>>>>>>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
>>>>>>>>> index 282982e58dbd..cf4daa670252 100644
>>>>>>>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>>>>>>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>>>>>>>> @@ -670,6 +670,72 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
>>>>>>>>>    					&args->handle);
>>>>>>>>>    }
>>>>>>>>>    
>>>>>>>>> +int
>>>>>>>>> +drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
>>>>>>>>> +				     struct drm_file *file_private)
>>>>>>>>> +{
>>>>>>>>> +	struct drm_syncobj_transfer *args = data;
>>>>>>>>> +	struct drm_syncobj *timeline_syncobj = NULL;
>>>>>>>>> +	struct dma_fence *fence;
>>>>>>>>> +	struct dma_fence_chain *chain;
>>>>>>>>> +	int ret;
>>>>>>>>> +
>>>>>>>>> +	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>>>>>>>>> +		return -ENODEV;
>>>>>>>>> +
>>>>>>>>> +	if (args->pad)
>>>>>>>>> +		return -EINVAL;
>>>>>>>>> +
>>>>>>>>> +	timeline_syncobj = drm_syncobj_find(file_private, args->timeline_handle);
>>>>>>>>> +	if (!timeline_syncobj) {
>>>>>>>>> +		return -ENOENT;
>>>>>>>>> +	}
>>>>>>>>> +	ret = drm_syncobj_find_fence(file_private, args->binary_handle, 0, 0,
>>>>>>>>> +				     &fence);
>>>>>>>>> +	if (ret)
>>>>>>>>> +		goto err;
>>>>>>>>> +	chain = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
>>>>>>>>> +	if (!chain)
>>>>>>>>> +		goto err1;
>>>>>>>>> +	drm_syncobj_add_point(timeline_syncobj, chain, fence, args->point);
>>>>>>>>> +err1:
>>>>>>>>> +	dma_fence_put(fence);
>>>>>>>>> +err:
>>>>>>>>> +	drm_syncobj_put(timeline_syncobj);
>>>>>>>>> +
>>>>>>>>> +	return ret;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +int
>>>>>>>>> +drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
>>>>>>>>> +				     struct drm_file *file_private)
>>>>>>>>> +{
>>>>>>>>> +	struct drm_syncobj_transfer *args = data;
>>>>>>>>> +	struct drm_syncobj *binary_syncobj = NULL;
>>>>>>>>> +	struct dma_fence *fence;
>>>>>>>>> +	int ret;
>>>>>>>>> +
>>>>>>>>> +	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>>>>>>>>> +		return -ENODEV;
>>>>>>>>> +
>>>>>>>>> +	if (args->pad)
>>>>>>>>> +		return -EINVAL;
>>>>>>>>> +
>>>>>>>>> +	binary_syncobj = drm_syncobj_find(file_private, args->binary_handle);
>>>>>>>>> +	if (!binary_syncobj)
>>>>>>>>> +		return -ENOENT;
>>>>>>>>> +	ret = drm_syncobj_find_fence(file_private, args->timeline_handle,
>>>>>>>>> +				     args->point, args->flags, &fence);
>>>>>>>>> +	if (ret)
>>>>>>>>> +		goto err;
>>>>>>>>> +	drm_syncobj_replace_fence(binary_syncobj, fence);
>>>>>>>>> +	dma_fence_put(fence);
>>>>>>>>> +err:
>>>>>>>>> +	drm_syncobj_put(binary_syncobj);
>>>>>>>>> +
>>>>>>>>> +	return ret;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>>    static void syncobj_wait_fence_func(struct dma_fence *fence,
>>>>>>>>>    				    struct dma_fence_cb *cb)
>>>>>>>>>    {
>>>>>>>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>>>>>>>> index b2c36f2b2599..88d6129d4a18 100644
>>>>>>>>> --- a/include/uapi/drm/drm.h
>>>>>>>>> +++ b/include/uapi/drm/drm.h
>>>>>>>>> @@ -735,6 +735,14 @@ struct drm_syncobj_handle {
>>>>>>>>>    	__u32 pad;
>>>>>>>>>    };
>>>>>>>>>    
>>>>>>>>> +struct drm_syncobj_transfer {
>>>>>>>>> +	__u32 binary_handle;
>>>>>>>>> +	__u32 timeline_handle;
>>>>>>>>> +	__u64 point;
>>>>>>>>> +	__u32 flags;
>>>>>>>>> +	__u32 pad;
>>>>>>>>> +};
>>>>>>>>> +
>>>>>>>>>    #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
>>>>>>>>>    #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
>>>>>>>>>    #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2)
>>>>>>>>> @@ -933,6 +941,8 @@ extern "C" {
>>>>>>>>>    
>>>>>>>>>    #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT	DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait)
>>>>>>>>>    #define DRM_IOCTL_SYNCOBJ_QUERY		DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
>>>>>>>>> +#define DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE	DRM_IOWR(0xCC, struct drm_syncobj_transfer)
>>>>>>>>> +#define DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY	DRM_IOWR(0xCD, struct drm_syncobj_transfer)
>>>>>>>>>    
>>>>>>>>>    /**
>>>>>>>>>     * Device specific ioctls should only be in their respective headers
>>>>>>>
>>>>>>
>>>>>
>>>>
>>>
>>
>


[-- Attachment #1.2: Type: text/html, Size: 20447 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 42+ messages in thread

* Re: [PATCH 09/11] drm/syncobj: add transition iotcls between binary and timeline
       [not found]                                     ` <730eaa42-d852-e9d8-7756-43fb256a466f-5C7GfCeVMHo@public.gmane.org>
@ 2019-02-20  8:24                                       ` Koenig, Christian
  0 siblings, 0 replies; 42+ messages in thread
From: Koenig, Christian @ 2019-02-20  8:24 UTC (permalink / raw)
  To: Zhou, David(ChunMing)
  Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 14253 bytes --]

Am 20.02.19 um 09:10 schrieb zhoucm1:


On 2019年02月20日 15:59, Koenig, Christian wrote:
Am 20.02.19 um 05:53 schrieb zhoucm1:


On 2019年02月19日 19:32, Koenig, Christian wrote:
Hi David,

Could you have a look if it's reasonable?

Patch #1 is also something I already fixed on my local branch.

But patch #2 won't work like this.

We can't return an error from drm_syncobj_add_point() because we already submitted work to the hardware. And just dropping the fence like you do in the patch is a clearly no-go as well.

Then do you have any idea to skip the messed order signal point?

No, I don't think we can actually do this.
But as Lionel pointed out, user mode shouldn't query a smaller timeline payload compared to last time, we must skip messed order signal point!

No we don't.

That userspace queries a smaller timeline payload compared to last time is because userspace messed up signaling order in the first place.

Additional to that I'm not sure that userspace would query a smaller timeline payload. IIRC our workaround for messed up signaling order handled that case gracefully as well.

Christian.


-David


The only solution I can see would be to lock down the syncobj to modifications while command submission is in progress. And that in turn would mean a huge bunch of ww_mutex overhead we will certainly want to avoid.

Christian.


-David

Regards,
Christian.

Am 19.02.19 um 11:46 schrieb zhoucm1:

Hi Lionel,

the attached should fix your problem and also messed signal order.

Hi Christian,

Could you have a look if it's reasonable?


btw: I pushed to change to https://github.com/amingriyue/timeline-syncobj-kernel, which is already rebased to latest drm-misc(kernel 5.0). You can directly use that branch.


-David

On 2019年02月19日 01:01, Koenig, Christian wrote:
Am 18.02.19 um 13:07 schrieb Lionel Landwerlin:
Thanks guys :)

You mentioned that signaling out of order is illegal.
Is this illegal with regard to the vulkan spec or to the syncobj implementation?

David is the expert on that, but as far as I know that is forbidden by the vulkan spec.

I'm not finding anything in the vulkan spec that makes out of order signaling illegal.
That's why I came up with this test, just verifying that the timeline does not go backward in term of its payload.

Well we need to handle this case gracefully in the kernel, so it is still a good testcase.

Christian.


-Lionel

On 18/02/2019 11:01, Koenig, Christian wrote:
Hi David,

well I think Lionel is testing the invalid signal order on purpose :)

Anyway we really need to handle invalid order graceful here. E.g. either the same way as during CS or we abort and return an error message.

I think just using the same approach as during CS ist the best we can do.

Regards,
Christian


Am 18.02.2019 11:35 schrieb "Zhou, David(ChunMing)" <David1.Zhou@amd.com><mailto:David1.Zhou@amd.com>:

Hi Lionel,

I checked your igt test case,

uint64_t points[5] = { 1, 5, 3, 7, 6 };

which is illegal signal order.

I must admit we should handle it gracefully if signal isn't in-order, and we shouldn't lead to deadlock.

Hi Christian,

Can we just ignore when signal point X <= timeline Y? Or just give a warning?

Otherwise like Lionel's unexpected use cases, which easily leads to deadlock.


-David

On 2019年02月15日 22:28, Lionel Landwerlin wrote:

Hi David,

Thanks a lot for point me to the tests you've added in IGT.
While adding a test with that signals fences imported into a timeline
syncobj out of order, I ran into a deadlock.
Here is the test :
https://github.com/djdeath/intel-gpu-tools/commit/1e46cf7e7bff09b78a24367ddc2314f97eb0a1b9

Trying to kill the deadlocked process I got this backtrace :


[   33.969136] [IGT] syncobj_timeline: starting subtest signal-order
[   60.452823] watchdog: BUG: soft lockup - CPU#6 stuck for 23s!
[syncobj_timelin:2021]
[   60.452826] Modules linked in: rfcomm cmac bnep binfmt_misc
nls_iso8859_1 snd_hda_codec_hdmi snd_hda_codec_realtek
snd_hda_codec_generic ledtrig_audio sch_fq_codel ib_iser snd_hda_intel
rdma_cm iw_cm snd_hda_codec ib_cm snd_hda_core snd_hwdep intel_rapl
snd_pcm ib_core x86_pkg_temp_thermal intel_powerclamp configf
s coretemp iscsi_tcp snd_seq_midi libiscsi_tcp snd_seq_midi_event
libiscsi kvm_intel scsi_transport_iscsi kvm btusb snd_rawmidi irqbypass
btrtl intel_cstate intel_rapl_perf btbcm btintel bluetooth snd_seq
snd_seq_device snd_timer input_leds ecdh_generic snd soundcore mei_me
mei intel_pch_thermal mac_hid acpi_pad parp
ort_pc ppdev lp parport ip_tables x_tables autofs4 btrfs zstd_decompress
zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq
async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear
hid_generic usbhid hid i915 crct10dif_pclmul crc32_pclmul i2c_algo_bit
ghash_clmulni_intel prime_numbers
drm_kms_helper aesni_intel syscopyarea sysfillrect
[   60.452876]  sysimgblt fb_sys_fops aes_x86_64 crypto_simd sdhci_pci
cryptd drm e1000e glue_helper cqhci sdhci wmi video
[   60.452881] CPU: 6 PID: 2021 Comm: syncobj_timelin Tainted: G
U            5.0.0-rc5+ #337
[   60.452882] Hardware name:  /NUC6i7KYB, BIOS
KYSKLi70.86A.0042.2016.0929.1933 09/29/2016
[   60.452886] RIP: 0010:dma_fence_chain_walk+0x22c/0x260
[   60.452888] Code: ff e9 93 fe ff ff 48 8b 45 08 48 8b 40 18 48 85 c0
74 0c 48 89 ef e8 33 0f 58 00 84 c0 75 23 f0 41 ff 4d 00 0f 88 99 87 2f
00 <0f> 85 05 fe ff ff 4c 89 ef e8 56 ea ff ff 48 89 d8 5b 5d 41 5c 41
[   60.452888] RSP: 0018:ffff9a5804653ca8 EFLAGS: 00010296 ORIG_RAX:
ffffffffffffff13
[   60.452889] RAX: 0000000000000000 RBX: ffff8f5690fb2480 RCX:
ffff8f5690fb2f00
[   60.452890] RDX: 00000000003e3730 RSI: 0000000000000000 RDI:
ffff8f5690fb2180
[   60.452891] RBP: ffff8f5690fb2180 R08: 0000000000000000 R09:
ffff8f5690fb2eb0
[   60.452891] R10: 0000000000000000 R11: ffff8f5660469860 R12:
ffff8f5690fb2f68
[   60.452892] R13: ffff8f5690fb2f00 R14: 0000000000000003 R15:
ffff8f5655a45fc0
[   60.452913] FS:  00007fdc5c459980(0000) GS:ffff8f569eb80000(0000)
knlGS:0000000000000000
[   60.452913] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   60.452914] CR2: 00007f9d74336dd8 CR3: 000000084a67e004 CR4:
00000000003606e0
[   60.452915] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
0000000000000000
[   60.452915] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
0000000000000400
[   60.452916] Call Trace:
[   60.452958]  drm_syncobj_add_point+0x102/0x160 [drm]
[   60.452965]  ? drm_syncobj_fd_to_handle_ioctl+0x1b0/0x1b0 [drm]
[   60.452971]  drm_syncobj_transfer_ioctl+0x10f/0x180 [drm]
[   60.452978]  drm_ioctl_kernel+0xac/0xf0 [drm]
[   60.452984]  drm_ioctl+0x2eb/0x3b0 [drm]
[   60.452990]  ? drm_syncobj_fd_to_handle_ioctl+0x1b0/0x1b0 [drm]
[   60.452992]  ? sw_sync_ioctl+0x347/0x370
[   60.452994]  do_vfs_ioctl+0xa4/0x640
[   60.452995]  ? __fput+0x134/0x220
[   60.452997]  ? do_fcntl+0x1a5/0x650
[   60.452998]  ksys_ioctl+0x70/0x80
[   60.452999]  __x64_sys_ioctl+0x16/0x20
[   60.453002]  do_syscall_64+0x55/0x110
[   60.453004]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[   60.453005] RIP: 0033:0x7fdc5b6e45d7
[   60.453006] Code: b3 66 90 48 8b 05 b1 48 2d 00 64 c7 00 26 00 00 00
48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f
05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 81 48 2d 00 f7 d8 64 89 01 48
[   60.453007] RSP: 002b:00007fff25c4d198 EFLAGS: 00000206 ORIG_RAX:
0000000000000010
[   60.453008] RAX: ffffffffffffffda RBX: 0000000000000000 RCX:
00007fdc5b6e45d7
[   60.453008] RDX: 00007fff25c4d200 RSI: 00000000c02064cc RDI:
0000000000000003
[   60.453009] RBP: 00007fff25c4d1d0 R08: 0000000000000000 R09:
000000000000001e
[   60.453010] R10: 0000000000000000 R11: 0000000000000206 R12:
0000563d3959e4d0
[   60.453010] R13: 00007fff25c4d620 R14: 0000000000000000 R15:
0000000000000000
[   88.447359] watchdog: BUG: soft lockup - CPU#6 stuck for 22s!
[syncobj_timelin:2021]


-Lionel


On 07/12/2018 09:55, Chunming Zhou wrote:


we need to import/export timeline point

Signed-off-by: Chunming Zhou <david1.zhou@amd.com><mailto:david1.zhou@amd.com>
---
  drivers/gpu/drm/drm_internal.h |  4 +++
  drivers/gpu/drm/drm_ioctl.c    |  6 ++++
  drivers/gpu/drm/drm_syncobj.c  | 66 ++++++++++++++++++++++++++++++++++
  include/uapi/drm/drm.h         | 10 ++++++
  4 files changed, 86 insertions(+)

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index dab4d5936441..ecbe3d51a702 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -176,6 +176,10 @@ int drm_syncobj_handle_to_fd_ioctl(struct drm_device *dev, void *data,
                                   struct drm_file *file_private);
  int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
                                   struct drm_file *file_private);
+int drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
+                                        struct drm_file *file_private);
+int drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
+                                        struct drm_file *file_private);
  int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data,
                           struct drm_file *file_private);
  int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 7578ef6dc1d1..6b417e3c3ea5 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -673,6 +673,12 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
                      DRM_UNLOCKED|DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, drm_syncobj_fd_to_handle_ioctl,
                      DRM_UNLOCKED|DRM_RENDER_ALLOW),
+       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE,
+                     drm_syncobj_binary_to_timeline_ioctl,
+                     DRM_UNLOCKED|DRM_RENDER_ALLOW),
+       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY,
+                     drm_syncobj_timeline_to_binary_ioctl,
+                     DRM_UNLOCKED|DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl,
                      DRM_UNLOCKED|DRM_RENDER_ALLOW),
        DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, drm_syncobj_timeline_wait_ioctl,
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 282982e58dbd..cf4daa670252 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -670,6 +670,72 @@ drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data,
                                        &args->handle);
  }

+int
+drm_syncobj_binary_to_timeline_ioctl(struct drm_device *dev, void *data,
+                                    struct drm_file *file_private)
+{
+       struct drm_syncobj_transfer *args = data;
+       struct drm_syncobj *timeline_syncobj = NULL;
+       struct dma_fence *fence;
+       struct dma_fence_chain *chain;
+       int ret;
+
+       if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+               return -ENODEV;
+
+       if (args->pad)
+               return -EINVAL;
+
+       timeline_syncobj = drm_syncobj_find(file_private, args->timeline_handle);
+       if (!timeline_syncobj) {
+               return -ENOENT;
+       }
+       ret = drm_syncobj_find_fence(file_private, args->binary_handle, 0, 0,
+                                    &fence);
+       if (ret)
+               goto err;
+       chain = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
+       if (!chain)
+               goto err1;
+       drm_syncobj_add_point(timeline_syncobj, chain, fence, args->point);
+err1:
+       dma_fence_put(fence);
+err:
+       drm_syncobj_put(timeline_syncobj);
+
+       return ret;
+}
+
+int
+drm_syncobj_timeline_to_binary_ioctl(struct drm_device *dev, void *data,
+                                    struct drm_file *file_private)
+{
+       struct drm_syncobj_transfer *args = data;
+       struct drm_syncobj *binary_syncobj = NULL;
+       struct dma_fence *fence;
+       int ret;
+
+       if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+               return -ENODEV;
+
+       if (args->pad)
+               return -EINVAL;
+
+       binary_syncobj = drm_syncobj_find(file_private, args->binary_handle);
+       if (!binary_syncobj)
+               return -ENOENT;
+       ret = drm_syncobj_find_fence(file_private, args->timeline_handle,
+                                    args->point, args->flags, &fence);
+       if (ret)
+               goto err;
+       drm_syncobj_replace_fence(binary_syncobj, fence);
+       dma_fence_put(fence);
+err:
+       drm_syncobj_put(binary_syncobj);
+
+       return ret;
+}
+
  static void syncobj_wait_fence_func(struct dma_fence *fence,
                                    struct dma_fence_cb *cb)
  {
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index b2c36f2b2599..88d6129d4a18 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -735,6 +735,14 @@ struct drm_syncobj_handle {
        __u32 pad;
  };

+struct drm_syncobj_transfer {
+       __u32 binary_handle;
+       __u32 timeline_handle;
+       __u64 point;
+       __u32 flags;
+       __u32 pad;
+};
+
  #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
  #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
  #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2)
@@ -933,6 +941,8 @@ extern "C" {

  #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT       DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait)
  #define DRM_IOCTL_SYNCOBJ_QUERY               DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
+#define DRM_IOCTL_SYNCOBJ_BINARY_TO_TIMELINE   DRM_IOWR(0xCC, struct drm_syncobj_transfer)
+#define DRM_IOCTL_SYNCOBJ_TIMELINE_TO_BINARY   DRM_IOWR(0xCD, struct drm_syncobj_transfer)

  /**
   * Device specific ioctls should only be in their respective headers











[-- Attachment #1.2: Type: text/html, Size: 18982 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 42+ messages in thread

* [PATCH 11/11] drm/amdgpu: update version for timeline syncobj support in amdgpu
  2018-11-28 14:50 restart syncobj timeline changes v2 Christian König
@ 2018-11-28 14:50 ` Christian König
  0 siblings, 0 replies; 42+ messages in thread
From: Christian König @ 2018-11-28 14:50 UTC (permalink / raw)
  To: dri-devel, amd-gfx

From: Chunming Zhou <david1.zhou@amd.com>

Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 90f474f98b6e..316bfc1a6a75 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -71,9 +71,10 @@
  * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
  * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
  * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
+ * - 3.28.0 - Add syncobj timeline support to AMDGPU_CS.
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	27
+#define KMS_DRIVER_MINOR	28
 #define KMS_DRIVER_PATCHLEVEL	0
 
 int amdgpu_vram_limit = 0;
-- 
2.14.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 42+ messages in thread

end of thread, other threads:[~2019-02-20  8:24 UTC | newest]

Thread overview: 42+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-12-07  9:55 [PATCH 01/11] dma-buf: make fence sequence numbers 64 bit v2 Chunming Zhou
2018-12-07  9:55 ` [PATCH 02/11] dma-buf: add new dma_fence_chain container v4 Chunming Zhou
2019-02-15 14:23   ` Lionel Landwerlin via dri-devel
     [not found]     ` <6c2adaf5-6871-20be-a26d-182f8ca8ab8a-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2019-02-15 14:32       ` Koenig, Christian
     [not found]         ` <e170ceed-fdb7-8b4a-93d7-e565641390b3-5C7GfCeVMHo@public.gmane.org>
2019-02-15 15:52           ` Lionel Landwerlin via amd-gfx
     [not found]             ` <bbae2023-8dee-692e-9549-40779a202587-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2019-02-15 16:39               ` Christian König via amd-gfx
2019-02-15 16:49             ` Jason Ekstrand
     [not found]               ` <CAOFGe96HUkzHPJKYT-07X3vMvCRD-=Hba1=Ke24qt_PY2vn0YQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2019-02-15 17:51                 ` Christian König via amd-gfx
     [not found]                   ` <a0b27d87-50f2-56ce-1db7-5a1dc005a798-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-02-15 18:16                     ` Jason Ekstrand
     [not found]                       ` <CAOFGe9611MqmsvdvZS4_vuJjrrUAmjK5-41Z6tpaxTHJsB8CwA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2019-02-15 18:33                         ` Koenig, Christian
     [not found]                           ` <f933f9ec-6e69-f9df-f12f-5f1844a2ad37-5C7GfCeVMHo@public.gmane.org>
2019-02-15 19:11                             ` Jason Ekstrand
2018-12-07  9:55 ` [PATCH 03/11] drm/syncobj: remove drm_syncobj_cb and cleanup Chunming Zhou
2018-12-07  9:55 ` [PATCH 05/11] drm/syncobj: add support for timeline point wait v8 Chunming Zhou
2018-12-07  9:55 ` [PATCH 08/11] drm/amdgpu: add timeline support in amdgpu CS v2 Chunming Zhou
     [not found] ` <20181207095601.2058-1-david1.zhou-5C7GfCeVMHo@public.gmane.org>
2018-12-07  9:55   ` [PATCH 04/11] drm/syncobj: add new drm_syncobj_add_point interface v2 Chunming Zhou
2018-12-07  9:55   ` [PATCH 06/11] drm/syncobj: add timeline payload query ioctl v4 Chunming Zhou
     [not found]     ` <20181207095601.2058-6-david1.zhou-5C7GfCeVMHo@public.gmane.org>
2019-02-15 19:31       ` Lionel Landwerlin via amd-gfx
     [not found]         ` <157f8231-57e2-0492-de5d-f9ba4761c4c9-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2019-02-16 19:22           ` Christian König via amd-gfx
     [not found]             ` <a24728a8-5b80-e746-a1f2-6555cd817e99-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-02-18  3:10               ` zhoucm1
     [not found]                 ` <eae060f6-6493-ef71-ed9d-52d7dd768b03-5C7GfCeVMHo@public.gmane.org>
2019-02-18  7:28                   ` Koenig, Christian
     [not found]                     ` <4becddef-3bb3-5a66-34d4-95cced896939-5C7GfCeVMHo@public.gmane.org>
2019-02-18 11:40                       ` Lionel Landwerlin
2018-12-07  9:55   ` [PATCH 07/11] drm/syncobj: use the timeline point in drm_syncobj_find_fence v3 Chunming Zhou
2018-12-07  9:55   ` [PATCH 09/11] drm/syncobj: add transition iotcls between binary and timeline Chunming Zhou
2018-12-07 11:28     ` Koenig, Christian
     [not found]     ` <20181207095601.2058-9-david1.zhou-5C7GfCeVMHo@public.gmane.org>
2019-02-15 14:28       ` Lionel Landwerlin via amd-gfx
2019-02-18 10:35         ` zhoucm1
2019-02-18 11:01           ` Koenig, Christian
     [not found]             ` <27a38e11-0c77-4340-aac9-b02e816c6f58-2ueSQiBKiTY7tOexoI0I+QC/G2K4zDHf@public.gmane.org>
2019-02-18 12:07               ` Lionel Landwerlin
     [not found]                 ` <83890a08-769a-b52a-f2f6-9fe425f2562c-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2019-02-18 17:01                   ` Koenig, Christian
     [not found]                     ` <64c548d0-b062-f937-30a5-5a4d3f296f91-5C7GfCeVMHo@public.gmane.org>
2019-02-19 10:46                       ` zhoucm1
     [not found]                         ` <c2c12849-d26b-3212-40ca-682d6f8006fa-5C7GfCeVMHo@public.gmane.org>
2019-02-19 11:29                           ` Lionel Landwerlin
2019-02-19 11:32                           ` Koenig, Christian
2019-02-20  4:53                             ` zhoucm1
2019-02-20  7:59                               ` Koenig, Christian
     [not found]                                 ` <976d7032-1cde-0427-ce56-38c2ac8881ec-5C7GfCeVMHo@public.gmane.org>
2019-02-20  8:10                                   ` zhoucm1
     [not found]                                     ` <730eaa42-d852-e9d8-7756-43fb256a466f-5C7GfCeVMHo@public.gmane.org>
2019-02-20  8:24                                       ` Koenig, Christian
2018-12-07  9:56   ` [PATCH 11/11] drm/amdgpu: update version for timeline syncobj support in amdgpu Chunming Zhou
2018-12-07  9:56 ` [PATCH 10/11] drm/syncobj: add timeline signal ioctl for syncobj Chunming Zhou
     [not found]   ` <20181207095601.2058-10-david1.zhou-5C7GfCeVMHo@public.gmane.org>
2018-12-07 11:31     ` Christian König
2018-12-07 13:09       ` Chunming Zhou
     [not found]         ` <8c34aaf0-13b3-4070-f4ef-076fe1ab3197-5C7GfCeVMHo@public.gmane.org>
2018-12-07 13:14           ` Koenig, Christian
  -- strict thread matches above, loose matches on Subject: below --
2018-11-28 14:50 restart syncobj timeline changes v2 Christian König
2018-11-28 14:50 ` [PATCH 11/11] drm/amdgpu: update version for timeline syncobj support in amdgpu Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.