All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/5] dma-buf: fix dma_resv_test_signaled test_all handling
@ 2021-06-11 12:02 ` Christian König
  0 siblings, 0 replies; 28+ messages in thread
From: Christian König @ 2021-06-11 12:02 UTC (permalink / raw)
  To: daniel, dri-devel, amd-gfx

As the name implies if testing all fences is requested we
should indeed test all fences and not skip the exclusive
one because we see shared ones.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/dma-resv.c | 33 ++++++++++++---------------------
 1 file changed, 12 insertions(+), 21 deletions(-)

diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index f26c71747d43..c66bfdde9454 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -615,25 +615,21 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence)
  */
 bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
 {
-	unsigned int seq, shared_count;
+	struct dma_fence *fence;
+	unsigned int seq;
 	int ret;
 
 	rcu_read_lock();
 retry:
 	ret = true;
-	shared_count = 0;
 	seq = read_seqcount_begin(&obj->seq);
 
 	if (test_all) {
 		struct dma_resv_list *fobj = dma_resv_shared_list(obj);
-		unsigned int i;
-
-		if (fobj)
-			shared_count = fobj->shared_count;
+		unsigned int i, shared_count;
 
+		shared_count = fobj ? fobj->shared_count : 0;
 		for (i = 0; i < shared_count; ++i) {
-			struct dma_fence *fence;
-
 			fence = rcu_dereference(fobj->shared[i]);
 			ret = dma_resv_test_signaled_single(fence);
 			if (ret < 0)
@@ -641,24 +637,19 @@ bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
 			else if (!ret)
 				break;
 		}
-
-		if (read_seqcount_retry(&obj->seq, seq))
-			goto retry;
 	}
 
-	if (!shared_count) {
-		struct dma_fence *fence_excl = dma_resv_excl_fence(obj);
-
-		if (fence_excl) {
-			ret = dma_resv_test_signaled_single(fence_excl);
-			if (ret < 0)
-				goto retry;
+	fence = dma_resv_excl_fence(obj);
+	if (fence) {
+		ret = dma_resv_test_signaled_single(fence);
+		if (ret < 0)
+			goto retry;
 
-			if (read_seqcount_retry(&obj->seq, seq))
-				goto retry;
-		}
 	}
 
+	if (read_seqcount_retry(&obj->seq, seq))
+		goto retry;
+
 	rcu_read_unlock();
 	return ret;
 }
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 1/5] dma-buf: fix dma_resv_test_signaled test_all handling
@ 2021-06-11 12:02 ` Christian König
  0 siblings, 0 replies; 28+ messages in thread
From: Christian König @ 2021-06-11 12:02 UTC (permalink / raw)
  To: daniel, dri-devel, amd-gfx

As the name implies if testing all fences is requested we
should indeed test all fences and not skip the exclusive
one because we see shared ones.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/dma-resv.c | 33 ++++++++++++---------------------
 1 file changed, 12 insertions(+), 21 deletions(-)

diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index f26c71747d43..c66bfdde9454 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -615,25 +615,21 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence)
  */
 bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
 {
-	unsigned int seq, shared_count;
+	struct dma_fence *fence;
+	unsigned int seq;
 	int ret;
 
 	rcu_read_lock();
 retry:
 	ret = true;
-	shared_count = 0;
 	seq = read_seqcount_begin(&obj->seq);
 
 	if (test_all) {
 		struct dma_resv_list *fobj = dma_resv_shared_list(obj);
-		unsigned int i;
-
-		if (fobj)
-			shared_count = fobj->shared_count;
+		unsigned int i, shared_count;
 
+		shared_count = fobj ? fobj->shared_count : 0;
 		for (i = 0; i < shared_count; ++i) {
-			struct dma_fence *fence;
-
 			fence = rcu_dereference(fobj->shared[i]);
 			ret = dma_resv_test_signaled_single(fence);
 			if (ret < 0)
@@ -641,24 +637,19 @@ bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
 			else if (!ret)
 				break;
 		}
-
-		if (read_seqcount_retry(&obj->seq, seq))
-			goto retry;
 	}
 
-	if (!shared_count) {
-		struct dma_fence *fence_excl = dma_resv_excl_fence(obj);
-
-		if (fence_excl) {
-			ret = dma_resv_test_signaled_single(fence_excl);
-			if (ret < 0)
-				goto retry;
+	fence = dma_resv_excl_fence(obj);
+	if (fence) {
+		ret = dma_resv_test_signaled_single(fence);
+		if (ret < 0)
+			goto retry;
 
-			if (read_seqcount_retry(&obj->seq, seq))
-				goto retry;
-		}
 	}
 
+	if (read_seqcount_retry(&obj->seq, seq))
+		goto retry;
+
 	rcu_read_unlock();
 	return ret;
 }
-- 
2.25.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 2/5] dma-buf: some dma_fence_chain improvements
  2021-06-11 12:02 ` Christian König
@ 2021-06-11 12:02   ` Christian König
  -1 siblings, 0 replies; 28+ messages in thread
From: Christian König @ 2021-06-11 12:02 UTC (permalink / raw)
  To: daniel, dri-devel, amd-gfx

The callback and the irq work are never used at the same
time. Putting them into an union saves us 24 bytes and
makes the structure only 120 bytes in size.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/dma-buf/dma-fence-chain.c |  2 +-
 include/linux/dma-fence-chain.h   | 27 +++++++++++++++++++++------
 2 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
index 7d129e68ac70..1b4cb3e5cec9 100644
--- a/drivers/dma-buf/dma-fence-chain.c
+++ b/drivers/dma-buf/dma-fence-chain.c
@@ -137,6 +137,7 @@ static void dma_fence_chain_cb(struct dma_fence *f, struct dma_fence_cb *cb)
 	struct dma_fence_chain *chain;
 
 	chain = container_of(cb, typeof(*chain), cb);
+	init_irq_work(&chain->work, dma_fence_chain_irq_work);
 	irq_work_queue(&chain->work);
 	dma_fence_put(f);
 }
@@ -239,7 +240,6 @@ void dma_fence_chain_init(struct dma_fence_chain *chain,
 	rcu_assign_pointer(chain->prev, prev);
 	chain->fence = fence;
 	chain->prev_seqno = 0;
-	init_irq_work(&chain->work, dma_fence_chain_irq_work);
 
 	/* Try to reuse the context of the previous chain node. */
 	if (prev_chain && __dma_fence_is_later(seqno, prev->seqno, prev->ops)) {
diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h
index 10462a029da2..c6eb3aa45668 100644
--- a/include/linux/dma-fence-chain.h
+++ b/include/linux/dma-fence-chain.h
@@ -16,21 +16,36 @@
 /**
  * struct dma_fence_chain - fence to represent an node of a fence chain
  * @base: fence base class
- * @lock: spinlock for fence handling
  * @prev: previous fence of the chain
  * @prev_seqno: original previous seqno before garbage collection
  * @fence: encapsulated fence
- * @cb: callback structure for signaling
- * @work: irq work item for signaling
+ * @lock: spinlock for fence handling
  */
 struct dma_fence_chain {
 	struct dma_fence base;
-	spinlock_t lock;
 	struct dma_fence __rcu *prev;
 	u64 prev_seqno;
 	struct dma_fence *fence;
-	struct dma_fence_cb cb;
-	struct irq_work work;
+	union {
+		/**
+		 * @cb: callback for signaling
+		 *
+		 * This is used to add the callback for signaling the
+		 * complection of the fence chain. Never used at the same time
+		 * as the irq work.
+		 */
+		struct dma_fence_cb cb;
+
+		/**
+		 * @work: irq work item for signaling
+		 *
+		 * Irq work structure to allow us to add the callback without
+		 * running into lock inversion. Never used at the same time as
+		 * the callback.
+		 */
+		struct irq_work work;
+	};
+	spinlock_t lock;
 };
 
 extern const struct dma_fence_ops dma_fence_chain_ops;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 2/5] dma-buf: some dma_fence_chain improvements
@ 2021-06-11 12:02   ` Christian König
  0 siblings, 0 replies; 28+ messages in thread
From: Christian König @ 2021-06-11 12:02 UTC (permalink / raw)
  To: daniel, dri-devel, amd-gfx

The callback and the irq work are never used at the same
time. Putting them into an union saves us 24 bytes and
makes the structure only 120 bytes in size.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/dma-buf/dma-fence-chain.c |  2 +-
 include/linux/dma-fence-chain.h   | 27 +++++++++++++++++++++------
 2 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c
index 7d129e68ac70..1b4cb3e5cec9 100644
--- a/drivers/dma-buf/dma-fence-chain.c
+++ b/drivers/dma-buf/dma-fence-chain.c
@@ -137,6 +137,7 @@ static void dma_fence_chain_cb(struct dma_fence *f, struct dma_fence_cb *cb)
 	struct dma_fence_chain *chain;
 
 	chain = container_of(cb, typeof(*chain), cb);
+	init_irq_work(&chain->work, dma_fence_chain_irq_work);
 	irq_work_queue(&chain->work);
 	dma_fence_put(f);
 }
@@ -239,7 +240,6 @@ void dma_fence_chain_init(struct dma_fence_chain *chain,
 	rcu_assign_pointer(chain->prev, prev);
 	chain->fence = fence;
 	chain->prev_seqno = 0;
-	init_irq_work(&chain->work, dma_fence_chain_irq_work);
 
 	/* Try to reuse the context of the previous chain node. */
 	if (prev_chain && __dma_fence_is_later(seqno, prev->seqno, prev->ops)) {
diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h
index 10462a029da2..c6eb3aa45668 100644
--- a/include/linux/dma-fence-chain.h
+++ b/include/linux/dma-fence-chain.h
@@ -16,21 +16,36 @@
 /**
  * struct dma_fence_chain - fence to represent an node of a fence chain
  * @base: fence base class
- * @lock: spinlock for fence handling
  * @prev: previous fence of the chain
  * @prev_seqno: original previous seqno before garbage collection
  * @fence: encapsulated fence
- * @cb: callback structure for signaling
- * @work: irq work item for signaling
+ * @lock: spinlock for fence handling
  */
 struct dma_fence_chain {
 	struct dma_fence base;
-	spinlock_t lock;
 	struct dma_fence __rcu *prev;
 	u64 prev_seqno;
 	struct dma_fence *fence;
-	struct dma_fence_cb cb;
-	struct irq_work work;
+	union {
+		/**
+		 * @cb: callback for signaling
+		 *
+		 * This is used to add the callback for signaling the
+		 * complection of the fence chain. Never used at the same time
+		 * as the irq work.
+		 */
+		struct dma_fence_cb cb;
+
+		/**
+		 * @work: irq work item for signaling
+		 *
+		 * Irq work structure to allow us to add the callback without
+		 * running into lock inversion. Never used at the same time as
+		 * the callback.
+		 */
+		struct irq_work work;
+	};
+	spinlock_t lock;
 };
 
 extern const struct dma_fence_ops dma_fence_chain_ops;
-- 
2.25.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 3/5] dma-buf: add dma_fence_chain_alloc/free v2
  2021-06-11 12:02 ` Christian König
@ 2021-06-11 12:02   ` Christian König
  -1 siblings, 0 replies; 28+ messages in thread
From: Christian König @ 2021-06-11 12:02 UTC (permalink / raw)
  To: daniel, dri-devel, amd-gfx

Add a common allocation helper. Cleaning up the mix of kzalloc/kmalloc
and some unused code in the selftest.

v2: polish kernel doc a bit

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/dma-buf/st-dma-fence-chain.c          | 16 ++++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        |  4 ++--
 drivers/gpu/drm/drm_syncobj.c                 |  6 ++---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  6 ++---
 drivers/gpu/drm/msm/msm_gem_submit.c          |  6 ++---
 include/linux/dma-fence-chain.h               | 23 +++++++++++++++++++
 6 files changed, 36 insertions(+), 25 deletions(-)

diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c
index 9525f7f56119..8ce1ea59d31b 100644
--- a/drivers/dma-buf/st-dma-fence-chain.c
+++ b/drivers/dma-buf/st-dma-fence-chain.c
@@ -58,28 +58,20 @@ static struct dma_fence *mock_fence(void)
 	return &f->base;
 }
 
-static inline struct mock_chain {
-	struct dma_fence_chain base;
-} *to_mock_chain(struct dma_fence *f) {
-	return container_of(f, struct mock_chain, base.base);
-}
-
 static struct dma_fence *mock_chain(struct dma_fence *prev,
 				    struct dma_fence *fence,
 				    u64 seqno)
 {
-	struct mock_chain *f;
+	struct dma_fence_chain *f;
 
-	f = kmalloc(sizeof(*f), GFP_KERNEL);
+	f = dma_fence_chain_alloc();
 	if (!f)
 		return NULL;
 
-	dma_fence_chain_init(&f->base,
-			     dma_fence_get(prev),
-			     dma_fence_get(fence),
+	dma_fence_chain_init(f, dma_fence_get(prev), dma_fence_get(fence),
 			     seqno);
 
-	return &f->base.base;
+	return &f->base;
 }
 
 static int sanitycheck(void *arg)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 90136f9dedd6..325e82621467 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1124,7 +1124,7 @@ static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p
 
 		dep->chain = NULL;
 		if (syncobj_deps[i].point) {
-			dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL);
+			dep->chain = dma_fence_chain_alloc();
 			if (!dep->chain)
 				return -ENOMEM;
 		}
@@ -1132,7 +1132,7 @@ static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p
 		dep->syncobj = drm_syncobj_find(p->filp,
 						syncobj_deps[i].handle);
 		if (!dep->syncobj) {
-			kfree(dep->chain);
+			dma_fence_chain_free(dep->chain);
 			return -EINVAL;
 		}
 		dep->point = syncobj_deps[i].point;
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index fdd2ec87cdd1..1c5b9ef6da37 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -861,7 +861,7 @@ static int drm_syncobj_transfer_to_timeline(struct drm_file *file_private,
 				     &fence);
 	if (ret)
 		goto err;
-	chain = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
+	chain = dma_fence_chain_alloc();
 	if (!chain) {
 		ret = -ENOMEM;
 		goto err1;
@@ -1402,10 +1402,10 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
 		goto err_points;
 	}
 	for (i = 0; i < args->count_handles; i++) {
-		chains[i] = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
+		chains[i] = dma_fence_chain_alloc();
 		if (!chains[i]) {
 			for (j = 0; j < i; j++)
-				kfree(chains[j]);
+				dma_fence_chain_free(chains[j]);
 			ret = -ENOMEM;
 			goto err_chains;
 		}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 66789111a24b..a22cb86730b3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2983,7 +2983,7 @@ __free_fence_array(struct eb_fence *fences, unsigned int n)
 	while (n--) {
 		drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
 		dma_fence_put(fences[n].dma_fence);
-		kfree(fences[n].chain_fence);
+		dma_fence_chain_free(fences[n].chain_fence);
 	}
 	kvfree(fences);
 }
@@ -3097,9 +3097,7 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
 				return -EINVAL;
 			}
 
-			f->chain_fence =
-				kmalloc(sizeof(*f->chain_fence),
-					GFP_KERNEL);
+			f->chain_fence = dma_fence_chain_alloc();
 			if (!f->chain_fence) {
 				drm_syncobj_put(syncobj);
 				dma_fence_put(fence);
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 5480852bdeda..6ff6df6c4791 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -586,9 +586,7 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev,
 				break;
 			}
 
-			post_deps[i].chain =
-				kmalloc(sizeof(*post_deps[i].chain),
-				        GFP_KERNEL);
+			post_deps[i].chain = dma_fence_chain_alloc();
 			if (!post_deps[i].chain) {
 				ret = -ENOMEM;
 				break;
@@ -605,7 +603,7 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev,
 
 	if (ret) {
 		for (j = 0; j <= i; ++j) {
-			kfree(post_deps[j].chain);
+			dma_fence_chain_free(post_deps[j].chain);
 			if (post_deps[j].syncobj)
 				drm_syncobj_put(post_deps[j].syncobj);
 		}
diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h
index c6eb3aa45668..7ec36d850363 100644
--- a/include/linux/dma-fence-chain.h
+++ b/include/linux/dma-fence-chain.h
@@ -12,6 +12,7 @@
 
 #include <linux/dma-fence.h>
 #include <linux/irq_work.h>
+#include <linux/slab.h>
 
 /**
  * struct dma_fence_chain - fence to represent an node of a fence chain
@@ -66,6 +67,28 @@ to_dma_fence_chain(struct dma_fence *fence)
 	return container_of(fence, struct dma_fence_chain, base);
 }
 
+/**
+ * dma_fence_chain_alloc
+ *
+ * Returns a new dma_fence_chain object or NULL on failure.
+ */
+static inline struct dma_fence_chain *dma_fence_chain_alloc(void)
+{
+	return kmalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
+};
+
+/**
+ * dma_fence_chain_free
+ * @chain: chain node to free
+ *
+ * Frees up an allocated but not used dma_fence_chain node. This doesn't need
+ * an RCU grace period since the fence was never initialized nor published.
+ */
+static inline void dma_fence_chain_free(struct dma_fence_chain *chain)
+{
+	kfree(chain);
+};
+
 /**
  * dma_fence_chain_for_each - iterate over all fences in chain
  * @iter: current fence
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 3/5] dma-buf: add dma_fence_chain_alloc/free v2
@ 2021-06-11 12:02   ` Christian König
  0 siblings, 0 replies; 28+ messages in thread
From: Christian König @ 2021-06-11 12:02 UTC (permalink / raw)
  To: daniel, dri-devel, amd-gfx

Add a common allocation helper. Cleaning up the mix of kzalloc/kmalloc
and some unused code in the selftest.

v2: polish kernel doc a bit

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/dma-buf/st-dma-fence-chain.c          | 16 ++++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        |  4 ++--
 drivers/gpu/drm/drm_syncobj.c                 |  6 ++---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  6 ++---
 drivers/gpu/drm/msm/msm_gem_submit.c          |  6 ++---
 include/linux/dma-fence-chain.h               | 23 +++++++++++++++++++
 6 files changed, 36 insertions(+), 25 deletions(-)

diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c
index 9525f7f56119..8ce1ea59d31b 100644
--- a/drivers/dma-buf/st-dma-fence-chain.c
+++ b/drivers/dma-buf/st-dma-fence-chain.c
@@ -58,28 +58,20 @@ static struct dma_fence *mock_fence(void)
 	return &f->base;
 }
 
-static inline struct mock_chain {
-	struct dma_fence_chain base;
-} *to_mock_chain(struct dma_fence *f) {
-	return container_of(f, struct mock_chain, base.base);
-}
-
 static struct dma_fence *mock_chain(struct dma_fence *prev,
 				    struct dma_fence *fence,
 				    u64 seqno)
 {
-	struct mock_chain *f;
+	struct dma_fence_chain *f;
 
-	f = kmalloc(sizeof(*f), GFP_KERNEL);
+	f = dma_fence_chain_alloc();
 	if (!f)
 		return NULL;
 
-	dma_fence_chain_init(&f->base,
-			     dma_fence_get(prev),
-			     dma_fence_get(fence),
+	dma_fence_chain_init(f, dma_fence_get(prev), dma_fence_get(fence),
 			     seqno);
 
-	return &f->base.base;
+	return &f->base;
 }
 
 static int sanitycheck(void *arg)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 90136f9dedd6..325e82621467 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1124,7 +1124,7 @@ static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p
 
 		dep->chain = NULL;
 		if (syncobj_deps[i].point) {
-			dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL);
+			dep->chain = dma_fence_chain_alloc();
 			if (!dep->chain)
 				return -ENOMEM;
 		}
@@ -1132,7 +1132,7 @@ static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p
 		dep->syncobj = drm_syncobj_find(p->filp,
 						syncobj_deps[i].handle);
 		if (!dep->syncobj) {
-			kfree(dep->chain);
+			dma_fence_chain_free(dep->chain);
 			return -EINVAL;
 		}
 		dep->point = syncobj_deps[i].point;
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index fdd2ec87cdd1..1c5b9ef6da37 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -861,7 +861,7 @@ static int drm_syncobj_transfer_to_timeline(struct drm_file *file_private,
 				     &fence);
 	if (ret)
 		goto err;
-	chain = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
+	chain = dma_fence_chain_alloc();
 	if (!chain) {
 		ret = -ENOMEM;
 		goto err1;
@@ -1402,10 +1402,10 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
 		goto err_points;
 	}
 	for (i = 0; i < args->count_handles; i++) {
-		chains[i] = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
+		chains[i] = dma_fence_chain_alloc();
 		if (!chains[i]) {
 			for (j = 0; j < i; j++)
-				kfree(chains[j]);
+				dma_fence_chain_free(chains[j]);
 			ret = -ENOMEM;
 			goto err_chains;
 		}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 66789111a24b..a22cb86730b3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2983,7 +2983,7 @@ __free_fence_array(struct eb_fence *fences, unsigned int n)
 	while (n--) {
 		drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
 		dma_fence_put(fences[n].dma_fence);
-		kfree(fences[n].chain_fence);
+		dma_fence_chain_free(fences[n].chain_fence);
 	}
 	kvfree(fences);
 }
@@ -3097,9 +3097,7 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
 				return -EINVAL;
 			}
 
-			f->chain_fence =
-				kmalloc(sizeof(*f->chain_fence),
-					GFP_KERNEL);
+			f->chain_fence = dma_fence_chain_alloc();
 			if (!f->chain_fence) {
 				drm_syncobj_put(syncobj);
 				dma_fence_put(fence);
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 5480852bdeda..6ff6df6c4791 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -586,9 +586,7 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev,
 				break;
 			}
 
-			post_deps[i].chain =
-				kmalloc(sizeof(*post_deps[i].chain),
-				        GFP_KERNEL);
+			post_deps[i].chain = dma_fence_chain_alloc();
 			if (!post_deps[i].chain) {
 				ret = -ENOMEM;
 				break;
@@ -605,7 +603,7 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev,
 
 	if (ret) {
 		for (j = 0; j <= i; ++j) {
-			kfree(post_deps[j].chain);
+			dma_fence_chain_free(post_deps[j].chain);
 			if (post_deps[j].syncobj)
 				drm_syncobj_put(post_deps[j].syncobj);
 		}
diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h
index c6eb3aa45668..7ec36d850363 100644
--- a/include/linux/dma-fence-chain.h
+++ b/include/linux/dma-fence-chain.h
@@ -12,6 +12,7 @@
 
 #include <linux/dma-fence.h>
 #include <linux/irq_work.h>
+#include <linux/slab.h>
 
 /**
  * struct dma_fence_chain - fence to represent an node of a fence chain
@@ -66,6 +67,28 @@ to_dma_fence_chain(struct dma_fence *fence)
 	return container_of(fence, struct dma_fence_chain, base);
 }
 
+/**
+ * dma_fence_chain_alloc
+ *
+ * Returns a new dma_fence_chain object or NULL on failure.
+ */
+static inline struct dma_fence_chain *dma_fence_chain_alloc(void)
+{
+	return kmalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
+};
+
+/**
+ * dma_fence_chain_free
+ * @chain: chain node to free
+ *
+ * Frees up an allocated but not used dma_fence_chain node. This doesn't need
+ * an RCU grace period since the fence was never initialized nor published.
+ */
+static inline void dma_fence_chain_free(struct dma_fence_chain *chain)
+{
+	kfree(chain);
+};
+
 /**
  * dma_fence_chain_for_each - iterate over all fences in chain
  * @iter: current fence
-- 
2.25.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 4/5] drm/amdgpu: unwrap fence chains in the explicit sync fence
  2021-06-11 12:02 ` Christian König
@ 2021-06-11 12:03   ` Christian König
  -1 siblings, 0 replies; 28+ messages in thread
From: Christian König @ 2021-06-11 12:03 UTC (permalink / raw)
  To: daniel, dri-devel, amd-gfx

Unwrap the explicit fence if it is a dma_fence_chain and
sync to the first fence not matching the owner rules.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 118 +++++++++++++----------
 1 file changed, 68 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 1b2ceccaf5b0..862eb3c1c4c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -28,6 +28,8 @@
  *    Christian König <christian.koenig@amd.com>
  */
 
+#include <linux/dma-fence-chain.h>
+
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
@@ -186,6 +188,55 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
 	return amdgpu_sync_fence(sync, fence);
 }
 
+/* Determine based on the owner and mode if we should sync to a fence or not */
+static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
+				   enum amdgpu_sync_mode mode,
+				   void *owner, struct dma_fence *f)
+{
+	void *fence_owner = amdgpu_sync_get_owner(f);
+
+	/* Always sync to moves, no matter what */
+	if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED)
+		return true;
+
+	/* We only want to trigger KFD eviction fences on
+	 * evict or move jobs. Skip KFD fences otherwise.
+	 */
+	if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
+	    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+		return false;
+
+	/* Never sync to VM updates either. */
+	if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
+	    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+		return false;
+
+	/* Ignore fences depending on the sync mode */
+	switch (mode) {
+	case AMDGPU_SYNC_ALWAYS:
+		return true;
+
+	case AMDGPU_SYNC_NE_OWNER:
+		if (amdgpu_sync_same_dev(adev, f) &&
+		    fence_owner == owner)
+			return false;
+		break;
+
+	case AMDGPU_SYNC_EQ_OWNER:
+		if (amdgpu_sync_same_dev(adev, f) &&
+		    fence_owner != owner)
+			return false;
+		break;
+
+	case AMDGPU_SYNC_EXPLICIT:
+		return false;
+	}
+
+	WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
+	     "Adding eviction fence to sync obj");
+	return true;
+}
+
 /**
  * amdgpu_sync_resv - sync to a reservation object
  *
@@ -211,67 +262,34 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 
 	/* always sync to the exclusive fence */
 	f = dma_resv_excl_fence(resv);
-	r = amdgpu_sync_fence(sync, f);
+	dma_fence_chain_for_each(f, f) {
+		struct dma_fence_chain *chain = to_dma_fence_chain(f);
+
+		if (amdgpu_sync_test_fence(adev, mode, owner, chain ?
+					   chain->fence : f)) {
+			r = amdgpu_sync_fence(sync, f);
+			dma_fence_put(f);
+			if (r)
+				return r;
+			break;
+		}
+	}
 
 	flist = dma_resv_shared_list(resv);
-	if (!flist || r)
-		return r;
+	if (!flist)
+		return 0;
 
 	for (i = 0; i < flist->shared_count; ++i) {
-		void *fence_owner;
-
 		f = rcu_dereference_protected(flist->shared[i],
 					      dma_resv_held(resv));
 
-		fence_owner = amdgpu_sync_get_owner(f);
-
-		/* Always sync to moves, no matter what */
-		if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) {
+		if (amdgpu_sync_test_fence(adev, mode, owner, f)) {
 			r = amdgpu_sync_fence(sync, f);
 			if (r)
-				break;
-		}
-
-		/* We only want to trigger KFD eviction fences on
-		 * evict or move jobs. Skip KFD fences otherwise.
-		 */
-		if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
-		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
-			continue;
-
-		/* Never sync to VM updates either. */
-		if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
-		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
-			continue;
-
-		/* Ignore fences depending on the sync mode */
-		switch (mode) {
-		case AMDGPU_SYNC_ALWAYS:
-			break;
-
-		case AMDGPU_SYNC_NE_OWNER:
-			if (amdgpu_sync_same_dev(adev, f) &&
-			    fence_owner == owner)
-				continue;
-			break;
-
-		case AMDGPU_SYNC_EQ_OWNER:
-			if (amdgpu_sync_same_dev(adev, f) &&
-			    fence_owner != owner)
-				continue;
-			break;
-
-		case AMDGPU_SYNC_EXPLICIT:
-			continue;
+				return r;
 		}
-
-		WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
-		     "Adding eviction fence to sync obj");
-		r = amdgpu_sync_fence(sync, f);
-		if (r)
-			break;
 	}
-	return r;
+	return 0;
 }
 
 /**
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 4/5] drm/amdgpu: unwrap fence chains in the explicit sync fence
@ 2021-06-11 12:03   ` Christian König
  0 siblings, 0 replies; 28+ messages in thread
From: Christian König @ 2021-06-11 12:03 UTC (permalink / raw)
  To: daniel, dri-devel, amd-gfx

Unwrap the explicit fence if it is a dma_fence_chain and
sync to the first fence not matching the owner rules.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 118 +++++++++++++----------
 1 file changed, 68 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 1b2ceccaf5b0..862eb3c1c4c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -28,6 +28,8 @@
  *    Christian König <christian.koenig@amd.com>
  */
 
+#include <linux/dma-fence-chain.h>
+
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
@@ -186,6 +188,55 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
 	return amdgpu_sync_fence(sync, fence);
 }
 
+/* Determine based on the owner and mode if we should sync to a fence or not */
+static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
+				   enum amdgpu_sync_mode mode,
+				   void *owner, struct dma_fence *f)
+{
+	void *fence_owner = amdgpu_sync_get_owner(f);
+
+	/* Always sync to moves, no matter what */
+	if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED)
+		return true;
+
+	/* We only want to trigger KFD eviction fences on
+	 * evict or move jobs. Skip KFD fences otherwise.
+	 */
+	if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
+	    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+		return false;
+
+	/* Never sync to VM updates either. */
+	if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
+	    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+		return false;
+
+	/* Ignore fences depending on the sync mode */
+	switch (mode) {
+	case AMDGPU_SYNC_ALWAYS:
+		return true;
+
+	case AMDGPU_SYNC_NE_OWNER:
+		if (amdgpu_sync_same_dev(adev, f) &&
+		    fence_owner == owner)
+			return false;
+		break;
+
+	case AMDGPU_SYNC_EQ_OWNER:
+		if (amdgpu_sync_same_dev(adev, f) &&
+		    fence_owner != owner)
+			return false;
+		break;
+
+	case AMDGPU_SYNC_EXPLICIT:
+		return false;
+	}
+
+	WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
+	     "Adding eviction fence to sync obj");
+	return true;
+}
+
 /**
  * amdgpu_sync_resv - sync to a reservation object
  *
@@ -211,67 +262,34 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 
 	/* always sync to the exclusive fence */
 	f = dma_resv_excl_fence(resv);
-	r = amdgpu_sync_fence(sync, f);
+	dma_fence_chain_for_each(f, f) {
+		struct dma_fence_chain *chain = to_dma_fence_chain(f);
+
+		if (amdgpu_sync_test_fence(adev, mode, owner, chain ?
+					   chain->fence : f)) {
+			r = amdgpu_sync_fence(sync, f);
+			dma_fence_put(f);
+			if (r)
+				return r;
+			break;
+		}
+	}
 
 	flist = dma_resv_shared_list(resv);
-	if (!flist || r)
-		return r;
+	if (!flist)
+		return 0;
 
 	for (i = 0; i < flist->shared_count; ++i) {
-		void *fence_owner;
-
 		f = rcu_dereference_protected(flist->shared[i],
 					      dma_resv_held(resv));
 
-		fence_owner = amdgpu_sync_get_owner(f);
-
-		/* Always sync to moves, no matter what */
-		if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) {
+		if (amdgpu_sync_test_fence(adev, mode, owner, f)) {
 			r = amdgpu_sync_fence(sync, f);
 			if (r)
-				break;
-		}
-
-		/* We only want to trigger KFD eviction fences on
-		 * evict or move jobs. Skip KFD fences otherwise.
-		 */
-		if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
-		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
-			continue;
-
-		/* Never sync to VM updates either. */
-		if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
-		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
-			continue;
-
-		/* Ignore fences depending on the sync mode */
-		switch (mode) {
-		case AMDGPU_SYNC_ALWAYS:
-			break;
-
-		case AMDGPU_SYNC_NE_OWNER:
-			if (amdgpu_sync_same_dev(adev, f) &&
-			    fence_owner == owner)
-				continue;
-			break;
-
-		case AMDGPU_SYNC_EQ_OWNER:
-			if (amdgpu_sync_same_dev(adev, f) &&
-			    fence_owner != owner)
-				continue;
-			break;
-
-		case AMDGPU_SYNC_EXPLICIT:
-			continue;
+				return r;
 		}
-
-		WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
-		     "Adding eviction fence to sync obj");
-		r = amdgpu_sync_fence(sync, f);
-		if (r)
-			break;
 	}
-	return r;
+	return 0;
 }
 
 /**
-- 
2.25.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 5/5] drm/amdgpu: rework dma_resv handling v2
  2021-06-11 12:02 ` Christian König
@ 2021-06-11 12:03   ` Christian König
  -1 siblings, 0 replies; 28+ messages in thread
From: Christian König @ 2021-06-11 12:03 UTC (permalink / raw)
  To: daniel, dri-devel, amd-gfx

Drop the workaround and instead implement a better solution.

Basically we are now chaining all submissions using a dma_fence_chain
container and adding them as exclusive fence to the dma_resv object.

This way other drivers can still sync to the single exclusive fence
while amdgpu only sync to fences from different processes.

v2: polish kerneldoc a bit

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 54 +++++++++++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 65 ---------------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c     |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  1 -
 6 files changed, 47 insertions(+), 79 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index a130e766cbdb..c905a4cfc173 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -34,6 +34,7 @@ struct amdgpu_fpriv;
 struct amdgpu_bo_list_entry {
 	struct ttm_validate_buffer	tv;
 	struct amdgpu_bo_va		*bo_va;
+	struct dma_fence_chain		*chain;
 	uint32_t			priority;
 	struct page			**user_pages;
 	bool				user_invalidated;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 325e82621467..f6f3029f958d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -587,6 +587,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		goto out;
 	}
 
+	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+
+		e->bo_va = amdgpu_vm_bo_find(vm, bo);
+
+		if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
+			e->chain = dma_fence_chain_alloc();
+			if (!e->chain) {
+				r = -ENOMEM;
+				goto error_validate;
+			}
+		}
+	}
+
 	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
 					  &p->bytes_moved_vis_threshold);
 	p->bytes_moved = 0;
@@ -614,15 +628,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	gws = p->bo_list->gws_obj;
 	oa = p->bo_list->oa_obj;
 
-	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
-		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
-
-		/* Make sure we use the exclusive slot for shared BOs */
-		if (bo->prime_shared_count)
-			e->tv.num_shared = 0;
-		e->bo_va = amdgpu_vm_bo_find(vm, bo);
-	}
-
 	if (gds) {
 		p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
 		p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
@@ -644,8 +649,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	}
 
 error_validate:
-	if (r)
+	if (r) {
+		amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+			dma_fence_chain_free(e->chain);
+			e->chain = NULL;
+		}
 		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
+	}
 out:
 	return r;
 }
@@ -685,9 +695,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
 {
 	unsigned i;
 
-	if (error && backoff)
+	if (error && backoff) {
+		struct amdgpu_bo_list_entry *e;
+
+		amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
+			dma_fence_chain_free(e->chain);
+			e->chain = NULL;
+		}
+
 		ttm_eu_backoff_reservation(&parser->ticket,
 					   &parser->validated);
+	}
 
 	for (i = 0; i < parser->num_post_deps; i++) {
 		drm_syncobj_put(parser->post_deps[i].syncobj);
@@ -1260,6 +1278,20 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 
 	amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
 
+	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+		struct dma_resv *resv = e->tv.bo->base.resv;
+		struct dma_fence_chain *chain = e->chain;
+
+		if (!chain)
+			continue;
+
+		dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
+				     dma_fence_get(p->fence), 1);
+
+		rcu_assign_pointer(resv->fence_excl, &chain->base);
+		e->chain = NULL;
+	}
+
 	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
 	mutex_unlock(&p->adev->notifier_lock);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index c3053b83b80c..23219fc3b28c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -42,48 +42,6 @@
 #include <linux/pci-p2pdma.h>
 #include <linux/pm_runtime.h>
 
-static int
-__dma_resv_make_exclusive(struct dma_resv *obj)
-{
-	struct dma_fence **fences;
-	unsigned int count;
-	int r;
-
-	if (!dma_resv_shared_list(obj)) /* no shared fences to convert */
-		return 0;
-
-	r = dma_resv_get_fences(obj, NULL, &count, &fences);
-	if (r)
-		return r;
-
-	if (count == 0) {
-		/* Now that was unexpected. */
-	} else if (count == 1) {
-		dma_resv_add_excl_fence(obj, fences[0]);
-		dma_fence_put(fences[0]);
-		kfree(fences);
-	} else {
-		struct dma_fence_array *array;
-
-		array = dma_fence_array_create(count, fences,
-					       dma_fence_context_alloc(1), 0,
-					       false);
-		if (!array)
-			goto err_fences_put;
-
-		dma_resv_add_excl_fence(obj, &array->base);
-		dma_fence_put(&array->base);
-	}
-
-	return 0;
-
-err_fences_put:
-	while (count--)
-		dma_fence_put(fences[count]);
-	kfree(fences);
-	return -ENOMEM;
-}
-
 /**
  * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
  *
@@ -110,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
 	if (r < 0)
 		goto out;
 
-	r = amdgpu_bo_reserve(bo, false);
-	if (unlikely(r != 0))
-		goto out;
-
-	/*
-	 * We only create shared fences for internal use, but importers
-	 * of the dmabuf rely on exclusive fences for implicitly
-	 * tracking write hazards. As any of the current fences may
-	 * correspond to a write, we need to convert all existing
-	 * fences on the reservation object into a single exclusive
-	 * fence.
-	 */
-	r = __dma_resv_make_exclusive(bo->tbo.base.resv);
-	if (r)
-		goto out;
-
-	bo->prime_shared_count++;
-	amdgpu_bo_unreserve(bo);
 	return 0;
 
 out:
@@ -150,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 
-	if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
-		bo->prime_shared_count--;
-
 	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
 	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
 }
@@ -406,8 +343,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
 	bo = gem_to_amdgpu_bo(gobj);
 	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
 	bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
-	if (dma_buf->ops != &amdgpu_dmabuf_ops)
-		bo->prime_shared_count = 1;
 
 	dma_resv_unlock(resv);
 	return gobj;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 1c3e3b608332..5d82120fc160 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -829,7 +829,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
 		break;
 	}
 	case AMDGPU_GEM_OP_SET_PLACEMENT:
-		if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) {
+		if (robj->tbo.base.import_attach &&
+		    args->value & AMDGPU_GEM_DOMAIN_VRAM) {
 			r = -EINVAL;
 			amdgpu_bo_unreserve(robj);
 			break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 96447e1d4c9c..30951b593809 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -871,7 +871,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
 		return -EINVAL;
 
 	/* A shared bo cannot be migrated to VRAM */
-	if (bo->prime_shared_count || bo->tbo.base.import_attach) {
+	if (bo->tbo.base.import_attach) {
 		if (domain & AMDGPU_GEM_DOMAIN_GTT)
 			domain = AMDGPU_GEM_DOMAIN_GTT;
 		else
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index b35962702278..55d7e90eaa72 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -98,7 +98,6 @@ struct amdgpu_bo {
 	struct ttm_buffer_object	tbo;
 	struct ttm_bo_kmap_obj		kmap;
 	u64				flags;
-	unsigned			prime_shared_count;
 	/* per VM structure for page tables and with virtual addresses */
 	struct amdgpu_vm_bo_base	*vm_bo;
 	/* Constant after initialization */
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH 5/5] drm/amdgpu: rework dma_resv handling v2
@ 2021-06-11 12:03   ` Christian König
  0 siblings, 0 replies; 28+ messages in thread
From: Christian König @ 2021-06-11 12:03 UTC (permalink / raw)
  To: daniel, dri-devel, amd-gfx

Drop the workaround and instead implement a better solution.

Basically we are now chaining all submissions using a dma_fence_chain
container and adding them as exclusive fence to the dma_resv object.

This way other drivers can still sync to the single exclusive fence
while amdgpu only sync to fences from different processes.

v2: polish kerneldoc a bit

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 54 +++++++++++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 65 ---------------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c     |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  1 -
 6 files changed, 47 insertions(+), 79 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index a130e766cbdb..c905a4cfc173 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -34,6 +34,7 @@ struct amdgpu_fpriv;
 struct amdgpu_bo_list_entry {
 	struct ttm_validate_buffer	tv;
 	struct amdgpu_bo_va		*bo_va;
+	struct dma_fence_chain		*chain;
 	uint32_t			priority;
 	struct page			**user_pages;
 	bool				user_invalidated;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 325e82621467..f6f3029f958d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -587,6 +587,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		goto out;
 	}
 
+	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+
+		e->bo_va = amdgpu_vm_bo_find(vm, bo);
+
+		if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
+			e->chain = dma_fence_chain_alloc();
+			if (!e->chain) {
+				r = -ENOMEM;
+				goto error_validate;
+			}
+		}
+	}
+
 	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
 					  &p->bytes_moved_vis_threshold);
 	p->bytes_moved = 0;
@@ -614,15 +628,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	gws = p->bo_list->gws_obj;
 	oa = p->bo_list->oa_obj;
 
-	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
-		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
-
-		/* Make sure we use the exclusive slot for shared BOs */
-		if (bo->prime_shared_count)
-			e->tv.num_shared = 0;
-		e->bo_va = amdgpu_vm_bo_find(vm, bo);
-	}
-
 	if (gds) {
 		p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
 		p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
@@ -644,8 +649,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	}
 
 error_validate:
-	if (r)
+	if (r) {
+		amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+			dma_fence_chain_free(e->chain);
+			e->chain = NULL;
+		}
 		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
+	}
 out:
 	return r;
 }
@@ -685,9 +695,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
 {
 	unsigned i;
 
-	if (error && backoff)
+	if (error && backoff) {
+		struct amdgpu_bo_list_entry *e;
+
+		amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
+			dma_fence_chain_free(e->chain);
+			e->chain = NULL;
+		}
+
 		ttm_eu_backoff_reservation(&parser->ticket,
 					   &parser->validated);
+	}
 
 	for (i = 0; i < parser->num_post_deps; i++) {
 		drm_syncobj_put(parser->post_deps[i].syncobj);
@@ -1260,6 +1278,20 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 
 	amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
 
+	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+		struct dma_resv *resv = e->tv.bo->base.resv;
+		struct dma_fence_chain *chain = e->chain;
+
+		if (!chain)
+			continue;
+
+		dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
+				     dma_fence_get(p->fence), 1);
+
+		rcu_assign_pointer(resv->fence_excl, &chain->base);
+		e->chain = NULL;
+	}
+
 	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
 	mutex_unlock(&p->adev->notifier_lock);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index c3053b83b80c..23219fc3b28c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -42,48 +42,6 @@
 #include <linux/pci-p2pdma.h>
 #include <linux/pm_runtime.h>
 
-static int
-__dma_resv_make_exclusive(struct dma_resv *obj)
-{
-	struct dma_fence **fences;
-	unsigned int count;
-	int r;
-
-	if (!dma_resv_shared_list(obj)) /* no shared fences to convert */
-		return 0;
-
-	r = dma_resv_get_fences(obj, NULL, &count, &fences);
-	if (r)
-		return r;
-
-	if (count == 0) {
-		/* Now that was unexpected. */
-	} else if (count == 1) {
-		dma_resv_add_excl_fence(obj, fences[0]);
-		dma_fence_put(fences[0]);
-		kfree(fences);
-	} else {
-		struct dma_fence_array *array;
-
-		array = dma_fence_array_create(count, fences,
-					       dma_fence_context_alloc(1), 0,
-					       false);
-		if (!array)
-			goto err_fences_put;
-
-		dma_resv_add_excl_fence(obj, &array->base);
-		dma_fence_put(&array->base);
-	}
-
-	return 0;
-
-err_fences_put:
-	while (count--)
-		dma_fence_put(fences[count]);
-	kfree(fences);
-	return -ENOMEM;
-}
-
 /**
  * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
  *
@@ -110,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
 	if (r < 0)
 		goto out;
 
-	r = amdgpu_bo_reserve(bo, false);
-	if (unlikely(r != 0))
-		goto out;
-
-	/*
-	 * We only create shared fences for internal use, but importers
-	 * of the dmabuf rely on exclusive fences for implicitly
-	 * tracking write hazards. As any of the current fences may
-	 * correspond to a write, we need to convert all existing
-	 * fences on the reservation object into a single exclusive
-	 * fence.
-	 */
-	r = __dma_resv_make_exclusive(bo->tbo.base.resv);
-	if (r)
-		goto out;
-
-	bo->prime_shared_count++;
-	amdgpu_bo_unreserve(bo);
 	return 0;
 
 out:
@@ -150,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 
-	if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
-		bo->prime_shared_count--;
-
 	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
 	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
 }
@@ -406,8 +343,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
 	bo = gem_to_amdgpu_bo(gobj);
 	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
 	bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
-	if (dma_buf->ops != &amdgpu_dmabuf_ops)
-		bo->prime_shared_count = 1;
 
 	dma_resv_unlock(resv);
 	return gobj;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 1c3e3b608332..5d82120fc160 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -829,7 +829,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
 		break;
 	}
 	case AMDGPU_GEM_OP_SET_PLACEMENT:
-		if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) {
+		if (robj->tbo.base.import_attach &&
+		    args->value & AMDGPU_GEM_DOMAIN_VRAM) {
 			r = -EINVAL;
 			amdgpu_bo_unreserve(robj);
 			break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 96447e1d4c9c..30951b593809 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -871,7 +871,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
 		return -EINVAL;
 
 	/* A shared bo cannot be migrated to VRAM */
-	if (bo->prime_shared_count || bo->tbo.base.import_attach) {
+	if (bo->tbo.base.import_attach) {
 		if (domain & AMDGPU_GEM_DOMAIN_GTT)
 			domain = AMDGPU_GEM_DOMAIN_GTT;
 		else
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index b35962702278..55d7e90eaa72 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -98,7 +98,6 @@ struct amdgpu_bo {
 	struct ttm_buffer_object	tbo;
 	struct ttm_bo_kmap_obj		kmap;
 	u64				flags;
-	unsigned			prime_shared_count;
 	/* per VM structure for page tables and with virtual addresses */
 	struct amdgpu_vm_bo_base	*vm_bo;
 	/* Constant after initialization */
-- 
2.25.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/5] dma-buf: fix dma_resv_test_signaled test_all handling
  2021-06-11 12:02 ` Christian König
@ 2021-06-11 14:47   ` Daniel Vetter
  -1 siblings, 0 replies; 28+ messages in thread
From: Daniel Vetter @ 2021-06-11 14:47 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx, dri-devel

On Fri, Jun 11, 2021 at 02:02:57PM +0200, Christian König wrote:
> As the name implies if testing all fences is requested we
> should indeed test all fences and not skip the exclusive
> one because we see shared ones.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>

Hm I thought we've had the rule that when both fences exist, then
collectively the shared ones must signale no earlier than the exclusive
one.

That's at least the contract we've implemented in dma_resv.h. But I've
also found a bunch of drivers who are a lot more yolo on this.

I think there's a solid case here to just always take all the fences if we
ask for all the shared ones, but if we go that way then I'd say
- clear kerneldoc patch to really hammer this in (currently we're not good
  at all in this regard)
- going through drivers a bit to check for this (I have some of that done
  already in my earlier series, need to respin it and send it out)

But I'm kinda not seeing why this needs to be in this patch series here.
-Daniel

> ---
>  drivers/dma-buf/dma-resv.c | 33 ++++++++++++---------------------
>  1 file changed, 12 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
> index f26c71747d43..c66bfdde9454 100644
> --- a/drivers/dma-buf/dma-resv.c
> +++ b/drivers/dma-buf/dma-resv.c
> @@ -615,25 +615,21 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence)
>   */
>  bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
>  {
> -	unsigned int seq, shared_count;
> +	struct dma_fence *fence;
> +	unsigned int seq;
>  	int ret;
>  
>  	rcu_read_lock();
>  retry:
>  	ret = true;
> -	shared_count = 0;
>  	seq = read_seqcount_begin(&obj->seq);
>  
>  	if (test_all) {
>  		struct dma_resv_list *fobj = dma_resv_shared_list(obj);
> -		unsigned int i;
> -
> -		if (fobj)
> -			shared_count = fobj->shared_count;
> +		unsigned int i, shared_count;
>  
> +		shared_count = fobj ? fobj->shared_count : 0;
>  		for (i = 0; i < shared_count; ++i) {
> -			struct dma_fence *fence;
> -
>  			fence = rcu_dereference(fobj->shared[i]);
>  			ret = dma_resv_test_signaled_single(fence);
>  			if (ret < 0)
> @@ -641,24 +637,19 @@ bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
>  			else if (!ret)
>  				break;
>  		}
> -
> -		if (read_seqcount_retry(&obj->seq, seq))
> -			goto retry;
>  	}
>  
> -	if (!shared_count) {
> -		struct dma_fence *fence_excl = dma_resv_excl_fence(obj);
> -
> -		if (fence_excl) {
> -			ret = dma_resv_test_signaled_single(fence_excl);
> -			if (ret < 0)
> -				goto retry;
> +	fence = dma_resv_excl_fence(obj);
> +	if (fence) {
> +		ret = dma_resv_test_signaled_single(fence);
> +		if (ret < 0)
> +			goto retry;
>  
> -			if (read_seqcount_retry(&obj->seq, seq))
> -				goto retry;
> -		}
>  	}
>  
> +	if (read_seqcount_retry(&obj->seq, seq))
> +		goto retry;
> +
>  	rcu_read_unlock();
>  	return ret;
>  }
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/5] dma-buf: fix dma_resv_test_signaled test_all handling
@ 2021-06-11 14:47   ` Daniel Vetter
  0 siblings, 0 replies; 28+ messages in thread
From: Daniel Vetter @ 2021-06-11 14:47 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx, dri-devel, daniel

On Fri, Jun 11, 2021 at 02:02:57PM +0200, Christian König wrote:
> As the name implies if testing all fences is requested we
> should indeed test all fences and not skip the exclusive
> one because we see shared ones.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>

Hm I thought we've had the rule that when both fences exist, then
collectively the shared ones must signale no earlier than the exclusive
one.

That's at least the contract we've implemented in dma_resv.h. But I've
also found a bunch of drivers who are a lot more yolo on this.

I think there's a solid case here to just always take all the fences if we
ask for all the shared ones, but if we go that way then I'd say
- clear kerneldoc patch to really hammer this in (currently we're not good
  at all in this regard)
- going through drivers a bit to check for this (I have some of that done
  already in my earlier series, need to respin it and send it out)

But I'm kinda not seeing why this needs to be in this patch series here.
-Daniel

> ---
>  drivers/dma-buf/dma-resv.c | 33 ++++++++++++---------------------
>  1 file changed, 12 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
> index f26c71747d43..c66bfdde9454 100644
> --- a/drivers/dma-buf/dma-resv.c
> +++ b/drivers/dma-buf/dma-resv.c
> @@ -615,25 +615,21 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence)
>   */
>  bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
>  {
> -	unsigned int seq, shared_count;
> +	struct dma_fence *fence;
> +	unsigned int seq;
>  	int ret;
>  
>  	rcu_read_lock();
>  retry:
>  	ret = true;
> -	shared_count = 0;
>  	seq = read_seqcount_begin(&obj->seq);
>  
>  	if (test_all) {
>  		struct dma_resv_list *fobj = dma_resv_shared_list(obj);
> -		unsigned int i;
> -
> -		if (fobj)
> -			shared_count = fobj->shared_count;
> +		unsigned int i, shared_count;
>  
> +		shared_count = fobj ? fobj->shared_count : 0;
>  		for (i = 0; i < shared_count; ++i) {
> -			struct dma_fence *fence;
> -
>  			fence = rcu_dereference(fobj->shared[i]);
>  			ret = dma_resv_test_signaled_single(fence);
>  			if (ret < 0)
> @@ -641,24 +637,19 @@ bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
>  			else if (!ret)
>  				break;
>  		}
> -
> -		if (read_seqcount_retry(&obj->seq, seq))
> -			goto retry;
>  	}
>  
> -	if (!shared_count) {
> -		struct dma_fence *fence_excl = dma_resv_excl_fence(obj);
> -
> -		if (fence_excl) {
> -			ret = dma_resv_test_signaled_single(fence_excl);
> -			if (ret < 0)
> -				goto retry;
> +	fence = dma_resv_excl_fence(obj);
> +	if (fence) {
> +		ret = dma_resv_test_signaled_single(fence);
> +		if (ret < 0)
> +			goto retry;
>  
> -			if (read_seqcount_retry(&obj->seq, seq))
> -				goto retry;
> -		}
>  	}
>  
> +	if (read_seqcount_retry(&obj->seq, seq))
> +		goto retry;
> +
>  	rcu_read_unlock();
>  	return ret;
>  }
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 3/5] dma-buf: add dma_fence_chain_alloc/free v2
  2021-06-11 12:02   ` Christian König
@ 2021-06-11 14:52     ` Daniel Vetter
  -1 siblings, 0 replies; 28+ messages in thread
From: Daniel Vetter @ 2021-06-11 14:52 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx, dri-devel

On Fri, Jun 11, 2021 at 02:02:59PM +0200, Christian König wrote:
> Add a common allocation helper. Cleaning up the mix of kzalloc/kmalloc
> and some unused code in the selftest.
> 
> v2: polish kernel doc a bit
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

Given how absolutely wrong I was I'm not sure this r-b here is justified
:-)

> ---
>  drivers/dma-buf/st-dma-fence-chain.c          | 16 ++++---------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        |  4 ++--
>  drivers/gpu/drm/drm_syncobj.c                 |  6 ++---
>  .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  6 ++---
>  drivers/gpu/drm/msm/msm_gem_submit.c          |  6 ++---
>  include/linux/dma-fence-chain.h               | 23 +++++++++++++++++++
>  6 files changed, 36 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c
> index 9525f7f56119..8ce1ea59d31b 100644
> --- a/drivers/dma-buf/st-dma-fence-chain.c
> +++ b/drivers/dma-buf/st-dma-fence-chain.c
> @@ -58,28 +58,20 @@ static struct dma_fence *mock_fence(void)
>  	return &f->base;
>  }
>  
> -static inline struct mock_chain {
> -	struct dma_fence_chain base;
> -} *to_mock_chain(struct dma_fence *f) {
> -	return container_of(f, struct mock_chain, base.base);
> -}
> -
>  static struct dma_fence *mock_chain(struct dma_fence *prev,
>  				    struct dma_fence *fence,
>  				    u64 seqno)
>  {
> -	struct mock_chain *f;
> +	struct dma_fence_chain *f;
>  
> -	f = kmalloc(sizeof(*f), GFP_KERNEL);
> +	f = dma_fence_chain_alloc();
>  	if (!f)
>  		return NULL;
>  
> -	dma_fence_chain_init(&f->base,
> -			     dma_fence_get(prev),
> -			     dma_fence_get(fence),
> +	dma_fence_chain_init(f, dma_fence_get(prev), dma_fence_get(fence),
>  			     seqno);
>  
> -	return &f->base.base;
> +	return &f->base;
>  }
>  
>  static int sanitycheck(void *arg)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 90136f9dedd6..325e82621467 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1124,7 +1124,7 @@ static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p
>  
>  		dep->chain = NULL;
>  		if (syncobj_deps[i].point) {
> -			dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL);
> +			dep->chain = dma_fence_chain_alloc();
>  			if (!dep->chain)
>  				return -ENOMEM;
>  		}
> @@ -1132,7 +1132,7 @@ static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p
>  		dep->syncobj = drm_syncobj_find(p->filp,
>  						syncobj_deps[i].handle);
>  		if (!dep->syncobj) {
> -			kfree(dep->chain);
> +			dma_fence_chain_free(dep->chain);
>  			return -EINVAL;
>  		}
>  		dep->point = syncobj_deps[i].point;
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index fdd2ec87cdd1..1c5b9ef6da37 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -861,7 +861,7 @@ static int drm_syncobj_transfer_to_timeline(struct drm_file *file_private,
>  				     &fence);
>  	if (ret)
>  		goto err;
> -	chain = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
> +	chain = dma_fence_chain_alloc();
>  	if (!chain) {
>  		ret = -ENOMEM;
>  		goto err1;
> @@ -1402,10 +1402,10 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
>  		goto err_points;
>  	}
>  	for (i = 0; i < args->count_handles; i++) {
> -		chains[i] = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
> +		chains[i] = dma_fence_chain_alloc();
>  		if (!chains[i]) {
>  			for (j = 0; j < i; j++)
> -				kfree(chains[j]);
> +				dma_fence_chain_free(chains[j]);
>  			ret = -ENOMEM;
>  			goto err_chains;
>  		}
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 66789111a24b..a22cb86730b3 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -2983,7 +2983,7 @@ __free_fence_array(struct eb_fence *fences, unsigned int n)
>  	while (n--) {
>  		drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
>  		dma_fence_put(fences[n].dma_fence);
> -		kfree(fences[n].chain_fence);
> +		dma_fence_chain_free(fences[n].chain_fence);
>  	}
>  	kvfree(fences);
>  }
> @@ -3097,9 +3097,7 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
>  				return -EINVAL;
>  			}
>  
> -			f->chain_fence =
> -				kmalloc(sizeof(*f->chain_fence),
> -					GFP_KERNEL);
> +			f->chain_fence = dma_fence_chain_alloc();
>  			if (!f->chain_fence) {
>  				drm_syncobj_put(syncobj);
>  				dma_fence_put(fence);
> diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
> index 5480852bdeda..6ff6df6c4791 100644
> --- a/drivers/gpu/drm/msm/msm_gem_submit.c
> +++ b/drivers/gpu/drm/msm/msm_gem_submit.c
> @@ -586,9 +586,7 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev,
>  				break;
>  			}
>  
> -			post_deps[i].chain =
> -				kmalloc(sizeof(*post_deps[i].chain),
> -				        GFP_KERNEL);
> +			post_deps[i].chain = dma_fence_chain_alloc();
>  			if (!post_deps[i].chain) {
>  				ret = -ENOMEM;
>  				break;
> @@ -605,7 +603,7 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev,
>  
>  	if (ret) {
>  		for (j = 0; j <= i; ++j) {
> -			kfree(post_deps[j].chain);
> +			dma_fence_chain_free(post_deps[j].chain);
>  			if (post_deps[j].syncobj)
>  				drm_syncobj_put(post_deps[j].syncobj);
>  		}
> diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h
> index c6eb3aa45668..7ec36d850363 100644
> --- a/include/linux/dma-fence-chain.h
> +++ b/include/linux/dma-fence-chain.h
> @@ -12,6 +12,7 @@
>  
>  #include <linux/dma-fence.h>
>  #include <linux/irq_work.h>
> +#include <linux/slab.h>
>  
>  /**
>   * struct dma_fence_chain - fence to represent an node of a fence chain
> @@ -66,6 +67,28 @@ to_dma_fence_chain(struct dma_fence *fence)
>  	return container_of(fence, struct dma_fence_chain, base);
>  }
>  
> +/**
> + * dma_fence_chain_alloc
> + *
> + * Returns a new dma_fence_chain object or NULL on failure.

		struct dma_fence_chain for that hyperlink goodness

> + */
> +static inline struct dma_fence_chain *dma_fence_chain_alloc(void)
> +{
> +	return kmalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
> +};
> +
> +/**
> + * dma_fence_chain_free
> + * @chain: chain node to free
> + *
> + * Frees up an allocated but not used dma_fence_chain node. This doesn't need

Same here.

> + * an RCU grace period since the fence was never initialized nor published.

I'd add even more clarification, like:

"After dma_fence_chain_init() has been called the fence must be released
by calling dma_fence_put(), and not through this function."

That's still a notch too strict (in theory as long as the fence isn't
published anywhere it's all fine), but it keeps the door open for some
validation.

With the doc polish:

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>


> + */
> +static inline void dma_fence_chain_free(struct dma_fence_chain *chain)
> +{
> +	kfree(chain);
> +};
> +
>  /**
>   * dma_fence_chain_for_each - iterate over all fences in chain
>   * @iter: current fence
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 3/5] dma-buf: add dma_fence_chain_alloc/free v2
@ 2021-06-11 14:52     ` Daniel Vetter
  0 siblings, 0 replies; 28+ messages in thread
From: Daniel Vetter @ 2021-06-11 14:52 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx, dri-devel, daniel

On Fri, Jun 11, 2021 at 02:02:59PM +0200, Christian König wrote:
> Add a common allocation helper. Cleaning up the mix of kzalloc/kmalloc
> and some unused code in the selftest.
> 
> v2: polish kernel doc a bit
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

Given how absolutely wrong I was I'm not sure this r-b here is justified
:-)

> ---
>  drivers/dma-buf/st-dma-fence-chain.c          | 16 ++++---------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        |  4 ++--
>  drivers/gpu/drm/drm_syncobj.c                 |  6 ++---
>  .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  6 ++---
>  drivers/gpu/drm/msm/msm_gem_submit.c          |  6 ++---
>  include/linux/dma-fence-chain.h               | 23 +++++++++++++++++++
>  6 files changed, 36 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c
> index 9525f7f56119..8ce1ea59d31b 100644
> --- a/drivers/dma-buf/st-dma-fence-chain.c
> +++ b/drivers/dma-buf/st-dma-fence-chain.c
> @@ -58,28 +58,20 @@ static struct dma_fence *mock_fence(void)
>  	return &f->base;
>  }
>  
> -static inline struct mock_chain {
> -	struct dma_fence_chain base;
> -} *to_mock_chain(struct dma_fence *f) {
> -	return container_of(f, struct mock_chain, base.base);
> -}
> -
>  static struct dma_fence *mock_chain(struct dma_fence *prev,
>  				    struct dma_fence *fence,
>  				    u64 seqno)
>  {
> -	struct mock_chain *f;
> +	struct dma_fence_chain *f;
>  
> -	f = kmalloc(sizeof(*f), GFP_KERNEL);
> +	f = dma_fence_chain_alloc();
>  	if (!f)
>  		return NULL;
>  
> -	dma_fence_chain_init(&f->base,
> -			     dma_fence_get(prev),
> -			     dma_fence_get(fence),
> +	dma_fence_chain_init(f, dma_fence_get(prev), dma_fence_get(fence),
>  			     seqno);
>  
> -	return &f->base.base;
> +	return &f->base;
>  }
>  
>  static int sanitycheck(void *arg)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 90136f9dedd6..325e82621467 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1124,7 +1124,7 @@ static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p
>  
>  		dep->chain = NULL;
>  		if (syncobj_deps[i].point) {
> -			dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL);
> +			dep->chain = dma_fence_chain_alloc();
>  			if (!dep->chain)
>  				return -ENOMEM;
>  		}
> @@ -1132,7 +1132,7 @@ static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p
>  		dep->syncobj = drm_syncobj_find(p->filp,
>  						syncobj_deps[i].handle);
>  		if (!dep->syncobj) {
> -			kfree(dep->chain);
> +			dma_fence_chain_free(dep->chain);
>  			return -EINVAL;
>  		}
>  		dep->point = syncobj_deps[i].point;
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index fdd2ec87cdd1..1c5b9ef6da37 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -861,7 +861,7 @@ static int drm_syncobj_transfer_to_timeline(struct drm_file *file_private,
>  				     &fence);
>  	if (ret)
>  		goto err;
> -	chain = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
> +	chain = dma_fence_chain_alloc();
>  	if (!chain) {
>  		ret = -ENOMEM;
>  		goto err1;
> @@ -1402,10 +1402,10 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
>  		goto err_points;
>  	}
>  	for (i = 0; i < args->count_handles; i++) {
> -		chains[i] = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
> +		chains[i] = dma_fence_chain_alloc();
>  		if (!chains[i]) {
>  			for (j = 0; j < i; j++)
> -				kfree(chains[j]);
> +				dma_fence_chain_free(chains[j]);
>  			ret = -ENOMEM;
>  			goto err_chains;
>  		}
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 66789111a24b..a22cb86730b3 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -2983,7 +2983,7 @@ __free_fence_array(struct eb_fence *fences, unsigned int n)
>  	while (n--) {
>  		drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
>  		dma_fence_put(fences[n].dma_fence);
> -		kfree(fences[n].chain_fence);
> +		dma_fence_chain_free(fences[n].chain_fence);
>  	}
>  	kvfree(fences);
>  }
> @@ -3097,9 +3097,7 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
>  				return -EINVAL;
>  			}
>  
> -			f->chain_fence =
> -				kmalloc(sizeof(*f->chain_fence),
> -					GFP_KERNEL);
> +			f->chain_fence = dma_fence_chain_alloc();
>  			if (!f->chain_fence) {
>  				drm_syncobj_put(syncobj);
>  				dma_fence_put(fence);
> diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
> index 5480852bdeda..6ff6df6c4791 100644
> --- a/drivers/gpu/drm/msm/msm_gem_submit.c
> +++ b/drivers/gpu/drm/msm/msm_gem_submit.c
> @@ -586,9 +586,7 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev,
>  				break;
>  			}
>  
> -			post_deps[i].chain =
> -				kmalloc(sizeof(*post_deps[i].chain),
> -				        GFP_KERNEL);
> +			post_deps[i].chain = dma_fence_chain_alloc();
>  			if (!post_deps[i].chain) {
>  				ret = -ENOMEM;
>  				break;
> @@ -605,7 +603,7 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev,
>  
>  	if (ret) {
>  		for (j = 0; j <= i; ++j) {
> -			kfree(post_deps[j].chain);
> +			dma_fence_chain_free(post_deps[j].chain);
>  			if (post_deps[j].syncobj)
>  				drm_syncobj_put(post_deps[j].syncobj);
>  		}
> diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h
> index c6eb3aa45668..7ec36d850363 100644
> --- a/include/linux/dma-fence-chain.h
> +++ b/include/linux/dma-fence-chain.h
> @@ -12,6 +12,7 @@
>  
>  #include <linux/dma-fence.h>
>  #include <linux/irq_work.h>
> +#include <linux/slab.h>
>  
>  /**
>   * struct dma_fence_chain - fence to represent an node of a fence chain
> @@ -66,6 +67,28 @@ to_dma_fence_chain(struct dma_fence *fence)
>  	return container_of(fence, struct dma_fence_chain, base);
>  }
>  
> +/**
> + * dma_fence_chain_alloc
> + *
> + * Returns a new dma_fence_chain object or NULL on failure.

		struct dma_fence_chain for that hyperlink goodness

> + */
> +static inline struct dma_fence_chain *dma_fence_chain_alloc(void)
> +{
> +	return kmalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
> +};
> +
> +/**
> + * dma_fence_chain_free
> + * @chain: chain node to free
> + *
> + * Frees up an allocated but not used dma_fence_chain node. This doesn't need

Same here.

> + * an RCU grace period since the fence was never initialized nor published.

I'd add even more clarification, like:

"After dma_fence_chain_init() has been called the fence must be released
by calling dma_fence_put(), and not through this function."

That's still a notch too strict (in theory as long as the fence isn't
published anywhere it's all fine), but it keeps the door open for some
validation.

With the doc polish:

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>


> + */
> +static inline void dma_fence_chain_free(struct dma_fence_chain *chain)
> +{
> +	kfree(chain);
> +};
> +
>  /**
>   * dma_fence_chain_for_each - iterate over all fences in chain
>   * @iter: current fence
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/5] dma-buf: fix dma_resv_test_signaled test_all handling
  2021-06-11 14:47   ` Daniel Vetter
@ 2021-06-11 14:53     ` Christian König
  -1 siblings, 0 replies; 28+ messages in thread
From: Christian König @ 2021-06-11 14:53 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: amd-gfx, dri-devel



Am 11.06.21 um 16:47 schrieb Daniel Vetter:
> On Fri, Jun 11, 2021 at 02:02:57PM +0200, Christian König wrote:
>> As the name implies if testing all fences is requested we
>> should indeed test all fences and not skip the exclusive
>> one because we see shared ones.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
> Hm I thought we've had the rule that when both fences exist, then
> collectively the shared ones must signale no earlier than the exclusive
> one.
>
> That's at least the contract we've implemented in dma_resv.h. But I've
> also found a bunch of drivers who are a lot more yolo on this.
>
> I think there's a solid case here to just always take all the fences if we
> ask for all the shared ones, but if we go that way then I'd say
> - clear kerneldoc patch to really hammer this in (currently we're not good
>    at all in this regard)
> - going through drivers a bit to check for this (I have some of that done
>    already in my earlier series, need to respin it and send it out)
>
> But I'm kinda not seeing why this needs to be in this patch series here.

You mentioned that this is a problem in the last patch and if you ask me 
that's just a bug or at least very inconsistent.

See dma_resv_wait_timeout() always waits for all fences, including the 
exclusive one even if shared ones are present. But 
dma_resv_test_signaled() ignores the exclusive one if shared ones are 
present.

The only other driver I could find trying to make use of this is nouveau 
and I already provided a fix for this as well.

I just think that this is the more defensive approach to fix this and 
have at least the core functions consistent on the handling.

Christian.

> -Daniel
>
>> ---
>>   drivers/dma-buf/dma-resv.c | 33 ++++++++++++---------------------
>>   1 file changed, 12 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
>> index f26c71747d43..c66bfdde9454 100644
>> --- a/drivers/dma-buf/dma-resv.c
>> +++ b/drivers/dma-buf/dma-resv.c
>> @@ -615,25 +615,21 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence)
>>    */
>>   bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
>>   {
>> -	unsigned int seq, shared_count;
>> +	struct dma_fence *fence;
>> +	unsigned int seq;
>>   	int ret;
>>   
>>   	rcu_read_lock();
>>   retry:
>>   	ret = true;
>> -	shared_count = 0;
>>   	seq = read_seqcount_begin(&obj->seq);
>>   
>>   	if (test_all) {
>>   		struct dma_resv_list *fobj = dma_resv_shared_list(obj);
>> -		unsigned int i;
>> -
>> -		if (fobj)
>> -			shared_count = fobj->shared_count;
>> +		unsigned int i, shared_count;
>>   
>> +		shared_count = fobj ? fobj->shared_count : 0;
>>   		for (i = 0; i < shared_count; ++i) {
>> -			struct dma_fence *fence;
>> -
>>   			fence = rcu_dereference(fobj->shared[i]);
>>   			ret = dma_resv_test_signaled_single(fence);
>>   			if (ret < 0)
>> @@ -641,24 +637,19 @@ bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
>>   			else if (!ret)
>>   				break;
>>   		}
>> -
>> -		if (read_seqcount_retry(&obj->seq, seq))
>> -			goto retry;
>>   	}
>>   
>> -	if (!shared_count) {
>> -		struct dma_fence *fence_excl = dma_resv_excl_fence(obj);
>> -
>> -		if (fence_excl) {
>> -			ret = dma_resv_test_signaled_single(fence_excl);
>> -			if (ret < 0)
>> -				goto retry;
>> +	fence = dma_resv_excl_fence(obj);
>> +	if (fence) {
>> +		ret = dma_resv_test_signaled_single(fence);
>> +		if (ret < 0)
>> +			goto retry;
>>   
>> -			if (read_seqcount_retry(&obj->seq, seq))
>> -				goto retry;
>> -		}
>>   	}
>>   
>> +	if (read_seqcount_retry(&obj->seq, seq))
>> +		goto retry;
>> +
>>   	rcu_read_unlock();
>>   	return ret;
>>   }
>> -- 
>> 2.25.1
>>


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/5] dma-buf: fix dma_resv_test_signaled test_all handling
@ 2021-06-11 14:53     ` Christian König
  0 siblings, 0 replies; 28+ messages in thread
From: Christian König @ 2021-06-11 14:53 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: amd-gfx, dri-devel



Am 11.06.21 um 16:47 schrieb Daniel Vetter:
> On Fri, Jun 11, 2021 at 02:02:57PM +0200, Christian König wrote:
>> As the name implies if testing all fences is requested we
>> should indeed test all fences and not skip the exclusive
>> one because we see shared ones.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
> Hm I thought we've had the rule that when both fences exist, then
> collectively the shared ones must signale no earlier than the exclusive
> one.
>
> That's at least the contract we've implemented in dma_resv.h. But I've
> also found a bunch of drivers who are a lot more yolo on this.
>
> I think there's a solid case here to just always take all the fences if we
> ask for all the shared ones, but if we go that way then I'd say
> - clear kerneldoc patch to really hammer this in (currently we're not good
>    at all in this regard)
> - going through drivers a bit to check for this (I have some of that done
>    already in my earlier series, need to respin it and send it out)
>
> But I'm kinda not seeing why this needs to be in this patch series here.

You mentioned that this is a problem in the last patch and if you ask me 
that's just a bug or at least very inconsistent.

See dma_resv_wait_timeout() always waits for all fences, including the 
exclusive one even if shared ones are present. But 
dma_resv_test_signaled() ignores the exclusive one if shared ones are 
present.

The only other driver I could find trying to make use of this is nouveau 
and I already provided a fix for this as well.

I just think that this is the more defensive approach to fix this and 
have at least the core functions consistent on the handling.

Christian.

> -Daniel
>
>> ---
>>   drivers/dma-buf/dma-resv.c | 33 ++++++++++++---------------------
>>   1 file changed, 12 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
>> index f26c71747d43..c66bfdde9454 100644
>> --- a/drivers/dma-buf/dma-resv.c
>> +++ b/drivers/dma-buf/dma-resv.c
>> @@ -615,25 +615,21 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence)
>>    */
>>   bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
>>   {
>> -	unsigned int seq, shared_count;
>> +	struct dma_fence *fence;
>> +	unsigned int seq;
>>   	int ret;
>>   
>>   	rcu_read_lock();
>>   retry:
>>   	ret = true;
>> -	shared_count = 0;
>>   	seq = read_seqcount_begin(&obj->seq);
>>   
>>   	if (test_all) {
>>   		struct dma_resv_list *fobj = dma_resv_shared_list(obj);
>> -		unsigned int i;
>> -
>> -		if (fobj)
>> -			shared_count = fobj->shared_count;
>> +		unsigned int i, shared_count;
>>   
>> +		shared_count = fobj ? fobj->shared_count : 0;
>>   		for (i = 0; i < shared_count; ++i) {
>> -			struct dma_fence *fence;
>> -
>>   			fence = rcu_dereference(fobj->shared[i]);
>>   			ret = dma_resv_test_signaled_single(fence);
>>   			if (ret < 0)
>> @@ -641,24 +637,19 @@ bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
>>   			else if (!ret)
>>   				break;
>>   		}
>> -
>> -		if (read_seqcount_retry(&obj->seq, seq))
>> -			goto retry;
>>   	}
>>   
>> -	if (!shared_count) {
>> -		struct dma_fence *fence_excl = dma_resv_excl_fence(obj);
>> -
>> -		if (fence_excl) {
>> -			ret = dma_resv_test_signaled_single(fence_excl);
>> -			if (ret < 0)
>> -				goto retry;
>> +	fence = dma_resv_excl_fence(obj);
>> +	if (fence) {
>> +		ret = dma_resv_test_signaled_single(fence);
>> +		if (ret < 0)
>> +			goto retry;
>>   
>> -			if (read_seqcount_retry(&obj->seq, seq))
>> -				goto retry;
>> -		}
>>   	}
>>   
>> +	if (read_seqcount_retry(&obj->seq, seq))
>> +		goto retry;
>> +
>>   	rcu_read_unlock();
>>   	return ret;
>>   }
>> -- 
>> 2.25.1
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 3/5] dma-buf: add dma_fence_chain_alloc/free v2
  2021-06-11 14:52     ` Daniel Vetter
@ 2021-06-11 14:54       ` Christian König
  -1 siblings, 0 replies; 28+ messages in thread
From: Christian König @ 2021-06-11 14:54 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: amd-gfx, dri-devel



Am 11.06.21 um 16:52 schrieb Daniel Vetter:
> On Fri, Jun 11, 2021 at 02:02:59PM +0200, Christian König wrote:
>> Add a common allocation helper. Cleaning up the mix of kzalloc/kmalloc
>> and some unused code in the selftest.
>>
>> v2: polish kernel doc a bit
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> Given how absolutely wrong I was I'm not sure this r-b here is justified
> :-)

Ups, that was also not added intentionally. It's just to hot in my flat 
and to few hours till the weekend.

>
>> ---
>>   drivers/dma-buf/st-dma-fence-chain.c          | 16 ++++---------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        |  4 ++--
>>   drivers/gpu/drm/drm_syncobj.c                 |  6 ++---
>>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  6 ++---
>>   drivers/gpu/drm/msm/msm_gem_submit.c          |  6 ++---
>>   include/linux/dma-fence-chain.h               | 23 +++++++++++++++++++
>>   6 files changed, 36 insertions(+), 25 deletions(-)
>>
>> diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c
>> index 9525f7f56119..8ce1ea59d31b 100644
>> --- a/drivers/dma-buf/st-dma-fence-chain.c
>> +++ b/drivers/dma-buf/st-dma-fence-chain.c
>> @@ -58,28 +58,20 @@ static struct dma_fence *mock_fence(void)
>>   	return &f->base;
>>   }
>>   
>> -static inline struct mock_chain {
>> -	struct dma_fence_chain base;
>> -} *to_mock_chain(struct dma_fence *f) {
>> -	return container_of(f, struct mock_chain, base.base);
>> -}
>> -
>>   static struct dma_fence *mock_chain(struct dma_fence *prev,
>>   				    struct dma_fence *fence,
>>   				    u64 seqno)
>>   {
>> -	struct mock_chain *f;
>> +	struct dma_fence_chain *f;
>>   
>> -	f = kmalloc(sizeof(*f), GFP_KERNEL);
>> +	f = dma_fence_chain_alloc();
>>   	if (!f)
>>   		return NULL;
>>   
>> -	dma_fence_chain_init(&f->base,
>> -			     dma_fence_get(prev),
>> -			     dma_fence_get(fence),
>> +	dma_fence_chain_init(f, dma_fence_get(prev), dma_fence_get(fence),
>>   			     seqno);
>>   
>> -	return &f->base.base;
>> +	return &f->base;
>>   }
>>   
>>   static int sanitycheck(void *arg)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index 90136f9dedd6..325e82621467 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -1124,7 +1124,7 @@ static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p
>>   
>>   		dep->chain = NULL;
>>   		if (syncobj_deps[i].point) {
>> -			dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL);
>> +			dep->chain = dma_fence_chain_alloc();
>>   			if (!dep->chain)
>>   				return -ENOMEM;
>>   		}
>> @@ -1132,7 +1132,7 @@ static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p
>>   		dep->syncobj = drm_syncobj_find(p->filp,
>>   						syncobj_deps[i].handle);
>>   		if (!dep->syncobj) {
>> -			kfree(dep->chain);
>> +			dma_fence_chain_free(dep->chain);
>>   			return -EINVAL;
>>   		}
>>   		dep->point = syncobj_deps[i].point;
>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
>> index fdd2ec87cdd1..1c5b9ef6da37 100644
>> --- a/drivers/gpu/drm/drm_syncobj.c
>> +++ b/drivers/gpu/drm/drm_syncobj.c
>> @@ -861,7 +861,7 @@ static int drm_syncobj_transfer_to_timeline(struct drm_file *file_private,
>>   				     &fence);
>>   	if (ret)
>>   		goto err;
>> -	chain = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
>> +	chain = dma_fence_chain_alloc();
>>   	if (!chain) {
>>   		ret = -ENOMEM;
>>   		goto err1;
>> @@ -1402,10 +1402,10 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
>>   		goto err_points;
>>   	}
>>   	for (i = 0; i < args->count_handles; i++) {
>> -		chains[i] = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
>> +		chains[i] = dma_fence_chain_alloc();
>>   		if (!chains[i]) {
>>   			for (j = 0; j < i; j++)
>> -				kfree(chains[j]);
>> +				dma_fence_chain_free(chains[j]);
>>   			ret = -ENOMEM;
>>   			goto err_chains;
>>   		}
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> index 66789111a24b..a22cb86730b3 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> @@ -2983,7 +2983,7 @@ __free_fence_array(struct eb_fence *fences, unsigned int n)
>>   	while (n--) {
>>   		drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
>>   		dma_fence_put(fences[n].dma_fence);
>> -		kfree(fences[n].chain_fence);
>> +		dma_fence_chain_free(fences[n].chain_fence);
>>   	}
>>   	kvfree(fences);
>>   }
>> @@ -3097,9 +3097,7 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
>>   				return -EINVAL;
>>   			}
>>   
>> -			f->chain_fence =
>> -				kmalloc(sizeof(*f->chain_fence),
>> -					GFP_KERNEL);
>> +			f->chain_fence = dma_fence_chain_alloc();
>>   			if (!f->chain_fence) {
>>   				drm_syncobj_put(syncobj);
>>   				dma_fence_put(fence);
>> diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
>> index 5480852bdeda..6ff6df6c4791 100644
>> --- a/drivers/gpu/drm/msm/msm_gem_submit.c
>> +++ b/drivers/gpu/drm/msm/msm_gem_submit.c
>> @@ -586,9 +586,7 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev,
>>   				break;
>>   			}
>>   
>> -			post_deps[i].chain =
>> -				kmalloc(sizeof(*post_deps[i].chain),
>> -				        GFP_KERNEL);
>> +			post_deps[i].chain = dma_fence_chain_alloc();
>>   			if (!post_deps[i].chain) {
>>   				ret = -ENOMEM;
>>   				break;
>> @@ -605,7 +603,7 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev,
>>   
>>   	if (ret) {
>>   		for (j = 0; j <= i; ++j) {
>> -			kfree(post_deps[j].chain);
>> +			dma_fence_chain_free(post_deps[j].chain);
>>   			if (post_deps[j].syncobj)
>>   				drm_syncobj_put(post_deps[j].syncobj);
>>   		}
>> diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h
>> index c6eb3aa45668..7ec36d850363 100644
>> --- a/include/linux/dma-fence-chain.h
>> +++ b/include/linux/dma-fence-chain.h
>> @@ -12,6 +12,7 @@
>>   
>>   #include <linux/dma-fence.h>
>>   #include <linux/irq_work.h>
>> +#include <linux/slab.h>
>>   
>>   /**
>>    * struct dma_fence_chain - fence to represent an node of a fence chain
>> @@ -66,6 +67,28 @@ to_dma_fence_chain(struct dma_fence *fence)
>>   	return container_of(fence, struct dma_fence_chain, base);
>>   }
>>   
>> +/**
>> + * dma_fence_chain_alloc
>> + *
>> + * Returns a new dma_fence_chain object or NULL on failure.
> 		struct dma_fence_chain for that hyperlink goodness
>
>> + */
>> +static inline struct dma_fence_chain *dma_fence_chain_alloc(void)
>> +{
>> +	return kmalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
>> +};
>> +
>> +/**
>> + * dma_fence_chain_free
>> + * @chain: chain node to free
>> + *
>> + * Frees up an allocated but not used dma_fence_chain node. This doesn't need
> Same here.
>
>> + * an RCU grace period since the fence was never initialized nor published.
> I'd add even more clarification, like:
>
> "After dma_fence_chain_init() has been called the fence must be released
> by calling dma_fence_put(), and not through this function."
>
> That's still a notch too strict (in theory as long as the fence isn't
> published anywhere it's all fine), but it keeps the door open for some
> validation.
>
> With the doc polish:
>
> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

Thanks,
Christian.

>
>
>> + */
>> +static inline void dma_fence_chain_free(struct dma_fence_chain *chain)
>> +{
>> +	kfree(chain);
>> +};
>> +
>>   /**
>>    * dma_fence_chain_for_each - iterate over all fences in chain
>>    * @iter: current fence
>> -- 
>> 2.25.1
>>


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 3/5] dma-buf: add dma_fence_chain_alloc/free v2
@ 2021-06-11 14:54       ` Christian König
  0 siblings, 0 replies; 28+ messages in thread
From: Christian König @ 2021-06-11 14:54 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: amd-gfx, dri-devel



Am 11.06.21 um 16:52 schrieb Daniel Vetter:
> On Fri, Jun 11, 2021 at 02:02:59PM +0200, Christian König wrote:
>> Add a common allocation helper. Cleaning up the mix of kzalloc/kmalloc
>> and some unused code in the selftest.
>>
>> v2: polish kernel doc a bit
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> Given how absolutely wrong I was I'm not sure this r-b here is justified
> :-)

Ups, that was also not added intentionally. It's just to hot in my flat 
and to few hours till the weekend.

>
>> ---
>>   drivers/dma-buf/st-dma-fence-chain.c          | 16 ++++---------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        |  4 ++--
>>   drivers/gpu/drm/drm_syncobj.c                 |  6 ++---
>>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  6 ++---
>>   drivers/gpu/drm/msm/msm_gem_submit.c          |  6 ++---
>>   include/linux/dma-fence-chain.h               | 23 +++++++++++++++++++
>>   6 files changed, 36 insertions(+), 25 deletions(-)
>>
>> diff --git a/drivers/dma-buf/st-dma-fence-chain.c b/drivers/dma-buf/st-dma-fence-chain.c
>> index 9525f7f56119..8ce1ea59d31b 100644
>> --- a/drivers/dma-buf/st-dma-fence-chain.c
>> +++ b/drivers/dma-buf/st-dma-fence-chain.c
>> @@ -58,28 +58,20 @@ static struct dma_fence *mock_fence(void)
>>   	return &f->base;
>>   }
>>   
>> -static inline struct mock_chain {
>> -	struct dma_fence_chain base;
>> -} *to_mock_chain(struct dma_fence *f) {
>> -	return container_of(f, struct mock_chain, base.base);
>> -}
>> -
>>   static struct dma_fence *mock_chain(struct dma_fence *prev,
>>   				    struct dma_fence *fence,
>>   				    u64 seqno)
>>   {
>> -	struct mock_chain *f;
>> +	struct dma_fence_chain *f;
>>   
>> -	f = kmalloc(sizeof(*f), GFP_KERNEL);
>> +	f = dma_fence_chain_alloc();
>>   	if (!f)
>>   		return NULL;
>>   
>> -	dma_fence_chain_init(&f->base,
>> -			     dma_fence_get(prev),
>> -			     dma_fence_get(fence),
>> +	dma_fence_chain_init(f, dma_fence_get(prev), dma_fence_get(fence),
>>   			     seqno);
>>   
>> -	return &f->base.base;
>> +	return &f->base;
>>   }
>>   
>>   static int sanitycheck(void *arg)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index 90136f9dedd6..325e82621467 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -1124,7 +1124,7 @@ static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p
>>   
>>   		dep->chain = NULL;
>>   		if (syncobj_deps[i].point) {
>> -			dep->chain = kmalloc(sizeof(*dep->chain), GFP_KERNEL);
>> +			dep->chain = dma_fence_chain_alloc();
>>   			if (!dep->chain)
>>   				return -ENOMEM;
>>   		}
>> @@ -1132,7 +1132,7 @@ static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p
>>   		dep->syncobj = drm_syncobj_find(p->filp,
>>   						syncobj_deps[i].handle);
>>   		if (!dep->syncobj) {
>> -			kfree(dep->chain);
>> +			dma_fence_chain_free(dep->chain);
>>   			return -EINVAL;
>>   		}
>>   		dep->point = syncobj_deps[i].point;
>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
>> index fdd2ec87cdd1..1c5b9ef6da37 100644
>> --- a/drivers/gpu/drm/drm_syncobj.c
>> +++ b/drivers/gpu/drm/drm_syncobj.c
>> @@ -861,7 +861,7 @@ static int drm_syncobj_transfer_to_timeline(struct drm_file *file_private,
>>   				     &fence);
>>   	if (ret)
>>   		goto err;
>> -	chain = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
>> +	chain = dma_fence_chain_alloc();
>>   	if (!chain) {
>>   		ret = -ENOMEM;
>>   		goto err1;
>> @@ -1402,10 +1402,10 @@ drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
>>   		goto err_points;
>>   	}
>>   	for (i = 0; i < args->count_handles; i++) {
>> -		chains[i] = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
>> +		chains[i] = dma_fence_chain_alloc();
>>   		if (!chains[i]) {
>>   			for (j = 0; j < i; j++)
>> -				kfree(chains[j]);
>> +				dma_fence_chain_free(chains[j]);
>>   			ret = -ENOMEM;
>>   			goto err_chains;
>>   		}
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> index 66789111a24b..a22cb86730b3 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
>> @@ -2983,7 +2983,7 @@ __free_fence_array(struct eb_fence *fences, unsigned int n)
>>   	while (n--) {
>>   		drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
>>   		dma_fence_put(fences[n].dma_fence);
>> -		kfree(fences[n].chain_fence);
>> +		dma_fence_chain_free(fences[n].chain_fence);
>>   	}
>>   	kvfree(fences);
>>   }
>> @@ -3097,9 +3097,7 @@ add_timeline_fence_array(struct i915_execbuffer *eb,
>>   				return -EINVAL;
>>   			}
>>   
>> -			f->chain_fence =
>> -				kmalloc(sizeof(*f->chain_fence),
>> -					GFP_KERNEL);
>> +			f->chain_fence = dma_fence_chain_alloc();
>>   			if (!f->chain_fence) {
>>   				drm_syncobj_put(syncobj);
>>   				dma_fence_put(fence);
>> diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
>> index 5480852bdeda..6ff6df6c4791 100644
>> --- a/drivers/gpu/drm/msm/msm_gem_submit.c
>> +++ b/drivers/gpu/drm/msm/msm_gem_submit.c
>> @@ -586,9 +586,7 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev,
>>   				break;
>>   			}
>>   
>> -			post_deps[i].chain =
>> -				kmalloc(sizeof(*post_deps[i].chain),
>> -				        GFP_KERNEL);
>> +			post_deps[i].chain = dma_fence_chain_alloc();
>>   			if (!post_deps[i].chain) {
>>   				ret = -ENOMEM;
>>   				break;
>> @@ -605,7 +603,7 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev,
>>   
>>   	if (ret) {
>>   		for (j = 0; j <= i; ++j) {
>> -			kfree(post_deps[j].chain);
>> +			dma_fence_chain_free(post_deps[j].chain);
>>   			if (post_deps[j].syncobj)
>>   				drm_syncobj_put(post_deps[j].syncobj);
>>   		}
>> diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h
>> index c6eb3aa45668..7ec36d850363 100644
>> --- a/include/linux/dma-fence-chain.h
>> +++ b/include/linux/dma-fence-chain.h
>> @@ -12,6 +12,7 @@
>>   
>>   #include <linux/dma-fence.h>
>>   #include <linux/irq_work.h>
>> +#include <linux/slab.h>
>>   
>>   /**
>>    * struct dma_fence_chain - fence to represent an node of a fence chain
>> @@ -66,6 +67,28 @@ to_dma_fence_chain(struct dma_fence *fence)
>>   	return container_of(fence, struct dma_fence_chain, base);
>>   }
>>   
>> +/**
>> + * dma_fence_chain_alloc
>> + *
>> + * Returns a new dma_fence_chain object or NULL on failure.
> 		struct dma_fence_chain for that hyperlink goodness
>
>> + */
>> +static inline struct dma_fence_chain *dma_fence_chain_alloc(void)
>> +{
>> +	return kmalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
>> +};
>> +
>> +/**
>> + * dma_fence_chain_free
>> + * @chain: chain node to free
>> + *
>> + * Frees up an allocated but not used dma_fence_chain node. This doesn't need
> Same here.
>
>> + * an RCU grace period since the fence was never initialized nor published.
> I'd add even more clarification, like:
>
> "After dma_fence_chain_init() has been called the fence must be released
> by calling dma_fence_put(), and not through this function."
>
> That's still a notch too strict (in theory as long as the fence isn't
> published anywhere it's all fine), but it keeps the door open for some
> validation.
>
> With the doc polish:
>
> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

Thanks,
Christian.

>
>
>> + */
>> +static inline void dma_fence_chain_free(struct dma_fence_chain *chain)
>> +{
>> +	kfree(chain);
>> +};
>> +
>>   /**
>>    * dma_fence_chain_for_each - iterate over all fences in chain
>>    * @iter: current fence
>> -- 
>> 2.25.1
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/5] dma-buf: fix dma_resv_test_signaled test_all handling
  2021-06-11 14:53     ` Christian König
@ 2021-06-11 14:55       ` Daniel Vetter
  -1 siblings, 0 replies; 28+ messages in thread
From: Daniel Vetter @ 2021-06-11 14:55 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx, dri-devel

On Fri, Jun 11, 2021 at 04:53:11PM +0200, Christian König wrote:
> 
> 
> Am 11.06.21 um 16:47 schrieb Daniel Vetter:
> > On Fri, Jun 11, 2021 at 02:02:57PM +0200, Christian König wrote:
> > > As the name implies if testing all fences is requested we
> > > should indeed test all fences and not skip the exclusive
> > > one because we see shared ones.
> > > 
> > > Signed-off-by: Christian König <christian.koenig@amd.com>
> > Hm I thought we've had the rule that when both fences exist, then
> > collectively the shared ones must signale no earlier than the exclusive
> > one.
> > 
> > That's at least the contract we've implemented in dma_resv.h. But I've
> > also found a bunch of drivers who are a lot more yolo on this.
> > 
> > I think there's a solid case here to just always take all the fences if we
> > ask for all the shared ones, but if we go that way then I'd say
> > - clear kerneldoc patch to really hammer this in (currently we're not good
> >    at all in this regard)
> > - going through drivers a bit to check for this (I have some of that done
> >    already in my earlier series, need to respin it and send it out)
> > 
> > But I'm kinda not seeing why this needs to be in this patch series here.
> 
> You mentioned that this is a problem in the last patch and if you ask me
> that's just a bug or at least very inconsistent.
> 
> See dma_resv_wait_timeout() always waits for all fences, including the
> exclusive one even if shared ones are present. But dma_resv_test_signaled()
> ignores the exclusive one if shared ones are present.

Hm the only one I thought I've mentioned is that dma_buf_poll doesn't use
dma_fence_get_rcu_safe where I think it should. Different problem. I think
this is one you spotted.

> The only other driver I could find trying to make use of this is nouveau and
> I already provided a fix for this as well.

i915 also does this, and I think I've found a few more.

> I just think that this is the more defensive approach to fix this and have
> at least the core functions consistent on the handling.

Oh fully agree, it's just current dma_resv docs aren't the greatest, and
hacking on semantics without updating the docs isn't great. Especially
when it's ad-hoc.
-Daniel

> 
> Christian.
> 
> > -Daniel
> > 
> > > ---
> > >   drivers/dma-buf/dma-resv.c | 33 ++++++++++++---------------------
> > >   1 file changed, 12 insertions(+), 21 deletions(-)
> > > 
> > > diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
> > > index f26c71747d43..c66bfdde9454 100644
> > > --- a/drivers/dma-buf/dma-resv.c
> > > +++ b/drivers/dma-buf/dma-resv.c
> > > @@ -615,25 +615,21 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence)
> > >    */
> > >   bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
> > >   {
> > > -	unsigned int seq, shared_count;
> > > +	struct dma_fence *fence;
> > > +	unsigned int seq;
> > >   	int ret;
> > >   	rcu_read_lock();
> > >   retry:
> > >   	ret = true;
> > > -	shared_count = 0;
> > >   	seq = read_seqcount_begin(&obj->seq);
> > >   	if (test_all) {
> > >   		struct dma_resv_list *fobj = dma_resv_shared_list(obj);
> > > -		unsigned int i;
> > > -
> > > -		if (fobj)
> > > -			shared_count = fobj->shared_count;
> > > +		unsigned int i, shared_count;
> > > +		shared_count = fobj ? fobj->shared_count : 0;
> > >   		for (i = 0; i < shared_count; ++i) {
> > > -			struct dma_fence *fence;
> > > -
> > >   			fence = rcu_dereference(fobj->shared[i]);
> > >   			ret = dma_resv_test_signaled_single(fence);
> > >   			if (ret < 0)
> > > @@ -641,24 +637,19 @@ bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
> > >   			else if (!ret)
> > >   				break;
> > >   		}
> > > -
> > > -		if (read_seqcount_retry(&obj->seq, seq))
> > > -			goto retry;
> > >   	}
> > > -	if (!shared_count) {
> > > -		struct dma_fence *fence_excl = dma_resv_excl_fence(obj);
> > > -
> > > -		if (fence_excl) {
> > > -			ret = dma_resv_test_signaled_single(fence_excl);
> > > -			if (ret < 0)
> > > -				goto retry;
> > > +	fence = dma_resv_excl_fence(obj);
> > > +	if (fence) {
> > > +		ret = dma_resv_test_signaled_single(fence);
> > > +		if (ret < 0)
> > > +			goto retry;
> > > -			if (read_seqcount_retry(&obj->seq, seq))
> > > -				goto retry;
> > > -		}
> > >   	}
> > > +	if (read_seqcount_retry(&obj->seq, seq))
> > > +		goto retry;
> > > +
> > >   	rcu_read_unlock();
> > >   	return ret;
> > >   }
> > > -- 
> > > 2.25.1
> > > 
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/5] dma-buf: fix dma_resv_test_signaled test_all handling
@ 2021-06-11 14:55       ` Daniel Vetter
  0 siblings, 0 replies; 28+ messages in thread
From: Daniel Vetter @ 2021-06-11 14:55 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx, dri-devel, Daniel Vetter

On Fri, Jun 11, 2021 at 04:53:11PM +0200, Christian König wrote:
> 
> 
> Am 11.06.21 um 16:47 schrieb Daniel Vetter:
> > On Fri, Jun 11, 2021 at 02:02:57PM +0200, Christian König wrote:
> > > As the name implies if testing all fences is requested we
> > > should indeed test all fences and not skip the exclusive
> > > one because we see shared ones.
> > > 
> > > Signed-off-by: Christian König <christian.koenig@amd.com>
> > Hm I thought we've had the rule that when both fences exist, then
> > collectively the shared ones must signale no earlier than the exclusive
> > one.
> > 
> > That's at least the contract we've implemented in dma_resv.h. But I've
> > also found a bunch of drivers who are a lot more yolo on this.
> > 
> > I think there's a solid case here to just always take all the fences if we
> > ask for all the shared ones, but if we go that way then I'd say
> > - clear kerneldoc patch to really hammer this in (currently we're not good
> >    at all in this regard)
> > - going through drivers a bit to check for this (I have some of that done
> >    already in my earlier series, need to respin it and send it out)
> > 
> > But I'm kinda not seeing why this needs to be in this patch series here.
> 
> You mentioned that this is a problem in the last patch and if you ask me
> that's just a bug or at least very inconsistent.
> 
> See dma_resv_wait_timeout() always waits for all fences, including the
> exclusive one even if shared ones are present. But dma_resv_test_signaled()
> ignores the exclusive one if shared ones are present.

Hm the only one I thought I've mentioned is that dma_buf_poll doesn't use
dma_fence_get_rcu_safe where I think it should. Different problem. I think
this is one you spotted.

> The only other driver I could find trying to make use of this is nouveau and
> I already provided a fix for this as well.

i915 also does this, and I think I've found a few more.

> I just think that this is the more defensive approach to fix this and have
> at least the core functions consistent on the handling.

Oh fully agree, it's just current dma_resv docs aren't the greatest, and
hacking on semantics without updating the docs isn't great. Especially
when it's ad-hoc.
-Daniel

> 
> Christian.
> 
> > -Daniel
> > 
> > > ---
> > >   drivers/dma-buf/dma-resv.c | 33 ++++++++++++---------------------
> > >   1 file changed, 12 insertions(+), 21 deletions(-)
> > > 
> > > diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
> > > index f26c71747d43..c66bfdde9454 100644
> > > --- a/drivers/dma-buf/dma-resv.c
> > > +++ b/drivers/dma-buf/dma-resv.c
> > > @@ -615,25 +615,21 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence)
> > >    */
> > >   bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
> > >   {
> > > -	unsigned int seq, shared_count;
> > > +	struct dma_fence *fence;
> > > +	unsigned int seq;
> > >   	int ret;
> > >   	rcu_read_lock();
> > >   retry:
> > >   	ret = true;
> > > -	shared_count = 0;
> > >   	seq = read_seqcount_begin(&obj->seq);
> > >   	if (test_all) {
> > >   		struct dma_resv_list *fobj = dma_resv_shared_list(obj);
> > > -		unsigned int i;
> > > -
> > > -		if (fobj)
> > > -			shared_count = fobj->shared_count;
> > > +		unsigned int i, shared_count;
> > > +		shared_count = fobj ? fobj->shared_count : 0;
> > >   		for (i = 0; i < shared_count; ++i) {
> > > -			struct dma_fence *fence;
> > > -
> > >   			fence = rcu_dereference(fobj->shared[i]);
> > >   			ret = dma_resv_test_signaled_single(fence);
> > >   			if (ret < 0)
> > > @@ -641,24 +637,19 @@ bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
> > >   			else if (!ret)
> > >   				break;
> > >   		}
> > > -
> > > -		if (read_seqcount_retry(&obj->seq, seq))
> > > -			goto retry;
> > >   	}
> > > -	if (!shared_count) {
> > > -		struct dma_fence *fence_excl = dma_resv_excl_fence(obj);
> > > -
> > > -		if (fence_excl) {
> > > -			ret = dma_resv_test_signaled_single(fence_excl);
> > > -			if (ret < 0)
> > > -				goto retry;
> > > +	fence = dma_resv_excl_fence(obj);
> > > +	if (fence) {
> > > +		ret = dma_resv_test_signaled_single(fence);
> > > +		if (ret < 0)
> > > +			goto retry;
> > > -			if (read_seqcount_retry(&obj->seq, seq))
> > > -				goto retry;
> > > -		}
> > >   	}
> > > +	if (read_seqcount_retry(&obj->seq, seq))
> > > +		goto retry;
> > > +
> > >   	rcu_read_unlock();
> > >   	return ret;
> > >   }
> > > -- 
> > > 2.25.1
> > > 
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/5] drm/amdgpu: rework dma_resv handling v2
  2021-06-11 12:03   ` Christian König
@ 2021-06-11 14:56     ` Daniel Vetter
  -1 siblings, 0 replies; 28+ messages in thread
From: Daniel Vetter @ 2021-06-11 14:56 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx, dri-devel

On Fri, Jun 11, 2021 at 02:03:01PM +0200, Christian König wrote:
> Drop the workaround and instead implement a better solution.
> 
> Basically we are now chaining all submissions using a dma_fence_chain
> container and adding them as exclusive fence to the dma_resv object.
> 
> This way other drivers can still sync to the single exclusive fence
> while amdgpu only sync to fences from different processes.
> 
> v2: polish kerneldoc a bit

There is no kerneldoc here, and otherwise this patch looks unchanged from
the previous round. Hit send a bit too quickly?
-Daniel

> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 54 +++++++++++++----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 65 ---------------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c     |  3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  1 -
>  6 files changed, 47 insertions(+), 79 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> index a130e766cbdb..c905a4cfc173 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> @@ -34,6 +34,7 @@ struct amdgpu_fpriv;
>  struct amdgpu_bo_list_entry {
>  	struct ttm_validate_buffer	tv;
>  	struct amdgpu_bo_va		*bo_va;
> +	struct dma_fence_chain		*chain;
>  	uint32_t			priority;
>  	struct page			**user_pages;
>  	bool				user_invalidated;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 325e82621467..f6f3029f958d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -587,6 +587,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>  		goto out;
>  	}
>  
> +	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> +		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> +
> +		e->bo_va = amdgpu_vm_bo_find(vm, bo);
> +
> +		if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
> +			e->chain = dma_fence_chain_alloc();
> +			if (!e->chain) {
> +				r = -ENOMEM;
> +				goto error_validate;
> +			}
> +		}
> +	}
> +
>  	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
>  					  &p->bytes_moved_vis_threshold);
>  	p->bytes_moved = 0;
> @@ -614,15 +628,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>  	gws = p->bo_list->gws_obj;
>  	oa = p->bo_list->oa_obj;
>  
> -	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> -		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> -
> -		/* Make sure we use the exclusive slot for shared BOs */
> -		if (bo->prime_shared_count)
> -			e->tv.num_shared = 0;
> -		e->bo_va = amdgpu_vm_bo_find(vm, bo);
> -	}
> -
>  	if (gds) {
>  		p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
>  		p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
> @@ -644,8 +649,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>  	}
>  
>  error_validate:
> -	if (r)
> +	if (r) {
> +		amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> +			dma_fence_chain_free(e->chain);
> +			e->chain = NULL;
> +		}
>  		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
> +	}
>  out:
>  	return r;
>  }
> @@ -685,9 +695,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
>  {
>  	unsigned i;
>  
> -	if (error && backoff)
> +	if (error && backoff) {
> +		struct amdgpu_bo_list_entry *e;
> +
> +		amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
> +			dma_fence_chain_free(e->chain);
> +			e->chain = NULL;
> +		}
> +
>  		ttm_eu_backoff_reservation(&parser->ticket,
>  					   &parser->validated);
> +	}
>  
>  	for (i = 0; i < parser->num_post_deps; i++) {
>  		drm_syncobj_put(parser->post_deps[i].syncobj);
> @@ -1260,6 +1278,20 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>  
>  	amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
>  
> +	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> +		struct dma_resv *resv = e->tv.bo->base.resv;
> +		struct dma_fence_chain *chain = e->chain;
> +
> +		if (!chain)
> +			continue;
> +
> +		dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
> +				     dma_fence_get(p->fence), 1);
> +
> +		rcu_assign_pointer(resv->fence_excl, &chain->base);
> +		e->chain = NULL;
> +	}
> +
>  	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>  	mutex_unlock(&p->adev->notifier_lock);
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> index c3053b83b80c..23219fc3b28c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> @@ -42,48 +42,6 @@
>  #include <linux/pci-p2pdma.h>
>  #include <linux/pm_runtime.h>
>  
> -static int
> -__dma_resv_make_exclusive(struct dma_resv *obj)
> -{
> -	struct dma_fence **fences;
> -	unsigned int count;
> -	int r;
> -
> -	if (!dma_resv_shared_list(obj)) /* no shared fences to convert */
> -		return 0;
> -
> -	r = dma_resv_get_fences(obj, NULL, &count, &fences);
> -	if (r)
> -		return r;
> -
> -	if (count == 0) {
> -		/* Now that was unexpected. */
> -	} else if (count == 1) {
> -		dma_resv_add_excl_fence(obj, fences[0]);
> -		dma_fence_put(fences[0]);
> -		kfree(fences);
> -	} else {
> -		struct dma_fence_array *array;
> -
> -		array = dma_fence_array_create(count, fences,
> -					       dma_fence_context_alloc(1), 0,
> -					       false);
> -		if (!array)
> -			goto err_fences_put;
> -
> -		dma_resv_add_excl_fence(obj, &array->base);
> -		dma_fence_put(&array->base);
> -	}
> -
> -	return 0;
> -
> -err_fences_put:
> -	while (count--)
> -		dma_fence_put(fences[count]);
> -	kfree(fences);
> -	return -ENOMEM;
> -}
> -
>  /**
>   * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
>   *
> @@ -110,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
>  	if (r < 0)
>  		goto out;
>  
> -	r = amdgpu_bo_reserve(bo, false);
> -	if (unlikely(r != 0))
> -		goto out;
> -
> -	/*
> -	 * We only create shared fences for internal use, but importers
> -	 * of the dmabuf rely on exclusive fences for implicitly
> -	 * tracking write hazards. As any of the current fences may
> -	 * correspond to a write, we need to convert all existing
> -	 * fences on the reservation object into a single exclusive
> -	 * fence.
> -	 */
> -	r = __dma_resv_make_exclusive(bo->tbo.base.resv);
> -	if (r)
> -		goto out;
> -
> -	bo->prime_shared_count++;
> -	amdgpu_bo_unreserve(bo);
>  	return 0;
>  
>  out:
> @@ -150,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
>  	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
>  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>  
> -	if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
> -		bo->prime_shared_count--;
> -
>  	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>  	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>  }
> @@ -406,8 +343,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
>  	bo = gem_to_amdgpu_bo(gobj);
>  	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
>  	bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
> -	if (dma_buf->ops != &amdgpu_dmabuf_ops)
> -		bo->prime_shared_count = 1;
>  
>  	dma_resv_unlock(resv);
>  	return gobj;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 1c3e3b608332..5d82120fc160 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -829,7 +829,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>  		break;
>  	}
>  	case AMDGPU_GEM_OP_SET_PLACEMENT:
> -		if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) {
> +		if (robj->tbo.base.import_attach &&
> +		    args->value & AMDGPU_GEM_DOMAIN_VRAM) {
>  			r = -EINVAL;
>  			amdgpu_bo_unreserve(robj);
>  			break;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 96447e1d4c9c..30951b593809 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -871,7 +871,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
>  		return -EINVAL;
>  
>  	/* A shared bo cannot be migrated to VRAM */
> -	if (bo->prime_shared_count || bo->tbo.base.import_attach) {
> +	if (bo->tbo.base.import_attach) {
>  		if (domain & AMDGPU_GEM_DOMAIN_GTT)
>  			domain = AMDGPU_GEM_DOMAIN_GTT;
>  		else
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index b35962702278..55d7e90eaa72 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -98,7 +98,6 @@ struct amdgpu_bo {
>  	struct ttm_buffer_object	tbo;
>  	struct ttm_bo_kmap_obj		kmap;
>  	u64				flags;
> -	unsigned			prime_shared_count;
>  	/* per VM structure for page tables and with virtual addresses */
>  	struct amdgpu_vm_bo_base	*vm_bo;
>  	/* Constant after initialization */
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/5] drm/amdgpu: rework dma_resv handling v2
@ 2021-06-11 14:56     ` Daniel Vetter
  0 siblings, 0 replies; 28+ messages in thread
From: Daniel Vetter @ 2021-06-11 14:56 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx, dri-devel, daniel

On Fri, Jun 11, 2021 at 02:03:01PM +0200, Christian König wrote:
> Drop the workaround and instead implement a better solution.
> 
> Basically we are now chaining all submissions using a dma_fence_chain
> container and adding them as exclusive fence to the dma_resv object.
> 
> This way other drivers can still sync to the single exclusive fence
> while amdgpu only sync to fences from different processes.
> 
> v2: polish kerneldoc a bit

There is no kerneldoc here, and otherwise this patch looks unchanged from
the previous round. Hit send a bit too quickly?
-Daniel

> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 54 +++++++++++++----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 65 ---------------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c     |  3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  1 -
>  6 files changed, 47 insertions(+), 79 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> index a130e766cbdb..c905a4cfc173 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> @@ -34,6 +34,7 @@ struct amdgpu_fpriv;
>  struct amdgpu_bo_list_entry {
>  	struct ttm_validate_buffer	tv;
>  	struct amdgpu_bo_va		*bo_va;
> +	struct dma_fence_chain		*chain;
>  	uint32_t			priority;
>  	struct page			**user_pages;
>  	bool				user_invalidated;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 325e82621467..f6f3029f958d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -587,6 +587,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>  		goto out;
>  	}
>  
> +	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> +		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> +
> +		e->bo_va = amdgpu_vm_bo_find(vm, bo);
> +
> +		if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
> +			e->chain = dma_fence_chain_alloc();
> +			if (!e->chain) {
> +				r = -ENOMEM;
> +				goto error_validate;
> +			}
> +		}
> +	}
> +
>  	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
>  					  &p->bytes_moved_vis_threshold);
>  	p->bytes_moved = 0;
> @@ -614,15 +628,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>  	gws = p->bo_list->gws_obj;
>  	oa = p->bo_list->oa_obj;
>  
> -	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> -		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> -
> -		/* Make sure we use the exclusive slot for shared BOs */
> -		if (bo->prime_shared_count)
> -			e->tv.num_shared = 0;
> -		e->bo_va = amdgpu_vm_bo_find(vm, bo);
> -	}
> -
>  	if (gds) {
>  		p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
>  		p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
> @@ -644,8 +649,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>  	}
>  
>  error_validate:
> -	if (r)
> +	if (r) {
> +		amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> +			dma_fence_chain_free(e->chain);
> +			e->chain = NULL;
> +		}
>  		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
> +	}
>  out:
>  	return r;
>  }
> @@ -685,9 +695,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
>  {
>  	unsigned i;
>  
> -	if (error && backoff)
> +	if (error && backoff) {
> +		struct amdgpu_bo_list_entry *e;
> +
> +		amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
> +			dma_fence_chain_free(e->chain);
> +			e->chain = NULL;
> +		}
> +
>  		ttm_eu_backoff_reservation(&parser->ticket,
>  					   &parser->validated);
> +	}
>  
>  	for (i = 0; i < parser->num_post_deps; i++) {
>  		drm_syncobj_put(parser->post_deps[i].syncobj);
> @@ -1260,6 +1278,20 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>  
>  	amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
>  
> +	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> +		struct dma_resv *resv = e->tv.bo->base.resv;
> +		struct dma_fence_chain *chain = e->chain;
> +
> +		if (!chain)
> +			continue;
> +
> +		dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
> +				     dma_fence_get(p->fence), 1);
> +
> +		rcu_assign_pointer(resv->fence_excl, &chain->base);
> +		e->chain = NULL;
> +	}
> +
>  	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>  	mutex_unlock(&p->adev->notifier_lock);
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> index c3053b83b80c..23219fc3b28c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> @@ -42,48 +42,6 @@
>  #include <linux/pci-p2pdma.h>
>  #include <linux/pm_runtime.h>
>  
> -static int
> -__dma_resv_make_exclusive(struct dma_resv *obj)
> -{
> -	struct dma_fence **fences;
> -	unsigned int count;
> -	int r;
> -
> -	if (!dma_resv_shared_list(obj)) /* no shared fences to convert */
> -		return 0;
> -
> -	r = dma_resv_get_fences(obj, NULL, &count, &fences);
> -	if (r)
> -		return r;
> -
> -	if (count == 0) {
> -		/* Now that was unexpected. */
> -	} else if (count == 1) {
> -		dma_resv_add_excl_fence(obj, fences[0]);
> -		dma_fence_put(fences[0]);
> -		kfree(fences);
> -	} else {
> -		struct dma_fence_array *array;
> -
> -		array = dma_fence_array_create(count, fences,
> -					       dma_fence_context_alloc(1), 0,
> -					       false);
> -		if (!array)
> -			goto err_fences_put;
> -
> -		dma_resv_add_excl_fence(obj, &array->base);
> -		dma_fence_put(&array->base);
> -	}
> -
> -	return 0;
> -
> -err_fences_put:
> -	while (count--)
> -		dma_fence_put(fences[count]);
> -	kfree(fences);
> -	return -ENOMEM;
> -}
> -
>  /**
>   * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
>   *
> @@ -110,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
>  	if (r < 0)
>  		goto out;
>  
> -	r = amdgpu_bo_reserve(bo, false);
> -	if (unlikely(r != 0))
> -		goto out;
> -
> -	/*
> -	 * We only create shared fences for internal use, but importers
> -	 * of the dmabuf rely on exclusive fences for implicitly
> -	 * tracking write hazards. As any of the current fences may
> -	 * correspond to a write, we need to convert all existing
> -	 * fences on the reservation object into a single exclusive
> -	 * fence.
> -	 */
> -	r = __dma_resv_make_exclusive(bo->tbo.base.resv);
> -	if (r)
> -		goto out;
> -
> -	bo->prime_shared_count++;
> -	amdgpu_bo_unreserve(bo);
>  	return 0;
>  
>  out:
> @@ -150,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
>  	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
>  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>  
> -	if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
> -		bo->prime_shared_count--;
> -
>  	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>  	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>  }
> @@ -406,8 +343,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
>  	bo = gem_to_amdgpu_bo(gobj);
>  	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
>  	bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
> -	if (dma_buf->ops != &amdgpu_dmabuf_ops)
> -		bo->prime_shared_count = 1;
>  
>  	dma_resv_unlock(resv);
>  	return gobj;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 1c3e3b608332..5d82120fc160 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -829,7 +829,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>  		break;
>  	}
>  	case AMDGPU_GEM_OP_SET_PLACEMENT:
> -		if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) {
> +		if (robj->tbo.base.import_attach &&
> +		    args->value & AMDGPU_GEM_DOMAIN_VRAM) {
>  			r = -EINVAL;
>  			amdgpu_bo_unreserve(robj);
>  			break;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 96447e1d4c9c..30951b593809 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -871,7 +871,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
>  		return -EINVAL;
>  
>  	/* A shared bo cannot be migrated to VRAM */
> -	if (bo->prime_shared_count || bo->tbo.base.import_attach) {
> +	if (bo->tbo.base.import_attach) {
>  		if (domain & AMDGPU_GEM_DOMAIN_GTT)
>  			domain = AMDGPU_GEM_DOMAIN_GTT;
>  		else
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index b35962702278..55d7e90eaa72 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -98,7 +98,6 @@ struct amdgpu_bo {
>  	struct ttm_buffer_object	tbo;
>  	struct ttm_bo_kmap_obj		kmap;
>  	u64				flags;
> -	unsigned			prime_shared_count;
>  	/* per VM structure for page tables and with virtual addresses */
>  	struct amdgpu_vm_bo_base	*vm_bo;
>  	/* Constant after initialization */
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/5] drm/amdgpu: rework dma_resv handling v2
  2021-06-11 14:56     ` Daniel Vetter
@ 2021-06-11 15:10       ` Christian König
  -1 siblings, 0 replies; 28+ messages in thread
From: Christian König @ 2021-06-11 15:10 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: amd-gfx, dri-devel

Am 11.06.21 um 16:56 schrieb Daniel Vetter:
> On Fri, Jun 11, 2021 at 02:03:01PM +0200, Christian König wrote:
>> Drop the workaround and instead implement a better solution.
>>
>> Basically we are now chaining all submissions using a dma_fence_chain
>> container and adding them as exclusive fence to the dma_resv object.
>>
>> This way other drivers can still sync to the single exclusive fence
>> while amdgpu only sync to fences from different processes.
>>
>> v2: polish kerneldoc a bit
> There is no kerneldoc here, and otherwise this patch looks unchanged from
> the previous round. Hit send a bit too quickly?

Ah, there it went!

It's just to hot here, that was meant to be added to the other patch and 
I was really wondering why it wasn't there!

Going to call it a day or rather week and will look at that again on Monday.

Christian.

> -Daniel
>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h |  1 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 54 +++++++++++++----
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 65 ---------------------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c     |  3 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  1 -
>>   6 files changed, 47 insertions(+), 79 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> index a130e766cbdb..c905a4cfc173 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> @@ -34,6 +34,7 @@ struct amdgpu_fpriv;
>>   struct amdgpu_bo_list_entry {
>>   	struct ttm_validate_buffer	tv;
>>   	struct amdgpu_bo_va		*bo_va;
>> +	struct dma_fence_chain		*chain;
>>   	uint32_t			priority;
>>   	struct page			**user_pages;
>>   	bool				user_invalidated;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index 325e82621467..f6f3029f958d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -587,6 +587,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>>   		goto out;
>>   	}
>>   
>> +	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>> +		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
>> +
>> +		e->bo_va = amdgpu_vm_bo_find(vm, bo);
>> +
>> +		if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
>> +			e->chain = dma_fence_chain_alloc();
>> +			if (!e->chain) {
>> +				r = -ENOMEM;
>> +				goto error_validate;
>> +			}
>> +		}
>> +	}
>> +
>>   	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
>>   					  &p->bytes_moved_vis_threshold);
>>   	p->bytes_moved = 0;
>> @@ -614,15 +628,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>>   	gws = p->bo_list->gws_obj;
>>   	oa = p->bo_list->oa_obj;
>>   
>> -	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>> -		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
>> -
>> -		/* Make sure we use the exclusive slot for shared BOs */
>> -		if (bo->prime_shared_count)
>> -			e->tv.num_shared = 0;
>> -		e->bo_va = amdgpu_vm_bo_find(vm, bo);
>> -	}
>> -
>>   	if (gds) {
>>   		p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
>>   		p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
>> @@ -644,8 +649,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>>   	}
>>   
>>   error_validate:
>> -	if (r)
>> +	if (r) {
>> +		amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>> +			dma_fence_chain_free(e->chain);
>> +			e->chain = NULL;
>> +		}
>>   		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
>> +	}
>>   out:
>>   	return r;
>>   }
>> @@ -685,9 +695,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
>>   {
>>   	unsigned i;
>>   
>> -	if (error && backoff)
>> +	if (error && backoff) {
>> +		struct amdgpu_bo_list_entry *e;
>> +
>> +		amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
>> +			dma_fence_chain_free(e->chain);
>> +			e->chain = NULL;
>> +		}
>> +
>>   		ttm_eu_backoff_reservation(&parser->ticket,
>>   					   &parser->validated);
>> +	}
>>   
>>   	for (i = 0; i < parser->num_post_deps; i++) {
>>   		drm_syncobj_put(parser->post_deps[i].syncobj);
>> @@ -1260,6 +1278,20 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>>   
>>   	amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
>>   
>> +	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>> +		struct dma_resv *resv = e->tv.bo->base.resv;
>> +		struct dma_fence_chain *chain = e->chain;
>> +
>> +		if (!chain)
>> +			continue;
>> +
>> +		dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
>> +				     dma_fence_get(p->fence), 1);
>> +
>> +		rcu_assign_pointer(resv->fence_excl, &chain->base);
>> +		e->chain = NULL;
>> +	}
>> +
>>   	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>>   	mutex_unlock(&p->adev->notifier_lock);
>>   
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
>> index c3053b83b80c..23219fc3b28c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
>> @@ -42,48 +42,6 @@
>>   #include <linux/pci-p2pdma.h>
>>   #include <linux/pm_runtime.h>
>>   
>> -static int
>> -__dma_resv_make_exclusive(struct dma_resv *obj)
>> -{
>> -	struct dma_fence **fences;
>> -	unsigned int count;
>> -	int r;
>> -
>> -	if (!dma_resv_shared_list(obj)) /* no shared fences to convert */
>> -		return 0;
>> -
>> -	r = dma_resv_get_fences(obj, NULL, &count, &fences);
>> -	if (r)
>> -		return r;
>> -
>> -	if (count == 0) {
>> -		/* Now that was unexpected. */
>> -	} else if (count == 1) {
>> -		dma_resv_add_excl_fence(obj, fences[0]);
>> -		dma_fence_put(fences[0]);
>> -		kfree(fences);
>> -	} else {
>> -		struct dma_fence_array *array;
>> -
>> -		array = dma_fence_array_create(count, fences,
>> -					       dma_fence_context_alloc(1), 0,
>> -					       false);
>> -		if (!array)
>> -			goto err_fences_put;
>> -
>> -		dma_resv_add_excl_fence(obj, &array->base);
>> -		dma_fence_put(&array->base);
>> -	}
>> -
>> -	return 0;
>> -
>> -err_fences_put:
>> -	while (count--)
>> -		dma_fence_put(fences[count]);
>> -	kfree(fences);
>> -	return -ENOMEM;
>> -}
>> -
>>   /**
>>    * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
>>    *
>> @@ -110,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
>>   	if (r < 0)
>>   		goto out;
>>   
>> -	r = amdgpu_bo_reserve(bo, false);
>> -	if (unlikely(r != 0))
>> -		goto out;
>> -
>> -	/*
>> -	 * We only create shared fences for internal use, but importers
>> -	 * of the dmabuf rely on exclusive fences for implicitly
>> -	 * tracking write hazards. As any of the current fences may
>> -	 * correspond to a write, we need to convert all existing
>> -	 * fences on the reservation object into a single exclusive
>> -	 * fence.
>> -	 */
>> -	r = __dma_resv_make_exclusive(bo->tbo.base.resv);
>> -	if (r)
>> -		goto out;
>> -
>> -	bo->prime_shared_count++;
>> -	amdgpu_bo_unreserve(bo);
>>   	return 0;
>>   
>>   out:
>> @@ -150,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
>>   	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
>>   	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>   
>> -	if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
>> -		bo->prime_shared_count--;
>> -
>>   	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>>   	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>>   }
>> @@ -406,8 +343,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
>>   	bo = gem_to_amdgpu_bo(gobj);
>>   	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
>>   	bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
>> -	if (dma_buf->ops != &amdgpu_dmabuf_ops)
>> -		bo->prime_shared_count = 1;
>>   
>>   	dma_resv_unlock(resv);
>>   	return gobj;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> index 1c3e3b608332..5d82120fc160 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> @@ -829,7 +829,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>>   		break;
>>   	}
>>   	case AMDGPU_GEM_OP_SET_PLACEMENT:
>> -		if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) {
>> +		if (robj->tbo.base.import_attach &&
>> +		    args->value & AMDGPU_GEM_DOMAIN_VRAM) {
>>   			r = -EINVAL;
>>   			amdgpu_bo_unreserve(robj);
>>   			break;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index 96447e1d4c9c..30951b593809 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -871,7 +871,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
>>   		return -EINVAL;
>>   
>>   	/* A shared bo cannot be migrated to VRAM */
>> -	if (bo->prime_shared_count || bo->tbo.base.import_attach) {
>> +	if (bo->tbo.base.import_attach) {
>>   		if (domain & AMDGPU_GEM_DOMAIN_GTT)
>>   			domain = AMDGPU_GEM_DOMAIN_GTT;
>>   		else
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> index b35962702278..55d7e90eaa72 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> @@ -98,7 +98,6 @@ struct amdgpu_bo {
>>   	struct ttm_buffer_object	tbo;
>>   	struct ttm_bo_kmap_obj		kmap;
>>   	u64				flags;
>> -	unsigned			prime_shared_count;
>>   	/* per VM structure for page tables and with virtual addresses */
>>   	struct amdgpu_vm_bo_base	*vm_bo;
>>   	/* Constant after initialization */
>> -- 
>> 2.25.1
>>


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 5/5] drm/amdgpu: rework dma_resv handling v2
@ 2021-06-11 15:10       ` Christian König
  0 siblings, 0 replies; 28+ messages in thread
From: Christian König @ 2021-06-11 15:10 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: amd-gfx, dri-devel

Am 11.06.21 um 16:56 schrieb Daniel Vetter:
> On Fri, Jun 11, 2021 at 02:03:01PM +0200, Christian König wrote:
>> Drop the workaround and instead implement a better solution.
>>
>> Basically we are now chaining all submissions using a dma_fence_chain
>> container and adding them as exclusive fence to the dma_resv object.
>>
>> This way other drivers can still sync to the single exclusive fence
>> while amdgpu only sync to fences from different processes.
>>
>> v2: polish kerneldoc a bit
> There is no kerneldoc here, and otherwise this patch looks unchanged from
> the previous round. Hit send a bit too quickly?

Ah, there it went!

It's just to hot here, that was meant to be added to the other patch and 
I was really wondering why it wasn't there!

Going to call it a day or rather week and will look at that again on Monday.

Christian.

> -Daniel
>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h |  1 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 54 +++++++++++++----
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 65 ---------------------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c     |  3 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  1 -
>>   6 files changed, 47 insertions(+), 79 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> index a130e766cbdb..c905a4cfc173 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> @@ -34,6 +34,7 @@ struct amdgpu_fpriv;
>>   struct amdgpu_bo_list_entry {
>>   	struct ttm_validate_buffer	tv;
>>   	struct amdgpu_bo_va		*bo_va;
>> +	struct dma_fence_chain		*chain;
>>   	uint32_t			priority;
>>   	struct page			**user_pages;
>>   	bool				user_invalidated;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index 325e82621467..f6f3029f958d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -587,6 +587,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>>   		goto out;
>>   	}
>>   
>> +	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>> +		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
>> +
>> +		e->bo_va = amdgpu_vm_bo_find(vm, bo);
>> +
>> +		if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
>> +			e->chain = dma_fence_chain_alloc();
>> +			if (!e->chain) {
>> +				r = -ENOMEM;
>> +				goto error_validate;
>> +			}
>> +		}
>> +	}
>> +
>>   	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
>>   					  &p->bytes_moved_vis_threshold);
>>   	p->bytes_moved = 0;
>> @@ -614,15 +628,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>>   	gws = p->bo_list->gws_obj;
>>   	oa = p->bo_list->oa_obj;
>>   
>> -	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>> -		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
>> -
>> -		/* Make sure we use the exclusive slot for shared BOs */
>> -		if (bo->prime_shared_count)
>> -			e->tv.num_shared = 0;
>> -		e->bo_va = amdgpu_vm_bo_find(vm, bo);
>> -	}
>> -
>>   	if (gds) {
>>   		p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
>>   		p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
>> @@ -644,8 +649,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>>   	}
>>   
>>   error_validate:
>> -	if (r)
>> +	if (r) {
>> +		amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>> +			dma_fence_chain_free(e->chain);
>> +			e->chain = NULL;
>> +		}
>>   		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
>> +	}
>>   out:
>>   	return r;
>>   }
>> @@ -685,9 +695,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
>>   {
>>   	unsigned i;
>>   
>> -	if (error && backoff)
>> +	if (error && backoff) {
>> +		struct amdgpu_bo_list_entry *e;
>> +
>> +		amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
>> +			dma_fence_chain_free(e->chain);
>> +			e->chain = NULL;
>> +		}
>> +
>>   		ttm_eu_backoff_reservation(&parser->ticket,
>>   					   &parser->validated);
>> +	}
>>   
>>   	for (i = 0; i < parser->num_post_deps; i++) {
>>   		drm_syncobj_put(parser->post_deps[i].syncobj);
>> @@ -1260,6 +1278,20 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>>   
>>   	amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
>>   
>> +	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>> +		struct dma_resv *resv = e->tv.bo->base.resv;
>> +		struct dma_fence_chain *chain = e->chain;
>> +
>> +		if (!chain)
>> +			continue;
>> +
>> +		dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
>> +				     dma_fence_get(p->fence), 1);
>> +
>> +		rcu_assign_pointer(resv->fence_excl, &chain->base);
>> +		e->chain = NULL;
>> +	}
>> +
>>   	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>>   	mutex_unlock(&p->adev->notifier_lock);
>>   
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
>> index c3053b83b80c..23219fc3b28c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
>> @@ -42,48 +42,6 @@
>>   #include <linux/pci-p2pdma.h>
>>   #include <linux/pm_runtime.h>
>>   
>> -static int
>> -__dma_resv_make_exclusive(struct dma_resv *obj)
>> -{
>> -	struct dma_fence **fences;
>> -	unsigned int count;
>> -	int r;
>> -
>> -	if (!dma_resv_shared_list(obj)) /* no shared fences to convert */
>> -		return 0;
>> -
>> -	r = dma_resv_get_fences(obj, NULL, &count, &fences);
>> -	if (r)
>> -		return r;
>> -
>> -	if (count == 0) {
>> -		/* Now that was unexpected. */
>> -	} else if (count == 1) {
>> -		dma_resv_add_excl_fence(obj, fences[0]);
>> -		dma_fence_put(fences[0]);
>> -		kfree(fences);
>> -	} else {
>> -		struct dma_fence_array *array;
>> -
>> -		array = dma_fence_array_create(count, fences,
>> -					       dma_fence_context_alloc(1), 0,
>> -					       false);
>> -		if (!array)
>> -			goto err_fences_put;
>> -
>> -		dma_resv_add_excl_fence(obj, &array->base);
>> -		dma_fence_put(&array->base);
>> -	}
>> -
>> -	return 0;
>> -
>> -err_fences_put:
>> -	while (count--)
>> -		dma_fence_put(fences[count]);
>> -	kfree(fences);
>> -	return -ENOMEM;
>> -}
>> -
>>   /**
>>    * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
>>    *
>> @@ -110,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
>>   	if (r < 0)
>>   		goto out;
>>   
>> -	r = amdgpu_bo_reserve(bo, false);
>> -	if (unlikely(r != 0))
>> -		goto out;
>> -
>> -	/*
>> -	 * We only create shared fences for internal use, but importers
>> -	 * of the dmabuf rely on exclusive fences for implicitly
>> -	 * tracking write hazards. As any of the current fences may
>> -	 * correspond to a write, we need to convert all existing
>> -	 * fences on the reservation object into a single exclusive
>> -	 * fence.
>> -	 */
>> -	r = __dma_resv_make_exclusive(bo->tbo.base.resv);
>> -	if (r)
>> -		goto out;
>> -
>> -	bo->prime_shared_count++;
>> -	amdgpu_bo_unreserve(bo);
>>   	return 0;
>>   
>>   out:
>> @@ -150,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
>>   	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
>>   	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>   
>> -	if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
>> -		bo->prime_shared_count--;
>> -
>>   	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>>   	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>>   }
>> @@ -406,8 +343,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
>>   	bo = gem_to_amdgpu_bo(gobj);
>>   	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
>>   	bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
>> -	if (dma_buf->ops != &amdgpu_dmabuf_ops)
>> -		bo->prime_shared_count = 1;
>>   
>>   	dma_resv_unlock(resv);
>>   	return gobj;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> index 1c3e3b608332..5d82120fc160 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> @@ -829,7 +829,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>>   		break;
>>   	}
>>   	case AMDGPU_GEM_OP_SET_PLACEMENT:
>> -		if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) {
>> +		if (robj->tbo.base.import_attach &&
>> +		    args->value & AMDGPU_GEM_DOMAIN_VRAM) {
>>   			r = -EINVAL;
>>   			amdgpu_bo_unreserve(robj);
>>   			break;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index 96447e1d4c9c..30951b593809 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -871,7 +871,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
>>   		return -EINVAL;
>>   
>>   	/* A shared bo cannot be migrated to VRAM */
>> -	if (bo->prime_shared_count || bo->tbo.base.import_attach) {
>> +	if (bo->tbo.base.import_attach) {
>>   		if (domain & AMDGPU_GEM_DOMAIN_GTT)
>>   			domain = AMDGPU_GEM_DOMAIN_GTT;
>>   		else
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> index b35962702278..55d7e90eaa72 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> @@ -98,7 +98,6 @@ struct amdgpu_bo {
>>   	struct ttm_buffer_object	tbo;
>>   	struct ttm_bo_kmap_obj		kmap;
>>   	u64				flags;
>> -	unsigned			prime_shared_count;
>>   	/* per VM structure for page tables and with virtual addresses */
>>   	struct amdgpu_vm_bo_base	*vm_bo;
>>   	/* Constant after initialization */
>> -- 
>> 2.25.1
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/5] dma-buf: fix dma_resv_test_signaled test_all handling
  2021-06-11 14:55       ` Daniel Vetter
@ 2021-06-14 17:15         ` Christian König
  -1 siblings, 0 replies; 28+ messages in thread
From: Christian König @ 2021-06-14 17:15 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: amd-gfx, dri-devel

Am 11.06.21 um 16:55 schrieb Daniel Vetter:
> On Fri, Jun 11, 2021 at 04:53:11PM +0200, Christian König wrote:
>>
>> Am 11.06.21 um 16:47 schrieb Daniel Vetter:
>>> On Fri, Jun 11, 2021 at 02:02:57PM +0200, Christian König wrote:
>>>> As the name implies if testing all fences is requested we
>>>> should indeed test all fences and not skip the exclusive
>>>> one because we see shared ones.
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> Hm I thought we've had the rule that when both fences exist, then
>>> collectively the shared ones must signale no earlier than the exclusive
>>> one.
>>>
>>> That's at least the contract we've implemented in dma_resv.h. But I've
>>> also found a bunch of drivers who are a lot more yolo on this.
>>>
>>> I think there's a solid case here to just always take all the fences if we
>>> ask for all the shared ones, but if we go that way then I'd say
>>> - clear kerneldoc patch to really hammer this in (currently we're not good
>>>     at all in this regard)
>>> - going through drivers a bit to check for this (I have some of that done
>>>     already in my earlier series, need to respin it and send it out)
>>>
>>> But I'm kinda not seeing why this needs to be in this patch series here.
>> You mentioned that this is a problem in the last patch and if you ask me
>> that's just a bug or at least very inconsistent.
>>
>> See dma_resv_wait_timeout() always waits for all fences, including the
>> exclusive one even if shared ones are present. But dma_resv_test_signaled()
>> ignores the exclusive one if shared ones are present.
> Hm the only one I thought I've mentioned is that dma_buf_poll doesn't use
> dma_fence_get_rcu_safe where I think it should. Different problem. I think
> this is one you spotted.
>
>> The only other driver I could find trying to make use of this is nouveau and
>> I already provided a fix for this as well.
> i915 also does this, and I think I've found a few more.
>
>> I just think that this is the more defensive approach to fix this and have
>> at least the core functions consistent on the handling.
> Oh fully agree, it's just current dma_resv docs aren't the greatest, and
> hacking on semantics without updating the docs isn't great. Especially
> when it's ad-hoc.

Well when the requirement that shared fences should always signal after 
the exclusive fence is not documented anywhere then I would say that it 
is naturally allowed to just add any fence to the list of shared fence 
and any code assuming something else is just broken and need fixing.

Christian.

> -Daniel
>
>> Christian.
>>
>>> -Daniel
>>>
>>>> ---
>>>>    drivers/dma-buf/dma-resv.c | 33 ++++++++++++---------------------
>>>>    1 file changed, 12 insertions(+), 21 deletions(-)
>>>>
>>>> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
>>>> index f26c71747d43..c66bfdde9454 100644
>>>> --- a/drivers/dma-buf/dma-resv.c
>>>> +++ b/drivers/dma-buf/dma-resv.c
>>>> @@ -615,25 +615,21 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence)
>>>>     */
>>>>    bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
>>>>    {
>>>> -	unsigned int seq, shared_count;
>>>> +	struct dma_fence *fence;
>>>> +	unsigned int seq;
>>>>    	int ret;
>>>>    	rcu_read_lock();
>>>>    retry:
>>>>    	ret = true;
>>>> -	shared_count = 0;
>>>>    	seq = read_seqcount_begin(&obj->seq);
>>>>    	if (test_all) {
>>>>    		struct dma_resv_list *fobj = dma_resv_shared_list(obj);
>>>> -		unsigned int i;
>>>> -
>>>> -		if (fobj)
>>>> -			shared_count = fobj->shared_count;
>>>> +		unsigned int i, shared_count;
>>>> +		shared_count = fobj ? fobj->shared_count : 0;
>>>>    		for (i = 0; i < shared_count; ++i) {
>>>> -			struct dma_fence *fence;
>>>> -
>>>>    			fence = rcu_dereference(fobj->shared[i]);
>>>>    			ret = dma_resv_test_signaled_single(fence);
>>>>    			if (ret < 0)
>>>> @@ -641,24 +637,19 @@ bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
>>>>    			else if (!ret)
>>>>    				break;
>>>>    		}
>>>> -
>>>> -		if (read_seqcount_retry(&obj->seq, seq))
>>>> -			goto retry;
>>>>    	}
>>>> -	if (!shared_count) {
>>>> -		struct dma_fence *fence_excl = dma_resv_excl_fence(obj);
>>>> -
>>>> -		if (fence_excl) {
>>>> -			ret = dma_resv_test_signaled_single(fence_excl);
>>>> -			if (ret < 0)
>>>> -				goto retry;
>>>> +	fence = dma_resv_excl_fence(obj);
>>>> +	if (fence) {
>>>> +		ret = dma_resv_test_signaled_single(fence);
>>>> +		if (ret < 0)
>>>> +			goto retry;
>>>> -			if (read_seqcount_retry(&obj->seq, seq))
>>>> -				goto retry;
>>>> -		}
>>>>    	}
>>>> +	if (read_seqcount_retry(&obj->seq, seq))
>>>> +		goto retry;
>>>> +
>>>>    	rcu_read_unlock();
>>>>    	return ret;
>>>>    }
>>>> -- 
>>>> 2.25.1
>>>>


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/5] dma-buf: fix dma_resv_test_signaled test_all handling
@ 2021-06-14 17:15         ` Christian König
  0 siblings, 0 replies; 28+ messages in thread
From: Christian König @ 2021-06-14 17:15 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: amd-gfx, dri-devel

Am 11.06.21 um 16:55 schrieb Daniel Vetter:
> On Fri, Jun 11, 2021 at 04:53:11PM +0200, Christian König wrote:
>>
>> Am 11.06.21 um 16:47 schrieb Daniel Vetter:
>>> On Fri, Jun 11, 2021 at 02:02:57PM +0200, Christian König wrote:
>>>> As the name implies if testing all fences is requested we
>>>> should indeed test all fences and not skip the exclusive
>>>> one because we see shared ones.
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> Hm I thought we've had the rule that when both fences exist, then
>>> collectively the shared ones must signale no earlier than the exclusive
>>> one.
>>>
>>> That's at least the contract we've implemented in dma_resv.h. But I've
>>> also found a bunch of drivers who are a lot more yolo on this.
>>>
>>> I think there's a solid case here to just always take all the fences if we
>>> ask for all the shared ones, but if we go that way then I'd say
>>> - clear kerneldoc patch to really hammer this in (currently we're not good
>>>     at all in this regard)
>>> - going through drivers a bit to check for this (I have some of that done
>>>     already in my earlier series, need to respin it and send it out)
>>>
>>> But I'm kinda not seeing why this needs to be in this patch series here.
>> You mentioned that this is a problem in the last patch and if you ask me
>> that's just a bug or at least very inconsistent.
>>
>> See dma_resv_wait_timeout() always waits for all fences, including the
>> exclusive one even if shared ones are present. But dma_resv_test_signaled()
>> ignores the exclusive one if shared ones are present.
> Hm the only one I thought I've mentioned is that dma_buf_poll doesn't use
> dma_fence_get_rcu_safe where I think it should. Different problem. I think
> this is one you spotted.
>
>> The only other driver I could find trying to make use of this is nouveau and
>> I already provided a fix for this as well.
> i915 also does this, and I think I've found a few more.
>
>> I just think that this is the more defensive approach to fix this and have
>> at least the core functions consistent on the handling.
> Oh fully agree, it's just current dma_resv docs aren't the greatest, and
> hacking on semantics without updating the docs isn't great. Especially
> when it's ad-hoc.

Well when the requirement that shared fences should always signal after 
the exclusive fence is not documented anywhere then I would say that it 
is naturally allowed to just add any fence to the list of shared fence 
and any code assuming something else is just broken and need fixing.

Christian.

> -Daniel
>
>> Christian.
>>
>>> -Daniel
>>>
>>>> ---
>>>>    drivers/dma-buf/dma-resv.c | 33 ++++++++++++---------------------
>>>>    1 file changed, 12 insertions(+), 21 deletions(-)
>>>>
>>>> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
>>>> index f26c71747d43..c66bfdde9454 100644
>>>> --- a/drivers/dma-buf/dma-resv.c
>>>> +++ b/drivers/dma-buf/dma-resv.c
>>>> @@ -615,25 +615,21 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence)
>>>>     */
>>>>    bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
>>>>    {
>>>> -	unsigned int seq, shared_count;
>>>> +	struct dma_fence *fence;
>>>> +	unsigned int seq;
>>>>    	int ret;
>>>>    	rcu_read_lock();
>>>>    retry:
>>>>    	ret = true;
>>>> -	shared_count = 0;
>>>>    	seq = read_seqcount_begin(&obj->seq);
>>>>    	if (test_all) {
>>>>    		struct dma_resv_list *fobj = dma_resv_shared_list(obj);
>>>> -		unsigned int i;
>>>> -
>>>> -		if (fobj)
>>>> -			shared_count = fobj->shared_count;
>>>> +		unsigned int i, shared_count;
>>>> +		shared_count = fobj ? fobj->shared_count : 0;
>>>>    		for (i = 0; i < shared_count; ++i) {
>>>> -			struct dma_fence *fence;
>>>> -
>>>>    			fence = rcu_dereference(fobj->shared[i]);
>>>>    			ret = dma_resv_test_signaled_single(fence);
>>>>    			if (ret < 0)
>>>> @@ -641,24 +637,19 @@ bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
>>>>    			else if (!ret)
>>>>    				break;
>>>>    		}
>>>> -
>>>> -		if (read_seqcount_retry(&obj->seq, seq))
>>>> -			goto retry;
>>>>    	}
>>>> -	if (!shared_count) {
>>>> -		struct dma_fence *fence_excl = dma_resv_excl_fence(obj);
>>>> -
>>>> -		if (fence_excl) {
>>>> -			ret = dma_resv_test_signaled_single(fence_excl);
>>>> -			if (ret < 0)
>>>> -				goto retry;
>>>> +	fence = dma_resv_excl_fence(obj);
>>>> +	if (fence) {
>>>> +		ret = dma_resv_test_signaled_single(fence);
>>>> +		if (ret < 0)
>>>> +			goto retry;
>>>> -			if (read_seqcount_retry(&obj->seq, seq))
>>>> -				goto retry;
>>>> -		}
>>>>    	}
>>>> +	if (read_seqcount_retry(&obj->seq, seq))
>>>> +		goto retry;
>>>> +
>>>>    	rcu_read_unlock();
>>>>    	return ret;
>>>>    }
>>>> -- 
>>>> 2.25.1
>>>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/5] dma-buf: fix dma_resv_test_signaled test_all handling
  2021-06-14 17:15         ` Christian König
@ 2021-06-17 16:58           ` Daniel Vetter
  -1 siblings, 0 replies; 28+ messages in thread
From: Daniel Vetter @ 2021-06-17 16:58 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx, dri-devel

On Mon, Jun 14, 2021 at 07:15:44PM +0200, Christian König wrote:
> Am 11.06.21 um 16:55 schrieb Daniel Vetter:
> > On Fri, Jun 11, 2021 at 04:53:11PM +0200, Christian König wrote:
> > > 
> > > Am 11.06.21 um 16:47 schrieb Daniel Vetter:
> > > > On Fri, Jun 11, 2021 at 02:02:57PM +0200, Christian König wrote:
> > > > > As the name implies if testing all fences is requested we
> > > > > should indeed test all fences and not skip the exclusive
> > > > > one because we see shared ones.
> > > > > 
> > > > > Signed-off-by: Christian König <christian.koenig@amd.com>
> > > > Hm I thought we've had the rule that when both fences exist, then
> > > > collectively the shared ones must signale no earlier than the exclusive
> > > > one.
> > > > 
> > > > That's at least the contract we've implemented in dma_resv.h. But I've
> > > > also found a bunch of drivers who are a lot more yolo on this.
> > > > 
> > > > I think there's a solid case here to just always take all the fences if we
> > > > ask for all the shared ones, but if we go that way then I'd say
> > > > - clear kerneldoc patch to really hammer this in (currently we're not good
> > > >     at all in this regard)
> > > > - going through drivers a bit to check for this (I have some of that done
> > > >     already in my earlier series, need to respin it and send it out)
> > > > 
> > > > But I'm kinda not seeing why this needs to be in this patch series here.
> > > You mentioned that this is a problem in the last patch and if you ask me
> > > that's just a bug or at least very inconsistent.
> > > 
> > > See dma_resv_wait_timeout() always waits for all fences, including the
> > > exclusive one even if shared ones are present. But dma_resv_test_signaled()
> > > ignores the exclusive one if shared ones are present.
> > Hm the only one I thought I've mentioned is that dma_buf_poll doesn't use
> > dma_fence_get_rcu_safe where I think it should. Different problem. I think
> > this is one you spotted.
> > 
> > > The only other driver I could find trying to make use of this is nouveau and
> > > I already provided a fix for this as well.
> > i915 also does this, and I think I've found a few more.
> > 
> > > I just think that this is the more defensive approach to fix this and have
> > > at least the core functions consistent on the handling.
> > Oh fully agree, it's just current dma_resv docs aren't the greatest, and
> > hacking on semantics without updating the docs isn't great. Especially
> > when it's ad-hoc.
> 
> Well when the requirement that shared fences should always signal after the
> exclusive fence is not documented anywhere then I would say that it is
> naturally allowed to just add any fence to the list of shared fence and any
> code assuming something else is just broken and need fixing.

That's not what I meant. I thought the rule is that the shared fences
_together_ need to signal after the exclusive ones. Not each individual
one.

This means that if you have both exclusive  fences and shared fences, and
you want to wait for just the shared fences, then you can ignore the
exclusive ones.

You have a patch series floating around which "fixes" this, but I think
it's imcomplete. And I'm pretty sure it's a change of defacto rules, since
not obeying this breaks a bunch of existing code (as you've noticed).
-Daniel

> 
> Christian.
> 
> > -Daniel
> > 
> > > Christian.
> > > 
> > > > -Daniel
> > > > 
> > > > > ---
> > > > >    drivers/dma-buf/dma-resv.c | 33 ++++++++++++---------------------
> > > > >    1 file changed, 12 insertions(+), 21 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
> > > > > index f26c71747d43..c66bfdde9454 100644
> > > > > --- a/drivers/dma-buf/dma-resv.c
> > > > > +++ b/drivers/dma-buf/dma-resv.c
> > > > > @@ -615,25 +615,21 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence)
> > > > >     */
> > > > >    bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
> > > > >    {
> > > > > -	unsigned int seq, shared_count;
> > > > > +	struct dma_fence *fence;
> > > > > +	unsigned int seq;
> > > > >    	int ret;
> > > > >    	rcu_read_lock();
> > > > >    retry:
> > > > >    	ret = true;
> > > > > -	shared_count = 0;
> > > > >    	seq = read_seqcount_begin(&obj->seq);
> > > > >    	if (test_all) {
> > > > >    		struct dma_resv_list *fobj = dma_resv_shared_list(obj);
> > > > > -		unsigned int i;
> > > > > -
> > > > > -		if (fobj)
> > > > > -			shared_count = fobj->shared_count;
> > > > > +		unsigned int i, shared_count;
> > > > > +		shared_count = fobj ? fobj->shared_count : 0;
> > > > >    		for (i = 0; i < shared_count; ++i) {
> > > > > -			struct dma_fence *fence;
> > > > > -
> > > > >    			fence = rcu_dereference(fobj->shared[i]);
> > > > >    			ret = dma_resv_test_signaled_single(fence);
> > > > >    			if (ret < 0)
> > > > > @@ -641,24 +637,19 @@ bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
> > > > >    			else if (!ret)
> > > > >    				break;
> > > > >    		}
> > > > > -
> > > > > -		if (read_seqcount_retry(&obj->seq, seq))
> > > > > -			goto retry;
> > > > >    	}
> > > > > -	if (!shared_count) {
> > > > > -		struct dma_fence *fence_excl = dma_resv_excl_fence(obj);
> > > > > -
> > > > > -		if (fence_excl) {
> > > > > -			ret = dma_resv_test_signaled_single(fence_excl);
> > > > > -			if (ret < 0)
> > > > > -				goto retry;
> > > > > +	fence = dma_resv_excl_fence(obj);
> > > > > +	if (fence) {
> > > > > +		ret = dma_resv_test_signaled_single(fence);
> > > > > +		if (ret < 0)
> > > > > +			goto retry;
> > > > > -			if (read_seqcount_retry(&obj->seq, seq))
> > > > > -				goto retry;
> > > > > -		}
> > > > >    	}
> > > > > +	if (read_seqcount_retry(&obj->seq, seq))
> > > > > +		goto retry;
> > > > > +
> > > > >    	rcu_read_unlock();
> > > > >    	return ret;
> > > > >    }
> > > > > -- 
> > > > > 2.25.1
> > > > > 
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH 1/5] dma-buf: fix dma_resv_test_signaled test_all handling
@ 2021-06-17 16:58           ` Daniel Vetter
  0 siblings, 0 replies; 28+ messages in thread
From: Daniel Vetter @ 2021-06-17 16:58 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx, dri-devel, Daniel Vetter

On Mon, Jun 14, 2021 at 07:15:44PM +0200, Christian König wrote:
> Am 11.06.21 um 16:55 schrieb Daniel Vetter:
> > On Fri, Jun 11, 2021 at 04:53:11PM +0200, Christian König wrote:
> > > 
> > > Am 11.06.21 um 16:47 schrieb Daniel Vetter:
> > > > On Fri, Jun 11, 2021 at 02:02:57PM +0200, Christian König wrote:
> > > > > As the name implies if testing all fences is requested we
> > > > > should indeed test all fences and not skip the exclusive
> > > > > one because we see shared ones.
> > > > > 
> > > > > Signed-off-by: Christian König <christian.koenig@amd.com>
> > > > Hm I thought we've had the rule that when both fences exist, then
> > > > collectively the shared ones must signale no earlier than the exclusive
> > > > one.
> > > > 
> > > > That's at least the contract we've implemented in dma_resv.h. But I've
> > > > also found a bunch of drivers who are a lot more yolo on this.
> > > > 
> > > > I think there's a solid case here to just always take all the fences if we
> > > > ask for all the shared ones, but if we go that way then I'd say
> > > > - clear kerneldoc patch to really hammer this in (currently we're not good
> > > >     at all in this regard)
> > > > - going through drivers a bit to check for this (I have some of that done
> > > >     already in my earlier series, need to respin it and send it out)
> > > > 
> > > > But I'm kinda not seeing why this needs to be in this patch series here.
> > > You mentioned that this is a problem in the last patch and if you ask me
> > > that's just a bug or at least very inconsistent.
> > > 
> > > See dma_resv_wait_timeout() always waits for all fences, including the
> > > exclusive one even if shared ones are present. But dma_resv_test_signaled()
> > > ignores the exclusive one if shared ones are present.
> > Hm the only one I thought I've mentioned is that dma_buf_poll doesn't use
> > dma_fence_get_rcu_safe where I think it should. Different problem. I think
> > this is one you spotted.
> > 
> > > The only other driver I could find trying to make use of this is nouveau and
> > > I already provided a fix for this as well.
> > i915 also does this, and I think I've found a few more.
> > 
> > > I just think that this is the more defensive approach to fix this and have
> > > at least the core functions consistent on the handling.
> > Oh fully agree, it's just current dma_resv docs aren't the greatest, and
> > hacking on semantics without updating the docs isn't great. Especially
> > when it's ad-hoc.
> 
> Well when the requirement that shared fences should always signal after the
> exclusive fence is not documented anywhere then I would say that it is
> naturally allowed to just add any fence to the list of shared fence and any
> code assuming something else is just broken and need fixing.

That's not what I meant. I thought the rule is that the shared fences
_together_ need to signal after the exclusive ones. Not each individual
one.

This means that if you have both exclusive  fences and shared fences, and
you want to wait for just the shared fences, then you can ignore the
exclusive ones.

You have a patch series floating around which "fixes" this, but I think
it's imcomplete. And I'm pretty sure it's a change of defacto rules, since
not obeying this breaks a bunch of existing code (as you've noticed).
-Daniel

> 
> Christian.
> 
> > -Daniel
> > 
> > > Christian.
> > > 
> > > > -Daniel
> > > > 
> > > > > ---
> > > > >    drivers/dma-buf/dma-resv.c | 33 ++++++++++++---------------------
> > > > >    1 file changed, 12 insertions(+), 21 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
> > > > > index f26c71747d43..c66bfdde9454 100644
> > > > > --- a/drivers/dma-buf/dma-resv.c
> > > > > +++ b/drivers/dma-buf/dma-resv.c
> > > > > @@ -615,25 +615,21 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence)
> > > > >     */
> > > > >    bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
> > > > >    {
> > > > > -	unsigned int seq, shared_count;
> > > > > +	struct dma_fence *fence;
> > > > > +	unsigned int seq;
> > > > >    	int ret;
> > > > >    	rcu_read_lock();
> > > > >    retry:
> > > > >    	ret = true;
> > > > > -	shared_count = 0;
> > > > >    	seq = read_seqcount_begin(&obj->seq);
> > > > >    	if (test_all) {
> > > > >    		struct dma_resv_list *fobj = dma_resv_shared_list(obj);
> > > > > -		unsigned int i;
> > > > > -
> > > > > -		if (fobj)
> > > > > -			shared_count = fobj->shared_count;
> > > > > +		unsigned int i, shared_count;
> > > > > +		shared_count = fobj ? fobj->shared_count : 0;
> > > > >    		for (i = 0; i < shared_count; ++i) {
> > > > > -			struct dma_fence *fence;
> > > > > -
> > > > >    			fence = rcu_dereference(fobj->shared[i]);
> > > > >    			ret = dma_resv_test_signaled_single(fence);
> > > > >    			if (ret < 0)
> > > > > @@ -641,24 +637,19 @@ bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
> > > > >    			else if (!ret)
> > > > >    				break;
> > > > >    		}
> > > > > -
> > > > > -		if (read_seqcount_retry(&obj->seq, seq))
> > > > > -			goto retry;
> > > > >    	}
> > > > > -	if (!shared_count) {
> > > > > -		struct dma_fence *fence_excl = dma_resv_excl_fence(obj);
> > > > > -
> > > > > -		if (fence_excl) {
> > > > > -			ret = dma_resv_test_signaled_single(fence_excl);
> > > > > -			if (ret < 0)
> > > > > -				goto retry;
> > > > > +	fence = dma_resv_excl_fence(obj);
> > > > > +	if (fence) {
> > > > > +		ret = dma_resv_test_signaled_single(fence);
> > > > > +		if (ret < 0)
> > > > > +			goto retry;
> > > > > -			if (read_seqcount_retry(&obj->seq, seq))
> > > > > -				goto retry;
> > > > > -		}
> > > > >    	}
> > > > > +	if (read_seqcount_retry(&obj->seq, seq))
> > > > > +		goto retry;
> > > > > +
> > > > >    	rcu_read_unlock();
> > > > >    	return ret;
> > > > >    }
> > > > > -- 
> > > > > 2.25.1
> > > > > 
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2021-06-17 16:58 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-11 12:02 [PATCH 1/5] dma-buf: fix dma_resv_test_signaled test_all handling Christian König
2021-06-11 12:02 ` Christian König
2021-06-11 12:02 ` [PATCH 2/5] dma-buf: some dma_fence_chain improvements Christian König
2021-06-11 12:02   ` Christian König
2021-06-11 12:02 ` [PATCH 3/5] dma-buf: add dma_fence_chain_alloc/free v2 Christian König
2021-06-11 12:02   ` Christian König
2021-06-11 14:52   ` Daniel Vetter
2021-06-11 14:52     ` Daniel Vetter
2021-06-11 14:54     ` Christian König
2021-06-11 14:54       ` Christian König
2021-06-11 12:03 ` [PATCH 4/5] drm/amdgpu: unwrap fence chains in the explicit sync fence Christian König
2021-06-11 12:03   ` Christian König
2021-06-11 12:03 ` [PATCH 5/5] drm/amdgpu: rework dma_resv handling v2 Christian König
2021-06-11 12:03   ` Christian König
2021-06-11 14:56   ` Daniel Vetter
2021-06-11 14:56     ` Daniel Vetter
2021-06-11 15:10     ` Christian König
2021-06-11 15:10       ` Christian König
2021-06-11 14:47 ` [PATCH 1/5] dma-buf: fix dma_resv_test_signaled test_all handling Daniel Vetter
2021-06-11 14:47   ` Daniel Vetter
2021-06-11 14:53   ` Christian König
2021-06-11 14:53     ` Christian König
2021-06-11 14:55     ` Daniel Vetter
2021-06-11 14:55       ` Daniel Vetter
2021-06-14 17:15       ` Christian König
2021-06-14 17:15         ` Christian König
2021-06-17 16:58         ` Daniel Vetter
2021-06-17 16:58           ` Daniel Vetter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.