All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/amdgpu: unwrap fence chains in the explicit sync fence
@ 2021-06-14 17:45 ` Christian König
  0 siblings, 0 replies; 16+ messages in thread
From: Christian König @ 2021-06-14 17:45 UTC (permalink / raw)
  To: daniel, dri-devel, amd-gfx

Unwrap the explicit fence if it is a dma_fence_chain and
sync to the first fence not matching the owner rules.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 118 +++++++++++++----------
 1 file changed, 68 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 1b2ceccaf5b0..862eb3c1c4c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -28,6 +28,8 @@
  *    Christian König <christian.koenig@amd.com>
  */
 
+#include <linux/dma-fence-chain.h>
+
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
@@ -186,6 +188,55 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
 	return amdgpu_sync_fence(sync, fence);
 }
 
+/* Determine based on the owner and mode if we should sync to a fence or not */
+static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
+				   enum amdgpu_sync_mode mode,
+				   void *owner, struct dma_fence *f)
+{
+	void *fence_owner = amdgpu_sync_get_owner(f);
+
+	/* Always sync to moves, no matter what */
+	if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED)
+		return true;
+
+	/* We only want to trigger KFD eviction fences on
+	 * evict or move jobs. Skip KFD fences otherwise.
+	 */
+	if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
+	    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+		return false;
+
+	/* Never sync to VM updates either. */
+	if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
+	    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+		return false;
+
+	/* Ignore fences depending on the sync mode */
+	switch (mode) {
+	case AMDGPU_SYNC_ALWAYS:
+		return true;
+
+	case AMDGPU_SYNC_NE_OWNER:
+		if (amdgpu_sync_same_dev(adev, f) &&
+		    fence_owner == owner)
+			return false;
+		break;
+
+	case AMDGPU_SYNC_EQ_OWNER:
+		if (amdgpu_sync_same_dev(adev, f) &&
+		    fence_owner != owner)
+			return false;
+		break;
+
+	case AMDGPU_SYNC_EXPLICIT:
+		return false;
+	}
+
+	WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
+	     "Adding eviction fence to sync obj");
+	return true;
+}
+
 /**
  * amdgpu_sync_resv - sync to a reservation object
  *
@@ -211,67 +262,34 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 
 	/* always sync to the exclusive fence */
 	f = dma_resv_excl_fence(resv);
-	r = amdgpu_sync_fence(sync, f);
+	dma_fence_chain_for_each(f, f) {
+		struct dma_fence_chain *chain = to_dma_fence_chain(f);
+
+		if (amdgpu_sync_test_fence(adev, mode, owner, chain ?
+					   chain->fence : f)) {
+			r = amdgpu_sync_fence(sync, f);
+			dma_fence_put(f);
+			if (r)
+				return r;
+			break;
+		}
+	}
 
 	flist = dma_resv_shared_list(resv);
-	if (!flist || r)
-		return r;
+	if (!flist)
+		return 0;
 
 	for (i = 0; i < flist->shared_count; ++i) {
-		void *fence_owner;
-
 		f = rcu_dereference_protected(flist->shared[i],
 					      dma_resv_held(resv));
 
-		fence_owner = amdgpu_sync_get_owner(f);
-
-		/* Always sync to moves, no matter what */
-		if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) {
+		if (amdgpu_sync_test_fence(adev, mode, owner, f)) {
 			r = amdgpu_sync_fence(sync, f);
 			if (r)
-				break;
-		}
-
-		/* We only want to trigger KFD eviction fences on
-		 * evict or move jobs. Skip KFD fences otherwise.
-		 */
-		if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
-		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
-			continue;
-
-		/* Never sync to VM updates either. */
-		if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
-		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
-			continue;
-
-		/* Ignore fences depending on the sync mode */
-		switch (mode) {
-		case AMDGPU_SYNC_ALWAYS:
-			break;
-
-		case AMDGPU_SYNC_NE_OWNER:
-			if (amdgpu_sync_same_dev(adev, f) &&
-			    fence_owner == owner)
-				continue;
-			break;
-
-		case AMDGPU_SYNC_EQ_OWNER:
-			if (amdgpu_sync_same_dev(adev, f) &&
-			    fence_owner != owner)
-				continue;
-			break;
-
-		case AMDGPU_SYNC_EXPLICIT:
-			continue;
+				return r;
 		}
-
-		WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
-		     "Adding eviction fence to sync obj");
-		r = amdgpu_sync_fence(sync, f);
-		if (r)
-			break;
 	}
-	return r;
+	return 0;
 }
 
 /**
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 1/2] drm/amdgpu: unwrap fence chains in the explicit sync fence
@ 2021-06-14 17:45 ` Christian König
  0 siblings, 0 replies; 16+ messages in thread
From: Christian König @ 2021-06-14 17:45 UTC (permalink / raw)
  To: daniel, dri-devel, amd-gfx

Unwrap the explicit fence if it is a dma_fence_chain and
sync to the first fence not matching the owner rules.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 118 +++++++++++++----------
 1 file changed, 68 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 1b2ceccaf5b0..862eb3c1c4c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -28,6 +28,8 @@
  *    Christian König <christian.koenig@amd.com>
  */
 
+#include <linux/dma-fence-chain.h>
+
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
@@ -186,6 +188,55 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
 	return amdgpu_sync_fence(sync, fence);
 }
 
+/* Determine based on the owner and mode if we should sync to a fence or not */
+static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
+				   enum amdgpu_sync_mode mode,
+				   void *owner, struct dma_fence *f)
+{
+	void *fence_owner = amdgpu_sync_get_owner(f);
+
+	/* Always sync to moves, no matter what */
+	if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED)
+		return true;
+
+	/* We only want to trigger KFD eviction fences on
+	 * evict or move jobs. Skip KFD fences otherwise.
+	 */
+	if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
+	    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+		return false;
+
+	/* Never sync to VM updates either. */
+	if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
+	    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+		return false;
+
+	/* Ignore fences depending on the sync mode */
+	switch (mode) {
+	case AMDGPU_SYNC_ALWAYS:
+		return true;
+
+	case AMDGPU_SYNC_NE_OWNER:
+		if (amdgpu_sync_same_dev(adev, f) &&
+		    fence_owner == owner)
+			return false;
+		break;
+
+	case AMDGPU_SYNC_EQ_OWNER:
+		if (amdgpu_sync_same_dev(adev, f) &&
+		    fence_owner != owner)
+			return false;
+		break;
+
+	case AMDGPU_SYNC_EXPLICIT:
+		return false;
+	}
+
+	WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
+	     "Adding eviction fence to sync obj");
+	return true;
+}
+
 /**
  * amdgpu_sync_resv - sync to a reservation object
  *
@@ -211,67 +262,34 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 
 	/* always sync to the exclusive fence */
 	f = dma_resv_excl_fence(resv);
-	r = amdgpu_sync_fence(sync, f);
+	dma_fence_chain_for_each(f, f) {
+		struct dma_fence_chain *chain = to_dma_fence_chain(f);
+
+		if (amdgpu_sync_test_fence(adev, mode, owner, chain ?
+					   chain->fence : f)) {
+			r = amdgpu_sync_fence(sync, f);
+			dma_fence_put(f);
+			if (r)
+				return r;
+			break;
+		}
+	}
 
 	flist = dma_resv_shared_list(resv);
-	if (!flist || r)
-		return r;
+	if (!flist)
+		return 0;
 
 	for (i = 0; i < flist->shared_count; ++i) {
-		void *fence_owner;
-
 		f = rcu_dereference_protected(flist->shared[i],
 					      dma_resv_held(resv));
 
-		fence_owner = amdgpu_sync_get_owner(f);
-
-		/* Always sync to moves, no matter what */
-		if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) {
+		if (amdgpu_sync_test_fence(adev, mode, owner, f)) {
 			r = amdgpu_sync_fence(sync, f);
 			if (r)
-				break;
-		}
-
-		/* We only want to trigger KFD eviction fences on
-		 * evict or move jobs. Skip KFD fences otherwise.
-		 */
-		if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
-		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
-			continue;
-
-		/* Never sync to VM updates either. */
-		if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
-		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
-			continue;
-
-		/* Ignore fences depending on the sync mode */
-		switch (mode) {
-		case AMDGPU_SYNC_ALWAYS:
-			break;
-
-		case AMDGPU_SYNC_NE_OWNER:
-			if (amdgpu_sync_same_dev(adev, f) &&
-			    fence_owner == owner)
-				continue;
-			break;
-
-		case AMDGPU_SYNC_EQ_OWNER:
-			if (amdgpu_sync_same_dev(adev, f) &&
-			    fence_owner != owner)
-				continue;
-			break;
-
-		case AMDGPU_SYNC_EXPLICIT:
-			continue;
+				return r;
 		}
-
-		WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
-		     "Adding eviction fence to sync obj");
-		r = amdgpu_sync_fence(sync, f);
-		if (r)
-			break;
 	}
-	return r;
+	return 0;
 }
 
 /**
-- 
2.25.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/2] drm/amdgpu: rework dma_resv handling v3
  2021-06-14 17:45 ` Christian König
@ 2021-06-14 17:45   ` Christian König
  -1 siblings, 0 replies; 16+ messages in thread
From: Christian König @ 2021-06-14 17:45 UTC (permalink / raw)
  To: daniel, dri-devel, amd-gfx

Drop the workaround and instead implement a better solution.

Basically we are now chaining all submissions using a dma_fence_chain
container and adding them as exclusive fence to the dma_resv object.

This way other drivers can still sync to the single exclusive fence
while amdgpu only sync to fences from different processes.

v3: add the shared fence first before the exclusive one

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 62 ++++++++++++++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 65 ---------------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c     |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  1 -
 6 files changed, 55 insertions(+), 79 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index a130e766cbdb..c905a4cfc173 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -34,6 +34,7 @@ struct amdgpu_fpriv;
 struct amdgpu_bo_list_entry {
 	struct ttm_validate_buffer	tv;
 	struct amdgpu_bo_va		*bo_va;
+	struct dma_fence_chain		*chain;
 	uint32_t			priority;
 	struct page			**user_pages;
 	bool				user_invalidated;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9ce649a1a8d3..25655414e9c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -572,6 +572,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		goto out;
 	}
 
+	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+
+		e->bo_va = amdgpu_vm_bo_find(vm, bo);
+
+		if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
+			e->chain = dma_fence_chain_alloc();
+			if (!e->chain) {
+				r = -ENOMEM;
+				goto error_validate;
+			}
+		}
+	}
+
 	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
 					  &p->bytes_moved_vis_threshold);
 	p->bytes_moved = 0;
@@ -599,15 +613,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	gws = p->bo_list->gws_obj;
 	oa = p->bo_list->oa_obj;
 
-	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
-		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
-
-		/* Make sure we use the exclusive slot for shared BOs */
-		if (bo->prime_shared_count)
-			e->tv.num_shared = 0;
-		e->bo_va = amdgpu_vm_bo_find(vm, bo);
-	}
-
 	if (gds) {
 		p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
 		p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
@@ -629,8 +634,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	}
 
 error_validate:
-	if (r)
+	if (r) {
+		amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+			dma_fence_chain_free(e->chain);
+			e->chain = NULL;
+		}
 		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
+	}
 out:
 	return r;
 }
@@ -670,9 +680,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
 {
 	unsigned i;
 
-	if (error && backoff)
+	if (error && backoff) {
+		struct amdgpu_bo_list_entry *e;
+
+		amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
+			dma_fence_chain_free(e->chain);
+			e->chain = NULL;
+		}
+
 		ttm_eu_backoff_reservation(&parser->ticket,
 					   &parser->validated);
+	}
 
 	for (i = 0; i < parser->num_post_deps; i++) {
 		drm_syncobj_put(parser->post_deps[i].syncobj);
@@ -1245,6 +1263,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 
 	amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
 
+	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+		struct dma_resv *resv = e->tv.bo->base.resv;
+		struct dma_fence_chain *chain = e->chain;
+
+		if (!chain)
+			continue;
+
+		/*
+		 * Work around dma_resv shortcommings by wrapping up the
+		 * submission in a dma_fence_chain and add it as exclusive
+		 * fence, but first add the submission as shared fence to make
+		 * sure that shared fences never signal before the exclusive
+		 * one.
+		 */
+		dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
+				     dma_fence_get(p->fence), 1);
+
+		dma_resv_add_shared_fence(resv, p->fence);
+		rcu_assign_pointer(resv->fence_excl, &chain->base);
+		e->chain = NULL;
+	}
+
 	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
 	mutex_unlock(&p->adev->notifier_lock);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index c3053b83b80c..23219fc3b28c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -42,48 +42,6 @@
 #include <linux/pci-p2pdma.h>
 #include <linux/pm_runtime.h>
 
-static int
-__dma_resv_make_exclusive(struct dma_resv *obj)
-{
-	struct dma_fence **fences;
-	unsigned int count;
-	int r;
-
-	if (!dma_resv_shared_list(obj)) /* no shared fences to convert */
-		return 0;
-
-	r = dma_resv_get_fences(obj, NULL, &count, &fences);
-	if (r)
-		return r;
-
-	if (count == 0) {
-		/* Now that was unexpected. */
-	} else if (count == 1) {
-		dma_resv_add_excl_fence(obj, fences[0]);
-		dma_fence_put(fences[0]);
-		kfree(fences);
-	} else {
-		struct dma_fence_array *array;
-
-		array = dma_fence_array_create(count, fences,
-					       dma_fence_context_alloc(1), 0,
-					       false);
-		if (!array)
-			goto err_fences_put;
-
-		dma_resv_add_excl_fence(obj, &array->base);
-		dma_fence_put(&array->base);
-	}
-
-	return 0;
-
-err_fences_put:
-	while (count--)
-		dma_fence_put(fences[count]);
-	kfree(fences);
-	return -ENOMEM;
-}
-
 /**
  * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
  *
@@ -110,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
 	if (r < 0)
 		goto out;
 
-	r = amdgpu_bo_reserve(bo, false);
-	if (unlikely(r != 0))
-		goto out;
-
-	/*
-	 * We only create shared fences for internal use, but importers
-	 * of the dmabuf rely on exclusive fences for implicitly
-	 * tracking write hazards. As any of the current fences may
-	 * correspond to a write, we need to convert all existing
-	 * fences on the reservation object into a single exclusive
-	 * fence.
-	 */
-	r = __dma_resv_make_exclusive(bo->tbo.base.resv);
-	if (r)
-		goto out;
-
-	bo->prime_shared_count++;
-	amdgpu_bo_unreserve(bo);
 	return 0;
 
 out:
@@ -150,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 
-	if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
-		bo->prime_shared_count--;
-
 	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
 	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
 }
@@ -406,8 +343,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
 	bo = gem_to_amdgpu_bo(gobj);
 	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
 	bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
-	if (dma_buf->ops != &amdgpu_dmabuf_ops)
-		bo->prime_shared_count = 1;
 
 	dma_resv_unlock(resv);
 	return gobj;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 9cf4beaf646c..0780e8d18992 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -829,7 +829,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
 		break;
 	}
 	case AMDGPU_GEM_OP_SET_PLACEMENT:
-		if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) {
+		if (robj->tbo.base.import_attach &&
+		    args->value & AMDGPU_GEM_DOMAIN_VRAM) {
 			r = -EINVAL;
 			amdgpu_bo_unreserve(robj);
 			break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index b7a2070d90af..d13490975ac3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -892,7 +892,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
 		return -EINVAL;
 
 	/* A shared bo cannot be migrated to VRAM */
-	if (bo->prime_shared_count || bo->tbo.base.import_attach) {
+	if (bo->tbo.base.import_attach) {
 		if (domain & AMDGPU_GEM_DOMAIN_GTT)
 			domain = AMDGPU_GEM_DOMAIN_GTT;
 		else
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 126df03a7066..c03dfd298f45 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -99,7 +99,6 @@ struct amdgpu_bo {
 	struct ttm_buffer_object	tbo;
 	struct ttm_bo_kmap_obj		kmap;
 	u64				flags;
-	unsigned			prime_shared_count;
 	/* per VM structure for page tables and with virtual addresses */
 	struct amdgpu_vm_bo_base	*vm_bo;
 	/* Constant after initialization */
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/2] drm/amdgpu: rework dma_resv handling v3
@ 2021-06-14 17:45   ` Christian König
  0 siblings, 0 replies; 16+ messages in thread
From: Christian König @ 2021-06-14 17:45 UTC (permalink / raw)
  To: daniel, dri-devel, amd-gfx

Drop the workaround and instead implement a better solution.

Basically we are now chaining all submissions using a dma_fence_chain
container and adding them as exclusive fence to the dma_resv object.

This way other drivers can still sync to the single exclusive fence
while amdgpu only sync to fences from different processes.

v3: add the shared fence first before the exclusive one

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 62 ++++++++++++++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 65 ---------------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c     |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  1 -
 6 files changed, 55 insertions(+), 79 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index a130e766cbdb..c905a4cfc173 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -34,6 +34,7 @@ struct amdgpu_fpriv;
 struct amdgpu_bo_list_entry {
 	struct ttm_validate_buffer	tv;
 	struct amdgpu_bo_va		*bo_va;
+	struct dma_fence_chain		*chain;
 	uint32_t			priority;
 	struct page			**user_pages;
 	bool				user_invalidated;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9ce649a1a8d3..25655414e9c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -572,6 +572,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		goto out;
 	}
 
+	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+
+		e->bo_va = amdgpu_vm_bo_find(vm, bo);
+
+		if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
+			e->chain = dma_fence_chain_alloc();
+			if (!e->chain) {
+				r = -ENOMEM;
+				goto error_validate;
+			}
+		}
+	}
+
 	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
 					  &p->bytes_moved_vis_threshold);
 	p->bytes_moved = 0;
@@ -599,15 +613,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	gws = p->bo_list->gws_obj;
 	oa = p->bo_list->oa_obj;
 
-	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
-		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
-
-		/* Make sure we use the exclusive slot for shared BOs */
-		if (bo->prime_shared_count)
-			e->tv.num_shared = 0;
-		e->bo_va = amdgpu_vm_bo_find(vm, bo);
-	}
-
 	if (gds) {
 		p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
 		p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
@@ -629,8 +634,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	}
 
 error_validate:
-	if (r)
+	if (r) {
+		amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+			dma_fence_chain_free(e->chain);
+			e->chain = NULL;
+		}
 		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
+	}
 out:
 	return r;
 }
@@ -670,9 +680,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
 {
 	unsigned i;
 
-	if (error && backoff)
+	if (error && backoff) {
+		struct amdgpu_bo_list_entry *e;
+
+		amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
+			dma_fence_chain_free(e->chain);
+			e->chain = NULL;
+		}
+
 		ttm_eu_backoff_reservation(&parser->ticket,
 					   &parser->validated);
+	}
 
 	for (i = 0; i < parser->num_post_deps; i++) {
 		drm_syncobj_put(parser->post_deps[i].syncobj);
@@ -1245,6 +1263,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 
 	amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
 
+	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+		struct dma_resv *resv = e->tv.bo->base.resv;
+		struct dma_fence_chain *chain = e->chain;
+
+		if (!chain)
+			continue;
+
+		/*
+		 * Work around dma_resv shortcommings by wrapping up the
+		 * submission in a dma_fence_chain and add it as exclusive
+		 * fence, but first add the submission as shared fence to make
+		 * sure that shared fences never signal before the exclusive
+		 * one.
+		 */
+		dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
+				     dma_fence_get(p->fence), 1);
+
+		dma_resv_add_shared_fence(resv, p->fence);
+		rcu_assign_pointer(resv->fence_excl, &chain->base);
+		e->chain = NULL;
+	}
+
 	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
 	mutex_unlock(&p->adev->notifier_lock);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index c3053b83b80c..23219fc3b28c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -42,48 +42,6 @@
 #include <linux/pci-p2pdma.h>
 #include <linux/pm_runtime.h>
 
-static int
-__dma_resv_make_exclusive(struct dma_resv *obj)
-{
-	struct dma_fence **fences;
-	unsigned int count;
-	int r;
-
-	if (!dma_resv_shared_list(obj)) /* no shared fences to convert */
-		return 0;
-
-	r = dma_resv_get_fences(obj, NULL, &count, &fences);
-	if (r)
-		return r;
-
-	if (count == 0) {
-		/* Now that was unexpected. */
-	} else if (count == 1) {
-		dma_resv_add_excl_fence(obj, fences[0]);
-		dma_fence_put(fences[0]);
-		kfree(fences);
-	} else {
-		struct dma_fence_array *array;
-
-		array = dma_fence_array_create(count, fences,
-					       dma_fence_context_alloc(1), 0,
-					       false);
-		if (!array)
-			goto err_fences_put;
-
-		dma_resv_add_excl_fence(obj, &array->base);
-		dma_fence_put(&array->base);
-	}
-
-	return 0;
-
-err_fences_put:
-	while (count--)
-		dma_fence_put(fences[count]);
-	kfree(fences);
-	return -ENOMEM;
-}
-
 /**
  * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
  *
@@ -110,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
 	if (r < 0)
 		goto out;
 
-	r = amdgpu_bo_reserve(bo, false);
-	if (unlikely(r != 0))
-		goto out;
-
-	/*
-	 * We only create shared fences for internal use, but importers
-	 * of the dmabuf rely on exclusive fences for implicitly
-	 * tracking write hazards. As any of the current fences may
-	 * correspond to a write, we need to convert all existing
-	 * fences on the reservation object into a single exclusive
-	 * fence.
-	 */
-	r = __dma_resv_make_exclusive(bo->tbo.base.resv);
-	if (r)
-		goto out;
-
-	bo->prime_shared_count++;
-	amdgpu_bo_unreserve(bo);
 	return 0;
 
 out:
@@ -150,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 
-	if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
-		bo->prime_shared_count--;
-
 	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
 	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
 }
@@ -406,8 +343,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
 	bo = gem_to_amdgpu_bo(gobj);
 	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
 	bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
-	if (dma_buf->ops != &amdgpu_dmabuf_ops)
-		bo->prime_shared_count = 1;
 
 	dma_resv_unlock(resv);
 	return gobj;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 9cf4beaf646c..0780e8d18992 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -829,7 +829,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
 		break;
 	}
 	case AMDGPU_GEM_OP_SET_PLACEMENT:
-		if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) {
+		if (robj->tbo.base.import_attach &&
+		    args->value & AMDGPU_GEM_DOMAIN_VRAM) {
 			r = -EINVAL;
 			amdgpu_bo_unreserve(robj);
 			break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index b7a2070d90af..d13490975ac3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -892,7 +892,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
 		return -EINVAL;
 
 	/* A shared bo cannot be migrated to VRAM */
-	if (bo->prime_shared_count || bo->tbo.base.import_attach) {
+	if (bo->tbo.base.import_attach) {
 		if (domain & AMDGPU_GEM_DOMAIN_GTT)
 			domain = AMDGPU_GEM_DOMAIN_GTT;
 		else
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 126df03a7066..c03dfd298f45 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -99,7 +99,6 @@ struct amdgpu_bo {
 	struct ttm_buffer_object	tbo;
 	struct ttm_bo_kmap_obj		kmap;
 	u64				flags;
-	unsigned			prime_shared_count;
 	/* per VM structure for page tables and with virtual addresses */
 	struct amdgpu_vm_bo_base	*vm_bo;
 	/* Constant after initialization */
-- 
2.25.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: unwrap fence chains in the explicit sync fence
  2021-06-14 17:45 ` Christian König
@ 2021-06-17  7:44   ` Christian König
  -1 siblings, 0 replies; 16+ messages in thread
From: Christian König @ 2021-06-17  7:44 UTC (permalink / raw)
  To: dri-devel, amd-gfx, Alex Deucher

Alex do want to review those so that we can close the ticket?

Thanks,
Christian.

Am 14.06.21 um 19:45 schrieb Christian König:
> Unwrap the explicit fence if it is a dma_fence_chain and
> sync to the first fence not matching the owner rules.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 118 +++++++++++++----------
>   1 file changed, 68 insertions(+), 50 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> index 1b2ceccaf5b0..862eb3c1c4c5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> @@ -28,6 +28,8 @@
>    *    Christian König <christian.koenig@amd.com>
>    */
>   
> +#include <linux/dma-fence-chain.h>
> +
>   #include "amdgpu.h"
>   #include "amdgpu_trace.h"
>   #include "amdgpu_amdkfd.h"
> @@ -186,6 +188,55 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
>   	return amdgpu_sync_fence(sync, fence);
>   }
>   
> +/* Determine based on the owner and mode if we should sync to a fence or not */
> +static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
> +				   enum amdgpu_sync_mode mode,
> +				   void *owner, struct dma_fence *f)
> +{
> +	void *fence_owner = amdgpu_sync_get_owner(f);
> +
> +	/* Always sync to moves, no matter what */
> +	if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED)
> +		return true;
> +
> +	/* We only want to trigger KFD eviction fences on
> +	 * evict or move jobs. Skip KFD fences otherwise.
> +	 */
> +	if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
> +	    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> +		return false;
> +
> +	/* Never sync to VM updates either. */
> +	if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
> +	    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> +		return false;
> +
> +	/* Ignore fences depending on the sync mode */
> +	switch (mode) {
> +	case AMDGPU_SYNC_ALWAYS:
> +		return true;
> +
> +	case AMDGPU_SYNC_NE_OWNER:
> +		if (amdgpu_sync_same_dev(adev, f) &&
> +		    fence_owner == owner)
> +			return false;
> +		break;
> +
> +	case AMDGPU_SYNC_EQ_OWNER:
> +		if (amdgpu_sync_same_dev(adev, f) &&
> +		    fence_owner != owner)
> +			return false;
> +		break;
> +
> +	case AMDGPU_SYNC_EXPLICIT:
> +		return false;
> +	}
> +
> +	WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
> +	     "Adding eviction fence to sync obj");
> +	return true;
> +}
> +
>   /**
>    * amdgpu_sync_resv - sync to a reservation object
>    *
> @@ -211,67 +262,34 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
>   
>   	/* always sync to the exclusive fence */
>   	f = dma_resv_excl_fence(resv);
> -	r = amdgpu_sync_fence(sync, f);
> +	dma_fence_chain_for_each(f, f) {
> +		struct dma_fence_chain *chain = to_dma_fence_chain(f);
> +
> +		if (amdgpu_sync_test_fence(adev, mode, owner, chain ?
> +					   chain->fence : f)) {
> +			r = amdgpu_sync_fence(sync, f);
> +			dma_fence_put(f);
> +			if (r)
> +				return r;
> +			break;
> +		}
> +	}
>   
>   	flist = dma_resv_shared_list(resv);
> -	if (!flist || r)
> -		return r;
> +	if (!flist)
> +		return 0;
>   
>   	for (i = 0; i < flist->shared_count; ++i) {
> -		void *fence_owner;
> -
>   		f = rcu_dereference_protected(flist->shared[i],
>   					      dma_resv_held(resv));
>   
> -		fence_owner = amdgpu_sync_get_owner(f);
> -
> -		/* Always sync to moves, no matter what */
> -		if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) {
> +		if (amdgpu_sync_test_fence(adev, mode, owner, f)) {
>   			r = amdgpu_sync_fence(sync, f);
>   			if (r)
> -				break;
> -		}
> -
> -		/* We only want to trigger KFD eviction fences on
> -		 * evict or move jobs. Skip KFD fences otherwise.
> -		 */
> -		if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
> -		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> -			continue;
> -
> -		/* Never sync to VM updates either. */
> -		if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
> -		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> -			continue;
> -
> -		/* Ignore fences depending on the sync mode */
> -		switch (mode) {
> -		case AMDGPU_SYNC_ALWAYS:
> -			break;
> -
> -		case AMDGPU_SYNC_NE_OWNER:
> -			if (amdgpu_sync_same_dev(adev, f) &&
> -			    fence_owner == owner)
> -				continue;
> -			break;
> -
> -		case AMDGPU_SYNC_EQ_OWNER:
> -			if (amdgpu_sync_same_dev(adev, f) &&
> -			    fence_owner != owner)
> -				continue;
> -			break;
> -
> -		case AMDGPU_SYNC_EXPLICIT:
> -			continue;
> +				return r;
>   		}
> -
> -		WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
> -		     "Adding eviction fence to sync obj");
> -		r = amdgpu_sync_fence(sync, f);
> -		if (r)
> -			break;
>   	}
> -	return r;
> +	return 0;
>   }
>   
>   /**


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: unwrap fence chains in the explicit sync fence
@ 2021-06-17  7:44   ` Christian König
  0 siblings, 0 replies; 16+ messages in thread
From: Christian König @ 2021-06-17  7:44 UTC (permalink / raw)
  To: dri-devel, amd-gfx, Alex Deucher

Alex do want to review those so that we can close the ticket?

Thanks,
Christian.

Am 14.06.21 um 19:45 schrieb Christian König:
> Unwrap the explicit fence if it is a dma_fence_chain and
> sync to the first fence not matching the owner rules.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 118 +++++++++++++----------
>   1 file changed, 68 insertions(+), 50 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> index 1b2ceccaf5b0..862eb3c1c4c5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> @@ -28,6 +28,8 @@
>    *    Christian König <christian.koenig@amd.com>
>    */
>   
> +#include <linux/dma-fence-chain.h>
> +
>   #include "amdgpu.h"
>   #include "amdgpu_trace.h"
>   #include "amdgpu_amdkfd.h"
> @@ -186,6 +188,55 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
>   	return amdgpu_sync_fence(sync, fence);
>   }
>   
> +/* Determine based on the owner and mode if we should sync to a fence or not */
> +static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
> +				   enum amdgpu_sync_mode mode,
> +				   void *owner, struct dma_fence *f)
> +{
> +	void *fence_owner = amdgpu_sync_get_owner(f);
> +
> +	/* Always sync to moves, no matter what */
> +	if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED)
> +		return true;
> +
> +	/* We only want to trigger KFD eviction fences on
> +	 * evict or move jobs. Skip KFD fences otherwise.
> +	 */
> +	if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
> +	    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> +		return false;
> +
> +	/* Never sync to VM updates either. */
> +	if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
> +	    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> +		return false;
> +
> +	/* Ignore fences depending on the sync mode */
> +	switch (mode) {
> +	case AMDGPU_SYNC_ALWAYS:
> +		return true;
> +
> +	case AMDGPU_SYNC_NE_OWNER:
> +		if (amdgpu_sync_same_dev(adev, f) &&
> +		    fence_owner == owner)
> +			return false;
> +		break;
> +
> +	case AMDGPU_SYNC_EQ_OWNER:
> +		if (amdgpu_sync_same_dev(adev, f) &&
> +		    fence_owner != owner)
> +			return false;
> +		break;
> +
> +	case AMDGPU_SYNC_EXPLICIT:
> +		return false;
> +	}
> +
> +	WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
> +	     "Adding eviction fence to sync obj");
> +	return true;
> +}
> +
>   /**
>    * amdgpu_sync_resv - sync to a reservation object
>    *
> @@ -211,67 +262,34 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
>   
>   	/* always sync to the exclusive fence */
>   	f = dma_resv_excl_fence(resv);
> -	r = amdgpu_sync_fence(sync, f);
> +	dma_fence_chain_for_each(f, f) {
> +		struct dma_fence_chain *chain = to_dma_fence_chain(f);
> +
> +		if (amdgpu_sync_test_fence(adev, mode, owner, chain ?
> +					   chain->fence : f)) {
> +			r = amdgpu_sync_fence(sync, f);
> +			dma_fence_put(f);
> +			if (r)
> +				return r;
> +			break;
> +		}
> +	}
>   
>   	flist = dma_resv_shared_list(resv);
> -	if (!flist || r)
> -		return r;
> +	if (!flist)
> +		return 0;
>   
>   	for (i = 0; i < flist->shared_count; ++i) {
> -		void *fence_owner;
> -
>   		f = rcu_dereference_protected(flist->shared[i],
>   					      dma_resv_held(resv));
>   
> -		fence_owner = amdgpu_sync_get_owner(f);
> -
> -		/* Always sync to moves, no matter what */
> -		if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) {
> +		if (amdgpu_sync_test_fence(adev, mode, owner, f)) {
>   			r = amdgpu_sync_fence(sync, f);
>   			if (r)
> -				break;
> -		}
> -
> -		/* We only want to trigger KFD eviction fences on
> -		 * evict or move jobs. Skip KFD fences otherwise.
> -		 */
> -		if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
> -		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> -			continue;
> -
> -		/* Never sync to VM updates either. */
> -		if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
> -		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> -			continue;
> -
> -		/* Ignore fences depending on the sync mode */
> -		switch (mode) {
> -		case AMDGPU_SYNC_ALWAYS:
> -			break;
> -
> -		case AMDGPU_SYNC_NE_OWNER:
> -			if (amdgpu_sync_same_dev(adev, f) &&
> -			    fence_owner == owner)
> -				continue;
> -			break;
> -
> -		case AMDGPU_SYNC_EQ_OWNER:
> -			if (amdgpu_sync_same_dev(adev, f) &&
> -			    fence_owner != owner)
> -				continue;
> -			break;
> -
> -		case AMDGPU_SYNC_EXPLICIT:
> -			continue;
> +				return r;
>   		}
> -
> -		WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
> -		     "Adding eviction fence to sync obj");
> -		r = amdgpu_sync_fence(sync, f);
> -		if (r)
> -			break;
>   	}
> -	return r;
> +	return 0;
>   }
>   
>   /**

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: unwrap fence chains in the explicit sync fence
  2021-06-17  7:44   ` Christian König
@ 2021-06-17 17:30     ` Daniel Vetter
  -1 siblings, 0 replies; 16+ messages in thread
From: Daniel Vetter @ 2021-06-17 17:30 UTC (permalink / raw)
  To: Christian König; +Cc: Alex Deucher, amd-gfx, dri-devel

On Thu, Jun 17, 2021 at 09:44:25AM +0200, Christian König wrote:
> Alex do want to review those so that we can close the ticket?

Maybe I'm behind on mails, but 2nd patch still has the issues I think I'm
seeing ...
-Daniel

> 
> Thanks,
> Christian.
> 
> Am 14.06.21 um 19:45 schrieb Christian König:
> > Unwrap the explicit fence if it is a dma_fence_chain and
> > sync to the first fence not matching the owner rules.
> > 
> > Signed-off-by: Christian König <christian.koenig@amd.com>
> > Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 118 +++++++++++++----------
> >   1 file changed, 68 insertions(+), 50 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> > index 1b2ceccaf5b0..862eb3c1c4c5 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> > @@ -28,6 +28,8 @@
> >    *    Christian König <christian.koenig@amd.com>
> >    */
> > +#include <linux/dma-fence-chain.h>
> > +
> >   #include "amdgpu.h"
> >   #include "amdgpu_trace.h"
> >   #include "amdgpu_amdkfd.h"
> > @@ -186,6 +188,55 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
> >   	return amdgpu_sync_fence(sync, fence);
> >   }
> > +/* Determine based on the owner and mode if we should sync to a fence or not */
> > +static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
> > +				   enum amdgpu_sync_mode mode,
> > +				   void *owner, struct dma_fence *f)
> > +{
> > +	void *fence_owner = amdgpu_sync_get_owner(f);
> > +
> > +	/* Always sync to moves, no matter what */
> > +	if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED)
> > +		return true;
> > +
> > +	/* We only want to trigger KFD eviction fences on
> > +	 * evict or move jobs. Skip KFD fences otherwise.
> > +	 */
> > +	if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
> > +	    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > +		return false;
> > +
> > +	/* Never sync to VM updates either. */
> > +	if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
> > +	    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > +		return false;
> > +
> > +	/* Ignore fences depending on the sync mode */
> > +	switch (mode) {
> > +	case AMDGPU_SYNC_ALWAYS:
> > +		return true;
> > +
> > +	case AMDGPU_SYNC_NE_OWNER:
> > +		if (amdgpu_sync_same_dev(adev, f) &&
> > +		    fence_owner == owner)
> > +			return false;
> > +		break;
> > +
> > +	case AMDGPU_SYNC_EQ_OWNER:
> > +		if (amdgpu_sync_same_dev(adev, f) &&
> > +		    fence_owner != owner)
> > +			return false;
> > +		break;
> > +
> > +	case AMDGPU_SYNC_EXPLICIT:
> > +		return false;
> > +	}
> > +
> > +	WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
> > +	     "Adding eviction fence to sync obj");
> > +	return true;
> > +}
> > +
> >   /**
> >    * amdgpu_sync_resv - sync to a reservation object
> >    *
> > @@ -211,67 +262,34 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
> >   	/* always sync to the exclusive fence */
> >   	f = dma_resv_excl_fence(resv);
> > -	r = amdgpu_sync_fence(sync, f);
> > +	dma_fence_chain_for_each(f, f) {
> > +		struct dma_fence_chain *chain = to_dma_fence_chain(f);
> > +
> > +		if (amdgpu_sync_test_fence(adev, mode, owner, chain ?
> > +					   chain->fence : f)) {
> > +			r = amdgpu_sync_fence(sync, f);
> > +			dma_fence_put(f);
> > +			if (r)
> > +				return r;
> > +			break;
> > +		}
> > +	}
> >   	flist = dma_resv_shared_list(resv);
> > -	if (!flist || r)
> > -		return r;
> > +	if (!flist)
> > +		return 0;
> >   	for (i = 0; i < flist->shared_count; ++i) {
> > -		void *fence_owner;
> > -
> >   		f = rcu_dereference_protected(flist->shared[i],
> >   					      dma_resv_held(resv));
> > -		fence_owner = amdgpu_sync_get_owner(f);
> > -
> > -		/* Always sync to moves, no matter what */
> > -		if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) {
> > +		if (amdgpu_sync_test_fence(adev, mode, owner, f)) {
> >   			r = amdgpu_sync_fence(sync, f);
> >   			if (r)
> > -				break;
> > -		}
> > -
> > -		/* We only want to trigger KFD eviction fences on
> > -		 * evict or move jobs. Skip KFD fences otherwise.
> > -		 */
> > -		if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
> > -		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > -			continue;
> > -
> > -		/* Never sync to VM updates either. */
> > -		if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
> > -		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > -			continue;
> > -
> > -		/* Ignore fences depending on the sync mode */
> > -		switch (mode) {
> > -		case AMDGPU_SYNC_ALWAYS:
> > -			break;
> > -
> > -		case AMDGPU_SYNC_NE_OWNER:
> > -			if (amdgpu_sync_same_dev(adev, f) &&
> > -			    fence_owner == owner)
> > -				continue;
> > -			break;
> > -
> > -		case AMDGPU_SYNC_EQ_OWNER:
> > -			if (amdgpu_sync_same_dev(adev, f) &&
> > -			    fence_owner != owner)
> > -				continue;
> > -			break;
> > -
> > -		case AMDGPU_SYNC_EXPLICIT:
> > -			continue;
> > +				return r;
> >   		}
> > -
> > -		WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
> > -		     "Adding eviction fence to sync obj");
> > -		r = amdgpu_sync_fence(sync, f);
> > -		if (r)
> > -			break;
> >   	}
> > -	return r;
> > +	return 0;
> >   }
> >   /**
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: unwrap fence chains in the explicit sync fence
@ 2021-06-17 17:30     ` Daniel Vetter
  0 siblings, 0 replies; 16+ messages in thread
From: Daniel Vetter @ 2021-06-17 17:30 UTC (permalink / raw)
  To: Christian König; +Cc: Alex Deucher, amd-gfx, dri-devel

On Thu, Jun 17, 2021 at 09:44:25AM +0200, Christian König wrote:
> Alex do want to review those so that we can close the ticket?

Maybe I'm behind on mails, but 2nd patch still has the issues I think I'm
seeing ...
-Daniel

> 
> Thanks,
> Christian.
> 
> Am 14.06.21 um 19:45 schrieb Christian König:
> > Unwrap the explicit fence if it is a dma_fence_chain and
> > sync to the first fence not matching the owner rules.
> > 
> > Signed-off-by: Christian König <christian.koenig@amd.com>
> > Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 118 +++++++++++++----------
> >   1 file changed, 68 insertions(+), 50 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> > index 1b2ceccaf5b0..862eb3c1c4c5 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> > @@ -28,6 +28,8 @@
> >    *    Christian König <christian.koenig@amd.com>
> >    */
> > +#include <linux/dma-fence-chain.h>
> > +
> >   #include "amdgpu.h"
> >   #include "amdgpu_trace.h"
> >   #include "amdgpu_amdkfd.h"
> > @@ -186,6 +188,55 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
> >   	return amdgpu_sync_fence(sync, fence);
> >   }
> > +/* Determine based on the owner and mode if we should sync to a fence or not */
> > +static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
> > +				   enum amdgpu_sync_mode mode,
> > +				   void *owner, struct dma_fence *f)
> > +{
> > +	void *fence_owner = amdgpu_sync_get_owner(f);
> > +
> > +	/* Always sync to moves, no matter what */
> > +	if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED)
> > +		return true;
> > +
> > +	/* We only want to trigger KFD eviction fences on
> > +	 * evict or move jobs. Skip KFD fences otherwise.
> > +	 */
> > +	if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
> > +	    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > +		return false;
> > +
> > +	/* Never sync to VM updates either. */
> > +	if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
> > +	    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > +		return false;
> > +
> > +	/* Ignore fences depending on the sync mode */
> > +	switch (mode) {
> > +	case AMDGPU_SYNC_ALWAYS:
> > +		return true;
> > +
> > +	case AMDGPU_SYNC_NE_OWNER:
> > +		if (amdgpu_sync_same_dev(adev, f) &&
> > +		    fence_owner == owner)
> > +			return false;
> > +		break;
> > +
> > +	case AMDGPU_SYNC_EQ_OWNER:
> > +		if (amdgpu_sync_same_dev(adev, f) &&
> > +		    fence_owner != owner)
> > +			return false;
> > +		break;
> > +
> > +	case AMDGPU_SYNC_EXPLICIT:
> > +		return false;
> > +	}
> > +
> > +	WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
> > +	     "Adding eviction fence to sync obj");
> > +	return true;
> > +}
> > +
> >   /**
> >    * amdgpu_sync_resv - sync to a reservation object
> >    *
> > @@ -211,67 +262,34 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
> >   	/* always sync to the exclusive fence */
> >   	f = dma_resv_excl_fence(resv);
> > -	r = amdgpu_sync_fence(sync, f);
> > +	dma_fence_chain_for_each(f, f) {
> > +		struct dma_fence_chain *chain = to_dma_fence_chain(f);
> > +
> > +		if (amdgpu_sync_test_fence(adev, mode, owner, chain ?
> > +					   chain->fence : f)) {
> > +			r = amdgpu_sync_fence(sync, f);
> > +			dma_fence_put(f);
> > +			if (r)
> > +				return r;
> > +			break;
> > +		}
> > +	}
> >   	flist = dma_resv_shared_list(resv);
> > -	if (!flist || r)
> > -		return r;
> > +	if (!flist)
> > +		return 0;
> >   	for (i = 0; i < flist->shared_count; ++i) {
> > -		void *fence_owner;
> > -
> >   		f = rcu_dereference_protected(flist->shared[i],
> >   					      dma_resv_held(resv));
> > -		fence_owner = amdgpu_sync_get_owner(f);
> > -
> > -		/* Always sync to moves, no matter what */
> > -		if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) {
> > +		if (amdgpu_sync_test_fence(adev, mode, owner, f)) {
> >   			r = amdgpu_sync_fence(sync, f);
> >   			if (r)
> > -				break;
> > -		}
> > -
> > -		/* We only want to trigger KFD eviction fences on
> > -		 * evict or move jobs. Skip KFD fences otherwise.
> > -		 */
> > -		if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
> > -		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > -			continue;
> > -
> > -		/* Never sync to VM updates either. */
> > -		if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
> > -		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > -			continue;
> > -
> > -		/* Ignore fences depending on the sync mode */
> > -		switch (mode) {
> > -		case AMDGPU_SYNC_ALWAYS:
> > -			break;
> > -
> > -		case AMDGPU_SYNC_NE_OWNER:
> > -			if (amdgpu_sync_same_dev(adev, f) &&
> > -			    fence_owner == owner)
> > -				continue;
> > -			break;
> > -
> > -		case AMDGPU_SYNC_EQ_OWNER:
> > -			if (amdgpu_sync_same_dev(adev, f) &&
> > -			    fence_owner != owner)
> > -				continue;
> > -			break;
> > -
> > -		case AMDGPU_SYNC_EXPLICIT:
> > -			continue;
> > +				return r;
> >   		}
> > -
> > -		WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
> > -		     "Adding eviction fence to sync obj");
> > -		r = amdgpu_sync_fence(sync, f);
> > -		if (r)
> > -			break;
> >   	}
> > -	return r;
> > +	return 0;
> >   }
> >   /**
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: rework dma_resv handling v3
  2021-06-14 17:45   ` Christian König
@ 2021-06-17 19:37     ` Daniel Vetter
  -1 siblings, 0 replies; 16+ messages in thread
From: Daniel Vetter @ 2021-06-17 19:37 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx, dri-devel

On Mon, Jun 14, 2021 at 07:45:36PM +0200, Christian König wrote:
> Drop the workaround and instead implement a better solution.
> 
> Basically we are now chaining all submissions using a dma_fence_chain
> container and adding them as exclusive fence to the dma_resv object.
> 
> This way other drivers can still sync to the single exclusive fence
> while amdgpu only sync to fences from different processes.
> 
> v3: add the shared fence first before the exclusive one
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 62 ++++++++++++++++----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 65 ---------------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c     |  3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  1 -
>  6 files changed, 55 insertions(+), 79 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> index a130e766cbdb..c905a4cfc173 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> @@ -34,6 +34,7 @@ struct amdgpu_fpriv;
>  struct amdgpu_bo_list_entry {
>  	struct ttm_validate_buffer	tv;
>  	struct amdgpu_bo_va		*bo_va;
> +	struct dma_fence_chain		*chain;
>  	uint32_t			priority;
>  	struct page			**user_pages;
>  	bool				user_invalidated;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 9ce649a1a8d3..25655414e9c0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -572,6 +572,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>  		goto out;
>  	}
>  
> +	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> +		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> +
> +		e->bo_va = amdgpu_vm_bo_find(vm, bo);
> +
> +		if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
> +			e->chain = dma_fence_chain_alloc();
> +			if (!e->chain) {
> +				r = -ENOMEM;
> +				goto error_validate;
> +			}
> +		}
> +	}
> +
>  	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
>  					  &p->bytes_moved_vis_threshold);
>  	p->bytes_moved = 0;
> @@ -599,15 +613,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>  	gws = p->bo_list->gws_obj;
>  	oa = p->bo_list->oa_obj;
>  
> -	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> -		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> -
> -		/* Make sure we use the exclusive slot for shared BOs */
> -		if (bo->prime_shared_count)
> -			e->tv.num_shared = 0;
> -		e->bo_va = amdgpu_vm_bo_find(vm, bo);
> -	}
> -
>  	if (gds) {
>  		p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
>  		p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
> @@ -629,8 +634,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>  	}
>  
>  error_validate:
> -	if (r)
> +	if (r) {
> +		amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> +			dma_fence_chain_free(e->chain);
> +			e->chain = NULL;
> +		}
>  		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
> +	}
>  out:
>  	return r;
>  }
> @@ -670,9 +680,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
>  {
>  	unsigned i;
>  
> -	if (error && backoff)
> +	if (error && backoff) {
> +		struct amdgpu_bo_list_entry *e;
> +
> +		amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
> +			dma_fence_chain_free(e->chain);
> +			e->chain = NULL;
> +		}
> +
>  		ttm_eu_backoff_reservation(&parser->ticket,
>  					   &parser->validated);
> +	}
>  
>  	for (i = 0; i < parser->num_post_deps; i++) {
>  		drm_syncobj_put(parser->post_deps[i].syncobj);
> @@ -1245,6 +1263,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>  
>  	amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
>  
> +	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> +		struct dma_resv *resv = e->tv.bo->base.resv;
> +		struct dma_fence_chain *chain = e->chain;
> +
> +		if (!chain)
> +			continue;
> +
> +		/*
> +		 * Work around dma_resv shortcommings by wrapping up the
> +		 * submission in a dma_fence_chain and add it as exclusive
> +		 * fence, but first add the submission as shared fence to make
> +		 * sure that shared fences never signal before the exclusive
> +		 * one.
> +		 */
> +		dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
> +				     dma_fence_get(p->fence), 1);
> +
> +		dma_resv_add_shared_fence(resv, p->fence);
> +		rcu_assign_pointer(resv->fence_excl, &chain->base);

Ok I should have looked more carefully, +/- memory barriers this looks
correct.

So my idea behind the dma_resv_add_shared_exclusive_fence helper was that
it would give us more flexibility for changing the internals. Since I do
like your idea from a few weeks back of just storing exclusive fences as
shared ones, but with a marker, and then the interface functions picking
the right set of fences depending what the caller wants. But for that we'd
need to drop a lot more abstraction of the internals into dma_resv and
make sure everyone is using it.

Because looking at drivers there's indeed both the concept of a truly
exclusive fence, and the "it's actually a shared fence but stuff it into
the exclusive fence slot for implicit sync" variety. And from what I can
see, everyone gets it wrong, but in lots of different ways.

Hence the wrapper for this concept, and then making sure we roll it out to
all the drivers. I think all the ones with a NO_IMPLICIT flag (or
equivalent, i915 and msm have it the other way round iirc) would need this
to not break the dma_resv contract too badly.

But also I guess we can bikeshed this later on even more.

On this: Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>


> +		e->chain = NULL;
> +	}
> +
>  	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>  	mutex_unlock(&p->adev->notifier_lock);
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> index c3053b83b80c..23219fc3b28c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> @@ -42,48 +42,6 @@
>  #include <linux/pci-p2pdma.h>
>  #include <linux/pm_runtime.h>
>  
> -static int
> -__dma_resv_make_exclusive(struct dma_resv *obj)
> -{
> -	struct dma_fence **fences;
> -	unsigned int count;
> -	int r;
> -
> -	if (!dma_resv_shared_list(obj)) /* no shared fences to convert */
> -		return 0;
> -
> -	r = dma_resv_get_fences(obj, NULL, &count, &fences);
> -	if (r)
> -		return r;
> -
> -	if (count == 0) {
> -		/* Now that was unexpected. */
> -	} else if (count == 1) {
> -		dma_resv_add_excl_fence(obj, fences[0]);
> -		dma_fence_put(fences[0]);
> -		kfree(fences);
> -	} else {
> -		struct dma_fence_array *array;
> -
> -		array = dma_fence_array_create(count, fences,
> -					       dma_fence_context_alloc(1), 0,
> -					       false);
> -		if (!array)
> -			goto err_fences_put;
> -
> -		dma_resv_add_excl_fence(obj, &array->base);
> -		dma_fence_put(&array->base);
> -	}
> -
> -	return 0;
> -
> -err_fences_put:
> -	while (count--)
> -		dma_fence_put(fences[count]);
> -	kfree(fences);
> -	return -ENOMEM;
> -}
> -
>  /**
>   * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
>   *
> @@ -110,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
>  	if (r < 0)
>  		goto out;
>  
> -	r = amdgpu_bo_reserve(bo, false);
> -	if (unlikely(r != 0))
> -		goto out;
> -
> -	/*
> -	 * We only create shared fences for internal use, but importers
> -	 * of the dmabuf rely on exclusive fences for implicitly
> -	 * tracking write hazards. As any of the current fences may
> -	 * correspond to a write, we need to convert all existing
> -	 * fences on the reservation object into a single exclusive
> -	 * fence.
> -	 */
> -	r = __dma_resv_make_exclusive(bo->tbo.base.resv);
> -	if (r)
> -		goto out;
> -
> -	bo->prime_shared_count++;
> -	amdgpu_bo_unreserve(bo);
>  	return 0;
>  
>  out:
> @@ -150,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
>  	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
>  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>  
> -	if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
> -		bo->prime_shared_count--;
> -
>  	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>  	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>  }
> @@ -406,8 +343,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
>  	bo = gem_to_amdgpu_bo(gobj);
>  	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
>  	bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
> -	if (dma_buf->ops != &amdgpu_dmabuf_ops)
> -		bo->prime_shared_count = 1;
>  
>  	dma_resv_unlock(resv);
>  	return gobj;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 9cf4beaf646c..0780e8d18992 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -829,7 +829,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>  		break;
>  	}
>  	case AMDGPU_GEM_OP_SET_PLACEMENT:
> -		if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) {
> +		if (robj->tbo.base.import_attach &&
> +		    args->value & AMDGPU_GEM_DOMAIN_VRAM) {
>  			r = -EINVAL;
>  			amdgpu_bo_unreserve(robj);
>  			break;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index b7a2070d90af..d13490975ac3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -892,7 +892,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
>  		return -EINVAL;
>  
>  	/* A shared bo cannot be migrated to VRAM */
> -	if (bo->prime_shared_count || bo->tbo.base.import_attach) {
> +	if (bo->tbo.base.import_attach) {
>  		if (domain & AMDGPU_GEM_DOMAIN_GTT)
>  			domain = AMDGPU_GEM_DOMAIN_GTT;
>  		else
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 126df03a7066..c03dfd298f45 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -99,7 +99,6 @@ struct amdgpu_bo {
>  	struct ttm_buffer_object	tbo;
>  	struct ttm_bo_kmap_obj		kmap;
>  	u64				flags;
> -	unsigned			prime_shared_count;
>  	/* per VM structure for page tables and with virtual addresses */
>  	struct amdgpu_vm_bo_base	*vm_bo;
>  	/* Constant after initialization */
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: rework dma_resv handling v3
@ 2021-06-17 19:37     ` Daniel Vetter
  0 siblings, 0 replies; 16+ messages in thread
From: Daniel Vetter @ 2021-06-17 19:37 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx, dri-devel, daniel

On Mon, Jun 14, 2021 at 07:45:36PM +0200, Christian König wrote:
> Drop the workaround and instead implement a better solution.
> 
> Basically we are now chaining all submissions using a dma_fence_chain
> container and adding them as exclusive fence to the dma_resv object.
> 
> This way other drivers can still sync to the single exclusive fence
> while amdgpu only sync to fences from different processes.
> 
> v3: add the shared fence first before the exclusive one
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 62 ++++++++++++++++----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 65 ---------------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c     |  3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  1 -
>  6 files changed, 55 insertions(+), 79 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> index a130e766cbdb..c905a4cfc173 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> @@ -34,6 +34,7 @@ struct amdgpu_fpriv;
>  struct amdgpu_bo_list_entry {
>  	struct ttm_validate_buffer	tv;
>  	struct amdgpu_bo_va		*bo_va;
> +	struct dma_fence_chain		*chain;
>  	uint32_t			priority;
>  	struct page			**user_pages;
>  	bool				user_invalidated;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 9ce649a1a8d3..25655414e9c0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -572,6 +572,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>  		goto out;
>  	}
>  
> +	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> +		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> +
> +		e->bo_va = amdgpu_vm_bo_find(vm, bo);
> +
> +		if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
> +			e->chain = dma_fence_chain_alloc();
> +			if (!e->chain) {
> +				r = -ENOMEM;
> +				goto error_validate;
> +			}
> +		}
> +	}
> +
>  	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
>  					  &p->bytes_moved_vis_threshold);
>  	p->bytes_moved = 0;
> @@ -599,15 +613,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>  	gws = p->bo_list->gws_obj;
>  	oa = p->bo_list->oa_obj;
>  
> -	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> -		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> -
> -		/* Make sure we use the exclusive slot for shared BOs */
> -		if (bo->prime_shared_count)
> -			e->tv.num_shared = 0;
> -		e->bo_va = amdgpu_vm_bo_find(vm, bo);
> -	}
> -
>  	if (gds) {
>  		p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
>  		p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
> @@ -629,8 +634,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>  	}
>  
>  error_validate:
> -	if (r)
> +	if (r) {
> +		amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> +			dma_fence_chain_free(e->chain);
> +			e->chain = NULL;
> +		}
>  		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
> +	}
>  out:
>  	return r;
>  }
> @@ -670,9 +680,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
>  {
>  	unsigned i;
>  
> -	if (error && backoff)
> +	if (error && backoff) {
> +		struct amdgpu_bo_list_entry *e;
> +
> +		amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
> +			dma_fence_chain_free(e->chain);
> +			e->chain = NULL;
> +		}
> +
>  		ttm_eu_backoff_reservation(&parser->ticket,
>  					   &parser->validated);
> +	}
>  
>  	for (i = 0; i < parser->num_post_deps; i++) {
>  		drm_syncobj_put(parser->post_deps[i].syncobj);
> @@ -1245,6 +1263,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>  
>  	amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
>  
> +	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> +		struct dma_resv *resv = e->tv.bo->base.resv;
> +		struct dma_fence_chain *chain = e->chain;
> +
> +		if (!chain)
> +			continue;
> +
> +		/*
> +		 * Work around dma_resv shortcommings by wrapping up the
> +		 * submission in a dma_fence_chain and add it as exclusive
> +		 * fence, but first add the submission as shared fence to make
> +		 * sure that shared fences never signal before the exclusive
> +		 * one.
> +		 */
> +		dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
> +				     dma_fence_get(p->fence), 1);
> +
> +		dma_resv_add_shared_fence(resv, p->fence);
> +		rcu_assign_pointer(resv->fence_excl, &chain->base);

Ok I should have looked more carefully, +/- memory barriers this looks
correct.

So my idea behind the dma_resv_add_shared_exclusive_fence helper was that
it would give us more flexibility for changing the internals. Since I do
like your idea from a few weeks back of just storing exclusive fences as
shared ones, but with a marker, and then the interface functions picking
the right set of fences depending what the caller wants. But for that we'd
need to drop a lot more abstraction of the internals into dma_resv and
make sure everyone is using it.

Because looking at drivers there's indeed both the concept of a truly
exclusive fence, and the "it's actually a shared fence but stuff it into
the exclusive fence slot for implicit sync" variety. And from what I can
see, everyone gets it wrong, but in lots of different ways.

Hence the wrapper for this concept, and then making sure we roll it out to
all the drivers. I think all the ones with a NO_IMPLICIT flag (or
equivalent, i915 and msm have it the other way round iirc) would need this
to not break the dma_resv contract too badly.

But also I guess we can bikeshed this later on even more.

On this: Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>


> +		e->chain = NULL;
> +	}
> +
>  	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>  	mutex_unlock(&p->adev->notifier_lock);
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> index c3053b83b80c..23219fc3b28c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> @@ -42,48 +42,6 @@
>  #include <linux/pci-p2pdma.h>
>  #include <linux/pm_runtime.h>
>  
> -static int
> -__dma_resv_make_exclusive(struct dma_resv *obj)
> -{
> -	struct dma_fence **fences;
> -	unsigned int count;
> -	int r;
> -
> -	if (!dma_resv_shared_list(obj)) /* no shared fences to convert */
> -		return 0;
> -
> -	r = dma_resv_get_fences(obj, NULL, &count, &fences);
> -	if (r)
> -		return r;
> -
> -	if (count == 0) {
> -		/* Now that was unexpected. */
> -	} else if (count == 1) {
> -		dma_resv_add_excl_fence(obj, fences[0]);
> -		dma_fence_put(fences[0]);
> -		kfree(fences);
> -	} else {
> -		struct dma_fence_array *array;
> -
> -		array = dma_fence_array_create(count, fences,
> -					       dma_fence_context_alloc(1), 0,
> -					       false);
> -		if (!array)
> -			goto err_fences_put;
> -
> -		dma_resv_add_excl_fence(obj, &array->base);
> -		dma_fence_put(&array->base);
> -	}
> -
> -	return 0;
> -
> -err_fences_put:
> -	while (count--)
> -		dma_fence_put(fences[count]);
> -	kfree(fences);
> -	return -ENOMEM;
> -}
> -
>  /**
>   * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
>   *
> @@ -110,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
>  	if (r < 0)
>  		goto out;
>  
> -	r = amdgpu_bo_reserve(bo, false);
> -	if (unlikely(r != 0))
> -		goto out;
> -
> -	/*
> -	 * We only create shared fences for internal use, but importers
> -	 * of the dmabuf rely on exclusive fences for implicitly
> -	 * tracking write hazards. As any of the current fences may
> -	 * correspond to a write, we need to convert all existing
> -	 * fences on the reservation object into a single exclusive
> -	 * fence.
> -	 */
> -	r = __dma_resv_make_exclusive(bo->tbo.base.resv);
> -	if (r)
> -		goto out;
> -
> -	bo->prime_shared_count++;
> -	amdgpu_bo_unreserve(bo);
>  	return 0;
>  
>  out:
> @@ -150,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
>  	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
>  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>  
> -	if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
> -		bo->prime_shared_count--;
> -
>  	pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>  	pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>  }
> @@ -406,8 +343,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
>  	bo = gem_to_amdgpu_bo(gobj);
>  	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
>  	bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
> -	if (dma_buf->ops != &amdgpu_dmabuf_ops)
> -		bo->prime_shared_count = 1;
>  
>  	dma_resv_unlock(resv);
>  	return gobj;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 9cf4beaf646c..0780e8d18992 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -829,7 +829,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>  		break;
>  	}
>  	case AMDGPU_GEM_OP_SET_PLACEMENT:
> -		if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) {
> +		if (robj->tbo.base.import_attach &&
> +		    args->value & AMDGPU_GEM_DOMAIN_VRAM) {
>  			r = -EINVAL;
>  			amdgpu_bo_unreserve(robj);
>  			break;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index b7a2070d90af..d13490975ac3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -892,7 +892,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
>  		return -EINVAL;
>  
>  	/* A shared bo cannot be migrated to VRAM */
> -	if (bo->prime_shared_count || bo->tbo.base.import_attach) {
> +	if (bo->tbo.base.import_attach) {
>  		if (domain & AMDGPU_GEM_DOMAIN_GTT)
>  			domain = AMDGPU_GEM_DOMAIN_GTT;
>  		else
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 126df03a7066..c03dfd298f45 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -99,7 +99,6 @@ struct amdgpu_bo {
>  	struct ttm_buffer_object	tbo;
>  	struct ttm_bo_kmap_obj		kmap;
>  	u64				flags;
> -	unsigned			prime_shared_count;
>  	/* per VM structure for page tables and with virtual addresses */
>  	struct amdgpu_vm_bo_base	*vm_bo;
>  	/* Constant after initialization */
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: unwrap fence chains in the explicit sync fence
  2021-06-17 17:30     ` Daniel Vetter
@ 2021-06-17 19:38       ` Daniel Vetter
  -1 siblings, 0 replies; 16+ messages in thread
From: Daniel Vetter @ 2021-06-17 19:38 UTC (permalink / raw)
  To: Christian König; +Cc: Alex Deucher, amd-gfx, dri-devel

On Thu, Jun 17, 2021 at 07:30:24PM +0200, Daniel Vetter wrote:
> On Thu, Jun 17, 2021 at 09:44:25AM +0200, Christian König wrote:
> > Alex do want to review those so that we can close the ticket?
> 
> Maybe I'm behind on mails, but 2nd patch still has the issues I think I'm
> seeing ...

Ok with temperatures getting colder towards the night the 2nd patch looks
much better now :-) I replied there.
-Daniel

> -Daniel
> 
> > 
> > Thanks,
> > Christian.
> > 
> > Am 14.06.21 um 19:45 schrieb Christian König:
> > > Unwrap the explicit fence if it is a dma_fence_chain and
> > > sync to the first fence not matching the owner rules.
> > > 
> > > Signed-off-by: Christian König <christian.koenig@amd.com>
> > > Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > > ---
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 118 +++++++++++++----------
> > >   1 file changed, 68 insertions(+), 50 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> > > index 1b2ceccaf5b0..862eb3c1c4c5 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> > > @@ -28,6 +28,8 @@
> > >    *    Christian König <christian.koenig@amd.com>
> > >    */
> > > +#include <linux/dma-fence-chain.h>
> > > +
> > >   #include "amdgpu.h"
> > >   #include "amdgpu_trace.h"
> > >   #include "amdgpu_amdkfd.h"
> > > @@ -186,6 +188,55 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
> > >   	return amdgpu_sync_fence(sync, fence);
> > >   }
> > > +/* Determine based on the owner and mode if we should sync to a fence or not */
> > > +static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
> > > +				   enum amdgpu_sync_mode mode,
> > > +				   void *owner, struct dma_fence *f)
> > > +{
> > > +	void *fence_owner = amdgpu_sync_get_owner(f);
> > > +
> > > +	/* Always sync to moves, no matter what */
> > > +	if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED)
> > > +		return true;
> > > +
> > > +	/* We only want to trigger KFD eviction fences on
> > > +	 * evict or move jobs. Skip KFD fences otherwise.
> > > +	 */
> > > +	if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
> > > +	    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > > +		return false;
> > > +
> > > +	/* Never sync to VM updates either. */
> > > +	if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
> > > +	    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > > +		return false;
> > > +
> > > +	/* Ignore fences depending on the sync mode */
> > > +	switch (mode) {
> > > +	case AMDGPU_SYNC_ALWAYS:
> > > +		return true;
> > > +
> > > +	case AMDGPU_SYNC_NE_OWNER:
> > > +		if (amdgpu_sync_same_dev(adev, f) &&
> > > +		    fence_owner == owner)
> > > +			return false;
> > > +		break;
> > > +
> > > +	case AMDGPU_SYNC_EQ_OWNER:
> > > +		if (amdgpu_sync_same_dev(adev, f) &&
> > > +		    fence_owner != owner)
> > > +			return false;
> > > +		break;
> > > +
> > > +	case AMDGPU_SYNC_EXPLICIT:
> > > +		return false;
> > > +	}
> > > +
> > > +	WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
> > > +	     "Adding eviction fence to sync obj");
> > > +	return true;
> > > +}
> > > +
> > >   /**
> > >    * amdgpu_sync_resv - sync to a reservation object
> > >    *
> > > @@ -211,67 +262,34 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
> > >   	/* always sync to the exclusive fence */
> > >   	f = dma_resv_excl_fence(resv);
> > > -	r = amdgpu_sync_fence(sync, f);
> > > +	dma_fence_chain_for_each(f, f) {
> > > +		struct dma_fence_chain *chain = to_dma_fence_chain(f);
> > > +
> > > +		if (amdgpu_sync_test_fence(adev, mode, owner, chain ?
> > > +					   chain->fence : f)) {
> > > +			r = amdgpu_sync_fence(sync, f);
> > > +			dma_fence_put(f);
> > > +			if (r)
> > > +				return r;
> > > +			break;
> > > +		}
> > > +	}
> > >   	flist = dma_resv_shared_list(resv);
> > > -	if (!flist || r)
> > > -		return r;
> > > +	if (!flist)
> > > +		return 0;
> > >   	for (i = 0; i < flist->shared_count; ++i) {
> > > -		void *fence_owner;
> > > -
> > >   		f = rcu_dereference_protected(flist->shared[i],
> > >   					      dma_resv_held(resv));
> > > -		fence_owner = amdgpu_sync_get_owner(f);
> > > -
> > > -		/* Always sync to moves, no matter what */
> > > -		if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) {
> > > +		if (amdgpu_sync_test_fence(adev, mode, owner, f)) {
> > >   			r = amdgpu_sync_fence(sync, f);
> > >   			if (r)
> > > -				break;
> > > -		}
> > > -
> > > -		/* We only want to trigger KFD eviction fences on
> > > -		 * evict or move jobs. Skip KFD fences otherwise.
> > > -		 */
> > > -		if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
> > > -		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > > -			continue;
> > > -
> > > -		/* Never sync to VM updates either. */
> > > -		if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
> > > -		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > > -			continue;
> > > -
> > > -		/* Ignore fences depending on the sync mode */
> > > -		switch (mode) {
> > > -		case AMDGPU_SYNC_ALWAYS:
> > > -			break;
> > > -
> > > -		case AMDGPU_SYNC_NE_OWNER:
> > > -			if (amdgpu_sync_same_dev(adev, f) &&
> > > -			    fence_owner == owner)
> > > -				continue;
> > > -			break;
> > > -
> > > -		case AMDGPU_SYNC_EQ_OWNER:
> > > -			if (amdgpu_sync_same_dev(adev, f) &&
> > > -			    fence_owner != owner)
> > > -				continue;
> > > -			break;
> > > -
> > > -		case AMDGPU_SYNC_EXPLICIT:
> > > -			continue;
> > > +				return r;
> > >   		}
> > > -
> > > -		WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
> > > -		     "Adding eviction fence to sync obj");
> > > -		r = amdgpu_sync_fence(sync, f);
> > > -		if (r)
> > > -			break;
> > >   	}
> > > -	return r;
> > > +	return 0;
> > >   }
> > >   /**
> > 
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: unwrap fence chains in the explicit sync fence
@ 2021-06-17 19:38       ` Daniel Vetter
  0 siblings, 0 replies; 16+ messages in thread
From: Daniel Vetter @ 2021-06-17 19:38 UTC (permalink / raw)
  To: Christian König; +Cc: Alex Deucher, amd-gfx, dri-devel

On Thu, Jun 17, 2021 at 07:30:24PM +0200, Daniel Vetter wrote:
> On Thu, Jun 17, 2021 at 09:44:25AM +0200, Christian König wrote:
> > Alex do want to review those so that we can close the ticket?
> 
> Maybe I'm behind on mails, but 2nd patch still has the issues I think I'm
> seeing ...

Ok with temperatures getting colder towards the night the 2nd patch looks
much better now :-) I replied there.
-Daniel

> -Daniel
> 
> > 
> > Thanks,
> > Christian.
> > 
> > Am 14.06.21 um 19:45 schrieb Christian König:
> > > Unwrap the explicit fence if it is a dma_fence_chain and
> > > sync to the first fence not matching the owner rules.
> > > 
> > > Signed-off-by: Christian König <christian.koenig@amd.com>
> > > Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > > ---
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 118 +++++++++++++----------
> > >   1 file changed, 68 insertions(+), 50 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> > > index 1b2ceccaf5b0..862eb3c1c4c5 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> > > @@ -28,6 +28,8 @@
> > >    *    Christian König <christian.koenig@amd.com>
> > >    */
> > > +#include <linux/dma-fence-chain.h>
> > > +
> > >   #include "amdgpu.h"
> > >   #include "amdgpu_trace.h"
> > >   #include "amdgpu_amdkfd.h"
> > > @@ -186,6 +188,55 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
> > >   	return amdgpu_sync_fence(sync, fence);
> > >   }
> > > +/* Determine based on the owner and mode if we should sync to a fence or not */
> > > +static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
> > > +				   enum amdgpu_sync_mode mode,
> > > +				   void *owner, struct dma_fence *f)
> > > +{
> > > +	void *fence_owner = amdgpu_sync_get_owner(f);
> > > +
> > > +	/* Always sync to moves, no matter what */
> > > +	if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED)
> > > +		return true;
> > > +
> > > +	/* We only want to trigger KFD eviction fences on
> > > +	 * evict or move jobs. Skip KFD fences otherwise.
> > > +	 */
> > > +	if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
> > > +	    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > > +		return false;
> > > +
> > > +	/* Never sync to VM updates either. */
> > > +	if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
> > > +	    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > > +		return false;
> > > +
> > > +	/* Ignore fences depending on the sync mode */
> > > +	switch (mode) {
> > > +	case AMDGPU_SYNC_ALWAYS:
> > > +		return true;
> > > +
> > > +	case AMDGPU_SYNC_NE_OWNER:
> > > +		if (amdgpu_sync_same_dev(adev, f) &&
> > > +		    fence_owner == owner)
> > > +			return false;
> > > +		break;
> > > +
> > > +	case AMDGPU_SYNC_EQ_OWNER:
> > > +		if (amdgpu_sync_same_dev(adev, f) &&
> > > +		    fence_owner != owner)
> > > +			return false;
> > > +		break;
> > > +
> > > +	case AMDGPU_SYNC_EXPLICIT:
> > > +		return false;
> > > +	}
> > > +
> > > +	WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
> > > +	     "Adding eviction fence to sync obj");
> > > +	return true;
> > > +}
> > > +
> > >   /**
> > >    * amdgpu_sync_resv - sync to a reservation object
> > >    *
> > > @@ -211,67 +262,34 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
> > >   	/* always sync to the exclusive fence */
> > >   	f = dma_resv_excl_fence(resv);
> > > -	r = amdgpu_sync_fence(sync, f);
> > > +	dma_fence_chain_for_each(f, f) {
> > > +		struct dma_fence_chain *chain = to_dma_fence_chain(f);
> > > +
> > > +		if (amdgpu_sync_test_fence(adev, mode, owner, chain ?
> > > +					   chain->fence : f)) {
> > > +			r = amdgpu_sync_fence(sync, f);
> > > +			dma_fence_put(f);
> > > +			if (r)
> > > +				return r;
> > > +			break;
> > > +		}
> > > +	}
> > >   	flist = dma_resv_shared_list(resv);
> > > -	if (!flist || r)
> > > -		return r;
> > > +	if (!flist)
> > > +		return 0;
> > >   	for (i = 0; i < flist->shared_count; ++i) {
> > > -		void *fence_owner;
> > > -
> > >   		f = rcu_dereference_protected(flist->shared[i],
> > >   					      dma_resv_held(resv));
> > > -		fence_owner = amdgpu_sync_get_owner(f);
> > > -
> > > -		/* Always sync to moves, no matter what */
> > > -		if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) {
> > > +		if (amdgpu_sync_test_fence(adev, mode, owner, f)) {
> > >   			r = amdgpu_sync_fence(sync, f);
> > >   			if (r)
> > > -				break;
> > > -		}
> > > -
> > > -		/* We only want to trigger KFD eviction fences on
> > > -		 * evict or move jobs. Skip KFD fences otherwise.
> > > -		 */
> > > -		if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
> > > -		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > > -			continue;
> > > -
> > > -		/* Never sync to VM updates either. */
> > > -		if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
> > > -		    owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > > -			continue;
> > > -
> > > -		/* Ignore fences depending on the sync mode */
> > > -		switch (mode) {
> > > -		case AMDGPU_SYNC_ALWAYS:
> > > -			break;
> > > -
> > > -		case AMDGPU_SYNC_NE_OWNER:
> > > -			if (amdgpu_sync_same_dev(adev, f) &&
> > > -			    fence_owner == owner)
> > > -				continue;
> > > -			break;
> > > -
> > > -		case AMDGPU_SYNC_EQ_OWNER:
> > > -			if (amdgpu_sync_same_dev(adev, f) &&
> > > -			    fence_owner != owner)
> > > -				continue;
> > > -			break;
> > > -
> > > -		case AMDGPU_SYNC_EXPLICIT:
> > > -			continue;
> > > +				return r;
> > >   		}
> > > -
> > > -		WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
> > > -		     "Adding eviction fence to sync obj");
> > > -		r = amdgpu_sync_fence(sync, f);
> > > -		if (r)
> > > -			break;
> > >   	}
> > > -	return r;
> > > +	return 0;
> > >   }
> > >   /**
> > 
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: rework dma_resv handling v3
  2021-06-14 17:45   ` Christian König
@ 2021-06-17 21:09     ` Alex Deucher
  -1 siblings, 0 replies; 16+ messages in thread
From: Alex Deucher @ 2021-06-17 21:09 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx list, Maling list - DRI developers

On Mon, Jun 14, 2021 at 1:45 PM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Drop the workaround and instead implement a better solution.
>
> Basically we are now chaining all submissions using a dma_fence_chain
> container and adding them as exclusive fence to the dma_resv object.
>
> This way other drivers can still sync to the single exclusive fence
> while amdgpu only sync to fences from different processes.
>
> v3: add the shared fence first before the exclusive one
>
> Signed-off-by: Christian König <christian.koenig@amd.com>

Series is:
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 62 ++++++++++++++++----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 65 ---------------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c     |  3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  1 -
>  6 files changed, 55 insertions(+), 79 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> index a130e766cbdb..c905a4cfc173 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> @@ -34,6 +34,7 @@ struct amdgpu_fpriv;
>  struct amdgpu_bo_list_entry {
>         struct ttm_validate_buffer      tv;
>         struct amdgpu_bo_va             *bo_va;
> +       struct dma_fence_chain          *chain;
>         uint32_t                        priority;
>         struct page                     **user_pages;
>         bool                            user_invalidated;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 9ce649a1a8d3..25655414e9c0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -572,6 +572,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>                 goto out;
>         }
>
> +       amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> +               struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> +
> +               e->bo_va = amdgpu_vm_bo_find(vm, bo);
> +
> +               if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
> +                       e->chain = dma_fence_chain_alloc();
> +                       if (!e->chain) {
> +                               r = -ENOMEM;
> +                               goto error_validate;
> +                       }
> +               }
> +       }
> +
>         amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
>                                           &p->bytes_moved_vis_threshold);
>         p->bytes_moved = 0;
> @@ -599,15 +613,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>         gws = p->bo_list->gws_obj;
>         oa = p->bo_list->oa_obj;
>
> -       amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> -               struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> -
> -               /* Make sure we use the exclusive slot for shared BOs */
> -               if (bo->prime_shared_count)
> -                       e->tv.num_shared = 0;
> -               e->bo_va = amdgpu_vm_bo_find(vm, bo);
> -       }
> -
>         if (gds) {
>                 p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
>                 p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
> @@ -629,8 +634,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>         }
>
>  error_validate:
> -       if (r)
> +       if (r) {
> +               amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> +                       dma_fence_chain_free(e->chain);
> +                       e->chain = NULL;
> +               }
>                 ttm_eu_backoff_reservation(&p->ticket, &p->validated);
> +       }
>  out:
>         return r;
>  }
> @@ -670,9 +680,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
>  {
>         unsigned i;
>
> -       if (error && backoff)
> +       if (error && backoff) {
> +               struct amdgpu_bo_list_entry *e;
> +
> +               amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
> +                       dma_fence_chain_free(e->chain);
> +                       e->chain = NULL;
> +               }
> +
>                 ttm_eu_backoff_reservation(&parser->ticket,
>                                            &parser->validated);
> +       }
>
>         for (i = 0; i < parser->num_post_deps; i++) {
>                 drm_syncobj_put(parser->post_deps[i].syncobj);
> @@ -1245,6 +1263,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>
>         amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
>
> +       amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> +               struct dma_resv *resv = e->tv.bo->base.resv;
> +               struct dma_fence_chain *chain = e->chain;
> +
> +               if (!chain)
> +                       continue;
> +
> +               /*
> +                * Work around dma_resv shortcommings by wrapping up the
> +                * submission in a dma_fence_chain and add it as exclusive
> +                * fence, but first add the submission as shared fence to make
> +                * sure that shared fences never signal before the exclusive
> +                * one.
> +                */
> +               dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
> +                                    dma_fence_get(p->fence), 1);
> +
> +               dma_resv_add_shared_fence(resv, p->fence);
> +               rcu_assign_pointer(resv->fence_excl, &chain->base);
> +               e->chain = NULL;
> +       }
> +
>         ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>         mutex_unlock(&p->adev->notifier_lock);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> index c3053b83b80c..23219fc3b28c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> @@ -42,48 +42,6 @@
>  #include <linux/pci-p2pdma.h>
>  #include <linux/pm_runtime.h>
>
> -static int
> -__dma_resv_make_exclusive(struct dma_resv *obj)
> -{
> -       struct dma_fence **fences;
> -       unsigned int count;
> -       int r;
> -
> -       if (!dma_resv_shared_list(obj)) /* no shared fences to convert */
> -               return 0;
> -
> -       r = dma_resv_get_fences(obj, NULL, &count, &fences);
> -       if (r)
> -               return r;
> -
> -       if (count == 0) {
> -               /* Now that was unexpected. */
> -       } else if (count == 1) {
> -               dma_resv_add_excl_fence(obj, fences[0]);
> -               dma_fence_put(fences[0]);
> -               kfree(fences);
> -       } else {
> -               struct dma_fence_array *array;
> -
> -               array = dma_fence_array_create(count, fences,
> -                                              dma_fence_context_alloc(1), 0,
> -                                              false);
> -               if (!array)
> -                       goto err_fences_put;
> -
> -               dma_resv_add_excl_fence(obj, &array->base);
> -               dma_fence_put(&array->base);
> -       }
> -
> -       return 0;
> -
> -err_fences_put:
> -       while (count--)
> -               dma_fence_put(fences[count]);
> -       kfree(fences);
> -       return -ENOMEM;
> -}
> -
>  /**
>   * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
>   *
> @@ -110,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
>         if (r < 0)
>                 goto out;
>
> -       r = amdgpu_bo_reserve(bo, false);
> -       if (unlikely(r != 0))
> -               goto out;
> -
> -       /*
> -        * We only create shared fences for internal use, but importers
> -        * of the dmabuf rely on exclusive fences for implicitly
> -        * tracking write hazards. As any of the current fences may
> -        * correspond to a write, we need to convert all existing
> -        * fences on the reservation object into a single exclusive
> -        * fence.
> -        */
> -       r = __dma_resv_make_exclusive(bo->tbo.base.resv);
> -       if (r)
> -               goto out;
> -
> -       bo->prime_shared_count++;
> -       amdgpu_bo_unreserve(bo);
>         return 0;
>
>  out:
> @@ -150,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
>         struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
>         struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>
> -       if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
> -               bo->prime_shared_count--;
> -
>         pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>         pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>  }
> @@ -406,8 +343,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
>         bo = gem_to_amdgpu_bo(gobj);
>         bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
>         bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
> -       if (dma_buf->ops != &amdgpu_dmabuf_ops)
> -               bo->prime_shared_count = 1;
>
>         dma_resv_unlock(resv);
>         return gobj;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 9cf4beaf646c..0780e8d18992 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -829,7 +829,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>                 break;
>         }
>         case AMDGPU_GEM_OP_SET_PLACEMENT:
> -               if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) {
> +               if (robj->tbo.base.import_attach &&
> +                   args->value & AMDGPU_GEM_DOMAIN_VRAM) {
>                         r = -EINVAL;
>                         amdgpu_bo_unreserve(robj);
>                         break;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index b7a2070d90af..d13490975ac3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -892,7 +892,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
>                 return -EINVAL;
>
>         /* A shared bo cannot be migrated to VRAM */
> -       if (bo->prime_shared_count || bo->tbo.base.import_attach) {
> +       if (bo->tbo.base.import_attach) {
>                 if (domain & AMDGPU_GEM_DOMAIN_GTT)
>                         domain = AMDGPU_GEM_DOMAIN_GTT;
>                 else
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 126df03a7066..c03dfd298f45 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -99,7 +99,6 @@ struct amdgpu_bo {
>         struct ttm_buffer_object        tbo;
>         struct ttm_bo_kmap_obj          kmap;
>         u64                             flags;
> -       unsigned                        prime_shared_count;
>         /* per VM structure for page tables and with virtual addresses */
>         struct amdgpu_vm_bo_base        *vm_bo;
>         /* Constant after initialization */
> --
> 2.25.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: rework dma_resv handling v3
@ 2021-06-17 21:09     ` Alex Deucher
  0 siblings, 0 replies; 16+ messages in thread
From: Alex Deucher @ 2021-06-17 21:09 UTC (permalink / raw)
  To: Christian König
  Cc: amd-gfx list, Maling list - DRI developers, Daniel Vetter

On Mon, Jun 14, 2021 at 1:45 PM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Drop the workaround and instead implement a better solution.
>
> Basically we are now chaining all submissions using a dma_fence_chain
> container and adding them as exclusive fence to the dma_resv object.
>
> This way other drivers can still sync to the single exclusive fence
> while amdgpu only sync to fences from different processes.
>
> v3: add the shared fence first before the exclusive one
>
> Signed-off-by: Christian König <christian.koenig@amd.com>

Series is:
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h |  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 62 ++++++++++++++++----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 65 ---------------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c     |  3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  1 -
>  6 files changed, 55 insertions(+), 79 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> index a130e766cbdb..c905a4cfc173 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> @@ -34,6 +34,7 @@ struct amdgpu_fpriv;
>  struct amdgpu_bo_list_entry {
>         struct ttm_validate_buffer      tv;
>         struct amdgpu_bo_va             *bo_va;
> +       struct dma_fence_chain          *chain;
>         uint32_t                        priority;
>         struct page                     **user_pages;
>         bool                            user_invalidated;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 9ce649a1a8d3..25655414e9c0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -572,6 +572,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>                 goto out;
>         }
>
> +       amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> +               struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> +
> +               e->bo_va = amdgpu_vm_bo_find(vm, bo);
> +
> +               if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
> +                       e->chain = dma_fence_chain_alloc();
> +                       if (!e->chain) {
> +                               r = -ENOMEM;
> +                               goto error_validate;
> +                       }
> +               }
> +       }
> +
>         amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
>                                           &p->bytes_moved_vis_threshold);
>         p->bytes_moved = 0;
> @@ -599,15 +613,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>         gws = p->bo_list->gws_obj;
>         oa = p->bo_list->oa_obj;
>
> -       amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> -               struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> -
> -               /* Make sure we use the exclusive slot for shared BOs */
> -               if (bo->prime_shared_count)
> -                       e->tv.num_shared = 0;
> -               e->bo_va = amdgpu_vm_bo_find(vm, bo);
> -       }
> -
>         if (gds) {
>                 p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
>                 p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
> @@ -629,8 +634,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>         }
>
>  error_validate:
> -       if (r)
> +       if (r) {
> +               amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> +                       dma_fence_chain_free(e->chain);
> +                       e->chain = NULL;
> +               }
>                 ttm_eu_backoff_reservation(&p->ticket, &p->validated);
> +       }
>  out:
>         return r;
>  }
> @@ -670,9 +680,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
>  {
>         unsigned i;
>
> -       if (error && backoff)
> +       if (error && backoff) {
> +               struct amdgpu_bo_list_entry *e;
> +
> +               amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
> +                       dma_fence_chain_free(e->chain);
> +                       e->chain = NULL;
> +               }
> +
>                 ttm_eu_backoff_reservation(&parser->ticket,
>                                            &parser->validated);
> +       }
>
>         for (i = 0; i < parser->num_post_deps; i++) {
>                 drm_syncobj_put(parser->post_deps[i].syncobj);
> @@ -1245,6 +1263,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>
>         amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
>
> +       amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> +               struct dma_resv *resv = e->tv.bo->base.resv;
> +               struct dma_fence_chain *chain = e->chain;
> +
> +               if (!chain)
> +                       continue;
> +
> +               /*
> +                * Work around dma_resv shortcommings by wrapping up the
> +                * submission in a dma_fence_chain and add it as exclusive
> +                * fence, but first add the submission as shared fence to make
> +                * sure that shared fences never signal before the exclusive
> +                * one.
> +                */
> +               dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
> +                                    dma_fence_get(p->fence), 1);
> +
> +               dma_resv_add_shared_fence(resv, p->fence);
> +               rcu_assign_pointer(resv->fence_excl, &chain->base);
> +               e->chain = NULL;
> +       }
> +
>         ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>         mutex_unlock(&p->adev->notifier_lock);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> index c3053b83b80c..23219fc3b28c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> @@ -42,48 +42,6 @@
>  #include <linux/pci-p2pdma.h>
>  #include <linux/pm_runtime.h>
>
> -static int
> -__dma_resv_make_exclusive(struct dma_resv *obj)
> -{
> -       struct dma_fence **fences;
> -       unsigned int count;
> -       int r;
> -
> -       if (!dma_resv_shared_list(obj)) /* no shared fences to convert */
> -               return 0;
> -
> -       r = dma_resv_get_fences(obj, NULL, &count, &fences);
> -       if (r)
> -               return r;
> -
> -       if (count == 0) {
> -               /* Now that was unexpected. */
> -       } else if (count == 1) {
> -               dma_resv_add_excl_fence(obj, fences[0]);
> -               dma_fence_put(fences[0]);
> -               kfree(fences);
> -       } else {
> -               struct dma_fence_array *array;
> -
> -               array = dma_fence_array_create(count, fences,
> -                                              dma_fence_context_alloc(1), 0,
> -                                              false);
> -               if (!array)
> -                       goto err_fences_put;
> -
> -               dma_resv_add_excl_fence(obj, &array->base);
> -               dma_fence_put(&array->base);
> -       }
> -
> -       return 0;
> -
> -err_fences_put:
> -       while (count--)
> -               dma_fence_put(fences[count]);
> -       kfree(fences);
> -       return -ENOMEM;
> -}
> -
>  /**
>   * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
>   *
> @@ -110,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
>         if (r < 0)
>                 goto out;
>
> -       r = amdgpu_bo_reserve(bo, false);
> -       if (unlikely(r != 0))
> -               goto out;
> -
> -       /*
> -        * We only create shared fences for internal use, but importers
> -        * of the dmabuf rely on exclusive fences for implicitly
> -        * tracking write hazards. As any of the current fences may
> -        * correspond to a write, we need to convert all existing
> -        * fences on the reservation object into a single exclusive
> -        * fence.
> -        */
> -       r = __dma_resv_make_exclusive(bo->tbo.base.resv);
> -       if (r)
> -               goto out;
> -
> -       bo->prime_shared_count++;
> -       amdgpu_bo_unreserve(bo);
>         return 0;
>
>  out:
> @@ -150,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
>         struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
>         struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>
> -       if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
> -               bo->prime_shared_count--;
> -
>         pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>         pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>  }
> @@ -406,8 +343,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
>         bo = gem_to_amdgpu_bo(gobj);
>         bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
>         bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
> -       if (dma_buf->ops != &amdgpu_dmabuf_ops)
> -               bo->prime_shared_count = 1;
>
>         dma_resv_unlock(resv);
>         return gobj;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 9cf4beaf646c..0780e8d18992 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -829,7 +829,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>                 break;
>         }
>         case AMDGPU_GEM_OP_SET_PLACEMENT:
> -               if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) {
> +               if (robj->tbo.base.import_attach &&
> +                   args->value & AMDGPU_GEM_DOMAIN_VRAM) {
>                         r = -EINVAL;
>                         amdgpu_bo_unreserve(robj);
>                         break;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index b7a2070d90af..d13490975ac3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -892,7 +892,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
>                 return -EINVAL;
>
>         /* A shared bo cannot be migrated to VRAM */
> -       if (bo->prime_shared_count || bo->tbo.base.import_attach) {
> +       if (bo->tbo.base.import_attach) {
>                 if (domain & AMDGPU_GEM_DOMAIN_GTT)
>                         domain = AMDGPU_GEM_DOMAIN_GTT;
>                 else
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 126df03a7066..c03dfd298f45 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -99,7 +99,6 @@ struct amdgpu_bo {
>         struct ttm_buffer_object        tbo;
>         struct ttm_bo_kmap_obj          kmap;
>         u64                             flags;
> -       unsigned                        prime_shared_count;
>         /* per VM structure for page tables and with virtual addresses */
>         struct amdgpu_vm_bo_base        *vm_bo;
>         /* Constant after initialization */
> --
> 2.25.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: rework dma_resv handling v3
  2021-06-17 21:09     ` Alex Deucher
@ 2021-06-22  9:20       ` Christian König
  -1 siblings, 0 replies; 16+ messages in thread
From: Christian König @ 2021-06-22  9:20 UTC (permalink / raw)
  To: Alex Deucher; +Cc: amd-gfx list, Maling list - DRI developers

Am 17.06.21 um 23:09 schrieb Alex Deucher:
> On Mon, Jun 14, 2021 at 1:45 PM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> Drop the workaround and instead implement a better solution.
>>
>> Basically we are now chaining all submissions using a dma_fence_chain
>> container and adding them as exclusive fence to the dma_resv object.
>>
>> This way other drivers can still sync to the single exclusive fence
>> while amdgpu only sync to fences from different processes.
>>
>> v3: add the shared fence first before the exclusive one
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
> Series is:
> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

FYI I've pushed this to drm-misc-next to avoid re-base problems.

Will probably not go upstream before 5.15, so we have plenty of time to 
test this.

Christian.

>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h |  1 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 62 ++++++++++++++++----
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 65 ---------------------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c     |  3 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  1 -
>>   6 files changed, 55 insertions(+), 79 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> index a130e766cbdb..c905a4cfc173 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> @@ -34,6 +34,7 @@ struct amdgpu_fpriv;
>>   struct amdgpu_bo_list_entry {
>>          struct ttm_validate_buffer      tv;
>>          struct amdgpu_bo_va             *bo_va;
>> +       struct dma_fence_chain          *chain;
>>          uint32_t                        priority;
>>          struct page                     **user_pages;
>>          bool                            user_invalidated;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index 9ce649a1a8d3..25655414e9c0 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -572,6 +572,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>>                  goto out;
>>          }
>>
>> +       amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>> +               struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
>> +
>> +               e->bo_va = amdgpu_vm_bo_find(vm, bo);
>> +
>> +               if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
>> +                       e->chain = dma_fence_chain_alloc();
>> +                       if (!e->chain) {
>> +                               r = -ENOMEM;
>> +                               goto error_validate;
>> +                       }
>> +               }
>> +       }
>> +
>>          amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
>>                                            &p->bytes_moved_vis_threshold);
>>          p->bytes_moved = 0;
>> @@ -599,15 +613,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>>          gws = p->bo_list->gws_obj;
>>          oa = p->bo_list->oa_obj;
>>
>> -       amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>> -               struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
>> -
>> -               /* Make sure we use the exclusive slot for shared BOs */
>> -               if (bo->prime_shared_count)
>> -                       e->tv.num_shared = 0;
>> -               e->bo_va = amdgpu_vm_bo_find(vm, bo);
>> -       }
>> -
>>          if (gds) {
>>                  p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
>>                  p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
>> @@ -629,8 +634,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>>          }
>>
>>   error_validate:
>> -       if (r)
>> +       if (r) {
>> +               amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>> +                       dma_fence_chain_free(e->chain);
>> +                       e->chain = NULL;
>> +               }
>>                  ttm_eu_backoff_reservation(&p->ticket, &p->validated);
>> +       }
>>   out:
>>          return r;
>>   }
>> @@ -670,9 +680,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
>>   {
>>          unsigned i;
>>
>> -       if (error && backoff)
>> +       if (error && backoff) {
>> +               struct amdgpu_bo_list_entry *e;
>> +
>> +               amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
>> +                       dma_fence_chain_free(e->chain);
>> +                       e->chain = NULL;
>> +               }
>> +
>>                  ttm_eu_backoff_reservation(&parser->ticket,
>>                                             &parser->validated);
>> +       }
>>
>>          for (i = 0; i < parser->num_post_deps; i++) {
>>                  drm_syncobj_put(parser->post_deps[i].syncobj);
>> @@ -1245,6 +1263,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>>
>>          amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
>>
>> +       amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>> +               struct dma_resv *resv = e->tv.bo->base.resv;
>> +               struct dma_fence_chain *chain = e->chain;
>> +
>> +               if (!chain)
>> +                       continue;
>> +
>> +               /*
>> +                * Work around dma_resv shortcommings by wrapping up the
>> +                * submission in a dma_fence_chain and add it as exclusive
>> +                * fence, but first add the submission as shared fence to make
>> +                * sure that shared fences never signal before the exclusive
>> +                * one.
>> +                */
>> +               dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
>> +                                    dma_fence_get(p->fence), 1);
>> +
>> +               dma_resv_add_shared_fence(resv, p->fence);
>> +               rcu_assign_pointer(resv->fence_excl, &chain->base);
>> +               e->chain = NULL;
>> +       }
>> +
>>          ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>>          mutex_unlock(&p->adev->notifier_lock);
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
>> index c3053b83b80c..23219fc3b28c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
>> @@ -42,48 +42,6 @@
>>   #include <linux/pci-p2pdma.h>
>>   #include <linux/pm_runtime.h>
>>
>> -static int
>> -__dma_resv_make_exclusive(struct dma_resv *obj)
>> -{
>> -       struct dma_fence **fences;
>> -       unsigned int count;
>> -       int r;
>> -
>> -       if (!dma_resv_shared_list(obj)) /* no shared fences to convert */
>> -               return 0;
>> -
>> -       r = dma_resv_get_fences(obj, NULL, &count, &fences);
>> -       if (r)
>> -               return r;
>> -
>> -       if (count == 0) {
>> -               /* Now that was unexpected. */
>> -       } else if (count == 1) {
>> -               dma_resv_add_excl_fence(obj, fences[0]);
>> -               dma_fence_put(fences[0]);
>> -               kfree(fences);
>> -       } else {
>> -               struct dma_fence_array *array;
>> -
>> -               array = dma_fence_array_create(count, fences,
>> -                                              dma_fence_context_alloc(1), 0,
>> -                                              false);
>> -               if (!array)
>> -                       goto err_fences_put;
>> -
>> -               dma_resv_add_excl_fence(obj, &array->base);
>> -               dma_fence_put(&array->base);
>> -       }
>> -
>> -       return 0;
>> -
>> -err_fences_put:
>> -       while (count--)
>> -               dma_fence_put(fences[count]);
>> -       kfree(fences);
>> -       return -ENOMEM;
>> -}
>> -
>>   /**
>>    * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
>>    *
>> @@ -110,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
>>          if (r < 0)
>>                  goto out;
>>
>> -       r = amdgpu_bo_reserve(bo, false);
>> -       if (unlikely(r != 0))
>> -               goto out;
>> -
>> -       /*
>> -        * We only create shared fences for internal use, but importers
>> -        * of the dmabuf rely on exclusive fences for implicitly
>> -        * tracking write hazards. As any of the current fences may
>> -        * correspond to a write, we need to convert all existing
>> -        * fences on the reservation object into a single exclusive
>> -        * fence.
>> -        */
>> -       r = __dma_resv_make_exclusive(bo->tbo.base.resv);
>> -       if (r)
>> -               goto out;
>> -
>> -       bo->prime_shared_count++;
>> -       amdgpu_bo_unreserve(bo);
>>          return 0;
>>
>>   out:
>> @@ -150,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
>>          struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
>>          struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>
>> -       if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
>> -               bo->prime_shared_count--;
>> -
>>          pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>>          pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>>   }
>> @@ -406,8 +343,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
>>          bo = gem_to_amdgpu_bo(gobj);
>>          bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
>>          bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
>> -       if (dma_buf->ops != &amdgpu_dmabuf_ops)
>> -               bo->prime_shared_count = 1;
>>
>>          dma_resv_unlock(resv);
>>          return gobj;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> index 9cf4beaf646c..0780e8d18992 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> @@ -829,7 +829,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>>                  break;
>>          }
>>          case AMDGPU_GEM_OP_SET_PLACEMENT:
>> -               if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) {
>> +               if (robj->tbo.base.import_attach &&
>> +                   args->value & AMDGPU_GEM_DOMAIN_VRAM) {
>>                          r = -EINVAL;
>>                          amdgpu_bo_unreserve(robj);
>>                          break;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index b7a2070d90af..d13490975ac3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -892,7 +892,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
>>                  return -EINVAL;
>>
>>          /* A shared bo cannot be migrated to VRAM */
>> -       if (bo->prime_shared_count || bo->tbo.base.import_attach) {
>> +       if (bo->tbo.base.import_attach) {
>>                  if (domain & AMDGPU_GEM_DOMAIN_GTT)
>>                          domain = AMDGPU_GEM_DOMAIN_GTT;
>>                  else
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> index 126df03a7066..c03dfd298f45 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> @@ -99,7 +99,6 @@ struct amdgpu_bo {
>>          struct ttm_buffer_object        tbo;
>>          struct ttm_bo_kmap_obj          kmap;
>>          u64                             flags;
>> -       unsigned                        prime_shared_count;
>>          /* per VM structure for page tables and with virtual addresses */
>>          struct amdgpu_vm_bo_base        *vm_bo;
>>          /* Constant after initialization */
>> --
>> 2.25.1
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: rework dma_resv handling v3
@ 2021-06-22  9:20       ` Christian König
  0 siblings, 0 replies; 16+ messages in thread
From: Christian König @ 2021-06-22  9:20 UTC (permalink / raw)
  To: Alex Deucher; +Cc: amd-gfx list, Maling list - DRI developers, Daniel Vetter

Am 17.06.21 um 23:09 schrieb Alex Deucher:
> On Mon, Jun 14, 2021 at 1:45 PM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> Drop the workaround and instead implement a better solution.
>>
>> Basically we are now chaining all submissions using a dma_fence_chain
>> container and adding them as exclusive fence to the dma_resv object.
>>
>> This way other drivers can still sync to the single exclusive fence
>> while amdgpu only sync to fences from different processes.
>>
>> v3: add the shared fence first before the exclusive one
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
> Series is:
> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

FYI I've pushed this to drm-misc-next to avoid re-base problems.

Will probably not go upstream before 5.15, so we have plenty of time to 
test this.

Christian.

>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h |  1 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 62 ++++++++++++++++----
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 65 ---------------------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c     |  3 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h  |  1 -
>>   6 files changed, 55 insertions(+), 79 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> index a130e766cbdb..c905a4cfc173 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> @@ -34,6 +34,7 @@ struct amdgpu_fpriv;
>>   struct amdgpu_bo_list_entry {
>>          struct ttm_validate_buffer      tv;
>>          struct amdgpu_bo_va             *bo_va;
>> +       struct dma_fence_chain          *chain;
>>          uint32_t                        priority;
>>          struct page                     **user_pages;
>>          bool                            user_invalidated;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index 9ce649a1a8d3..25655414e9c0 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -572,6 +572,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>>                  goto out;
>>          }
>>
>> +       amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>> +               struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
>> +
>> +               e->bo_va = amdgpu_vm_bo_find(vm, bo);
>> +
>> +               if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
>> +                       e->chain = dma_fence_chain_alloc();
>> +                       if (!e->chain) {
>> +                               r = -ENOMEM;
>> +                               goto error_validate;
>> +                       }
>> +               }
>> +       }
>> +
>>          amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
>>                                            &p->bytes_moved_vis_threshold);
>>          p->bytes_moved = 0;
>> @@ -599,15 +613,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>>          gws = p->bo_list->gws_obj;
>>          oa = p->bo_list->oa_obj;
>>
>> -       amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>> -               struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
>> -
>> -               /* Make sure we use the exclusive slot for shared BOs */
>> -               if (bo->prime_shared_count)
>> -                       e->tv.num_shared = 0;
>> -               e->bo_va = amdgpu_vm_bo_find(vm, bo);
>> -       }
>> -
>>          if (gds) {
>>                  p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
>>                  p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
>> @@ -629,8 +634,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>>          }
>>
>>   error_validate:
>> -       if (r)
>> +       if (r) {
>> +               amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>> +                       dma_fence_chain_free(e->chain);
>> +                       e->chain = NULL;
>> +               }
>>                  ttm_eu_backoff_reservation(&p->ticket, &p->validated);
>> +       }
>>   out:
>>          return r;
>>   }
>> @@ -670,9 +680,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
>>   {
>>          unsigned i;
>>
>> -       if (error && backoff)
>> +       if (error && backoff) {
>> +               struct amdgpu_bo_list_entry *e;
>> +
>> +               amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
>> +                       dma_fence_chain_free(e->chain);
>> +                       e->chain = NULL;
>> +               }
>> +
>>                  ttm_eu_backoff_reservation(&parser->ticket,
>>                                             &parser->validated);
>> +       }
>>
>>          for (i = 0; i < parser->num_post_deps; i++) {
>>                  drm_syncobj_put(parser->post_deps[i].syncobj);
>> @@ -1245,6 +1263,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>>
>>          amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
>>
>> +       amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>> +               struct dma_resv *resv = e->tv.bo->base.resv;
>> +               struct dma_fence_chain *chain = e->chain;
>> +
>> +               if (!chain)
>> +                       continue;
>> +
>> +               /*
>> +                * Work around dma_resv shortcommings by wrapping up the
>> +                * submission in a dma_fence_chain and add it as exclusive
>> +                * fence, but first add the submission as shared fence to make
>> +                * sure that shared fences never signal before the exclusive
>> +                * one.
>> +                */
>> +               dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
>> +                                    dma_fence_get(p->fence), 1);
>> +
>> +               dma_resv_add_shared_fence(resv, p->fence);
>> +               rcu_assign_pointer(resv->fence_excl, &chain->base);
>> +               e->chain = NULL;
>> +       }
>> +
>>          ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>>          mutex_unlock(&p->adev->notifier_lock);
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
>> index c3053b83b80c..23219fc3b28c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
>> @@ -42,48 +42,6 @@
>>   #include <linux/pci-p2pdma.h>
>>   #include <linux/pm_runtime.h>
>>
>> -static int
>> -__dma_resv_make_exclusive(struct dma_resv *obj)
>> -{
>> -       struct dma_fence **fences;
>> -       unsigned int count;
>> -       int r;
>> -
>> -       if (!dma_resv_shared_list(obj)) /* no shared fences to convert */
>> -               return 0;
>> -
>> -       r = dma_resv_get_fences(obj, NULL, &count, &fences);
>> -       if (r)
>> -               return r;
>> -
>> -       if (count == 0) {
>> -               /* Now that was unexpected. */
>> -       } else if (count == 1) {
>> -               dma_resv_add_excl_fence(obj, fences[0]);
>> -               dma_fence_put(fences[0]);
>> -               kfree(fences);
>> -       } else {
>> -               struct dma_fence_array *array;
>> -
>> -               array = dma_fence_array_create(count, fences,
>> -                                              dma_fence_context_alloc(1), 0,
>> -                                              false);
>> -               if (!array)
>> -                       goto err_fences_put;
>> -
>> -               dma_resv_add_excl_fence(obj, &array->base);
>> -               dma_fence_put(&array->base);
>> -       }
>> -
>> -       return 0;
>> -
>> -err_fences_put:
>> -       while (count--)
>> -               dma_fence_put(fences[count]);
>> -       kfree(fences);
>> -       return -ENOMEM;
>> -}
>> -
>>   /**
>>    * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
>>    *
>> @@ -110,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
>>          if (r < 0)
>>                  goto out;
>>
>> -       r = amdgpu_bo_reserve(bo, false);
>> -       if (unlikely(r != 0))
>> -               goto out;
>> -
>> -       /*
>> -        * We only create shared fences for internal use, but importers
>> -        * of the dmabuf rely on exclusive fences for implicitly
>> -        * tracking write hazards. As any of the current fences may
>> -        * correspond to a write, we need to convert all existing
>> -        * fences on the reservation object into a single exclusive
>> -        * fence.
>> -        */
>> -       r = __dma_resv_make_exclusive(bo->tbo.base.resv);
>> -       if (r)
>> -               goto out;
>> -
>> -       bo->prime_shared_count++;
>> -       amdgpu_bo_unreserve(bo);
>>          return 0;
>>
>>   out:
>> @@ -150,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
>>          struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
>>          struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>
>> -       if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
>> -               bo->prime_shared_count--;
>> -
>>          pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>>          pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>>   }
>> @@ -406,8 +343,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
>>          bo = gem_to_amdgpu_bo(gobj);
>>          bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
>>          bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
>> -       if (dma_buf->ops != &amdgpu_dmabuf_ops)
>> -               bo->prime_shared_count = 1;
>>
>>          dma_resv_unlock(resv);
>>          return gobj;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> index 9cf4beaf646c..0780e8d18992 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> @@ -829,7 +829,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>>                  break;
>>          }
>>          case AMDGPU_GEM_OP_SET_PLACEMENT:
>> -               if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) {
>> +               if (robj->tbo.base.import_attach &&
>> +                   args->value & AMDGPU_GEM_DOMAIN_VRAM) {
>>                          r = -EINVAL;
>>                          amdgpu_bo_unreserve(robj);
>>                          break;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index b7a2070d90af..d13490975ac3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -892,7 +892,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
>>                  return -EINVAL;
>>
>>          /* A shared bo cannot be migrated to VRAM */
>> -       if (bo->prime_shared_count || bo->tbo.base.import_attach) {
>> +       if (bo->tbo.base.import_attach) {
>>                  if (domain & AMDGPU_GEM_DOMAIN_GTT)
>>                          domain = AMDGPU_GEM_DOMAIN_GTT;
>>                  else
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> index 126df03a7066..c03dfd298f45 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> @@ -99,7 +99,6 @@ struct amdgpu_bo {
>>          struct ttm_buffer_object        tbo;
>>          struct ttm_bo_kmap_obj          kmap;
>>          u64                             flags;
>> -       unsigned                        prime_shared_count;
>>          /* per VM structure for page tables and with virtual addresses */
>>          struct amdgpu_vm_bo_base        *vm_bo;
>>          /* Constant after initialization */
>> --
>> 2.25.1
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2021-06-22  9:20 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-14 17:45 [PATCH 1/2] drm/amdgpu: unwrap fence chains in the explicit sync fence Christian König
2021-06-14 17:45 ` Christian König
2021-06-14 17:45 ` [PATCH 2/2] drm/amdgpu: rework dma_resv handling v3 Christian König
2021-06-14 17:45   ` Christian König
2021-06-17 19:37   ` Daniel Vetter
2021-06-17 19:37     ` Daniel Vetter
2021-06-17 21:09   ` Alex Deucher
2021-06-17 21:09     ` Alex Deucher
2021-06-22  9:20     ` Christian König
2021-06-22  9:20       ` Christian König
2021-06-17  7:44 ` [PATCH 1/2] drm/amdgpu: unwrap fence chains in the explicit sync fence Christian König
2021-06-17  7:44   ` Christian König
2021-06-17 17:30   ` Daniel Vetter
2021-06-17 17:30     ` Daniel Vetter
2021-06-17 19:38     ` Daniel Vetter
2021-06-17 19:38       ` Daniel Vetter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.