* [PATCH 1/2] drm/amdgpu: unwrap fence chains in the explicit sync fence
@ 2021-06-14 17:45 ` Christian König
0 siblings, 0 replies; 16+ messages in thread
From: Christian König @ 2021-06-14 17:45 UTC (permalink / raw)
To: daniel, dri-devel, amd-gfx
Unwrap the explicit fence if it is a dma_fence_chain and
sync to the first fence not matching the owner rules.
Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 118 +++++++++++++----------
1 file changed, 68 insertions(+), 50 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 1b2ceccaf5b0..862eb3c1c4c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -28,6 +28,8 @@
* Christian König <christian.koenig@amd.com>
*/
+#include <linux/dma-fence-chain.h>
+
#include "amdgpu.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
@@ -186,6 +188,55 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
return amdgpu_sync_fence(sync, fence);
}
+/* Determine based on the owner and mode if we should sync to a fence or not */
+static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
+ enum amdgpu_sync_mode mode,
+ void *owner, struct dma_fence *f)
+{
+ void *fence_owner = amdgpu_sync_get_owner(f);
+
+ /* Always sync to moves, no matter what */
+ if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED)
+ return true;
+
+ /* We only want to trigger KFD eviction fences on
+ * evict or move jobs. Skip KFD fences otherwise.
+ */
+ if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
+ owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+ return false;
+
+ /* Never sync to VM updates either. */
+ if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
+ owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+ return false;
+
+ /* Ignore fences depending on the sync mode */
+ switch (mode) {
+ case AMDGPU_SYNC_ALWAYS:
+ return true;
+
+ case AMDGPU_SYNC_NE_OWNER:
+ if (amdgpu_sync_same_dev(adev, f) &&
+ fence_owner == owner)
+ return false;
+ break;
+
+ case AMDGPU_SYNC_EQ_OWNER:
+ if (amdgpu_sync_same_dev(adev, f) &&
+ fence_owner != owner)
+ return false;
+ break;
+
+ case AMDGPU_SYNC_EXPLICIT:
+ return false;
+ }
+
+ WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
+ "Adding eviction fence to sync obj");
+ return true;
+}
+
/**
* amdgpu_sync_resv - sync to a reservation object
*
@@ -211,67 +262,34 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
/* always sync to the exclusive fence */
f = dma_resv_excl_fence(resv);
- r = amdgpu_sync_fence(sync, f);
+ dma_fence_chain_for_each(f, f) {
+ struct dma_fence_chain *chain = to_dma_fence_chain(f);
+
+ if (amdgpu_sync_test_fence(adev, mode, owner, chain ?
+ chain->fence : f)) {
+ r = amdgpu_sync_fence(sync, f);
+ dma_fence_put(f);
+ if (r)
+ return r;
+ break;
+ }
+ }
flist = dma_resv_shared_list(resv);
- if (!flist || r)
- return r;
+ if (!flist)
+ return 0;
for (i = 0; i < flist->shared_count; ++i) {
- void *fence_owner;
-
f = rcu_dereference_protected(flist->shared[i],
dma_resv_held(resv));
- fence_owner = amdgpu_sync_get_owner(f);
-
- /* Always sync to moves, no matter what */
- if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) {
+ if (amdgpu_sync_test_fence(adev, mode, owner, f)) {
r = amdgpu_sync_fence(sync, f);
if (r)
- break;
- }
-
- /* We only want to trigger KFD eviction fences on
- * evict or move jobs. Skip KFD fences otherwise.
- */
- if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
- owner != AMDGPU_FENCE_OWNER_UNDEFINED)
- continue;
-
- /* Never sync to VM updates either. */
- if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
- owner != AMDGPU_FENCE_OWNER_UNDEFINED)
- continue;
-
- /* Ignore fences depending on the sync mode */
- switch (mode) {
- case AMDGPU_SYNC_ALWAYS:
- break;
-
- case AMDGPU_SYNC_NE_OWNER:
- if (amdgpu_sync_same_dev(adev, f) &&
- fence_owner == owner)
- continue;
- break;
-
- case AMDGPU_SYNC_EQ_OWNER:
- if (amdgpu_sync_same_dev(adev, f) &&
- fence_owner != owner)
- continue;
- break;
-
- case AMDGPU_SYNC_EXPLICIT:
- continue;
+ return r;
}
-
- WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
- "Adding eviction fence to sync obj");
- r = amdgpu_sync_fence(sync, f);
- if (r)
- break;
}
- return r;
+ return 0;
}
/**
--
2.25.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH 1/2] drm/amdgpu: unwrap fence chains in the explicit sync fence
@ 2021-06-14 17:45 ` Christian König
0 siblings, 0 replies; 16+ messages in thread
From: Christian König @ 2021-06-14 17:45 UTC (permalink / raw)
To: daniel, dri-devel, amd-gfx
Unwrap the explicit fence if it is a dma_fence_chain and
sync to the first fence not matching the owner rules.
Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 118 +++++++++++++----------
1 file changed, 68 insertions(+), 50 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 1b2ceccaf5b0..862eb3c1c4c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -28,6 +28,8 @@
* Christian König <christian.koenig@amd.com>
*/
+#include <linux/dma-fence-chain.h>
+
#include "amdgpu.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
@@ -186,6 +188,55 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
return amdgpu_sync_fence(sync, fence);
}
+/* Determine based on the owner and mode if we should sync to a fence or not */
+static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
+ enum amdgpu_sync_mode mode,
+ void *owner, struct dma_fence *f)
+{
+ void *fence_owner = amdgpu_sync_get_owner(f);
+
+ /* Always sync to moves, no matter what */
+ if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED)
+ return true;
+
+ /* We only want to trigger KFD eviction fences on
+ * evict or move jobs. Skip KFD fences otherwise.
+ */
+ if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
+ owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+ return false;
+
+ /* Never sync to VM updates either. */
+ if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
+ owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+ return false;
+
+ /* Ignore fences depending on the sync mode */
+ switch (mode) {
+ case AMDGPU_SYNC_ALWAYS:
+ return true;
+
+ case AMDGPU_SYNC_NE_OWNER:
+ if (amdgpu_sync_same_dev(adev, f) &&
+ fence_owner == owner)
+ return false;
+ break;
+
+ case AMDGPU_SYNC_EQ_OWNER:
+ if (amdgpu_sync_same_dev(adev, f) &&
+ fence_owner != owner)
+ return false;
+ break;
+
+ case AMDGPU_SYNC_EXPLICIT:
+ return false;
+ }
+
+ WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
+ "Adding eviction fence to sync obj");
+ return true;
+}
+
/**
* amdgpu_sync_resv - sync to a reservation object
*
@@ -211,67 +262,34 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
/* always sync to the exclusive fence */
f = dma_resv_excl_fence(resv);
- r = amdgpu_sync_fence(sync, f);
+ dma_fence_chain_for_each(f, f) {
+ struct dma_fence_chain *chain = to_dma_fence_chain(f);
+
+ if (amdgpu_sync_test_fence(adev, mode, owner, chain ?
+ chain->fence : f)) {
+ r = amdgpu_sync_fence(sync, f);
+ dma_fence_put(f);
+ if (r)
+ return r;
+ break;
+ }
+ }
flist = dma_resv_shared_list(resv);
- if (!flist || r)
- return r;
+ if (!flist)
+ return 0;
for (i = 0; i < flist->shared_count; ++i) {
- void *fence_owner;
-
f = rcu_dereference_protected(flist->shared[i],
dma_resv_held(resv));
- fence_owner = amdgpu_sync_get_owner(f);
-
- /* Always sync to moves, no matter what */
- if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) {
+ if (amdgpu_sync_test_fence(adev, mode, owner, f)) {
r = amdgpu_sync_fence(sync, f);
if (r)
- break;
- }
-
- /* We only want to trigger KFD eviction fences on
- * evict or move jobs. Skip KFD fences otherwise.
- */
- if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
- owner != AMDGPU_FENCE_OWNER_UNDEFINED)
- continue;
-
- /* Never sync to VM updates either. */
- if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
- owner != AMDGPU_FENCE_OWNER_UNDEFINED)
- continue;
-
- /* Ignore fences depending on the sync mode */
- switch (mode) {
- case AMDGPU_SYNC_ALWAYS:
- break;
-
- case AMDGPU_SYNC_NE_OWNER:
- if (amdgpu_sync_same_dev(adev, f) &&
- fence_owner == owner)
- continue;
- break;
-
- case AMDGPU_SYNC_EQ_OWNER:
- if (amdgpu_sync_same_dev(adev, f) &&
- fence_owner != owner)
- continue;
- break;
-
- case AMDGPU_SYNC_EXPLICIT:
- continue;
+ return r;
}
-
- WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
- "Adding eviction fence to sync obj");
- r = amdgpu_sync_fence(sync, f);
- if (r)
- break;
}
- return r;
+ return 0;
}
/**
--
2.25.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH 2/2] drm/amdgpu: rework dma_resv handling v3
2021-06-14 17:45 ` Christian König
@ 2021-06-14 17:45 ` Christian König
-1 siblings, 0 replies; 16+ messages in thread
From: Christian König @ 2021-06-14 17:45 UTC (permalink / raw)
To: daniel, dri-devel, amd-gfx
Drop the workaround and instead implement a better solution.
Basically we are now chaining all submissions using a dma_fence_chain
container and adding them as exclusive fence to the dma_resv object.
This way other drivers can still sync to the single exclusive fence
while amdgpu only sync to fences from different processes.
v3: add the shared fence first before the exclusive one
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 62 ++++++++++++++++----
drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 65 ---------------------
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 3 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 -
6 files changed, 55 insertions(+), 79 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index a130e766cbdb..c905a4cfc173 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -34,6 +34,7 @@ struct amdgpu_fpriv;
struct amdgpu_bo_list_entry {
struct ttm_validate_buffer tv;
struct amdgpu_bo_va *bo_va;
+ struct dma_fence_chain *chain;
uint32_t priority;
struct page **user_pages;
bool user_invalidated;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9ce649a1a8d3..25655414e9c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -572,6 +572,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
goto out;
}
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+
+ e->bo_va = amdgpu_vm_bo_find(vm, bo);
+
+ if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
+ e->chain = dma_fence_chain_alloc();
+ if (!e->chain) {
+ r = -ENOMEM;
+ goto error_validate;
+ }
+ }
+ }
+
amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
&p->bytes_moved_vis_threshold);
p->bytes_moved = 0;
@@ -599,15 +613,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
gws = p->bo_list->gws_obj;
oa = p->bo_list->oa_obj;
- amdgpu_bo_list_for_each_entry(e, p->bo_list) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
-
- /* Make sure we use the exclusive slot for shared BOs */
- if (bo->prime_shared_count)
- e->tv.num_shared = 0;
- e->bo_va = amdgpu_vm_bo_find(vm, bo);
- }
-
if (gds) {
p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
@@ -629,8 +634,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
}
error_validate:
- if (r)
+ if (r) {
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+ dma_fence_chain_free(e->chain);
+ e->chain = NULL;
+ }
ttm_eu_backoff_reservation(&p->ticket, &p->validated);
+ }
out:
return r;
}
@@ -670,9 +680,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
{
unsigned i;
- if (error && backoff)
+ if (error && backoff) {
+ struct amdgpu_bo_list_entry *e;
+
+ amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
+ dma_fence_chain_free(e->chain);
+ e->chain = NULL;
+ }
+
ttm_eu_backoff_reservation(&parser->ticket,
&parser->validated);
+ }
for (i = 0; i < parser->num_post_deps; i++) {
drm_syncobj_put(parser->post_deps[i].syncobj);
@@ -1245,6 +1263,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+ struct dma_resv *resv = e->tv.bo->base.resv;
+ struct dma_fence_chain *chain = e->chain;
+
+ if (!chain)
+ continue;
+
+ /*
+ * Work around dma_resv shortcommings by wrapping up the
+ * submission in a dma_fence_chain and add it as exclusive
+ * fence, but first add the submission as shared fence to make
+ * sure that shared fences never signal before the exclusive
+ * one.
+ */
+ dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
+ dma_fence_get(p->fence), 1);
+
+ dma_resv_add_shared_fence(resv, p->fence);
+ rcu_assign_pointer(resv->fence_excl, &chain->base);
+ e->chain = NULL;
+ }
+
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
mutex_unlock(&p->adev->notifier_lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index c3053b83b80c..23219fc3b28c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -42,48 +42,6 @@
#include <linux/pci-p2pdma.h>
#include <linux/pm_runtime.h>
-static int
-__dma_resv_make_exclusive(struct dma_resv *obj)
-{
- struct dma_fence **fences;
- unsigned int count;
- int r;
-
- if (!dma_resv_shared_list(obj)) /* no shared fences to convert */
- return 0;
-
- r = dma_resv_get_fences(obj, NULL, &count, &fences);
- if (r)
- return r;
-
- if (count == 0) {
- /* Now that was unexpected. */
- } else if (count == 1) {
- dma_resv_add_excl_fence(obj, fences[0]);
- dma_fence_put(fences[0]);
- kfree(fences);
- } else {
- struct dma_fence_array *array;
-
- array = dma_fence_array_create(count, fences,
- dma_fence_context_alloc(1), 0,
- false);
- if (!array)
- goto err_fences_put;
-
- dma_resv_add_excl_fence(obj, &array->base);
- dma_fence_put(&array->base);
- }
-
- return 0;
-
-err_fences_put:
- while (count--)
- dma_fence_put(fences[count]);
- kfree(fences);
- return -ENOMEM;
-}
-
/**
* amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
*
@@ -110,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
if (r < 0)
goto out;
- r = amdgpu_bo_reserve(bo, false);
- if (unlikely(r != 0))
- goto out;
-
- /*
- * We only create shared fences for internal use, but importers
- * of the dmabuf rely on exclusive fences for implicitly
- * tracking write hazards. As any of the current fences may
- * correspond to a write, we need to convert all existing
- * fences on the reservation object into a single exclusive
- * fence.
- */
- r = __dma_resv_make_exclusive(bo->tbo.base.resv);
- if (r)
- goto out;
-
- bo->prime_shared_count++;
- amdgpu_bo_unreserve(bo);
return 0;
out:
@@ -150,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
- bo->prime_shared_count--;
-
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
}
@@ -406,8 +343,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
bo = gem_to_amdgpu_bo(gobj);
bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
- if (dma_buf->ops != &amdgpu_dmabuf_ops)
- bo->prime_shared_count = 1;
dma_resv_unlock(resv);
return gobj;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 9cf4beaf646c..0780e8d18992 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -829,7 +829,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
break;
}
case AMDGPU_GEM_OP_SET_PLACEMENT:
- if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) {
+ if (robj->tbo.base.import_attach &&
+ args->value & AMDGPU_GEM_DOMAIN_VRAM) {
r = -EINVAL;
amdgpu_bo_unreserve(robj);
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index b7a2070d90af..d13490975ac3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -892,7 +892,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
return -EINVAL;
/* A shared bo cannot be migrated to VRAM */
- if (bo->prime_shared_count || bo->tbo.base.import_attach) {
+ if (bo->tbo.base.import_attach) {
if (domain & AMDGPU_GEM_DOMAIN_GTT)
domain = AMDGPU_GEM_DOMAIN_GTT;
else
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 126df03a7066..c03dfd298f45 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -99,7 +99,6 @@ struct amdgpu_bo {
struct ttm_buffer_object tbo;
struct ttm_bo_kmap_obj kmap;
u64 flags;
- unsigned prime_shared_count;
/* per VM structure for page tables and with virtual addresses */
struct amdgpu_vm_bo_base *vm_bo;
/* Constant after initialization */
--
2.25.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH 2/2] drm/amdgpu: rework dma_resv handling v3
@ 2021-06-14 17:45 ` Christian König
0 siblings, 0 replies; 16+ messages in thread
From: Christian König @ 2021-06-14 17:45 UTC (permalink / raw)
To: daniel, dri-devel, amd-gfx
Drop the workaround and instead implement a better solution.
Basically we are now chaining all submissions using a dma_fence_chain
container and adding them as exclusive fence to the dma_resv object.
This way other drivers can still sync to the single exclusive fence
while amdgpu only sync to fences from different processes.
v3: add the shared fence first before the exclusive one
Signed-off-by: Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 62 ++++++++++++++++----
drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 65 ---------------------
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 3 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 -
6 files changed, 55 insertions(+), 79 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index a130e766cbdb..c905a4cfc173 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -34,6 +34,7 @@ struct amdgpu_fpriv;
struct amdgpu_bo_list_entry {
struct ttm_validate_buffer tv;
struct amdgpu_bo_va *bo_va;
+ struct dma_fence_chain *chain;
uint32_t priority;
struct page **user_pages;
bool user_invalidated;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9ce649a1a8d3..25655414e9c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -572,6 +572,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
goto out;
}
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+
+ e->bo_va = amdgpu_vm_bo_find(vm, bo);
+
+ if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
+ e->chain = dma_fence_chain_alloc();
+ if (!e->chain) {
+ r = -ENOMEM;
+ goto error_validate;
+ }
+ }
+ }
+
amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
&p->bytes_moved_vis_threshold);
p->bytes_moved = 0;
@@ -599,15 +613,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
gws = p->bo_list->gws_obj;
oa = p->bo_list->oa_obj;
- amdgpu_bo_list_for_each_entry(e, p->bo_list) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
-
- /* Make sure we use the exclusive slot for shared BOs */
- if (bo->prime_shared_count)
- e->tv.num_shared = 0;
- e->bo_va = amdgpu_vm_bo_find(vm, bo);
- }
-
if (gds) {
p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
@@ -629,8 +634,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
}
error_validate:
- if (r)
+ if (r) {
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+ dma_fence_chain_free(e->chain);
+ e->chain = NULL;
+ }
ttm_eu_backoff_reservation(&p->ticket, &p->validated);
+ }
out:
return r;
}
@@ -670,9 +680,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
{
unsigned i;
- if (error && backoff)
+ if (error && backoff) {
+ struct amdgpu_bo_list_entry *e;
+
+ amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
+ dma_fence_chain_free(e->chain);
+ e->chain = NULL;
+ }
+
ttm_eu_backoff_reservation(&parser->ticket,
&parser->validated);
+ }
for (i = 0; i < parser->num_post_deps; i++) {
drm_syncobj_put(parser->post_deps[i].syncobj);
@@ -1245,6 +1263,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
+ amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+ struct dma_resv *resv = e->tv.bo->base.resv;
+ struct dma_fence_chain *chain = e->chain;
+
+ if (!chain)
+ continue;
+
+ /*
+ * Work around dma_resv shortcommings by wrapping up the
+ * submission in a dma_fence_chain and add it as exclusive
+ * fence, but first add the submission as shared fence to make
+ * sure that shared fences never signal before the exclusive
+ * one.
+ */
+ dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
+ dma_fence_get(p->fence), 1);
+
+ dma_resv_add_shared_fence(resv, p->fence);
+ rcu_assign_pointer(resv->fence_excl, &chain->base);
+ e->chain = NULL;
+ }
+
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
mutex_unlock(&p->adev->notifier_lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index c3053b83b80c..23219fc3b28c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -42,48 +42,6 @@
#include <linux/pci-p2pdma.h>
#include <linux/pm_runtime.h>
-static int
-__dma_resv_make_exclusive(struct dma_resv *obj)
-{
- struct dma_fence **fences;
- unsigned int count;
- int r;
-
- if (!dma_resv_shared_list(obj)) /* no shared fences to convert */
- return 0;
-
- r = dma_resv_get_fences(obj, NULL, &count, &fences);
- if (r)
- return r;
-
- if (count == 0) {
- /* Now that was unexpected. */
- } else if (count == 1) {
- dma_resv_add_excl_fence(obj, fences[0]);
- dma_fence_put(fences[0]);
- kfree(fences);
- } else {
- struct dma_fence_array *array;
-
- array = dma_fence_array_create(count, fences,
- dma_fence_context_alloc(1), 0,
- false);
- if (!array)
- goto err_fences_put;
-
- dma_resv_add_excl_fence(obj, &array->base);
- dma_fence_put(&array->base);
- }
-
- return 0;
-
-err_fences_put:
- while (count--)
- dma_fence_put(fences[count]);
- kfree(fences);
- return -ENOMEM;
-}
-
/**
* amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
*
@@ -110,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
if (r < 0)
goto out;
- r = amdgpu_bo_reserve(bo, false);
- if (unlikely(r != 0))
- goto out;
-
- /*
- * We only create shared fences for internal use, but importers
- * of the dmabuf rely on exclusive fences for implicitly
- * tracking write hazards. As any of the current fences may
- * correspond to a write, we need to convert all existing
- * fences on the reservation object into a single exclusive
- * fence.
- */
- r = __dma_resv_make_exclusive(bo->tbo.base.resv);
- if (r)
- goto out;
-
- bo->prime_shared_count++;
- amdgpu_bo_unreserve(bo);
return 0;
out:
@@ -150,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
- bo->prime_shared_count--;
-
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
}
@@ -406,8 +343,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
bo = gem_to_amdgpu_bo(gobj);
bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
- if (dma_buf->ops != &amdgpu_dmabuf_ops)
- bo->prime_shared_count = 1;
dma_resv_unlock(resv);
return gobj;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 9cf4beaf646c..0780e8d18992 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -829,7 +829,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
break;
}
case AMDGPU_GEM_OP_SET_PLACEMENT:
- if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) {
+ if (robj->tbo.base.import_attach &&
+ args->value & AMDGPU_GEM_DOMAIN_VRAM) {
r = -EINVAL;
amdgpu_bo_unreserve(robj);
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index b7a2070d90af..d13490975ac3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -892,7 +892,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
return -EINVAL;
/* A shared bo cannot be migrated to VRAM */
- if (bo->prime_shared_count || bo->tbo.base.import_attach) {
+ if (bo->tbo.base.import_attach) {
if (domain & AMDGPU_GEM_DOMAIN_GTT)
domain = AMDGPU_GEM_DOMAIN_GTT;
else
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 126df03a7066..c03dfd298f45 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -99,7 +99,6 @@ struct amdgpu_bo {
struct ttm_buffer_object tbo;
struct ttm_bo_kmap_obj kmap;
u64 flags;
- unsigned prime_shared_count;
/* per VM structure for page tables and with virtual addresses */
struct amdgpu_vm_bo_base *vm_bo;
/* Constant after initialization */
--
2.25.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH 1/2] drm/amdgpu: unwrap fence chains in the explicit sync fence
2021-06-14 17:45 ` Christian König
@ 2021-06-17 7:44 ` Christian König
-1 siblings, 0 replies; 16+ messages in thread
From: Christian König @ 2021-06-17 7:44 UTC (permalink / raw)
To: dri-devel, amd-gfx, Alex Deucher
Alex do want to review those so that we can close the ticket?
Thanks,
Christian.
Am 14.06.21 um 19:45 schrieb Christian König:
> Unwrap the explicit fence if it is a dma_fence_chain and
> sync to the first fence not matching the owner rules.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 118 +++++++++++++----------
> 1 file changed, 68 insertions(+), 50 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> index 1b2ceccaf5b0..862eb3c1c4c5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> @@ -28,6 +28,8 @@
> * Christian König <christian.koenig@amd.com>
> */
>
> +#include <linux/dma-fence-chain.h>
> +
> #include "amdgpu.h"
> #include "amdgpu_trace.h"
> #include "amdgpu_amdkfd.h"
> @@ -186,6 +188,55 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
> return amdgpu_sync_fence(sync, fence);
> }
>
> +/* Determine based on the owner and mode if we should sync to a fence or not */
> +static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
> + enum amdgpu_sync_mode mode,
> + void *owner, struct dma_fence *f)
> +{
> + void *fence_owner = amdgpu_sync_get_owner(f);
> +
> + /* Always sync to moves, no matter what */
> + if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED)
> + return true;
> +
> + /* We only want to trigger KFD eviction fences on
> + * evict or move jobs. Skip KFD fences otherwise.
> + */
> + if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
> + owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> + return false;
> +
> + /* Never sync to VM updates either. */
> + if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
> + owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> + return false;
> +
> + /* Ignore fences depending on the sync mode */
> + switch (mode) {
> + case AMDGPU_SYNC_ALWAYS:
> + return true;
> +
> + case AMDGPU_SYNC_NE_OWNER:
> + if (amdgpu_sync_same_dev(adev, f) &&
> + fence_owner == owner)
> + return false;
> + break;
> +
> + case AMDGPU_SYNC_EQ_OWNER:
> + if (amdgpu_sync_same_dev(adev, f) &&
> + fence_owner != owner)
> + return false;
> + break;
> +
> + case AMDGPU_SYNC_EXPLICIT:
> + return false;
> + }
> +
> + WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
> + "Adding eviction fence to sync obj");
> + return true;
> +}
> +
> /**
> * amdgpu_sync_resv - sync to a reservation object
> *
> @@ -211,67 +262,34 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
>
> /* always sync to the exclusive fence */
> f = dma_resv_excl_fence(resv);
> - r = amdgpu_sync_fence(sync, f);
> + dma_fence_chain_for_each(f, f) {
> + struct dma_fence_chain *chain = to_dma_fence_chain(f);
> +
> + if (amdgpu_sync_test_fence(adev, mode, owner, chain ?
> + chain->fence : f)) {
> + r = amdgpu_sync_fence(sync, f);
> + dma_fence_put(f);
> + if (r)
> + return r;
> + break;
> + }
> + }
>
> flist = dma_resv_shared_list(resv);
> - if (!flist || r)
> - return r;
> + if (!flist)
> + return 0;
>
> for (i = 0; i < flist->shared_count; ++i) {
> - void *fence_owner;
> -
> f = rcu_dereference_protected(flist->shared[i],
> dma_resv_held(resv));
>
> - fence_owner = amdgpu_sync_get_owner(f);
> -
> - /* Always sync to moves, no matter what */
> - if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) {
> + if (amdgpu_sync_test_fence(adev, mode, owner, f)) {
> r = amdgpu_sync_fence(sync, f);
> if (r)
> - break;
> - }
> -
> - /* We only want to trigger KFD eviction fences on
> - * evict or move jobs. Skip KFD fences otherwise.
> - */
> - if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
> - owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> - continue;
> -
> - /* Never sync to VM updates either. */
> - if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
> - owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> - continue;
> -
> - /* Ignore fences depending on the sync mode */
> - switch (mode) {
> - case AMDGPU_SYNC_ALWAYS:
> - break;
> -
> - case AMDGPU_SYNC_NE_OWNER:
> - if (amdgpu_sync_same_dev(adev, f) &&
> - fence_owner == owner)
> - continue;
> - break;
> -
> - case AMDGPU_SYNC_EQ_OWNER:
> - if (amdgpu_sync_same_dev(adev, f) &&
> - fence_owner != owner)
> - continue;
> - break;
> -
> - case AMDGPU_SYNC_EXPLICIT:
> - continue;
> + return r;
> }
> -
> - WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
> - "Adding eviction fence to sync obj");
> - r = amdgpu_sync_fence(sync, f);
> - if (r)
> - break;
> }
> - return r;
> + return 0;
> }
>
> /**
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 1/2] drm/amdgpu: unwrap fence chains in the explicit sync fence
@ 2021-06-17 7:44 ` Christian König
0 siblings, 0 replies; 16+ messages in thread
From: Christian König @ 2021-06-17 7:44 UTC (permalink / raw)
To: dri-devel, amd-gfx, Alex Deucher
Alex do want to review those so that we can close the ticket?
Thanks,
Christian.
Am 14.06.21 um 19:45 schrieb Christian König:
> Unwrap the explicit fence if it is a dma_fence_chain and
> sync to the first fence not matching the owner rules.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 118 +++++++++++++----------
> 1 file changed, 68 insertions(+), 50 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> index 1b2ceccaf5b0..862eb3c1c4c5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> @@ -28,6 +28,8 @@
> * Christian König <christian.koenig@amd.com>
> */
>
> +#include <linux/dma-fence-chain.h>
> +
> #include "amdgpu.h"
> #include "amdgpu_trace.h"
> #include "amdgpu_amdkfd.h"
> @@ -186,6 +188,55 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
> return amdgpu_sync_fence(sync, fence);
> }
>
> +/* Determine based on the owner and mode if we should sync to a fence or not */
> +static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
> + enum amdgpu_sync_mode mode,
> + void *owner, struct dma_fence *f)
> +{
> + void *fence_owner = amdgpu_sync_get_owner(f);
> +
> + /* Always sync to moves, no matter what */
> + if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED)
> + return true;
> +
> + /* We only want to trigger KFD eviction fences on
> + * evict or move jobs. Skip KFD fences otherwise.
> + */
> + if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
> + owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> + return false;
> +
> + /* Never sync to VM updates either. */
> + if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
> + owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> + return false;
> +
> + /* Ignore fences depending on the sync mode */
> + switch (mode) {
> + case AMDGPU_SYNC_ALWAYS:
> + return true;
> +
> + case AMDGPU_SYNC_NE_OWNER:
> + if (amdgpu_sync_same_dev(adev, f) &&
> + fence_owner == owner)
> + return false;
> + break;
> +
> + case AMDGPU_SYNC_EQ_OWNER:
> + if (amdgpu_sync_same_dev(adev, f) &&
> + fence_owner != owner)
> + return false;
> + break;
> +
> + case AMDGPU_SYNC_EXPLICIT:
> + return false;
> + }
> +
> + WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
> + "Adding eviction fence to sync obj");
> + return true;
> +}
> +
> /**
> * amdgpu_sync_resv - sync to a reservation object
> *
> @@ -211,67 +262,34 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
>
> /* always sync to the exclusive fence */
> f = dma_resv_excl_fence(resv);
> - r = amdgpu_sync_fence(sync, f);
> + dma_fence_chain_for_each(f, f) {
> + struct dma_fence_chain *chain = to_dma_fence_chain(f);
> +
> + if (amdgpu_sync_test_fence(adev, mode, owner, chain ?
> + chain->fence : f)) {
> + r = amdgpu_sync_fence(sync, f);
> + dma_fence_put(f);
> + if (r)
> + return r;
> + break;
> + }
> + }
>
> flist = dma_resv_shared_list(resv);
> - if (!flist || r)
> - return r;
> + if (!flist)
> + return 0;
>
> for (i = 0; i < flist->shared_count; ++i) {
> - void *fence_owner;
> -
> f = rcu_dereference_protected(flist->shared[i],
> dma_resv_held(resv));
>
> - fence_owner = amdgpu_sync_get_owner(f);
> -
> - /* Always sync to moves, no matter what */
> - if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) {
> + if (amdgpu_sync_test_fence(adev, mode, owner, f)) {
> r = amdgpu_sync_fence(sync, f);
> if (r)
> - break;
> - }
> -
> - /* We only want to trigger KFD eviction fences on
> - * evict or move jobs. Skip KFD fences otherwise.
> - */
> - if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
> - owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> - continue;
> -
> - /* Never sync to VM updates either. */
> - if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
> - owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> - continue;
> -
> - /* Ignore fences depending on the sync mode */
> - switch (mode) {
> - case AMDGPU_SYNC_ALWAYS:
> - break;
> -
> - case AMDGPU_SYNC_NE_OWNER:
> - if (amdgpu_sync_same_dev(adev, f) &&
> - fence_owner == owner)
> - continue;
> - break;
> -
> - case AMDGPU_SYNC_EQ_OWNER:
> - if (amdgpu_sync_same_dev(adev, f) &&
> - fence_owner != owner)
> - continue;
> - break;
> -
> - case AMDGPU_SYNC_EXPLICIT:
> - continue;
> + return r;
> }
> -
> - WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
> - "Adding eviction fence to sync obj");
> - r = amdgpu_sync_fence(sync, f);
> - if (r)
> - break;
> }
> - return r;
> + return 0;
> }
>
> /**
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 1/2] drm/amdgpu: unwrap fence chains in the explicit sync fence
2021-06-17 7:44 ` Christian König
@ 2021-06-17 17:30 ` Daniel Vetter
-1 siblings, 0 replies; 16+ messages in thread
From: Daniel Vetter @ 2021-06-17 17:30 UTC (permalink / raw)
To: Christian König; +Cc: Alex Deucher, amd-gfx, dri-devel
On Thu, Jun 17, 2021 at 09:44:25AM +0200, Christian König wrote:
> Alex do want to review those so that we can close the ticket?
Maybe I'm behind on mails, but 2nd patch still has the issues I think I'm
seeing ...
-Daniel
>
> Thanks,
> Christian.
>
> Am 14.06.21 um 19:45 schrieb Christian König:
> > Unwrap the explicit fence if it is a dma_fence_chain and
> > sync to the first fence not matching the owner rules.
> >
> > Signed-off-by: Christian König <christian.koenig@amd.com>
> > Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > ---
> > drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 118 +++++++++++++----------
> > 1 file changed, 68 insertions(+), 50 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> > index 1b2ceccaf5b0..862eb3c1c4c5 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> > @@ -28,6 +28,8 @@
> > * Christian König <christian.koenig@amd.com>
> > */
> > +#include <linux/dma-fence-chain.h>
> > +
> > #include "amdgpu.h"
> > #include "amdgpu_trace.h"
> > #include "amdgpu_amdkfd.h"
> > @@ -186,6 +188,55 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
> > return amdgpu_sync_fence(sync, fence);
> > }
> > +/* Determine based on the owner and mode if we should sync to a fence or not */
> > +static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
> > + enum amdgpu_sync_mode mode,
> > + void *owner, struct dma_fence *f)
> > +{
> > + void *fence_owner = amdgpu_sync_get_owner(f);
> > +
> > + /* Always sync to moves, no matter what */
> > + if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED)
> > + return true;
> > +
> > + /* We only want to trigger KFD eviction fences on
> > + * evict or move jobs. Skip KFD fences otherwise.
> > + */
> > + if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
> > + owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > + return false;
> > +
> > + /* Never sync to VM updates either. */
> > + if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
> > + owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > + return false;
> > +
> > + /* Ignore fences depending on the sync mode */
> > + switch (mode) {
> > + case AMDGPU_SYNC_ALWAYS:
> > + return true;
> > +
> > + case AMDGPU_SYNC_NE_OWNER:
> > + if (amdgpu_sync_same_dev(adev, f) &&
> > + fence_owner == owner)
> > + return false;
> > + break;
> > +
> > + case AMDGPU_SYNC_EQ_OWNER:
> > + if (amdgpu_sync_same_dev(adev, f) &&
> > + fence_owner != owner)
> > + return false;
> > + break;
> > +
> > + case AMDGPU_SYNC_EXPLICIT:
> > + return false;
> > + }
> > +
> > + WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
> > + "Adding eviction fence to sync obj");
> > + return true;
> > +}
> > +
> > /**
> > * amdgpu_sync_resv - sync to a reservation object
> > *
> > @@ -211,67 +262,34 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
> > /* always sync to the exclusive fence */
> > f = dma_resv_excl_fence(resv);
> > - r = amdgpu_sync_fence(sync, f);
> > + dma_fence_chain_for_each(f, f) {
> > + struct dma_fence_chain *chain = to_dma_fence_chain(f);
> > +
> > + if (amdgpu_sync_test_fence(adev, mode, owner, chain ?
> > + chain->fence : f)) {
> > + r = amdgpu_sync_fence(sync, f);
> > + dma_fence_put(f);
> > + if (r)
> > + return r;
> > + break;
> > + }
> > + }
> > flist = dma_resv_shared_list(resv);
> > - if (!flist || r)
> > - return r;
> > + if (!flist)
> > + return 0;
> > for (i = 0; i < flist->shared_count; ++i) {
> > - void *fence_owner;
> > -
> > f = rcu_dereference_protected(flist->shared[i],
> > dma_resv_held(resv));
> > - fence_owner = amdgpu_sync_get_owner(f);
> > -
> > - /* Always sync to moves, no matter what */
> > - if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) {
> > + if (amdgpu_sync_test_fence(adev, mode, owner, f)) {
> > r = amdgpu_sync_fence(sync, f);
> > if (r)
> > - break;
> > - }
> > -
> > - /* We only want to trigger KFD eviction fences on
> > - * evict or move jobs. Skip KFD fences otherwise.
> > - */
> > - if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
> > - owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > - continue;
> > -
> > - /* Never sync to VM updates either. */
> > - if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
> > - owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > - continue;
> > -
> > - /* Ignore fences depending on the sync mode */
> > - switch (mode) {
> > - case AMDGPU_SYNC_ALWAYS:
> > - break;
> > -
> > - case AMDGPU_SYNC_NE_OWNER:
> > - if (amdgpu_sync_same_dev(adev, f) &&
> > - fence_owner == owner)
> > - continue;
> > - break;
> > -
> > - case AMDGPU_SYNC_EQ_OWNER:
> > - if (amdgpu_sync_same_dev(adev, f) &&
> > - fence_owner != owner)
> > - continue;
> > - break;
> > -
> > - case AMDGPU_SYNC_EXPLICIT:
> > - continue;
> > + return r;
> > }
> > -
> > - WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
> > - "Adding eviction fence to sync obj");
> > - r = amdgpu_sync_fence(sync, f);
> > - if (r)
> > - break;
> > }
> > - return r;
> > + return 0;
> > }
> > /**
>
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 1/2] drm/amdgpu: unwrap fence chains in the explicit sync fence
@ 2021-06-17 17:30 ` Daniel Vetter
0 siblings, 0 replies; 16+ messages in thread
From: Daniel Vetter @ 2021-06-17 17:30 UTC (permalink / raw)
To: Christian König; +Cc: Alex Deucher, amd-gfx, dri-devel
On Thu, Jun 17, 2021 at 09:44:25AM +0200, Christian König wrote:
> Alex do want to review those so that we can close the ticket?
Maybe I'm behind on mails, but 2nd patch still has the issues I think I'm
seeing ...
-Daniel
>
> Thanks,
> Christian.
>
> Am 14.06.21 um 19:45 schrieb Christian König:
> > Unwrap the explicit fence if it is a dma_fence_chain and
> > sync to the first fence not matching the owner rules.
> >
> > Signed-off-by: Christian König <christian.koenig@amd.com>
> > Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > ---
> > drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 118 +++++++++++++----------
> > 1 file changed, 68 insertions(+), 50 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> > index 1b2ceccaf5b0..862eb3c1c4c5 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> > @@ -28,6 +28,8 @@
> > * Christian König <christian.koenig@amd.com>
> > */
> > +#include <linux/dma-fence-chain.h>
> > +
> > #include "amdgpu.h"
> > #include "amdgpu_trace.h"
> > #include "amdgpu_amdkfd.h"
> > @@ -186,6 +188,55 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
> > return amdgpu_sync_fence(sync, fence);
> > }
> > +/* Determine based on the owner and mode if we should sync to a fence or not */
> > +static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
> > + enum amdgpu_sync_mode mode,
> > + void *owner, struct dma_fence *f)
> > +{
> > + void *fence_owner = amdgpu_sync_get_owner(f);
> > +
> > + /* Always sync to moves, no matter what */
> > + if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED)
> > + return true;
> > +
> > + /* We only want to trigger KFD eviction fences on
> > + * evict or move jobs. Skip KFD fences otherwise.
> > + */
> > + if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
> > + owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > + return false;
> > +
> > + /* Never sync to VM updates either. */
> > + if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
> > + owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > + return false;
> > +
> > + /* Ignore fences depending on the sync mode */
> > + switch (mode) {
> > + case AMDGPU_SYNC_ALWAYS:
> > + return true;
> > +
> > + case AMDGPU_SYNC_NE_OWNER:
> > + if (amdgpu_sync_same_dev(adev, f) &&
> > + fence_owner == owner)
> > + return false;
> > + break;
> > +
> > + case AMDGPU_SYNC_EQ_OWNER:
> > + if (amdgpu_sync_same_dev(adev, f) &&
> > + fence_owner != owner)
> > + return false;
> > + break;
> > +
> > + case AMDGPU_SYNC_EXPLICIT:
> > + return false;
> > + }
> > +
> > + WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
> > + "Adding eviction fence to sync obj");
> > + return true;
> > +}
> > +
> > /**
> > * amdgpu_sync_resv - sync to a reservation object
> > *
> > @@ -211,67 +262,34 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
> > /* always sync to the exclusive fence */
> > f = dma_resv_excl_fence(resv);
> > - r = amdgpu_sync_fence(sync, f);
> > + dma_fence_chain_for_each(f, f) {
> > + struct dma_fence_chain *chain = to_dma_fence_chain(f);
> > +
> > + if (amdgpu_sync_test_fence(adev, mode, owner, chain ?
> > + chain->fence : f)) {
> > + r = amdgpu_sync_fence(sync, f);
> > + dma_fence_put(f);
> > + if (r)
> > + return r;
> > + break;
> > + }
> > + }
> > flist = dma_resv_shared_list(resv);
> > - if (!flist || r)
> > - return r;
> > + if (!flist)
> > + return 0;
> > for (i = 0; i < flist->shared_count; ++i) {
> > - void *fence_owner;
> > -
> > f = rcu_dereference_protected(flist->shared[i],
> > dma_resv_held(resv));
> > - fence_owner = amdgpu_sync_get_owner(f);
> > -
> > - /* Always sync to moves, no matter what */
> > - if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) {
> > + if (amdgpu_sync_test_fence(adev, mode, owner, f)) {
> > r = amdgpu_sync_fence(sync, f);
> > if (r)
> > - break;
> > - }
> > -
> > - /* We only want to trigger KFD eviction fences on
> > - * evict or move jobs. Skip KFD fences otherwise.
> > - */
> > - if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
> > - owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > - continue;
> > -
> > - /* Never sync to VM updates either. */
> > - if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
> > - owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > - continue;
> > -
> > - /* Ignore fences depending on the sync mode */
> > - switch (mode) {
> > - case AMDGPU_SYNC_ALWAYS:
> > - break;
> > -
> > - case AMDGPU_SYNC_NE_OWNER:
> > - if (amdgpu_sync_same_dev(adev, f) &&
> > - fence_owner == owner)
> > - continue;
> > - break;
> > -
> > - case AMDGPU_SYNC_EQ_OWNER:
> > - if (amdgpu_sync_same_dev(adev, f) &&
> > - fence_owner != owner)
> > - continue;
> > - break;
> > -
> > - case AMDGPU_SYNC_EXPLICIT:
> > - continue;
> > + return r;
> > }
> > -
> > - WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
> > - "Adding eviction fence to sync obj");
> > - r = amdgpu_sync_fence(sync, f);
> > - if (r)
> > - break;
> > }
> > - return r;
> > + return 0;
> > }
> > /**
>
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] drm/amdgpu: rework dma_resv handling v3
2021-06-14 17:45 ` Christian König
@ 2021-06-17 19:37 ` Daniel Vetter
-1 siblings, 0 replies; 16+ messages in thread
From: Daniel Vetter @ 2021-06-17 19:37 UTC (permalink / raw)
To: Christian König; +Cc: amd-gfx, dri-devel
On Mon, Jun 14, 2021 at 07:45:36PM +0200, Christian König wrote:
> Drop the workaround and instead implement a better solution.
>
> Basically we are now chaining all submissions using a dma_fence_chain
> container and adding them as exclusive fence to the dma_resv object.
>
> This way other drivers can still sync to the single exclusive fence
> while amdgpu only sync to fences from different processes.
>
> v3: add the shared fence first before the exclusive one
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 62 ++++++++++++++++----
> drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 65 ---------------------
> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 3 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 -
> 6 files changed, 55 insertions(+), 79 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> index a130e766cbdb..c905a4cfc173 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> @@ -34,6 +34,7 @@ struct amdgpu_fpriv;
> struct amdgpu_bo_list_entry {
> struct ttm_validate_buffer tv;
> struct amdgpu_bo_va *bo_va;
> + struct dma_fence_chain *chain;
> uint32_t priority;
> struct page **user_pages;
> bool user_invalidated;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 9ce649a1a8d3..25655414e9c0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -572,6 +572,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
> goto out;
> }
>
> + amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> +
> + e->bo_va = amdgpu_vm_bo_find(vm, bo);
> +
> + if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
> + e->chain = dma_fence_chain_alloc();
> + if (!e->chain) {
> + r = -ENOMEM;
> + goto error_validate;
> + }
> + }
> + }
> +
> amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
> &p->bytes_moved_vis_threshold);
> p->bytes_moved = 0;
> @@ -599,15 +613,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
> gws = p->bo_list->gws_obj;
> oa = p->bo_list->oa_obj;
>
> - amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> -
> - /* Make sure we use the exclusive slot for shared BOs */
> - if (bo->prime_shared_count)
> - e->tv.num_shared = 0;
> - e->bo_va = amdgpu_vm_bo_find(vm, bo);
> - }
> -
> if (gds) {
> p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
> p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
> @@ -629,8 +634,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
> }
>
> error_validate:
> - if (r)
> + if (r) {
> + amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> + dma_fence_chain_free(e->chain);
> + e->chain = NULL;
> + }
> ttm_eu_backoff_reservation(&p->ticket, &p->validated);
> + }
> out:
> return r;
> }
> @@ -670,9 +680,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
> {
> unsigned i;
>
> - if (error && backoff)
> + if (error && backoff) {
> + struct amdgpu_bo_list_entry *e;
> +
> + amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
> + dma_fence_chain_free(e->chain);
> + e->chain = NULL;
> + }
> +
> ttm_eu_backoff_reservation(&parser->ticket,
> &parser->validated);
> + }
>
> for (i = 0; i < parser->num_post_deps; i++) {
> drm_syncobj_put(parser->post_deps[i].syncobj);
> @@ -1245,6 +1263,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>
> amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
>
> + amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> + struct dma_resv *resv = e->tv.bo->base.resv;
> + struct dma_fence_chain *chain = e->chain;
> +
> + if (!chain)
> + continue;
> +
> + /*
> + * Work around dma_resv shortcommings by wrapping up the
> + * submission in a dma_fence_chain and add it as exclusive
> + * fence, but first add the submission as shared fence to make
> + * sure that shared fences never signal before the exclusive
> + * one.
> + */
> + dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
> + dma_fence_get(p->fence), 1);
> +
> + dma_resv_add_shared_fence(resv, p->fence);
> + rcu_assign_pointer(resv->fence_excl, &chain->base);
Ok I should have looked more carefully, +/- memory barriers this looks
correct.
So my idea behind the dma_resv_add_shared_exclusive_fence helper was that
it would give us more flexibility for changing the internals. Since I do
like your idea from a few weeks back of just storing exclusive fences as
shared ones, but with a marker, and then the interface functions picking
the right set of fences depending what the caller wants. But for that we'd
need to drop a lot more abstraction of the internals into dma_resv and
make sure everyone is using it.
Because looking at drivers there's indeed both the concept of a truly
exclusive fence, and the "it's actually a shared fence but stuff it into
the exclusive fence slot for implicit sync" variety. And from what I can
see, everyone gets it wrong, but in lots of different ways.
Hence the wrapper for this concept, and then making sure we roll it out to
all the drivers. I think all the ones with a NO_IMPLICIT flag (or
equivalent, i915 and msm have it the other way round iirc) would need this
to not break the dma_resv contract too badly.
But also I guess we can bikeshed this later on even more.
On this: Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> + e->chain = NULL;
> + }
> +
> ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
> mutex_unlock(&p->adev->notifier_lock);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> index c3053b83b80c..23219fc3b28c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> @@ -42,48 +42,6 @@
> #include <linux/pci-p2pdma.h>
> #include <linux/pm_runtime.h>
>
> -static int
> -__dma_resv_make_exclusive(struct dma_resv *obj)
> -{
> - struct dma_fence **fences;
> - unsigned int count;
> - int r;
> -
> - if (!dma_resv_shared_list(obj)) /* no shared fences to convert */
> - return 0;
> -
> - r = dma_resv_get_fences(obj, NULL, &count, &fences);
> - if (r)
> - return r;
> -
> - if (count == 0) {
> - /* Now that was unexpected. */
> - } else if (count == 1) {
> - dma_resv_add_excl_fence(obj, fences[0]);
> - dma_fence_put(fences[0]);
> - kfree(fences);
> - } else {
> - struct dma_fence_array *array;
> -
> - array = dma_fence_array_create(count, fences,
> - dma_fence_context_alloc(1), 0,
> - false);
> - if (!array)
> - goto err_fences_put;
> -
> - dma_resv_add_excl_fence(obj, &array->base);
> - dma_fence_put(&array->base);
> - }
> -
> - return 0;
> -
> -err_fences_put:
> - while (count--)
> - dma_fence_put(fences[count]);
> - kfree(fences);
> - return -ENOMEM;
> -}
> -
> /**
> * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
> *
> @@ -110,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
> if (r < 0)
> goto out;
>
> - r = amdgpu_bo_reserve(bo, false);
> - if (unlikely(r != 0))
> - goto out;
> -
> - /*
> - * We only create shared fences for internal use, but importers
> - * of the dmabuf rely on exclusive fences for implicitly
> - * tracking write hazards. As any of the current fences may
> - * correspond to a write, we need to convert all existing
> - * fences on the reservation object into a single exclusive
> - * fence.
> - */
> - r = __dma_resv_make_exclusive(bo->tbo.base.resv);
> - if (r)
> - goto out;
> -
> - bo->prime_shared_count++;
> - amdgpu_bo_unreserve(bo);
> return 0;
>
> out:
> @@ -150,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
> struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
> struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>
> - if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
> - bo->prime_shared_count--;
> -
> pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> }
> @@ -406,8 +343,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
> bo = gem_to_amdgpu_bo(gobj);
> bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
> bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
> - if (dma_buf->ops != &amdgpu_dmabuf_ops)
> - bo->prime_shared_count = 1;
>
> dma_resv_unlock(resv);
> return gobj;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 9cf4beaf646c..0780e8d18992 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -829,7 +829,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
> break;
> }
> case AMDGPU_GEM_OP_SET_PLACEMENT:
> - if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) {
> + if (robj->tbo.base.import_attach &&
> + args->value & AMDGPU_GEM_DOMAIN_VRAM) {
> r = -EINVAL;
> amdgpu_bo_unreserve(robj);
> break;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index b7a2070d90af..d13490975ac3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -892,7 +892,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
> return -EINVAL;
>
> /* A shared bo cannot be migrated to VRAM */
> - if (bo->prime_shared_count || bo->tbo.base.import_attach) {
> + if (bo->tbo.base.import_attach) {
> if (domain & AMDGPU_GEM_DOMAIN_GTT)
> domain = AMDGPU_GEM_DOMAIN_GTT;
> else
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 126df03a7066..c03dfd298f45 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -99,7 +99,6 @@ struct amdgpu_bo {
> struct ttm_buffer_object tbo;
> struct ttm_bo_kmap_obj kmap;
> u64 flags;
> - unsigned prime_shared_count;
> /* per VM structure for page tables and with virtual addresses */
> struct amdgpu_vm_bo_base *vm_bo;
> /* Constant after initialization */
> --
> 2.25.1
>
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] drm/amdgpu: rework dma_resv handling v3
@ 2021-06-17 19:37 ` Daniel Vetter
0 siblings, 0 replies; 16+ messages in thread
From: Daniel Vetter @ 2021-06-17 19:37 UTC (permalink / raw)
To: Christian König; +Cc: amd-gfx, dri-devel, daniel
On Mon, Jun 14, 2021 at 07:45:36PM +0200, Christian König wrote:
> Drop the workaround and instead implement a better solution.
>
> Basically we are now chaining all submissions using a dma_fence_chain
> container and adding them as exclusive fence to the dma_resv object.
>
> This way other drivers can still sync to the single exclusive fence
> while amdgpu only sync to fences from different processes.
>
> v3: add the shared fence first before the exclusive one
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 62 ++++++++++++++++----
> drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 65 ---------------------
> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 3 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 -
> 6 files changed, 55 insertions(+), 79 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> index a130e766cbdb..c905a4cfc173 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> @@ -34,6 +34,7 @@ struct amdgpu_fpriv;
> struct amdgpu_bo_list_entry {
> struct ttm_validate_buffer tv;
> struct amdgpu_bo_va *bo_va;
> + struct dma_fence_chain *chain;
> uint32_t priority;
> struct page **user_pages;
> bool user_invalidated;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 9ce649a1a8d3..25655414e9c0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -572,6 +572,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
> goto out;
> }
>
> + amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> +
> + e->bo_va = amdgpu_vm_bo_find(vm, bo);
> +
> + if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
> + e->chain = dma_fence_chain_alloc();
> + if (!e->chain) {
> + r = -ENOMEM;
> + goto error_validate;
> + }
> + }
> + }
> +
> amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
> &p->bytes_moved_vis_threshold);
> p->bytes_moved = 0;
> @@ -599,15 +613,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
> gws = p->bo_list->gws_obj;
> oa = p->bo_list->oa_obj;
>
> - amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> -
> - /* Make sure we use the exclusive slot for shared BOs */
> - if (bo->prime_shared_count)
> - e->tv.num_shared = 0;
> - e->bo_va = amdgpu_vm_bo_find(vm, bo);
> - }
> -
> if (gds) {
> p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
> p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
> @@ -629,8 +634,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
> }
>
> error_validate:
> - if (r)
> + if (r) {
> + amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> + dma_fence_chain_free(e->chain);
> + e->chain = NULL;
> + }
> ttm_eu_backoff_reservation(&p->ticket, &p->validated);
> + }
> out:
> return r;
> }
> @@ -670,9 +680,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
> {
> unsigned i;
>
> - if (error && backoff)
> + if (error && backoff) {
> + struct amdgpu_bo_list_entry *e;
> +
> + amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
> + dma_fence_chain_free(e->chain);
> + e->chain = NULL;
> + }
> +
> ttm_eu_backoff_reservation(&parser->ticket,
> &parser->validated);
> + }
>
> for (i = 0; i < parser->num_post_deps; i++) {
> drm_syncobj_put(parser->post_deps[i].syncobj);
> @@ -1245,6 +1263,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>
> amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
>
> + amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> + struct dma_resv *resv = e->tv.bo->base.resv;
> + struct dma_fence_chain *chain = e->chain;
> +
> + if (!chain)
> + continue;
> +
> + /*
> + * Work around dma_resv shortcommings by wrapping up the
> + * submission in a dma_fence_chain and add it as exclusive
> + * fence, but first add the submission as shared fence to make
> + * sure that shared fences never signal before the exclusive
> + * one.
> + */
> + dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
> + dma_fence_get(p->fence), 1);
> +
> + dma_resv_add_shared_fence(resv, p->fence);
> + rcu_assign_pointer(resv->fence_excl, &chain->base);
Ok I should have looked more carefully, +/- memory barriers this looks
correct.
So my idea behind the dma_resv_add_shared_exclusive_fence helper was that
it would give us more flexibility for changing the internals. Since I do
like your idea from a few weeks back of just storing exclusive fences as
shared ones, but with a marker, and then the interface functions picking
the right set of fences depending what the caller wants. But for that we'd
need to drop a lot more abstraction of the internals into dma_resv and
make sure everyone is using it.
Because looking at drivers there's indeed both the concept of a truly
exclusive fence, and the "it's actually a shared fence but stuff it into
the exclusive fence slot for implicit sync" variety. And from what I can
see, everyone gets it wrong, but in lots of different ways.
Hence the wrapper for this concept, and then making sure we roll it out to
all the drivers. I think all the ones with a NO_IMPLICIT flag (or
equivalent, i915 and msm have it the other way round iirc) would need this
to not break the dma_resv contract too badly.
But also I guess we can bikeshed this later on even more.
On this: Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> + e->chain = NULL;
> + }
> +
> ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
> mutex_unlock(&p->adev->notifier_lock);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> index c3053b83b80c..23219fc3b28c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> @@ -42,48 +42,6 @@
> #include <linux/pci-p2pdma.h>
> #include <linux/pm_runtime.h>
>
> -static int
> -__dma_resv_make_exclusive(struct dma_resv *obj)
> -{
> - struct dma_fence **fences;
> - unsigned int count;
> - int r;
> -
> - if (!dma_resv_shared_list(obj)) /* no shared fences to convert */
> - return 0;
> -
> - r = dma_resv_get_fences(obj, NULL, &count, &fences);
> - if (r)
> - return r;
> -
> - if (count == 0) {
> - /* Now that was unexpected. */
> - } else if (count == 1) {
> - dma_resv_add_excl_fence(obj, fences[0]);
> - dma_fence_put(fences[0]);
> - kfree(fences);
> - } else {
> - struct dma_fence_array *array;
> -
> - array = dma_fence_array_create(count, fences,
> - dma_fence_context_alloc(1), 0,
> - false);
> - if (!array)
> - goto err_fences_put;
> -
> - dma_resv_add_excl_fence(obj, &array->base);
> - dma_fence_put(&array->base);
> - }
> -
> - return 0;
> -
> -err_fences_put:
> - while (count--)
> - dma_fence_put(fences[count]);
> - kfree(fences);
> - return -ENOMEM;
> -}
> -
> /**
> * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
> *
> @@ -110,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
> if (r < 0)
> goto out;
>
> - r = amdgpu_bo_reserve(bo, false);
> - if (unlikely(r != 0))
> - goto out;
> -
> - /*
> - * We only create shared fences for internal use, but importers
> - * of the dmabuf rely on exclusive fences for implicitly
> - * tracking write hazards. As any of the current fences may
> - * correspond to a write, we need to convert all existing
> - * fences on the reservation object into a single exclusive
> - * fence.
> - */
> - r = __dma_resv_make_exclusive(bo->tbo.base.resv);
> - if (r)
> - goto out;
> -
> - bo->prime_shared_count++;
> - amdgpu_bo_unreserve(bo);
> return 0;
>
> out:
> @@ -150,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
> struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
> struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>
> - if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
> - bo->prime_shared_count--;
> -
> pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> }
> @@ -406,8 +343,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
> bo = gem_to_amdgpu_bo(gobj);
> bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
> bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
> - if (dma_buf->ops != &amdgpu_dmabuf_ops)
> - bo->prime_shared_count = 1;
>
> dma_resv_unlock(resv);
> return gobj;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 9cf4beaf646c..0780e8d18992 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -829,7 +829,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
> break;
> }
> case AMDGPU_GEM_OP_SET_PLACEMENT:
> - if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) {
> + if (robj->tbo.base.import_attach &&
> + args->value & AMDGPU_GEM_DOMAIN_VRAM) {
> r = -EINVAL;
> amdgpu_bo_unreserve(robj);
> break;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index b7a2070d90af..d13490975ac3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -892,7 +892,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
> return -EINVAL;
>
> /* A shared bo cannot be migrated to VRAM */
> - if (bo->prime_shared_count || bo->tbo.base.import_attach) {
> + if (bo->tbo.base.import_attach) {
> if (domain & AMDGPU_GEM_DOMAIN_GTT)
> domain = AMDGPU_GEM_DOMAIN_GTT;
> else
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 126df03a7066..c03dfd298f45 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -99,7 +99,6 @@ struct amdgpu_bo {
> struct ttm_buffer_object tbo;
> struct ttm_bo_kmap_obj kmap;
> u64 flags;
> - unsigned prime_shared_count;
> /* per VM structure for page tables and with virtual addresses */
> struct amdgpu_vm_bo_base *vm_bo;
> /* Constant after initialization */
> --
> 2.25.1
>
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 1/2] drm/amdgpu: unwrap fence chains in the explicit sync fence
2021-06-17 17:30 ` Daniel Vetter
@ 2021-06-17 19:38 ` Daniel Vetter
-1 siblings, 0 replies; 16+ messages in thread
From: Daniel Vetter @ 2021-06-17 19:38 UTC (permalink / raw)
To: Christian König; +Cc: Alex Deucher, amd-gfx, dri-devel
On Thu, Jun 17, 2021 at 07:30:24PM +0200, Daniel Vetter wrote:
> On Thu, Jun 17, 2021 at 09:44:25AM +0200, Christian König wrote:
> > Alex do want to review those so that we can close the ticket?
>
> Maybe I'm behind on mails, but 2nd patch still has the issues I think I'm
> seeing ...
Ok with temperatures getting colder towards the night the 2nd patch looks
much better now :-) I replied there.
-Daniel
> -Daniel
>
> >
> > Thanks,
> > Christian.
> >
> > Am 14.06.21 um 19:45 schrieb Christian König:
> > > Unwrap the explicit fence if it is a dma_fence_chain and
> > > sync to the first fence not matching the owner rules.
> > >
> > > Signed-off-by: Christian König <christian.koenig@amd.com>
> > > Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > > ---
> > > drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 118 +++++++++++++----------
> > > 1 file changed, 68 insertions(+), 50 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> > > index 1b2ceccaf5b0..862eb3c1c4c5 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> > > @@ -28,6 +28,8 @@
> > > * Christian König <christian.koenig@amd.com>
> > > */
> > > +#include <linux/dma-fence-chain.h>
> > > +
> > > #include "amdgpu.h"
> > > #include "amdgpu_trace.h"
> > > #include "amdgpu_amdkfd.h"
> > > @@ -186,6 +188,55 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
> > > return amdgpu_sync_fence(sync, fence);
> > > }
> > > +/* Determine based on the owner and mode if we should sync to a fence or not */
> > > +static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
> > > + enum amdgpu_sync_mode mode,
> > > + void *owner, struct dma_fence *f)
> > > +{
> > > + void *fence_owner = amdgpu_sync_get_owner(f);
> > > +
> > > + /* Always sync to moves, no matter what */
> > > + if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED)
> > > + return true;
> > > +
> > > + /* We only want to trigger KFD eviction fences on
> > > + * evict or move jobs. Skip KFD fences otherwise.
> > > + */
> > > + if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
> > > + owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > > + return false;
> > > +
> > > + /* Never sync to VM updates either. */
> > > + if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
> > > + owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > > + return false;
> > > +
> > > + /* Ignore fences depending on the sync mode */
> > > + switch (mode) {
> > > + case AMDGPU_SYNC_ALWAYS:
> > > + return true;
> > > +
> > > + case AMDGPU_SYNC_NE_OWNER:
> > > + if (amdgpu_sync_same_dev(adev, f) &&
> > > + fence_owner == owner)
> > > + return false;
> > > + break;
> > > +
> > > + case AMDGPU_SYNC_EQ_OWNER:
> > > + if (amdgpu_sync_same_dev(adev, f) &&
> > > + fence_owner != owner)
> > > + return false;
> > > + break;
> > > +
> > > + case AMDGPU_SYNC_EXPLICIT:
> > > + return false;
> > > + }
> > > +
> > > + WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
> > > + "Adding eviction fence to sync obj");
> > > + return true;
> > > +}
> > > +
> > > /**
> > > * amdgpu_sync_resv - sync to a reservation object
> > > *
> > > @@ -211,67 +262,34 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
> > > /* always sync to the exclusive fence */
> > > f = dma_resv_excl_fence(resv);
> > > - r = amdgpu_sync_fence(sync, f);
> > > + dma_fence_chain_for_each(f, f) {
> > > + struct dma_fence_chain *chain = to_dma_fence_chain(f);
> > > +
> > > + if (amdgpu_sync_test_fence(adev, mode, owner, chain ?
> > > + chain->fence : f)) {
> > > + r = amdgpu_sync_fence(sync, f);
> > > + dma_fence_put(f);
> > > + if (r)
> > > + return r;
> > > + break;
> > > + }
> > > + }
> > > flist = dma_resv_shared_list(resv);
> > > - if (!flist || r)
> > > - return r;
> > > + if (!flist)
> > > + return 0;
> > > for (i = 0; i < flist->shared_count; ++i) {
> > > - void *fence_owner;
> > > -
> > > f = rcu_dereference_protected(flist->shared[i],
> > > dma_resv_held(resv));
> > > - fence_owner = amdgpu_sync_get_owner(f);
> > > -
> > > - /* Always sync to moves, no matter what */
> > > - if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) {
> > > + if (amdgpu_sync_test_fence(adev, mode, owner, f)) {
> > > r = amdgpu_sync_fence(sync, f);
> > > if (r)
> > > - break;
> > > - }
> > > -
> > > - /* We only want to trigger KFD eviction fences on
> > > - * evict or move jobs. Skip KFD fences otherwise.
> > > - */
> > > - if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
> > > - owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > > - continue;
> > > -
> > > - /* Never sync to VM updates either. */
> > > - if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
> > > - owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > > - continue;
> > > -
> > > - /* Ignore fences depending on the sync mode */
> > > - switch (mode) {
> > > - case AMDGPU_SYNC_ALWAYS:
> > > - break;
> > > -
> > > - case AMDGPU_SYNC_NE_OWNER:
> > > - if (amdgpu_sync_same_dev(adev, f) &&
> > > - fence_owner == owner)
> > > - continue;
> > > - break;
> > > -
> > > - case AMDGPU_SYNC_EQ_OWNER:
> > > - if (amdgpu_sync_same_dev(adev, f) &&
> > > - fence_owner != owner)
> > > - continue;
> > > - break;
> > > -
> > > - case AMDGPU_SYNC_EXPLICIT:
> > > - continue;
> > > + return r;
> > > }
> > > -
> > > - WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
> > > - "Adding eviction fence to sync obj");
> > > - r = amdgpu_sync_fence(sync, f);
> > > - if (r)
> > > - break;
> > > }
> > > - return r;
> > > + return 0;
> > > }
> > > /**
> >
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 1/2] drm/amdgpu: unwrap fence chains in the explicit sync fence
@ 2021-06-17 19:38 ` Daniel Vetter
0 siblings, 0 replies; 16+ messages in thread
From: Daniel Vetter @ 2021-06-17 19:38 UTC (permalink / raw)
To: Christian König; +Cc: Alex Deucher, amd-gfx, dri-devel
On Thu, Jun 17, 2021 at 07:30:24PM +0200, Daniel Vetter wrote:
> On Thu, Jun 17, 2021 at 09:44:25AM +0200, Christian König wrote:
> > Alex do want to review those so that we can close the ticket?
>
> Maybe I'm behind on mails, but 2nd patch still has the issues I think I'm
> seeing ...
Ok with temperatures getting colder towards the night the 2nd patch looks
much better now :-) I replied there.
-Daniel
> -Daniel
>
> >
> > Thanks,
> > Christian.
> >
> > Am 14.06.21 um 19:45 schrieb Christian König:
> > > Unwrap the explicit fence if it is a dma_fence_chain and
> > > sync to the first fence not matching the owner rules.
> > >
> > > Signed-off-by: Christian König <christian.koenig@amd.com>
> > > Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > > ---
> > > drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 118 +++++++++++++----------
> > > 1 file changed, 68 insertions(+), 50 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> > > index 1b2ceccaf5b0..862eb3c1c4c5 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> > > @@ -28,6 +28,8 @@
> > > * Christian König <christian.koenig@amd.com>
> > > */
> > > +#include <linux/dma-fence-chain.h>
> > > +
> > > #include "amdgpu.h"
> > > #include "amdgpu_trace.h"
> > > #include "amdgpu_amdkfd.h"
> > > @@ -186,6 +188,55 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
> > > return amdgpu_sync_fence(sync, fence);
> > > }
> > > +/* Determine based on the owner and mode if we should sync to a fence or not */
> > > +static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
> > > + enum amdgpu_sync_mode mode,
> > > + void *owner, struct dma_fence *f)
> > > +{
> > > + void *fence_owner = amdgpu_sync_get_owner(f);
> > > +
> > > + /* Always sync to moves, no matter what */
> > > + if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED)
> > > + return true;
> > > +
> > > + /* We only want to trigger KFD eviction fences on
> > > + * evict or move jobs. Skip KFD fences otherwise.
> > > + */
> > > + if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
> > > + owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > > + return false;
> > > +
> > > + /* Never sync to VM updates either. */
> > > + if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
> > > + owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > > + return false;
> > > +
> > > + /* Ignore fences depending on the sync mode */
> > > + switch (mode) {
> > > + case AMDGPU_SYNC_ALWAYS:
> > > + return true;
> > > +
> > > + case AMDGPU_SYNC_NE_OWNER:
> > > + if (amdgpu_sync_same_dev(adev, f) &&
> > > + fence_owner == owner)
> > > + return false;
> > > + break;
> > > +
> > > + case AMDGPU_SYNC_EQ_OWNER:
> > > + if (amdgpu_sync_same_dev(adev, f) &&
> > > + fence_owner != owner)
> > > + return false;
> > > + break;
> > > +
> > > + case AMDGPU_SYNC_EXPLICIT:
> > > + return false;
> > > + }
> > > +
> > > + WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
> > > + "Adding eviction fence to sync obj");
> > > + return true;
> > > +}
> > > +
> > > /**
> > > * amdgpu_sync_resv - sync to a reservation object
> > > *
> > > @@ -211,67 +262,34 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
> > > /* always sync to the exclusive fence */
> > > f = dma_resv_excl_fence(resv);
> > > - r = amdgpu_sync_fence(sync, f);
> > > + dma_fence_chain_for_each(f, f) {
> > > + struct dma_fence_chain *chain = to_dma_fence_chain(f);
> > > +
> > > + if (amdgpu_sync_test_fence(adev, mode, owner, chain ?
> > > + chain->fence : f)) {
> > > + r = amdgpu_sync_fence(sync, f);
> > > + dma_fence_put(f);
> > > + if (r)
> > > + return r;
> > > + break;
> > > + }
> > > + }
> > > flist = dma_resv_shared_list(resv);
> > > - if (!flist || r)
> > > - return r;
> > > + if (!flist)
> > > + return 0;
> > > for (i = 0; i < flist->shared_count; ++i) {
> > > - void *fence_owner;
> > > -
> > > f = rcu_dereference_protected(flist->shared[i],
> > > dma_resv_held(resv));
> > > - fence_owner = amdgpu_sync_get_owner(f);
> > > -
> > > - /* Always sync to moves, no matter what */
> > > - if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) {
> > > + if (amdgpu_sync_test_fence(adev, mode, owner, f)) {
> > > r = amdgpu_sync_fence(sync, f);
> > > if (r)
> > > - break;
> > > - }
> > > -
> > > - /* We only want to trigger KFD eviction fences on
> > > - * evict or move jobs. Skip KFD fences otherwise.
> > > - */
> > > - if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
> > > - owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > > - continue;
> > > -
> > > - /* Never sync to VM updates either. */
> > > - if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
> > > - owner != AMDGPU_FENCE_OWNER_UNDEFINED)
> > > - continue;
> > > -
> > > - /* Ignore fences depending on the sync mode */
> > > - switch (mode) {
> > > - case AMDGPU_SYNC_ALWAYS:
> > > - break;
> > > -
> > > - case AMDGPU_SYNC_NE_OWNER:
> > > - if (amdgpu_sync_same_dev(adev, f) &&
> > > - fence_owner == owner)
> > > - continue;
> > > - break;
> > > -
> > > - case AMDGPU_SYNC_EQ_OWNER:
> > > - if (amdgpu_sync_same_dev(adev, f) &&
> > > - fence_owner != owner)
> > > - continue;
> > > - break;
> > > -
> > > - case AMDGPU_SYNC_EXPLICIT:
> > > - continue;
> > > + return r;
> > > }
> > > -
> > > - WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
> > > - "Adding eviction fence to sync obj");
> > > - r = amdgpu_sync_fence(sync, f);
> > > - if (r)
> > > - break;
> > > }
> > > - return r;
> > > + return 0;
> > > }
> > > /**
> >
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] drm/amdgpu: rework dma_resv handling v3
2021-06-14 17:45 ` Christian König
@ 2021-06-17 21:09 ` Alex Deucher
-1 siblings, 0 replies; 16+ messages in thread
From: Alex Deucher @ 2021-06-17 21:09 UTC (permalink / raw)
To: Christian König; +Cc: amd-gfx list, Maling list - DRI developers
On Mon, Jun 14, 2021 at 1:45 PM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Drop the workaround and instead implement a better solution.
>
> Basically we are now chaining all submissions using a dma_fence_chain
> container and adding them as exclusive fence to the dma_resv object.
>
> This way other drivers can still sync to the single exclusive fence
> while amdgpu only sync to fences from different processes.
>
> v3: add the shared fence first before the exclusive one
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
Series is:
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 62 ++++++++++++++++----
> drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 65 ---------------------
> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 3 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 -
> 6 files changed, 55 insertions(+), 79 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> index a130e766cbdb..c905a4cfc173 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> @@ -34,6 +34,7 @@ struct amdgpu_fpriv;
> struct amdgpu_bo_list_entry {
> struct ttm_validate_buffer tv;
> struct amdgpu_bo_va *bo_va;
> + struct dma_fence_chain *chain;
> uint32_t priority;
> struct page **user_pages;
> bool user_invalidated;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 9ce649a1a8d3..25655414e9c0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -572,6 +572,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
> goto out;
> }
>
> + amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> +
> + e->bo_va = amdgpu_vm_bo_find(vm, bo);
> +
> + if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
> + e->chain = dma_fence_chain_alloc();
> + if (!e->chain) {
> + r = -ENOMEM;
> + goto error_validate;
> + }
> + }
> + }
> +
> amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
> &p->bytes_moved_vis_threshold);
> p->bytes_moved = 0;
> @@ -599,15 +613,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
> gws = p->bo_list->gws_obj;
> oa = p->bo_list->oa_obj;
>
> - amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> -
> - /* Make sure we use the exclusive slot for shared BOs */
> - if (bo->prime_shared_count)
> - e->tv.num_shared = 0;
> - e->bo_va = amdgpu_vm_bo_find(vm, bo);
> - }
> -
> if (gds) {
> p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
> p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
> @@ -629,8 +634,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
> }
>
> error_validate:
> - if (r)
> + if (r) {
> + amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> + dma_fence_chain_free(e->chain);
> + e->chain = NULL;
> + }
> ttm_eu_backoff_reservation(&p->ticket, &p->validated);
> + }
> out:
> return r;
> }
> @@ -670,9 +680,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
> {
> unsigned i;
>
> - if (error && backoff)
> + if (error && backoff) {
> + struct amdgpu_bo_list_entry *e;
> +
> + amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
> + dma_fence_chain_free(e->chain);
> + e->chain = NULL;
> + }
> +
> ttm_eu_backoff_reservation(&parser->ticket,
> &parser->validated);
> + }
>
> for (i = 0; i < parser->num_post_deps; i++) {
> drm_syncobj_put(parser->post_deps[i].syncobj);
> @@ -1245,6 +1263,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>
> amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
>
> + amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> + struct dma_resv *resv = e->tv.bo->base.resv;
> + struct dma_fence_chain *chain = e->chain;
> +
> + if (!chain)
> + continue;
> +
> + /*
> + * Work around dma_resv shortcommings by wrapping up the
> + * submission in a dma_fence_chain and add it as exclusive
> + * fence, but first add the submission as shared fence to make
> + * sure that shared fences never signal before the exclusive
> + * one.
> + */
> + dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
> + dma_fence_get(p->fence), 1);
> +
> + dma_resv_add_shared_fence(resv, p->fence);
> + rcu_assign_pointer(resv->fence_excl, &chain->base);
> + e->chain = NULL;
> + }
> +
> ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
> mutex_unlock(&p->adev->notifier_lock);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> index c3053b83b80c..23219fc3b28c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> @@ -42,48 +42,6 @@
> #include <linux/pci-p2pdma.h>
> #include <linux/pm_runtime.h>
>
> -static int
> -__dma_resv_make_exclusive(struct dma_resv *obj)
> -{
> - struct dma_fence **fences;
> - unsigned int count;
> - int r;
> -
> - if (!dma_resv_shared_list(obj)) /* no shared fences to convert */
> - return 0;
> -
> - r = dma_resv_get_fences(obj, NULL, &count, &fences);
> - if (r)
> - return r;
> -
> - if (count == 0) {
> - /* Now that was unexpected. */
> - } else if (count == 1) {
> - dma_resv_add_excl_fence(obj, fences[0]);
> - dma_fence_put(fences[0]);
> - kfree(fences);
> - } else {
> - struct dma_fence_array *array;
> -
> - array = dma_fence_array_create(count, fences,
> - dma_fence_context_alloc(1), 0,
> - false);
> - if (!array)
> - goto err_fences_put;
> -
> - dma_resv_add_excl_fence(obj, &array->base);
> - dma_fence_put(&array->base);
> - }
> -
> - return 0;
> -
> -err_fences_put:
> - while (count--)
> - dma_fence_put(fences[count]);
> - kfree(fences);
> - return -ENOMEM;
> -}
> -
> /**
> * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
> *
> @@ -110,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
> if (r < 0)
> goto out;
>
> - r = amdgpu_bo_reserve(bo, false);
> - if (unlikely(r != 0))
> - goto out;
> -
> - /*
> - * We only create shared fences for internal use, but importers
> - * of the dmabuf rely on exclusive fences for implicitly
> - * tracking write hazards. As any of the current fences may
> - * correspond to a write, we need to convert all existing
> - * fences on the reservation object into a single exclusive
> - * fence.
> - */
> - r = __dma_resv_make_exclusive(bo->tbo.base.resv);
> - if (r)
> - goto out;
> -
> - bo->prime_shared_count++;
> - amdgpu_bo_unreserve(bo);
> return 0;
>
> out:
> @@ -150,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
> struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
> struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>
> - if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
> - bo->prime_shared_count--;
> -
> pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> }
> @@ -406,8 +343,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
> bo = gem_to_amdgpu_bo(gobj);
> bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
> bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
> - if (dma_buf->ops != &amdgpu_dmabuf_ops)
> - bo->prime_shared_count = 1;
>
> dma_resv_unlock(resv);
> return gobj;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 9cf4beaf646c..0780e8d18992 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -829,7 +829,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
> break;
> }
> case AMDGPU_GEM_OP_SET_PLACEMENT:
> - if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) {
> + if (robj->tbo.base.import_attach &&
> + args->value & AMDGPU_GEM_DOMAIN_VRAM) {
> r = -EINVAL;
> amdgpu_bo_unreserve(robj);
> break;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index b7a2070d90af..d13490975ac3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -892,7 +892,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
> return -EINVAL;
>
> /* A shared bo cannot be migrated to VRAM */
> - if (bo->prime_shared_count || bo->tbo.base.import_attach) {
> + if (bo->tbo.base.import_attach) {
> if (domain & AMDGPU_GEM_DOMAIN_GTT)
> domain = AMDGPU_GEM_DOMAIN_GTT;
> else
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 126df03a7066..c03dfd298f45 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -99,7 +99,6 @@ struct amdgpu_bo {
> struct ttm_buffer_object tbo;
> struct ttm_bo_kmap_obj kmap;
> u64 flags;
> - unsigned prime_shared_count;
> /* per VM structure for page tables and with virtual addresses */
> struct amdgpu_vm_bo_base *vm_bo;
> /* Constant after initialization */
> --
> 2.25.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] drm/amdgpu: rework dma_resv handling v3
@ 2021-06-17 21:09 ` Alex Deucher
0 siblings, 0 replies; 16+ messages in thread
From: Alex Deucher @ 2021-06-17 21:09 UTC (permalink / raw)
To: Christian König
Cc: amd-gfx list, Maling list - DRI developers, Daniel Vetter
On Mon, Jun 14, 2021 at 1:45 PM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Drop the workaround and instead implement a better solution.
>
> Basically we are now chaining all submissions using a dma_fence_chain
> container and adding them as exclusive fence to the dma_resv object.
>
> This way other drivers can still sync to the single exclusive fence
> while amdgpu only sync to fences from different processes.
>
> v3: add the shared fence first before the exclusive one
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
Series is:
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 62 ++++++++++++++++----
> drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 65 ---------------------
> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 3 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 -
> 6 files changed, 55 insertions(+), 79 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> index a130e766cbdb..c905a4cfc173 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> @@ -34,6 +34,7 @@ struct amdgpu_fpriv;
> struct amdgpu_bo_list_entry {
> struct ttm_validate_buffer tv;
> struct amdgpu_bo_va *bo_va;
> + struct dma_fence_chain *chain;
> uint32_t priority;
> struct page **user_pages;
> bool user_invalidated;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 9ce649a1a8d3..25655414e9c0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -572,6 +572,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
> goto out;
> }
>
> + amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> +
> + e->bo_va = amdgpu_vm_bo_find(vm, bo);
> +
> + if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
> + e->chain = dma_fence_chain_alloc();
> + if (!e->chain) {
> + r = -ENOMEM;
> + goto error_validate;
> + }
> + }
> + }
> +
> amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
> &p->bytes_moved_vis_threshold);
> p->bytes_moved = 0;
> @@ -599,15 +613,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
> gws = p->bo_list->gws_obj;
> oa = p->bo_list->oa_obj;
>
> - amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> -
> - /* Make sure we use the exclusive slot for shared BOs */
> - if (bo->prime_shared_count)
> - e->tv.num_shared = 0;
> - e->bo_va = amdgpu_vm_bo_find(vm, bo);
> - }
> -
> if (gds) {
> p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
> p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
> @@ -629,8 +634,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
> }
>
> error_validate:
> - if (r)
> + if (r) {
> + amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> + dma_fence_chain_free(e->chain);
> + e->chain = NULL;
> + }
> ttm_eu_backoff_reservation(&p->ticket, &p->validated);
> + }
> out:
> return r;
> }
> @@ -670,9 +680,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
> {
> unsigned i;
>
> - if (error && backoff)
> + if (error && backoff) {
> + struct amdgpu_bo_list_entry *e;
> +
> + amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
> + dma_fence_chain_free(e->chain);
> + e->chain = NULL;
> + }
> +
> ttm_eu_backoff_reservation(&parser->ticket,
> &parser->validated);
> + }
>
> for (i = 0; i < parser->num_post_deps; i++) {
> drm_syncobj_put(parser->post_deps[i].syncobj);
> @@ -1245,6 +1263,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>
> amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
>
> + amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> + struct dma_resv *resv = e->tv.bo->base.resv;
> + struct dma_fence_chain *chain = e->chain;
> +
> + if (!chain)
> + continue;
> +
> + /*
> + * Work around dma_resv shortcommings by wrapping up the
> + * submission in a dma_fence_chain and add it as exclusive
> + * fence, but first add the submission as shared fence to make
> + * sure that shared fences never signal before the exclusive
> + * one.
> + */
> + dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
> + dma_fence_get(p->fence), 1);
> +
> + dma_resv_add_shared_fence(resv, p->fence);
> + rcu_assign_pointer(resv->fence_excl, &chain->base);
> + e->chain = NULL;
> + }
> +
> ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
> mutex_unlock(&p->adev->notifier_lock);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> index c3053b83b80c..23219fc3b28c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> @@ -42,48 +42,6 @@
> #include <linux/pci-p2pdma.h>
> #include <linux/pm_runtime.h>
>
> -static int
> -__dma_resv_make_exclusive(struct dma_resv *obj)
> -{
> - struct dma_fence **fences;
> - unsigned int count;
> - int r;
> -
> - if (!dma_resv_shared_list(obj)) /* no shared fences to convert */
> - return 0;
> -
> - r = dma_resv_get_fences(obj, NULL, &count, &fences);
> - if (r)
> - return r;
> -
> - if (count == 0) {
> - /* Now that was unexpected. */
> - } else if (count == 1) {
> - dma_resv_add_excl_fence(obj, fences[0]);
> - dma_fence_put(fences[0]);
> - kfree(fences);
> - } else {
> - struct dma_fence_array *array;
> -
> - array = dma_fence_array_create(count, fences,
> - dma_fence_context_alloc(1), 0,
> - false);
> - if (!array)
> - goto err_fences_put;
> -
> - dma_resv_add_excl_fence(obj, &array->base);
> - dma_fence_put(&array->base);
> - }
> -
> - return 0;
> -
> -err_fences_put:
> - while (count--)
> - dma_fence_put(fences[count]);
> - kfree(fences);
> - return -ENOMEM;
> -}
> -
> /**
> * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
> *
> @@ -110,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
> if (r < 0)
> goto out;
>
> - r = amdgpu_bo_reserve(bo, false);
> - if (unlikely(r != 0))
> - goto out;
> -
> - /*
> - * We only create shared fences for internal use, but importers
> - * of the dmabuf rely on exclusive fences for implicitly
> - * tracking write hazards. As any of the current fences may
> - * correspond to a write, we need to convert all existing
> - * fences on the reservation object into a single exclusive
> - * fence.
> - */
> - r = __dma_resv_make_exclusive(bo->tbo.base.resv);
> - if (r)
> - goto out;
> -
> - bo->prime_shared_count++;
> - amdgpu_bo_unreserve(bo);
> return 0;
>
> out:
> @@ -150,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
> struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
> struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>
> - if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
> - bo->prime_shared_count--;
> -
> pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
> }
> @@ -406,8 +343,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
> bo = gem_to_amdgpu_bo(gobj);
> bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
> bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
> - if (dma_buf->ops != &amdgpu_dmabuf_ops)
> - bo->prime_shared_count = 1;
>
> dma_resv_unlock(resv);
> return gobj;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 9cf4beaf646c..0780e8d18992 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -829,7 +829,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
> break;
> }
> case AMDGPU_GEM_OP_SET_PLACEMENT:
> - if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) {
> + if (robj->tbo.base.import_attach &&
> + args->value & AMDGPU_GEM_DOMAIN_VRAM) {
> r = -EINVAL;
> amdgpu_bo_unreserve(robj);
> break;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index b7a2070d90af..d13490975ac3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -892,7 +892,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
> return -EINVAL;
>
> /* A shared bo cannot be migrated to VRAM */
> - if (bo->prime_shared_count || bo->tbo.base.import_attach) {
> + if (bo->tbo.base.import_attach) {
> if (domain & AMDGPU_GEM_DOMAIN_GTT)
> domain = AMDGPU_GEM_DOMAIN_GTT;
> else
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 126df03a7066..c03dfd298f45 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -99,7 +99,6 @@ struct amdgpu_bo {
> struct ttm_buffer_object tbo;
> struct ttm_bo_kmap_obj kmap;
> u64 flags;
> - unsigned prime_shared_count;
> /* per VM structure for page tables and with virtual addresses */
> struct amdgpu_vm_bo_base *vm_bo;
> /* Constant after initialization */
> --
> 2.25.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] drm/amdgpu: rework dma_resv handling v3
2021-06-17 21:09 ` Alex Deucher
@ 2021-06-22 9:20 ` Christian König
-1 siblings, 0 replies; 16+ messages in thread
From: Christian König @ 2021-06-22 9:20 UTC (permalink / raw)
To: Alex Deucher; +Cc: amd-gfx list, Maling list - DRI developers
Am 17.06.21 um 23:09 schrieb Alex Deucher:
> On Mon, Jun 14, 2021 at 1:45 PM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> Drop the workaround and instead implement a better solution.
>>
>> Basically we are now chaining all submissions using a dma_fence_chain
>> container and adding them as exclusive fence to the dma_resv object.
>>
>> This way other drivers can still sync to the single exclusive fence
>> while amdgpu only sync to fences from different processes.
>>
>> v3: add the shared fence first before the exclusive one
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
> Series is:
> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
FYI I've pushed this to drm-misc-next to avoid re-base problems.
Will probably not go upstream before 5.15, so we have plenty of time to
test this.
Christian.
>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 1 +
>> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 62 ++++++++++++++++----
>> drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 65 ---------------------
>> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 3 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 -
>> 6 files changed, 55 insertions(+), 79 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> index a130e766cbdb..c905a4cfc173 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> @@ -34,6 +34,7 @@ struct amdgpu_fpriv;
>> struct amdgpu_bo_list_entry {
>> struct ttm_validate_buffer tv;
>> struct amdgpu_bo_va *bo_va;
>> + struct dma_fence_chain *chain;
>> uint32_t priority;
>> struct page **user_pages;
>> bool user_invalidated;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index 9ce649a1a8d3..25655414e9c0 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -572,6 +572,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>> goto out;
>> }
>>
>> + amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>> + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
>> +
>> + e->bo_va = amdgpu_vm_bo_find(vm, bo);
>> +
>> + if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
>> + e->chain = dma_fence_chain_alloc();
>> + if (!e->chain) {
>> + r = -ENOMEM;
>> + goto error_validate;
>> + }
>> + }
>> + }
>> +
>> amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
>> &p->bytes_moved_vis_threshold);
>> p->bytes_moved = 0;
>> @@ -599,15 +613,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>> gws = p->bo_list->gws_obj;
>> oa = p->bo_list->oa_obj;
>>
>> - amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>> - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
>> -
>> - /* Make sure we use the exclusive slot for shared BOs */
>> - if (bo->prime_shared_count)
>> - e->tv.num_shared = 0;
>> - e->bo_va = amdgpu_vm_bo_find(vm, bo);
>> - }
>> -
>> if (gds) {
>> p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
>> p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
>> @@ -629,8 +634,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>> }
>>
>> error_validate:
>> - if (r)
>> + if (r) {
>> + amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>> + dma_fence_chain_free(e->chain);
>> + e->chain = NULL;
>> + }
>> ttm_eu_backoff_reservation(&p->ticket, &p->validated);
>> + }
>> out:
>> return r;
>> }
>> @@ -670,9 +680,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
>> {
>> unsigned i;
>>
>> - if (error && backoff)
>> + if (error && backoff) {
>> + struct amdgpu_bo_list_entry *e;
>> +
>> + amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
>> + dma_fence_chain_free(e->chain);
>> + e->chain = NULL;
>> + }
>> +
>> ttm_eu_backoff_reservation(&parser->ticket,
>> &parser->validated);
>> + }
>>
>> for (i = 0; i < parser->num_post_deps; i++) {
>> drm_syncobj_put(parser->post_deps[i].syncobj);
>> @@ -1245,6 +1263,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>>
>> amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
>>
>> + amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>> + struct dma_resv *resv = e->tv.bo->base.resv;
>> + struct dma_fence_chain *chain = e->chain;
>> +
>> + if (!chain)
>> + continue;
>> +
>> + /*
>> + * Work around dma_resv shortcommings by wrapping up the
>> + * submission in a dma_fence_chain and add it as exclusive
>> + * fence, but first add the submission as shared fence to make
>> + * sure that shared fences never signal before the exclusive
>> + * one.
>> + */
>> + dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
>> + dma_fence_get(p->fence), 1);
>> +
>> + dma_resv_add_shared_fence(resv, p->fence);
>> + rcu_assign_pointer(resv->fence_excl, &chain->base);
>> + e->chain = NULL;
>> + }
>> +
>> ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>> mutex_unlock(&p->adev->notifier_lock);
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
>> index c3053b83b80c..23219fc3b28c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
>> @@ -42,48 +42,6 @@
>> #include <linux/pci-p2pdma.h>
>> #include <linux/pm_runtime.h>
>>
>> -static int
>> -__dma_resv_make_exclusive(struct dma_resv *obj)
>> -{
>> - struct dma_fence **fences;
>> - unsigned int count;
>> - int r;
>> -
>> - if (!dma_resv_shared_list(obj)) /* no shared fences to convert */
>> - return 0;
>> -
>> - r = dma_resv_get_fences(obj, NULL, &count, &fences);
>> - if (r)
>> - return r;
>> -
>> - if (count == 0) {
>> - /* Now that was unexpected. */
>> - } else if (count == 1) {
>> - dma_resv_add_excl_fence(obj, fences[0]);
>> - dma_fence_put(fences[0]);
>> - kfree(fences);
>> - } else {
>> - struct dma_fence_array *array;
>> -
>> - array = dma_fence_array_create(count, fences,
>> - dma_fence_context_alloc(1), 0,
>> - false);
>> - if (!array)
>> - goto err_fences_put;
>> -
>> - dma_resv_add_excl_fence(obj, &array->base);
>> - dma_fence_put(&array->base);
>> - }
>> -
>> - return 0;
>> -
>> -err_fences_put:
>> - while (count--)
>> - dma_fence_put(fences[count]);
>> - kfree(fences);
>> - return -ENOMEM;
>> -}
>> -
>> /**
>> * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
>> *
>> @@ -110,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
>> if (r < 0)
>> goto out;
>>
>> - r = amdgpu_bo_reserve(bo, false);
>> - if (unlikely(r != 0))
>> - goto out;
>> -
>> - /*
>> - * We only create shared fences for internal use, but importers
>> - * of the dmabuf rely on exclusive fences for implicitly
>> - * tracking write hazards. As any of the current fences may
>> - * correspond to a write, we need to convert all existing
>> - * fences on the reservation object into a single exclusive
>> - * fence.
>> - */
>> - r = __dma_resv_make_exclusive(bo->tbo.base.resv);
>> - if (r)
>> - goto out;
>> -
>> - bo->prime_shared_count++;
>> - amdgpu_bo_unreserve(bo);
>> return 0;
>>
>> out:
>> @@ -150,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
>> struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
>> struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>
>> - if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
>> - bo->prime_shared_count--;
>> -
>> pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>> pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>> }
>> @@ -406,8 +343,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
>> bo = gem_to_amdgpu_bo(gobj);
>> bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
>> bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
>> - if (dma_buf->ops != &amdgpu_dmabuf_ops)
>> - bo->prime_shared_count = 1;
>>
>> dma_resv_unlock(resv);
>> return gobj;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> index 9cf4beaf646c..0780e8d18992 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> @@ -829,7 +829,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>> break;
>> }
>> case AMDGPU_GEM_OP_SET_PLACEMENT:
>> - if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) {
>> + if (robj->tbo.base.import_attach &&
>> + args->value & AMDGPU_GEM_DOMAIN_VRAM) {
>> r = -EINVAL;
>> amdgpu_bo_unreserve(robj);
>> break;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index b7a2070d90af..d13490975ac3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -892,7 +892,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
>> return -EINVAL;
>>
>> /* A shared bo cannot be migrated to VRAM */
>> - if (bo->prime_shared_count || bo->tbo.base.import_attach) {
>> + if (bo->tbo.base.import_attach) {
>> if (domain & AMDGPU_GEM_DOMAIN_GTT)
>> domain = AMDGPU_GEM_DOMAIN_GTT;
>> else
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> index 126df03a7066..c03dfd298f45 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> @@ -99,7 +99,6 @@ struct amdgpu_bo {
>> struct ttm_buffer_object tbo;
>> struct ttm_bo_kmap_obj kmap;
>> u64 flags;
>> - unsigned prime_shared_count;
>> /* per VM structure for page tables and with virtual addresses */
>> struct amdgpu_vm_bo_base *vm_bo;
>> /* Constant after initialization */
>> --
>> 2.25.1
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] drm/amdgpu: rework dma_resv handling v3
@ 2021-06-22 9:20 ` Christian König
0 siblings, 0 replies; 16+ messages in thread
From: Christian König @ 2021-06-22 9:20 UTC (permalink / raw)
To: Alex Deucher; +Cc: amd-gfx list, Maling list - DRI developers, Daniel Vetter
Am 17.06.21 um 23:09 schrieb Alex Deucher:
> On Mon, Jun 14, 2021 at 1:45 PM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> Drop the workaround and instead implement a better solution.
>>
>> Basically we are now chaining all submissions using a dma_fence_chain
>> container and adding them as exclusive fence to the dma_resv object.
>>
>> This way other drivers can still sync to the single exclusive fence
>> while amdgpu only sync to fences from different processes.
>>
>> v3: add the shared fence first before the exclusive one
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
> Series is:
> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
FYI I've pushed this to drm-misc-next to avoid re-base problems.
Will probably not go upstream before 5.15, so we have plenty of time to
test this.
Christian.
>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 1 +
>> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 62 ++++++++++++++++----
>> drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 65 ---------------------
>> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 3 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 -
>> 6 files changed, 55 insertions(+), 79 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> index a130e766cbdb..c905a4cfc173 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> @@ -34,6 +34,7 @@ struct amdgpu_fpriv;
>> struct amdgpu_bo_list_entry {
>> struct ttm_validate_buffer tv;
>> struct amdgpu_bo_va *bo_va;
>> + struct dma_fence_chain *chain;
>> uint32_t priority;
>> struct page **user_pages;
>> bool user_invalidated;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index 9ce649a1a8d3..25655414e9c0 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -572,6 +572,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>> goto out;
>> }
>>
>> + amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>> + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
>> +
>> + e->bo_va = amdgpu_vm_bo_find(vm, bo);
>> +
>> + if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
>> + e->chain = dma_fence_chain_alloc();
>> + if (!e->chain) {
>> + r = -ENOMEM;
>> + goto error_validate;
>> + }
>> + }
>> + }
>> +
>> amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
>> &p->bytes_moved_vis_threshold);
>> p->bytes_moved = 0;
>> @@ -599,15 +613,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>> gws = p->bo_list->gws_obj;
>> oa = p->bo_list->oa_obj;
>>
>> - amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>> - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
>> -
>> - /* Make sure we use the exclusive slot for shared BOs */
>> - if (bo->prime_shared_count)
>> - e->tv.num_shared = 0;
>> - e->bo_va = amdgpu_vm_bo_find(vm, bo);
>> - }
>> -
>> if (gds) {
>> p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
>> p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
>> @@ -629,8 +634,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>> }
>>
>> error_validate:
>> - if (r)
>> + if (r) {
>> + amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>> + dma_fence_chain_free(e->chain);
>> + e->chain = NULL;
>> + }
>> ttm_eu_backoff_reservation(&p->ticket, &p->validated);
>> + }
>> out:
>> return r;
>> }
>> @@ -670,9 +680,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
>> {
>> unsigned i;
>>
>> - if (error && backoff)
>> + if (error && backoff) {
>> + struct amdgpu_bo_list_entry *e;
>> +
>> + amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
>> + dma_fence_chain_free(e->chain);
>> + e->chain = NULL;
>> + }
>> +
>> ttm_eu_backoff_reservation(&parser->ticket,
>> &parser->validated);
>> + }
>>
>> for (i = 0; i < parser->num_post_deps; i++) {
>> drm_syncobj_put(parser->post_deps[i].syncobj);
>> @@ -1245,6 +1263,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>>
>> amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
>>
>> + amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>> + struct dma_resv *resv = e->tv.bo->base.resv;
>> + struct dma_fence_chain *chain = e->chain;
>> +
>> + if (!chain)
>> + continue;
>> +
>> + /*
>> + * Work around dma_resv shortcommings by wrapping up the
>> + * submission in a dma_fence_chain and add it as exclusive
>> + * fence, but first add the submission as shared fence to make
>> + * sure that shared fences never signal before the exclusive
>> + * one.
>> + */
>> + dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
>> + dma_fence_get(p->fence), 1);
>> +
>> + dma_resv_add_shared_fence(resv, p->fence);
>> + rcu_assign_pointer(resv->fence_excl, &chain->base);
>> + e->chain = NULL;
>> + }
>> +
>> ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>> mutex_unlock(&p->adev->notifier_lock);
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
>> index c3053b83b80c..23219fc3b28c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
>> @@ -42,48 +42,6 @@
>> #include <linux/pci-p2pdma.h>
>> #include <linux/pm_runtime.h>
>>
>> -static int
>> -__dma_resv_make_exclusive(struct dma_resv *obj)
>> -{
>> - struct dma_fence **fences;
>> - unsigned int count;
>> - int r;
>> -
>> - if (!dma_resv_shared_list(obj)) /* no shared fences to convert */
>> - return 0;
>> -
>> - r = dma_resv_get_fences(obj, NULL, &count, &fences);
>> - if (r)
>> - return r;
>> -
>> - if (count == 0) {
>> - /* Now that was unexpected. */
>> - } else if (count == 1) {
>> - dma_resv_add_excl_fence(obj, fences[0]);
>> - dma_fence_put(fences[0]);
>> - kfree(fences);
>> - } else {
>> - struct dma_fence_array *array;
>> -
>> - array = dma_fence_array_create(count, fences,
>> - dma_fence_context_alloc(1), 0,
>> - false);
>> - if (!array)
>> - goto err_fences_put;
>> -
>> - dma_resv_add_excl_fence(obj, &array->base);
>> - dma_fence_put(&array->base);
>> - }
>> -
>> - return 0;
>> -
>> -err_fences_put:
>> - while (count--)
>> - dma_fence_put(fences[count]);
>> - kfree(fences);
>> - return -ENOMEM;
>> -}
>> -
>> /**
>> * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
>> *
>> @@ -110,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
>> if (r < 0)
>> goto out;
>>
>> - r = amdgpu_bo_reserve(bo, false);
>> - if (unlikely(r != 0))
>> - goto out;
>> -
>> - /*
>> - * We only create shared fences for internal use, but importers
>> - * of the dmabuf rely on exclusive fences for implicitly
>> - * tracking write hazards. As any of the current fences may
>> - * correspond to a write, we need to convert all existing
>> - * fences on the reservation object into a single exclusive
>> - * fence.
>> - */
>> - r = __dma_resv_make_exclusive(bo->tbo.base.resv);
>> - if (r)
>> - goto out;
>> -
>> - bo->prime_shared_count++;
>> - amdgpu_bo_unreserve(bo);
>> return 0;
>>
>> out:
>> @@ -150,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
>> struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
>> struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
>>
>> - if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
>> - bo->prime_shared_count--;
>> -
>> pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
>> pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
>> }
>> @@ -406,8 +343,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
>> bo = gem_to_amdgpu_bo(gobj);
>> bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
>> bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
>> - if (dma_buf->ops != &amdgpu_dmabuf_ops)
>> - bo->prime_shared_count = 1;
>>
>> dma_resv_unlock(resv);
>> return gobj;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> index 9cf4beaf646c..0780e8d18992 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>> @@ -829,7 +829,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
>> break;
>> }
>> case AMDGPU_GEM_OP_SET_PLACEMENT:
>> - if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) {
>> + if (robj->tbo.base.import_attach &&
>> + args->value & AMDGPU_GEM_DOMAIN_VRAM) {
>> r = -EINVAL;
>> amdgpu_bo_unreserve(robj);
>> break;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index b7a2070d90af..d13490975ac3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -892,7 +892,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
>> return -EINVAL;
>>
>> /* A shared bo cannot be migrated to VRAM */
>> - if (bo->prime_shared_count || bo->tbo.base.import_attach) {
>> + if (bo->tbo.base.import_attach) {
>> if (domain & AMDGPU_GEM_DOMAIN_GTT)
>> domain = AMDGPU_GEM_DOMAIN_GTT;
>> else
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> index 126df03a7066..c03dfd298f45 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> @@ -99,7 +99,6 @@ struct amdgpu_bo {
>> struct ttm_buffer_object tbo;
>> struct ttm_bo_kmap_obj kmap;
>> u64 flags;
>> - unsigned prime_shared_count;
>> /* per VM structure for page tables and with virtual addresses */
>> struct amdgpu_vm_bo_base *vm_bo;
>> /* Constant after initialization */
>> --
>> 2.25.1
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 16+ messages in thread
end of thread, other threads:[~2021-06-22 9:20 UTC | newest]
Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-14 17:45 [PATCH 1/2] drm/amdgpu: unwrap fence chains in the explicit sync fence Christian König
2021-06-14 17:45 ` Christian König
2021-06-14 17:45 ` [PATCH 2/2] drm/amdgpu: rework dma_resv handling v3 Christian König
2021-06-14 17:45 ` Christian König
2021-06-17 19:37 ` Daniel Vetter
2021-06-17 19:37 ` Daniel Vetter
2021-06-17 21:09 ` Alex Deucher
2021-06-17 21:09 ` Alex Deucher
2021-06-22 9:20 ` Christian König
2021-06-22 9:20 ` Christian König
2021-06-17 7:44 ` [PATCH 1/2] drm/amdgpu: unwrap fence chains in the explicit sync fence Christian König
2021-06-17 7:44 ` Christian König
2021-06-17 17:30 ` Daniel Vetter
2021-06-17 17:30 ` Daniel Vetter
2021-06-17 19:38 ` Daniel Vetter
2021-06-17 19:38 ` Daniel Vetter
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.