All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2
@ 2022-03-21 13:58 Christian König
  2022-03-21 13:58 ` [PATCH 02/23] dma-buf: finally make the dma_resv_list private v2 Christian König
                   ` (24 more replies)
  0 siblings, 25 replies; 73+ messages in thread
From: Christian König @ 2022-03-21 13:58 UTC (permalink / raw)
  To: daniel.vetter, dri-devel; +Cc: Christian König

This function allows to replace fences from the shared fence list when
we can gurantee that the operation represented by the original fence has
finished or no accesses to the resources protected by the dma_resv
object any more when the new fence finishes.

Then use this function in the amdkfd code when BOs are unmapped from the
process.

v2: add an example when this is usefull.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/dma-resv.c                    | 45 +++++++++++++++++
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 49 +++----------------
 include/linux/dma-resv.h                      |  2 +
 3 files changed, 54 insertions(+), 42 deletions(-)

diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index b51416405e86..509060861cf3 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -289,6 +289,51 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence)
 }
 EXPORT_SYMBOL(dma_resv_add_shared_fence);
 
+/**
+ * dma_resv_replace_fences - replace fences in the dma_resv obj
+ * @obj: the reservation object
+ * @context: the context of the fences to replace
+ * @replacement: the new fence to use instead
+ *
+ * Replace fences with a specified context with a new fence. Only valid if the
+ * operation represented by the original fence has no longer access to the
+ * resources represented by the dma_resv object when the new fence completes.
+ *
+ * And example for using this is replacing a preemption fence with a page table
+ * update fence which makes the resource inaccessible.
+ */
+void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
+			     struct dma_fence *replacement)
+{
+	struct dma_resv_list *list;
+	struct dma_fence *old;
+	unsigned int i;
+
+	dma_resv_assert_held(obj);
+
+	write_seqcount_begin(&obj->seq);
+
+	old = dma_resv_excl_fence(obj);
+	if (old->context == context) {
+		RCU_INIT_POINTER(obj->fence_excl, dma_fence_get(replacement));
+		dma_fence_put(old);
+	}
+
+	list = dma_resv_shared_list(obj);
+	for (i = 0; list && i < list->shared_count; ++i) {
+		old = rcu_dereference_protected(list->shared[i],
+						dma_resv_held(obj));
+		if (old->context != context)
+			continue;
+
+		rcu_assign_pointer(list->shared[i], dma_fence_get(replacement));
+		dma_fence_put(old);
+	}
+
+	write_seqcount_end(&obj->seq);
+}
+EXPORT_SYMBOL(dma_resv_replace_fences);
+
 /**
  * dma_resv_add_excl_fence - Add an exclusive fence.
  * @obj: the reservation object
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index f9bab963a948..b6f266f612ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -253,53 +253,18 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
 static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
 					struct amdgpu_amdkfd_fence *ef)
 {
-	struct dma_resv *resv = bo->tbo.base.resv;
-	struct dma_resv_list *old, *new;
-	unsigned int i, j, k;
+	struct dma_fence *replacement;
 
 	if (!ef)
 		return -EINVAL;
 
-	old = dma_resv_shared_list(resv);
-	if (!old)
-		return 0;
-
-	new = kmalloc(struct_size(new, shared, old->shared_max), GFP_KERNEL);
-	if (!new)
-		return -ENOMEM;
-
-	/* Go through all the shared fences in the resevation object and sort
-	 * the interesting ones to the end of the list.
+	/* TODO: Instead of block before we should use the fence of the page
+	 * table update and TLB flush here directly.
 	 */
-	for (i = 0, j = old->shared_count, k = 0; i < old->shared_count; ++i) {
-		struct dma_fence *f;
-
-		f = rcu_dereference_protected(old->shared[i],
-					      dma_resv_held(resv));
-
-		if (f->context == ef->base.context)
-			RCU_INIT_POINTER(new->shared[--j], f);
-		else
-			RCU_INIT_POINTER(new->shared[k++], f);
-	}
-	new->shared_max = old->shared_max;
-	new->shared_count = k;
-
-	/* Install the new fence list, seqcount provides the barriers */
-	write_seqcount_begin(&resv->seq);
-	RCU_INIT_POINTER(resv->fence, new);
-	write_seqcount_end(&resv->seq);
-
-	/* Drop the references to the removed fences or move them to ef_list */
-	for (i = j; i < old->shared_count; ++i) {
-		struct dma_fence *f;
-
-		f = rcu_dereference_protected(new->shared[i],
-					      dma_resv_held(resv));
-		dma_fence_put(f);
-	}
-	kfree_rcu(old, rcu);
-
+	replacement = dma_fence_get_stub();
+	dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context,
+				replacement);
+	dma_fence_put(replacement);
 	return 0;
 }
 
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index afdfdfac729f..3f53177bdb46 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -468,6 +468,8 @@ void dma_resv_init(struct dma_resv *obj);
 void dma_resv_fini(struct dma_resv *obj);
 int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences);
 void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence);
+void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
+			     struct dma_fence *fence);
 void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence);
 int dma_resv_get_fences(struct dma_resv *obj, bool write,
 			unsigned int *num_fences, struct dma_fence ***fences);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 02/23] dma-buf: finally make the dma_resv_list private v2
  2022-03-21 13:58 [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
@ 2022-03-21 13:58 ` Christian König
  2022-03-21 13:58 ` [PATCH 03/23] dma-buf: add dma_resv_get_singleton v2 Christian König
                   ` (23 subsequent siblings)
  24 siblings, 0 replies; 73+ messages in thread
From: Christian König @ 2022-03-21 13:58 UTC (permalink / raw)
  To: daniel.vetter, dri-devel; +Cc: Christian König

Drivers should never touch this directly.

v2: drop kerneldoc for now internal handling

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/dma-buf/dma-resv.c | 11 +++++++++++
 include/linux/dma-resv.h   | 26 +-------------------------
 2 files changed, 12 insertions(+), 25 deletions(-)

diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index 509060861cf3..5001e9b4420a 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -56,6 +56,12 @@
 DEFINE_WD_CLASS(reservation_ww_class);
 EXPORT_SYMBOL(reservation_ww_class);
 
+struct dma_resv_list {
+	struct rcu_head rcu;
+	u32 shared_count, shared_max;
+	struct dma_fence __rcu *shared[];
+};
+
 /**
  * dma_resv_list_alloc - allocate fence list
  * @shared_max: number of fences we need space for
@@ -133,6 +139,11 @@ void dma_resv_fini(struct dma_resv *obj)
 }
 EXPORT_SYMBOL(dma_resv_fini);
 
+static inline struct dma_resv_list *dma_resv_shared_list(struct dma_resv *obj)
+{
+	return rcu_dereference_check(obj->fence, dma_resv_held(obj));
+}
+
 /**
  * dma_resv_reserve_shared - Reserve space to add shared fences to
  * a dma_resv.
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index 3f53177bdb46..202cc65d0621 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -47,18 +47,7 @@
 
 extern struct ww_class reservation_ww_class;
 
-/**
- * struct dma_resv_list - a list of shared fences
- * @rcu: for internal use
- * @shared_count: table of shared fences
- * @shared_max: for growing shared fence table
- * @shared: shared fence table
- */
-struct dma_resv_list {
-	struct rcu_head rcu;
-	u32 shared_count, shared_max;
-	struct dma_fence __rcu *shared[];
-};
+struct dma_resv_list;
 
 /**
  * struct dma_resv - a reservation object manages fences for a buffer
@@ -451,19 +440,6 @@ dma_resv_excl_fence(struct dma_resv *obj)
 	return rcu_dereference_check(obj->fence_excl, dma_resv_held(obj));
 }
 
-/**
- * dma_resv_shared_list - get the reservation object's shared fence list
- * @obj: the reservation object
- *
- * Returns the shared fence list. Caller must either hold the objects
- * through dma_resv_lock() or the RCU read side lock through rcu_read_lock(),
- * or one of the variants of each
- */
-static inline struct dma_resv_list *dma_resv_shared_list(struct dma_resv *obj)
-{
-	return rcu_dereference_check(obj->fence, dma_resv_held(obj));
-}
-
 void dma_resv_init(struct dma_resv *obj);
 void dma_resv_fini(struct dma_resv *obj);
 int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 03/23] dma-buf: add dma_resv_get_singleton v2
  2022-03-21 13:58 [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
  2022-03-21 13:58 ` [PATCH 02/23] dma-buf: finally make the dma_resv_list private v2 Christian König
@ 2022-03-21 13:58 ` Christian König
  2022-04-01  8:21   ` Christian König
  2022-03-21 13:58   ` Christian König
                   ` (22 subsequent siblings)
  24 siblings, 1 reply; 73+ messages in thread
From: Christian König @ 2022-03-21 13:58 UTC (permalink / raw)
  To: daniel.vetter, dri-devel; +Cc: Christian König

Add a function to simplify getting a single fence for all the fences in
the dma_resv object.

v2: fix ref leak in error handling

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/dma-resv.c | 52 ++++++++++++++++++++++++++++++++++++++
 include/linux/dma-resv.h   |  2 ++
 2 files changed, 54 insertions(+)

diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index 5001e9b4420a..c09fd8da0c85 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -34,6 +34,7 @@
  */
 
 #include <linux/dma-resv.h>
+#include <linux/dma-fence-array.h>
 #include <linux/export.h>
 #include <linux/mm.h>
 #include <linux/sched/mm.h>
@@ -650,6 +651,57 @@ int dma_resv_get_fences(struct dma_resv *obj, bool write,
 }
 EXPORT_SYMBOL_GPL(dma_resv_get_fences);
 
+/**
+ * dma_resv_get_singleton - Get a single fence for all the fences
+ * @obj: the reservation object
+ * @write: true if we should return all fences
+ * @fence: the resulting fence
+ *
+ * Get a single fence representing all the fences inside the resv object.
+ * Returns either 0 for success or -ENOMEM.
+ *
+ * Warning: This can't be used like this when adding the fence back to the resv
+ * object since that can lead to stack corruption when finalizing the
+ * dma_fence_array.
+ */
+int dma_resv_get_singleton(struct dma_resv *obj, bool write,
+			   struct dma_fence **fence)
+{
+	struct dma_fence_array *array;
+	struct dma_fence **fences;
+	unsigned count;
+	int r;
+
+	r = dma_resv_get_fences(obj, write, &count, &fences);
+        if (r)
+		return r;
+
+	if (count == 0) {
+		*fence = NULL;
+		return 0;
+	}
+
+	if (count == 1) {
+		*fence = fences[0];
+		kfree(fences);
+		return 0;
+	}
+
+	array = dma_fence_array_create(count, fences,
+				       dma_fence_context_alloc(1),
+				       1, false);
+	if (!array) {
+		while (count--)
+			dma_fence_put(fences[count]);
+		kfree(fences);
+		return -ENOMEM;
+	}
+
+	*fence = &array->base;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dma_resv_get_singleton);
+
 /**
  * dma_resv_wait_timeout - Wait on reservation's objects
  * shared and/or exclusive fences.
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index 202cc65d0621..08512c1e215d 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -449,6 +449,8 @@ void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
 void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence);
 int dma_resv_get_fences(struct dma_resv *obj, bool write,
 			unsigned int *num_fences, struct dma_fence ***fences);
+int dma_resv_get_singleton(struct dma_resv *obj, bool write,
+			   struct dma_fence **fence);
 int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src);
 long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr,
 			   unsigned long timeout);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 04/23] RDMA: use dma_resv_wait() instead of extracting the fence
  2022-03-21 13:58 [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
@ 2022-03-21 13:58   ` Christian König
  2022-03-21 13:58 ` [PATCH 03/23] dma-buf: add dma_resv_get_singleton v2 Christian König
                     ` (23 subsequent siblings)
  24 siblings, 0 replies; 73+ messages in thread
From: Christian König @ 2022-03-21 13:58 UTC (permalink / raw)
  To: daniel.vetter, dri-devel
  Cc: Leon Romanovsky, Gal Pressman, linaro-mm-sig, Jason Gunthorpe,
	Maor Gottlieb, Christian König, linux-media

Use dma_resv_wait() instead of extracting the exclusive fence and
waiting on it manually.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Maor Gottlieb <maorg@nvidia.com>
Cc: Gal Pressman <galpress@amazon.com>
Cc: linux-media@vger.kernel.org
Cc: linaro-mm-sig@lists.linaro.org
---
 drivers/infiniband/core/umem_dmabuf.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c
index f0760741f281..d32cd7538835 100644
--- a/drivers/infiniband/core/umem_dmabuf.c
+++ b/drivers/infiniband/core/umem_dmabuf.c
@@ -16,7 +16,6 @@ int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf)
 {
 	struct sg_table *sgt;
 	struct scatterlist *sg;
-	struct dma_fence *fence;
 	unsigned long start, end, cur = 0;
 	unsigned int nmap = 0;
 	int i;
@@ -68,11 +67,8 @@ int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf)
 	 * may be not up-to-date. Wait for the exporter to finish
 	 * the migration.
 	 */
-	fence = dma_resv_excl_fence(umem_dmabuf->attach->dmabuf->resv);
-	if (fence)
-		return dma_fence_wait(fence, false);
-
-	return 0;
+	return dma_resv_wait_timeout(umem_dmabuf->attach->dmabuf->resv, false,
+				     false, MAX_SCHEDULE_TIMEOUT);
 }
 EXPORT_SYMBOL(ib_umem_dmabuf_map_pages);
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 04/23] RDMA: use dma_resv_wait() instead of extracting the fence
@ 2022-03-21 13:58   ` Christian König
  0 siblings, 0 replies; 73+ messages in thread
From: Christian König @ 2022-03-21 13:58 UTC (permalink / raw)
  To: daniel.vetter, dri-devel
  Cc: Christian König, Jason Gunthorpe, Leon Romanovsky,
	Maor Gottlieb, Gal Pressman, linux-media, linaro-mm-sig

Use dma_resv_wait() instead of extracting the exclusive fence and
waiting on it manually.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Maor Gottlieb <maorg@nvidia.com>
Cc: Gal Pressman <galpress@amazon.com>
Cc: linux-media@vger.kernel.org
Cc: linaro-mm-sig@lists.linaro.org
---
 drivers/infiniband/core/umem_dmabuf.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c
index f0760741f281..d32cd7538835 100644
--- a/drivers/infiniband/core/umem_dmabuf.c
+++ b/drivers/infiniband/core/umem_dmabuf.c
@@ -16,7 +16,6 @@ int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf)
 {
 	struct sg_table *sgt;
 	struct scatterlist *sg;
-	struct dma_fence *fence;
 	unsigned long start, end, cur = 0;
 	unsigned int nmap = 0;
 	int i;
@@ -68,11 +67,8 @@ int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf)
 	 * may be not up-to-date. Wait for the exporter to finish
 	 * the migration.
 	 */
-	fence = dma_resv_excl_fence(umem_dmabuf->attach->dmabuf->resv);
-	if (fence)
-		return dma_fence_wait(fence, false);
-
-	return 0;
+	return dma_resv_wait_timeout(umem_dmabuf->attach->dmabuf->resv, false,
+				     false, MAX_SCHEDULE_TIMEOUT);
 }
 EXPORT_SYMBOL(ib_umem_dmabuf_map_pages);
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 05/23] drm/etnaviv: stop using dma_resv_excl_fence
  2022-03-21 13:58 [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
                   ` (2 preceding siblings ...)
  2022-03-21 13:58   ` Christian König
@ 2022-03-21 13:58 ` Christian König
  2022-03-23 15:58   ` Lucas Stach
  2022-03-21 13:58   ` Christian König
                   ` (20 subsequent siblings)
  24 siblings, 1 reply; 73+ messages in thread
From: Christian König @ 2022-03-21 13:58 UTC (permalink / raw)
  To: daniel.vetter, dri-devel; +Cc: Russell King, Christian König, etnaviv

We can get the excl fence together with the shared ones as well.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Lucas Stach <l.stach@pengutronix.de>
Cc: Russell King <linux+etnaviv@armlinux.org.uk>
Cc: Christian Gmeiner <christian.gmeiner@gmail.com>
Cc: etnaviv@lists.freedesktop.org
---
 drivers/gpu/drm/etnaviv/etnaviv_gem.h        |  1 -
 drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 14 +++++---------
 drivers/gpu/drm/etnaviv/etnaviv_sched.c      | 10 ----------
 3 files changed, 5 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
index 98e60df882b6..f596d743baa3 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
@@ -80,7 +80,6 @@ struct etnaviv_gem_submit_bo {
 	u64 va;
 	struct etnaviv_gem_object *obj;
 	struct etnaviv_vram_mapping *mapping;
-	struct dma_fence *excl;
 	unsigned int nr_shared;
 	struct dma_fence **shared;
 };
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index 4eb00a0cb650..385ea141c23e 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -188,15 +188,11 @@ static int submit_fence_sync(struct etnaviv_gem_submit *submit)
 		if (submit->flags & ETNA_SUBMIT_NO_IMPLICIT)
 			continue;
 
-		if (bo->flags & ETNA_SUBMIT_BO_WRITE) {
-			ret = dma_resv_get_fences(robj, true, &bo->nr_shared,
-						  &bo->shared);
-			if (ret)
-				return ret;
-		} else {
-			bo->excl = dma_fence_get(dma_resv_excl_fence(robj));
-		}
-
+		ret = dma_resv_get_fences(robj,
+					  bo->flags & ETNA_SUBMIT_BO_WRITE,
+					  &bo->nr_shared, &bo->shared);
+		if (ret)
+			return ret;
 	}
 
 	return ret;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index 35e5ef7dbdcc..59277dc62011 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -39,16 +39,6 @@ etnaviv_sched_dependency(struct drm_sched_job *sched_job,
 		struct etnaviv_gem_submit_bo *bo = &submit->bos[i];
 		int j;
 
-		if (bo->excl) {
-			fence = bo->excl;
-			bo->excl = NULL;
-
-			if (!dma_fence_is_signaled(fence))
-				return fence;
-
-			dma_fence_put(fence);
-		}
-
 		for (j = 0; j < bo->nr_shared; j++) {
 			if (!bo->shared[j])
 				continue;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [Nouveau] [PATCH 06/23] drm/nouveau: stop using dma_resv_excl_fence
  2022-03-21 13:58 [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
@ 2022-03-21 13:58   ` Christian König
  2022-03-21 13:58 ` [PATCH 03/23] dma-buf: add dma_resv_get_singleton v2 Christian König
                     ` (23 subsequent siblings)
  24 siblings, 0 replies; 73+ messages in thread
From: Christian König @ 2022-03-21 13:58 UTC (permalink / raw)
  To: daniel.vetter, dri-devel; +Cc: nouveau, Christian König, Ben Skeggs

Instead use the new dma_resv_get_singleton function.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Ben Skeggs <bskeggs@redhat.com>
Cc: Karol Herbst <kherbst@redhat.com>
Cc: Lyude Paul <lyude@redhat.com>
Cc: nouveau@lists.freedesktop.org
---
 drivers/gpu/drm/nouveau/nouveau_bo.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index fa73fe57f97b..74f8652d2bd3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -959,7 +959,14 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
 {
 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
 	struct drm_device *dev = drm->dev;
-	struct dma_fence *fence = dma_resv_excl_fence(bo->base.resv);
+	struct dma_fence *fence;
+	int ret;
+
+	/* TODO: This is actually a memory management dependency */
+	ret = dma_resv_get_singleton(bo->base.resv, false, &fence);
+	if (ret)
+		dma_resv_wait_timeout(bo->base.resv, false, false,
+				      MAX_SCHEDULE_TIMEOUT);
 
 	nv10_bo_put_tile_region(dev, *old_tile, fence);
 	*old_tile = new_tile;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 06/23] drm/nouveau: stop using dma_resv_excl_fence
@ 2022-03-21 13:58   ` Christian König
  0 siblings, 0 replies; 73+ messages in thread
From: Christian König @ 2022-03-21 13:58 UTC (permalink / raw)
  To: daniel.vetter, dri-devel
  Cc: nouveau, Christian König, Karol Herbst, Ben Skeggs

Instead use the new dma_resv_get_singleton function.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Ben Skeggs <bskeggs@redhat.com>
Cc: Karol Herbst <kherbst@redhat.com>
Cc: Lyude Paul <lyude@redhat.com>
Cc: nouveau@lists.freedesktop.org
---
 drivers/gpu/drm/nouveau/nouveau_bo.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index fa73fe57f97b..74f8652d2bd3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -959,7 +959,14 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
 {
 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
 	struct drm_device *dev = drm->dev;
-	struct dma_fence *fence = dma_resv_excl_fence(bo->base.resv);
+	struct dma_fence *fence;
+	int ret;
+
+	/* TODO: This is actually a memory management dependency */
+	ret = dma_resv_get_singleton(bo->base.resv, false, &fence);
+	if (ret)
+		dma_resv_wait_timeout(bo->base.resv, false, false,
+				      MAX_SCHEDULE_TIMEOUT);
 
 	nv10_bo_put_tile_region(dev, *old_tile, fence);
 	*old_tile = new_tile;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 07/23] drm/vmwgfx: stop using dma_resv_excl_fence
  2022-03-21 13:58 [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
                   ` (4 preceding siblings ...)
  2022-03-21 13:58   ` Christian König
@ 2022-03-21 13:58 ` Christian König
  2022-03-21 14:02   ` Zack Rusin
  2022-03-21 13:58 ` [PATCH 08/23] drm/radeon: " Christian König
                   ` (18 subsequent siblings)
  24 siblings, 1 reply; 73+ messages in thread
From: Christian König @ 2022-03-21 13:58 UTC (permalink / raw)
  To: daniel.vetter, dri-devel; +Cc: VMware Graphics, Christian König

Instead use the new dma_resv_get_singleton function.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: VMware Graphics <linux-graphics-maintainer@vmware.com>
Cc: Zack Rusin <zackr@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index 708899ba2102..36c3b5db7e69 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -1165,8 +1165,10 @@ int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start,
 		vmw_bo_fence_single(bo, NULL);
 		if (bo->moving)
 			dma_fence_put(bo->moving);
-		bo->moving = dma_fence_get
-			(dma_resv_excl_fence(bo->base.resv));
+
+		/* TODO: This is actually a memory management dependency */
+		return dma_resv_get_singleton(bo->base.resv, false,
+					      &bo->moving);
 	}
 
 	return 0;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 08/23] drm/radeon: stop using dma_resv_excl_fence
  2022-03-21 13:58 [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
                   ` (5 preceding siblings ...)
  2022-03-21 13:58 ` [PATCH 07/23] drm/vmwgfx: " Christian König
@ 2022-03-21 13:58 ` Christian König
  2022-03-21 13:58 ` [PATCH 09/23] drm/amdgpu: use dma_resv_for_each_fence for CS workaround Christian König
                   ` (17 subsequent siblings)
  24 siblings, 0 replies; 73+ messages in thread
From: Christian König @ 2022-03-21 13:58 UTC (permalink / raw)
  To: daniel.vetter, dri-devel; +Cc: Christian König, amd-gfx

Instead use the new dma_resv_get_singleton function.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: amd-gfx@lists.freedesktop.org
---
 drivers/gpu/drm/radeon/radeon_display.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index b9a07677a71e..f60e826cd292 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -533,7 +533,12 @@ static int radeon_crtc_page_flip_target(struct drm_crtc *crtc,
 		DRM_ERROR("failed to pin new rbo buffer before flip\n");
 		goto cleanup;
 	}
-	work->fence = dma_fence_get(dma_resv_excl_fence(new_rbo->tbo.base.resv));
+	r = dma_resv_get_singleton(new_rbo->tbo.base.resv, false, &work->fence);
+	if (r) {
+		radeon_bo_unreserve(new_rbo);
+		DRM_ERROR("failed to get new rbo buffer fences\n");
+		goto cleanup;
+	}
 	radeon_bo_get_tiling_flags(new_rbo, &tiling_flags, NULL);
 	radeon_bo_unreserve(new_rbo);
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 09/23] drm/amdgpu: use dma_resv_for_each_fence for CS workaround
  2022-03-21 13:58 [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
                   ` (6 preceding siblings ...)
  2022-03-21 13:58 ` [PATCH 08/23] drm/radeon: " Christian König
@ 2022-03-21 13:58 ` Christian König
  2022-03-21 13:58 ` [PATCH 10/23] dma-buf: finally make dma_resv_excl_fence private v2 Christian König
                   ` (16 subsequent siblings)
  24 siblings, 0 replies; 73+ messages in thread
From: Christian König @ 2022-03-21 13:58 UTC (permalink / raw)
  To: daniel.vetter, dri-devel; +Cc: Christian König, amd-gfx

Get the write fence using dma_resv_for_each_fence instead of accessing
it manually.

Signed-off-by: Christian König <christian.koenig@amd.com>
Cc: amd-gfx@lists.freedesktop.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 025748e9c772..02488a824fe8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1267,6 +1267,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
 		struct dma_resv *resv = e->tv.bo->base.resv;
 		struct dma_fence_chain *chain = e->chain;
+		struct dma_resv_iter cursor;
+		struct dma_fence *fence;
 
 		if (!chain)
 			continue;
@@ -1276,9 +1278,10 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 		 * submission in a dma_fence_chain and add it as exclusive
 		 * fence.
 		 */
-		dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
-				     dma_fence_get(p->fence), 1);
-
+		dma_resv_for_each_fence(&cursor, resv, false, fence) {
+			break;
+		}
+		dma_fence_chain_init(chain, fence, dma_fence_get(p->fence), 1);
 		rcu_assign_pointer(resv->fence_excl, &chain->base);
 		e->chain = NULL;
 	}
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 10/23] dma-buf: finally make dma_resv_excl_fence private v2
  2022-03-21 13:58 [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
                   ` (7 preceding siblings ...)
  2022-03-21 13:58 ` [PATCH 09/23] drm/amdgpu: use dma_resv_for_each_fence for CS workaround Christian König
@ 2022-03-21 13:58 ` Christian König
  2022-03-23 13:36   ` Daniel Vetter
  2022-03-21 13:58 ` [PATCH 11/23] dma-buf: drop the DAG approach for the dma_resv object v2 Christian König
                   ` (15 subsequent siblings)
  24 siblings, 1 reply; 73+ messages in thread
From: Christian König @ 2022-03-21 13:58 UTC (permalink / raw)
  To: daniel.vetter, dri-devel; +Cc: Christian König

Drivers should never touch this directly.

v2: fix rebase clash

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/dma-resv.c |  6 ++++++
 include/linux/dma-resv.h   | 17 -----------------
 2 files changed, 6 insertions(+), 17 deletions(-)

diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index c09fd8da0c85..1c9af97fe904 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -140,6 +140,12 @@ void dma_resv_fini(struct dma_resv *obj)
 }
 EXPORT_SYMBOL(dma_resv_fini);
 
+static inline struct dma_fence *
+dma_resv_excl_fence(struct dma_resv *obj)
+{
+       return rcu_dereference_check(obj->fence_excl, dma_resv_held(obj));
+}
+
 static inline struct dma_resv_list *dma_resv_shared_list(struct dma_resv *obj)
 {
 	return rcu_dereference_check(obj->fence, dma_resv_held(obj));
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index 08512c1e215d..20e13f36710a 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -423,23 +423,6 @@ static inline void dma_resv_unlock(struct dma_resv *obj)
 	ww_mutex_unlock(&obj->lock);
 }
 
-/**
- * dma_resv_excl_fence - return the object's exclusive fence
- * @obj: the reservation object
- *
- * Returns the exclusive fence (if any). Caller must either hold the objects
- * through dma_resv_lock() or the RCU read side lock through rcu_read_lock(),
- * or one of the variants of each
- *
- * RETURNS
- * The exclusive fence or NULL
- */
-static inline struct dma_fence *
-dma_resv_excl_fence(struct dma_resv *obj)
-{
-	return rcu_dereference_check(obj->fence_excl, dma_resv_held(obj));
-}
-
 void dma_resv_init(struct dma_resv *obj);
 void dma_resv_fini(struct dma_resv *obj);
 int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 11/23] dma-buf: drop the DAG approach for the dma_resv object v2
  2022-03-21 13:58 [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
                   ` (8 preceding siblings ...)
  2022-03-21 13:58 ` [PATCH 10/23] dma-buf: finally make dma_resv_excl_fence private v2 Christian König
@ 2022-03-21 13:58 ` Christian König
  2022-03-23 13:40   ` Daniel Vetter
  2022-03-21 13:58 ` [PATCH 12/23] dma-buf/drivers: make reserving a shared slot mandatory v3 Christian König
                   ` (14 subsequent siblings)
  24 siblings, 1 reply; 73+ messages in thread
From: Christian König @ 2022-03-21 13:58 UTC (permalink / raw)
  To: daniel.vetter, dri-devel; +Cc: Christian König

So far we had the approach of using a directed acyclic
graph with the dma_resv obj.

This turned out to have many downsides, especially it means
that every single driver and user of this interface needs
to be aware of this restriction when adding fences. If the
rules for the DAG are not followed then we end up with
potential hard to debug memory corruption, information
leaks or even elephant big security holes because we allow
userspace to access freed up memory.

Since we already took a step back from that by always
looking at all fences we now go a step further and stop
dropping the shared fences when a new exclusive one is
added.

v2: Drop some now superflous documentation

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/dma-resv.c | 16 +---------------
 include/linux/dma-buf.h    |  7 -------
 include/linux/dma-resv.h   | 22 +++++-----------------
 3 files changed, 6 insertions(+), 39 deletions(-)

diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index 1c9af97fe904..4b12141579e2 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -358,35 +358,21 @@ EXPORT_SYMBOL(dma_resv_replace_fences);
  * @fence: the exclusive fence to add
  *
  * Add a fence to the exclusive slot. @obj must be locked with dma_resv_lock().
- * Note that this function replaces all fences attached to @obj, see also
- * &dma_resv.fence_excl for a discussion of the semantics.
+ * See also &dma_resv.fence_excl for a discussion of the semantics.
  */
 void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence)
 {
 	struct dma_fence *old_fence = dma_resv_excl_fence(obj);
-	struct dma_resv_list *old;
-	u32 i = 0;
 
 	dma_resv_assert_held(obj);
 
-	old = dma_resv_shared_list(obj);
-	if (old)
-		i = old->shared_count;
-
 	dma_fence_get(fence);
 
 	write_seqcount_begin(&obj->seq);
 	/* write_seqcount_begin provides the necessary memory barrier */
 	RCU_INIT_POINTER(obj->fence_excl, fence);
-	if (old)
-		old->shared_count = 0;
 	write_seqcount_end(&obj->seq);
 
-	/* inplace update, no shared fences */
-	while (i--)
-		dma_fence_put(rcu_dereference_protected(old->shared[i],
-						dma_resv_held(obj)));
-
 	dma_fence_put(old_fence);
 }
 EXPORT_SYMBOL(dma_resv_add_excl_fence);
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 7ab50076e7a6..74083e62e19d 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -420,13 +420,6 @@ struct dma_buf {
 	 * - Dynamic importers should set fences for any access that they can't
 	 *   disable immediately from their &dma_buf_attach_ops.move_notify
 	 *   callback.
-	 *
-	 * IMPORTANT:
-	 *
-	 * All drivers must obey the struct dma_resv rules, specifically the
-	 * rules for updating fences, see &dma_resv.fence_excl and
-	 * &dma_resv.fence. If these dependency rules are broken access tracking
-	 * can be lost resulting in use after free issues.
 	 */
 	struct dma_resv *resv;
 
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index 20e13f36710a..ecb697d4d861 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -93,23 +93,11 @@ struct dma_resv {
 	 *
 	 * The exclusive fence, if there is one currently.
 	 *
-	 * There are two ways to update this fence:
-	 *
-	 * - First by calling dma_resv_add_excl_fence(), which replaces all
-	 *   fences attached to the reservation object. To guarantee that no
-	 *   fences are lost, this new fence must signal only after all previous
-	 *   fences, both shared and exclusive, have signalled. In some cases it
-	 *   is convenient to achieve that by attaching a struct dma_fence_array
-	 *   with all the new and old fences.
-	 *
-	 * - Alternatively the fence can be set directly, which leaves the
-	 *   shared fences unchanged. To guarantee that no fences are lost, this
-	 *   new fence must signal only after the previous exclusive fence has
-	 *   signalled. Since the shared fences are staying intact, it is not
-	 *   necessary to maintain any ordering against those. If semantically
-	 *   only a new access is added without actually treating the previous
-	 *   one as a dependency the exclusive fences can be strung together
-	 *   using struct dma_fence_chain.
+	 * To guarantee that no fences are lost, this new fence must signal
+	 * only after the previous exclusive fence has signalled. If
+	 * semantically only a new access is added without actually treating the
+	 * previous one as a dependency the exclusive fences can be strung
+	 * together using struct dma_fence_chain.
 	 *
 	 * Note that actual semantics of what an exclusive or shared fence mean
 	 * is defined by the user, for reservation objects shared across drivers
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 12/23] dma-buf/drivers: make reserving a shared slot mandatory v3
  2022-03-21 13:58 [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
                   ` (9 preceding siblings ...)
  2022-03-21 13:58 ` [PATCH 11/23] dma-buf: drop the DAG approach for the dma_resv object v2 Christian König
@ 2022-03-21 13:58 ` Christian König
  2022-03-28 17:14   ` Daniel Vetter
  2022-03-21 13:58 ` [PATCH 13/23] drm/atomic-helper: support more than one write fence in drm_gem_plane_helper_prepare_fb Christian König
                   ` (13 subsequent siblings)
  24 siblings, 1 reply; 73+ messages in thread
From: Christian König @ 2022-03-21 13:58 UTC (permalink / raw)
  To: daniel.vetter, dri-devel; +Cc: Christian König

Audit all the users of dma_resv_add_excl_fence() and make sure they
reserve a shared slot also when only trying to add an exclusive fence.

This is the next step towards handling the exclusive fence like a
shared one.

v2: fix missed case in amdgpu
v3: and two more radeon, rename function

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/dma-resv.c                    | 10 +--
 drivers/dma-buf/st-dma-resv.c                 | 64 +++++++++----------
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    |  8 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |  4 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c          |  2 +-
 drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c  |  8 +--
 drivers/gpu/drm/i915/gem/i915_gem_clflush.c   |  3 +-
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 10 ++-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  |  6 +-
 .../drm/i915/gem/selftests/i915_gem_migrate.c |  5 +-
 drivers/gpu/drm/i915/i915_vma.c               |  8 ++-
 .../drm/i915/selftests/intel_memory_region.c  |  7 ++
 drivers/gpu/drm/lima/lima_gem.c               | 10 ++-
 drivers/gpu/drm/msm/msm_gem_submit.c          | 18 +++---
 drivers/gpu/drm/nouveau/nouveau_fence.c       |  8 +--
 drivers/gpu/drm/panfrost/panfrost_job.c       |  4 ++
 drivers/gpu/drm/qxl/qxl_release.c             |  2 +-
 drivers/gpu/drm/radeon/radeon_cs.c            |  4 ++
 drivers/gpu/drm/radeon/radeon_object.c        |  8 +++
 drivers/gpu/drm/radeon/radeon_vm.c            |  2 +-
 drivers/gpu/drm/ttm/ttm_bo.c                  |  4 +-
 drivers/gpu/drm/ttm/ttm_bo_util.c             | 12 +++-
 drivers/gpu/drm/ttm/ttm_execbuf_util.c        | 15 ++---
 drivers/gpu/drm/v3d/v3d_gem.c                 | 15 +++--
 drivers/gpu/drm/vc4/vc4_gem.c                 |  2 +-
 drivers/gpu/drm/vgem/vgem_fence.c             | 12 ++--
 drivers/gpu/drm/virtio/virtgpu_gem.c          |  9 +++
 drivers/gpu/drm/vmwgfx/vmwgfx_bo.c            | 16 +++--
 include/linux/dma-resv.h                      |  4 +-
 30 files changed, 171 insertions(+), 113 deletions(-)

diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index 4b12141579e2..78a32da2cb0b 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -152,7 +152,7 @@ static inline struct dma_resv_list *dma_resv_shared_list(struct dma_resv *obj)
 }
 
 /**
- * dma_resv_reserve_shared - Reserve space to add shared fences to
+ * dma_resv_reserve_fences - Reserve space to add shared fences to
  * a dma_resv.
  * @obj: reservation object
  * @num_fences: number of fences we want to add
@@ -167,7 +167,7 @@ static inline struct dma_resv_list *dma_resv_shared_list(struct dma_resv *obj)
  * RETURNS
  * Zero for success, or -errno
  */
-int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences)
+int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences)
 {
 	struct dma_resv_list *old, *new;
 	unsigned int i, j, k, max;
@@ -230,7 +230,7 @@ int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences)
 
 	return 0;
 }
-EXPORT_SYMBOL(dma_resv_reserve_shared);
+EXPORT_SYMBOL(dma_resv_reserve_fences);
 
 #ifdef CONFIG_DEBUG_MUTEXES
 /**
@@ -238,7 +238,7 @@ EXPORT_SYMBOL(dma_resv_reserve_shared);
  * @obj: the dma_resv object to reset
  *
  * Reset the number of pre-reserved shared slots to test that drivers do
- * correct slot allocation using dma_resv_reserve_shared(). See also
+ * correct slot allocation using dma_resv_reserve_fences(). See also
  * &dma_resv_list.shared_max.
  */
 void dma_resv_reset_shared_max(struct dma_resv *obj)
@@ -260,7 +260,7 @@ EXPORT_SYMBOL(dma_resv_reset_shared_max);
  * @fence: the shared fence to add
  *
  * Add a fence to a shared slot, @obj must be locked with dma_resv_lock(), and
- * dma_resv_reserve_shared() has been called.
+ * dma_resv_reserve_fences() has been called.
  *
  * See also &dma_resv.fence for a discussion of the semantics.
  */
diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c
index cbe999c6e7a6..d2e61f6ae989 100644
--- a/drivers/dma-buf/st-dma-resv.c
+++ b/drivers/dma-buf/st-dma-resv.c
@@ -75,17 +75,16 @@ static int test_signaling(void *arg, bool shared)
 		goto err_free;
 	}
 
-	if (shared) {
-		r = dma_resv_reserve_shared(&resv, 1);
-		if (r) {
-			pr_err("Resv shared slot allocation failed\n");
-			goto err_unlock;
-		}
+	r = dma_resv_reserve_fences(&resv, 1);
+	if (r) {
+		pr_err("Resv shared slot allocation failed\n");
+		goto err_unlock;
+	}
 
+	if (shared)
 		dma_resv_add_shared_fence(&resv, f);
-	} else {
+	else
 		dma_resv_add_excl_fence(&resv, f);
-	}
 
 	if (dma_resv_test_signaled(&resv, shared)) {
 		pr_err("Resv unexpectedly signaled\n");
@@ -134,17 +133,16 @@ static int test_for_each(void *arg, bool shared)
 		goto err_free;
 	}
 
-	if (shared) {
-		r = dma_resv_reserve_shared(&resv, 1);
-		if (r) {
-			pr_err("Resv shared slot allocation failed\n");
-			goto err_unlock;
-		}
+	r = dma_resv_reserve_fences(&resv, 1);
+	if (r) {
+		pr_err("Resv shared slot allocation failed\n");
+		goto err_unlock;
+	}
 
+	if (shared)
 		dma_resv_add_shared_fence(&resv, f);
-	} else {
+	else
 		dma_resv_add_excl_fence(&resv, f);
-	}
 
 	r = -ENOENT;
 	dma_resv_for_each_fence(&cursor, &resv, shared, fence) {
@@ -206,18 +204,17 @@ static int test_for_each_unlocked(void *arg, bool shared)
 		goto err_free;
 	}
 
-	if (shared) {
-		r = dma_resv_reserve_shared(&resv, 1);
-		if (r) {
-			pr_err("Resv shared slot allocation failed\n");
-			dma_resv_unlock(&resv);
-			goto err_free;
-		}
+	r = dma_resv_reserve_fences(&resv, 1);
+	if (r) {
+		pr_err("Resv shared slot allocation failed\n");
+		dma_resv_unlock(&resv);
+		goto err_free;
+	}
 
+	if (shared)
 		dma_resv_add_shared_fence(&resv, f);
-	} else {
+	else
 		dma_resv_add_excl_fence(&resv, f);
-	}
 	dma_resv_unlock(&resv);
 
 	r = -ENOENT;
@@ -290,18 +287,17 @@ static int test_get_fences(void *arg, bool shared)
 		goto err_resv;
 	}
 
-	if (shared) {
-		r = dma_resv_reserve_shared(&resv, 1);
-		if (r) {
-			pr_err("Resv shared slot allocation failed\n");
-			dma_resv_unlock(&resv);
-			goto err_resv;
-		}
+	r = dma_resv_reserve_fences(&resv, 1);
+	if (r) {
+		pr_err("Resv shared slot allocation failed\n");
+		dma_resv_unlock(&resv);
+		goto err_resv;
+	}
 
+	if (shared)
 		dma_resv_add_shared_fence(&resv, f);
-	} else {
+	else
 		dma_resv_add_excl_fence(&resv, f);
-	}
 	dma_resv_unlock(&resv);
 
 	r = dma_resv_get_fences(&resv, shared, &i, &fences);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index b6f266f612ea..7de8f67f7dde 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1220,7 +1220,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
 				  AMDGPU_FENCE_OWNER_KFD, false);
 	if (ret)
 		goto wait_pd_fail;
-	ret = dma_resv_reserve_shared(vm->root.bo->tbo.base.resv, 1);
+	ret = dma_resv_reserve_fences(vm->root.bo->tbo.base.resv, 1);
 	if (ret)
 		goto reserve_shared_fail;
 	amdgpu_bo_fence(vm->root.bo,
@@ -2530,7 +2530,7 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem
 	 * Add process eviction fence to bo so they can
 	 * evict each other.
 	 */
-	ret = dma_resv_reserve_shared(gws_bo->tbo.base.resv, 1);
+	ret = dma_resv_reserve_fences(gws_bo->tbo.base.resv, 1);
 	if (ret)
 		goto reserve_shared_fail;
 	amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index ea0cde4904f0..2f808decd8d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1384,6 +1384,14 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
 		     bool shared)
 {
 	struct dma_resv *resv = bo->tbo.base.resv;
+	int r;
+
+	r = dma_resv_reserve_fences(resv, 1);
+	if (r) {
+		/* As last resort on OOM we block for the fence */
+		dma_fence_wait(fence, false);
+		return;
+	}
 
 	if (shared)
 		dma_resv_add_shared_fence(resv, fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b37fc7d7d2c7..0ab85280e8ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2968,7 +2968,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	if (r)
 		goto error_free_root;
 
-	r = dma_resv_reserve_shared(root_bo->tbo.base.resv, 1);
+	r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1);
 	if (r)
 		goto error_unreserve;
 
@@ -3411,7 +3411,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
 		value = 0;
 	}
 
-	r = dma_resv_reserve_shared(root->tbo.base.resv, 1);
+	r = dma_resv_reserve_fences(root->tbo.base.resv, 1);
 	if (r) {
 		pr_debug("failed %d to reserve fence slot\n", r);
 		goto error_unlock;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index f2805ba74c80..7b878221f1d3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -543,7 +543,7 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
 		goto reserve_bo_failed;
 	}
 
-	r = dma_resv_reserve_shared(bo->tbo.base.resv, 1);
+	r = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
 	if (r) {
 		pr_debug("failed %d to reserve bo\n", r);
 		amdgpu_bo_unreserve(bo);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index 385ea141c23e..b808ddb9da48 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -179,11 +179,9 @@ static int submit_fence_sync(struct etnaviv_gem_submit *submit)
 		struct etnaviv_gem_submit_bo *bo = &submit->bos[i];
 		struct dma_resv *robj = bo->obj->base.resv;
 
-		if (!(bo->flags & ETNA_SUBMIT_BO_WRITE)) {
-			ret = dma_resv_reserve_shared(robj, 1);
-			if (ret)
-				return ret;
-		}
+		ret = dma_resv_reserve_fences(robj, 1);
+		if (ret)
+			return ret;
 
 		if (submit->flags & ETNA_SUBMIT_NO_IMPLICIT)
 			continue;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
index 8a248003dfae..8a2223eb0ba9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -106,7 +106,8 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 	trace_i915_gem_object_clflush(obj);
 
 	clflush = NULL;
-	if (!(flags & I915_CLFLUSH_SYNC))
+	if (!(flags & I915_CLFLUSH_SYNC) &&
+	    dma_resv_reserve_fences(obj->base.resv, 1) == 0)
 		clflush = clflush_work_create(obj);
 	if (clflush) {
 		i915_sw_fence_await_reservation(&clflush->base.chain,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 3a5b247be738..e8eb6ee83f24 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -994,11 +994,9 @@ static int eb_validate_vmas(struct i915_execbuffer *eb)
 			}
 		}
 
-		if (!(ev->flags & EXEC_OBJECT_WRITE)) {
-			err = dma_resv_reserve_shared(vma->obj->base.resv, 1);
-			if (err)
-				return err;
-		}
+		err = dma_resv_reserve_fences(vma->obj->base.resv, 1);
+		if (err)
+			return err;
 
 		GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&
 			   eb_vma_misplaced(&eb->exec[i], vma, ev->flags));
@@ -2297,7 +2295,7 @@ static int eb_parse(struct i915_execbuffer *eb)
 		goto err_trampoline;
 	}
 
-	err = dma_resv_reserve_shared(shadow->obj->base.resv, 1);
+	err = dma_resv_reserve_fences(shadow->obj->base.resv, 1);
 	if (err)
 		goto err_trampoline;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index ee9612a3ee5e..4de6500f3c55 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -596,7 +596,11 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
 	assert_object_held(src);
 	i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
 
-	ret = dma_resv_reserve_shared(src_bo->base.resv, 1);
+	ret = dma_resv_reserve_fences(src_bo->base.resv, 1);
+	if (ret)
+		return ret;
+
+	ret = dma_resv_reserve_fences(dst_bo->base.resv, 1);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
index ecb691c81d1e..f9b369ed4b50 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
@@ -180,7 +180,10 @@ static int igt_lmem_pages_migrate(void *arg)
 					  i915_gem_object_is_lmem(obj),
 					  0xdeadbeaf, &rq);
 		if (rq) {
-			dma_resv_add_excl_fence(obj->base.resv, &rq->fence);
+			err = dma_resv_reserve_fences(obj->base.resv, 1);
+			if (!err)
+				dma_resv_add_excl_fence(obj->base.resv,
+							&rq->fence);
 			i915_request_put(rq);
 		}
 		if (err)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index c0d6d5526abe..fe9f89289418 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -1633,6 +1633,12 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
 			intel_frontbuffer_put(front);
 		}
 
+		if (!(flags & __EXEC_OBJECT_NO_RESERVE)) {
+			err = dma_resv_reserve_fences(vma->obj->base.resv, 1);
+			if (unlikely(err))
+				return err;
+		}
+
 		if (fence) {
 			dma_resv_add_excl_fence(vma->obj->base.resv, fence);
 			obj->write_domain = I915_GEM_DOMAIN_RENDER;
@@ -1640,7 +1646,7 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
 		}
 	} else {
 		if (!(flags & __EXEC_OBJECT_NO_RESERVE)) {
-			err = dma_resv_reserve_shared(vma->obj->base.resv, 1);
+			err = dma_resv_reserve_fences(vma->obj->base.resv, 1);
 			if (unlikely(err))
 				return err;
 		}
diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
index 7acba1d2135e..7f40502b2c1e 100644
--- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
@@ -895,6 +895,13 @@ static int igt_lmem_write_cpu(void *arg)
 	}
 
 	i915_gem_object_lock(obj, NULL);
+
+	err = dma_resv_reserve_fences(obj->base.resv, 1);
+	if (err) {
+		i915_gem_object_unlock(obj);
+		goto out_put;
+	}
+
 	/* Put the pages into a known state -- from the gpu for added fun */
 	intel_engine_pm_get(engine);
 	err = intel_context_migrate_clear(engine->gt->migrate.context, NULL,
diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c
index 6a6f6f2ead75..9435a3ca71c8 100644
--- a/drivers/gpu/drm/lima/lima_gem.c
+++ b/drivers/gpu/drm/lima/lima_gem.c
@@ -256,13 +256,11 @@ int lima_gem_get_info(struct drm_file *file, u32 handle, u32 *va, u64 *offset)
 static int lima_gem_sync_bo(struct lima_sched_task *task, struct lima_bo *bo,
 			    bool write, bool explicit)
 {
-	int err = 0;
+	int err;
 
-	if (!write) {
-		err = dma_resv_reserve_shared(lima_bo_resv(bo), 1);
-		if (err)
-			return err;
-	}
+	err = dma_resv_reserve_fences(lima_bo_resv(bo), 1);
+	if (err)
+		return err;
 
 	/* explicit sync use user passed dep fence */
 	if (explicit)
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 6cfa984dee6a..993dbcd7a586 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -320,16 +320,14 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit)
 		struct drm_gem_object *obj = &submit->bos[i].obj->base;
 		bool write = submit->bos[i].flags & MSM_SUBMIT_BO_WRITE;
 
-		if (!write) {
-			/* NOTE: _reserve_shared() must happen before
-			 * _add_shared_fence(), which makes this a slightly
-			 * strange place to call it.  OTOH this is a
-			 * convenient can-fail point to hook it in.
-			 */
-			ret = dma_resv_reserve_shared(obj->resv, 1);
-			if (ret)
-				return ret;
-		}
+		/* NOTE: _reserve_shared() must happen before
+		 * _add_shared_fence(), which makes this a slightly
+		 * strange place to call it.  OTOH this is a
+		 * convenient can-fail point to hook it in.
+		 */
+		ret = dma_resv_reserve_fences(obj->resv, 1);
+		if (ret)
+			return ret;
 
 		/* exclusive fences must be ordered */
 		if (no_implicit && !write)
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index a3a04e0d76ec..0268259e97eb 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -346,11 +346,9 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan,
 	struct dma_resv *resv = nvbo->bo.base.resv;
 	int i, ret;
 
-	if (!exclusive) {
-		ret = dma_resv_reserve_shared(resv, 1);
-		if (ret)
-			return ret;
-	}
+	ret = dma_resv_reserve_fences(resv, 1);
+	if (ret)
+		return ret;
 
 	/* Waiting for the exclusive fence first causes performance regressions
 	 * under some circumstances. So manually wait for the shared ones first.
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index a6925dbb6224..c34114560e49 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -247,6 +247,10 @@ static int panfrost_acquire_object_fences(struct drm_gem_object **bos,
 	int i, ret;
 
 	for (i = 0; i < bo_count; i++) {
+		ret = dma_resv_reserve_fences(bos[i]->resv, 1);
+		if (ret)
+			return ret;
+
 		/* panfrost always uses write mode in its current uapi */
 		ret = drm_sched_job_add_implicit_dependencies(job, bos[i],
 							      true);
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index 469979cd0341..cde1e8ddaeaa 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -200,7 +200,7 @@ static int qxl_release_validate_bo(struct qxl_bo *bo)
 			return ret;
 	}
 
-	ret = dma_resv_reserve_shared(bo->tbo.base.resv, 1);
+	ret = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 9ed2b2700e0a..446f7bae54c4 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -535,6 +535,10 @@ static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
 			return r;
 
 		radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update);
+
+		r = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
+		if (r)
+			return r;
 	}
 
 	return radeon_vm_clear_invalids(rdev, vm);
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index b827b87aefe2..afca4bf59a8d 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -783,6 +783,14 @@ void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
 		     bool shared)
 {
 	struct dma_resv *resv = bo->tbo.base.resv;
+	int r;
+
+	r = dma_resv_reserve_fences(resv, 1);
+	if (r) {
+		/* As last resort on OOM we block for the fence */
+		dma_fence_wait(&fence->base, false);
+		return;
+	}
 
 	if (shared)
 		dma_resv_add_shared_fence(resv, &fence->base);
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index bb53016f3138..987cabbf1318 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -831,7 +831,7 @@ static int radeon_vm_update_ptes(struct radeon_device *rdev,
 		int r;
 
 		radeon_sync_resv(rdev, &ib->sync, pt->tbo.base.resv, true);
-		r = dma_resv_reserve_shared(pt->tbo.base.resv, 1);
+		r = dma_resv_reserve_fences(pt->tbo.base.resv, 1);
 		if (r)
 			return r;
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index db3dc7ef5382..1dd6f13bb03c 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -762,7 +762,7 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
 
 	dma_resv_add_shared_fence(bo->base.resv, fence);
 
-	ret = dma_resv_reserve_shared(bo->base.resv, 1);
+	ret = dma_resv_reserve_fences(bo->base.resv, 1);
 	if (unlikely(ret)) {
 		dma_fence_put(fence);
 		return ret;
@@ -821,7 +821,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 	bool type_found = false;
 	int i, ret;
 
-	ret = dma_resv_reserve_shared(bo->base.resv, 1);
+	ret = dma_resv_reserve_fences(bo->base.resv, 1);
 	if (unlikely(ret))
 		return ret;
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 544a84fa6589..862d2f22412a 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -221,9 +221,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 
 	fbo->base = *bo;
 
-	ttm_bo_get(bo);
-	fbo->bo = bo;
-
 	/**
 	 * Fix up members that we shouldn't copy directly:
 	 * TODO: Explicit member copy would probably be better here.
@@ -251,6 +248,15 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 	ret = dma_resv_trylock(&fbo->base.base._resv);
 	WARN_ON(!ret);
 
+	ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1);
+	if (ret) {
+		kfree(fbo);
+		return ret;
+	}
+
+	ttm_bo_get(bo);
+	fbo->bo = bo;
+
 	ttm_bo_move_to_lru_tail_unlocked(&fbo->base);
 
 	*new_obj = &fbo->base;
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index 071c48d672c6..789c645f004e 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -90,6 +90,7 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
 
 	list_for_each_entry(entry, list, head) {
 		struct ttm_buffer_object *bo = entry->bo;
+		unsigned int num_fences;
 
 		ret = ttm_bo_reserve(bo, intr, (ticket == NULL), ticket);
 		if (ret == -EALREADY && dups) {
@@ -100,12 +101,10 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
 			continue;
 		}
 
+		num_fences = min(entry->num_shared, 1u);
 		if (!ret) {
-			if (!entry->num_shared)
-				continue;
-
-			ret = dma_resv_reserve_shared(bo->base.resv,
-								entry->num_shared);
+			ret = dma_resv_reserve_fences(bo->base.resv,
+						      num_fences);
 			if (!ret)
 				continue;
 		}
@@ -120,9 +119,9 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
 			ret = ttm_bo_reserve_slowpath(bo, intr, ticket);
 		}
 
-		if (!ret && entry->num_shared)
-			ret = dma_resv_reserve_shared(bo->base.resv,
-								entry->num_shared);
+		if (!ret)
+			ret = dma_resv_reserve_fences(bo->base.resv,
+						      num_fences);
 
 		if (unlikely(ret != 0)) {
 			if (ticket) {
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 92bc0faee84f..961812d33827 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -259,16 +259,21 @@ v3d_lock_bo_reservations(struct v3d_job *job,
 		return ret;
 
 	for (i = 0; i < job->bo_count; i++) {
+		ret = dma_resv_reserve_fences(job->bo[i]->resv, 1);
+		if (ret)
+			goto fail;
+
 		ret = drm_sched_job_add_implicit_dependencies(&job->base,
 							      job->bo[i], true);
-		if (ret) {
-			drm_gem_unlock_reservations(job->bo, job->bo_count,
-						    acquire_ctx);
-			return ret;
-		}
+		if (ret)
+			goto fail;
 	}
 
 	return 0;
+
+fail:
+	drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx);
+	return ret;
 }
 
 /**
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index 4abf10b66fe8..594bd6bb00d2 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -644,7 +644,7 @@ vc4_lock_bo_reservations(struct drm_device *dev,
 	for (i = 0; i < exec->bo_count; i++) {
 		bo = &exec->bo[i]->base;
 
-		ret = dma_resv_reserve_shared(bo->resv, 1);
+		ret = dma_resv_reserve_fences(bo->resv, 1);
 		if (ret) {
 			vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
 			return ret;
diff --git a/drivers/gpu/drm/vgem/vgem_fence.c b/drivers/gpu/drm/vgem/vgem_fence.c
index bd6f75285fd9..2ddbebca87d9 100644
--- a/drivers/gpu/drm/vgem/vgem_fence.c
+++ b/drivers/gpu/drm/vgem/vgem_fence.c
@@ -157,12 +157,14 @@ int vgem_fence_attach_ioctl(struct drm_device *dev,
 	}
 
 	/* Expose the fence via the dma-buf */
-	ret = 0;
 	dma_resv_lock(resv, NULL);
-	if (arg->flags & VGEM_FENCE_WRITE)
-		dma_resv_add_excl_fence(resv, fence);
-	else if ((ret = dma_resv_reserve_shared(resv, 1)) == 0)
-		dma_resv_add_shared_fence(resv, fence);
+	ret = dma_resv_reserve_fences(resv, 1);
+	if (!ret) {
+		if (arg->flags & VGEM_FENCE_WRITE)
+			dma_resv_add_excl_fence(resv, fence);
+		else
+			dma_resv_add_shared_fence(resv, fence);
+	}
 	dma_resv_unlock(resv);
 
 	/* Record the fence in our idr for later signaling */
diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c
index 48d3c9955f0d..1820ca6cf673 100644
--- a/drivers/gpu/drm/virtio/virtgpu_gem.c
+++ b/drivers/gpu/drm/virtio/virtgpu_gem.c
@@ -214,6 +214,7 @@ void virtio_gpu_array_add_obj(struct virtio_gpu_object_array *objs,
 
 int virtio_gpu_array_lock_resv(struct virtio_gpu_object_array *objs)
 {
+	unsigned int i;
 	int ret;
 
 	if (objs->nents == 1) {
@@ -222,6 +223,14 @@ int virtio_gpu_array_lock_resv(struct virtio_gpu_object_array *objs)
 		ret = drm_gem_lock_reservations(objs->objs, objs->nents,
 						&objs->ticket);
 	}
+	if (ret)
+		return ret;
+
+	for (i = 0; i < objs->nents; ++i) {
+		ret = dma_resv_reserve_fences(objs->objs[i]->resv, 1);
+		if (ret)
+			return ret;
+	}
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
index 31aecc46624b..fe13aa8b4a64 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -747,16 +747,22 @@ void vmw_bo_fence_single(struct ttm_buffer_object *bo,
 			 struct vmw_fence_obj *fence)
 {
 	struct ttm_device *bdev = bo->bdev;
-
 	struct vmw_private *dev_priv =
 		container_of(bdev, struct vmw_private, bdev);
+	int ret;
 
-	if (fence == NULL) {
+	if (fence == NULL)
 		vmw_execbuf_fence_commands(NULL, dev_priv, &fence, NULL);
+	else
+		dma_fence_get(&fence->base);
+
+	ret = dma_resv_reserve_fences(bo->base.resv, 1);
+	if (!ret)
 		dma_resv_add_excl_fence(bo->base.resv, &fence->base);
-		dma_fence_put(&fence->base);
-	} else
-		dma_resv_add_excl_fence(bo->base.resv, &fence->base);
+	else
+		/* Last resort fallback when we are OOM */
+		dma_fence_wait(&fence->base, false);
+	dma_fence_put(&fence->base);
 }
 
 
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index ecb697d4d861..5fa04d0fccad 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -117,7 +117,7 @@ struct dma_resv {
 	 * A new fence is added by calling dma_resv_add_shared_fence(). Since
 	 * this often needs to be done past the point of no return in command
 	 * submission it cannot fail, and therefore sufficient slots need to be
-	 * reserved by calling dma_resv_reserve_shared().
+	 * reserved by calling dma_resv_reserve_fences().
 	 *
 	 * Note that actual semantics of what an exclusive or shared fence mean
 	 * is defined by the user, for reservation objects shared across drivers
@@ -413,7 +413,7 @@ static inline void dma_resv_unlock(struct dma_resv *obj)
 
 void dma_resv_init(struct dma_resv *obj);
 void dma_resv_fini(struct dma_resv *obj);
-int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences);
+int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences);
 void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence);
 void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
 			     struct dma_fence *fence);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 13/23] drm/atomic-helper: support more than one write fence in drm_gem_plane_helper_prepare_fb
  2022-03-21 13:58 [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
                   ` (10 preceding siblings ...)
  2022-03-21 13:58 ` [PATCH 12/23] dma-buf/drivers: make reserving a shared slot mandatory v3 Christian König
@ 2022-03-21 13:58 ` Christian König
  2022-03-21 13:58   ` Christian König
                   ` (12 subsequent siblings)
  24 siblings, 0 replies; 73+ messages in thread
From: Christian König @ 2022-03-21 13:58 UTC (permalink / raw)
  To: daniel.vetter, dri-devel; +Cc: Christian König

Use dma_resv_get_singleton() here to eventually get more than one write
fence as single fence.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/drm_gem_atomic_helper.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c
index c3189afe10cb..9338ddb7edff 100644
--- a/drivers/gpu/drm/drm_gem_atomic_helper.c
+++ b/drivers/gpu/drm/drm_gem_atomic_helper.c
@@ -143,25 +143,21 @@
  */
 int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state)
 {
-	struct dma_resv_iter cursor;
 	struct drm_gem_object *obj;
 	struct dma_fence *fence;
+	int ret;
 
 	if (!state->fb)
 		return 0;
 
 	obj = drm_gem_fb_get_obj(state->fb, 0);
-	dma_resv_iter_begin(&cursor, obj->resv, false);
-	dma_resv_for_each_fence_unlocked(&cursor, fence) {
-		/* TODO: Currently there should be only one write fence, so this
-		 * here works fine. But drm_atomic_set_fence_for_plane() should
-		 * be changed to be able to handle more fences in general for
-		 * multiple BOs per fb anyway. */
-		dma_fence_get(fence);
-		break;
-	}
-	dma_resv_iter_end(&cursor);
+	ret = dma_resv_get_singleton(obj->resv, false, &fence);
+	if (ret)
+		return ret;
 
+	/* TODO: drm_atomic_set_fence_for_plane() should be changed to be able
+	 * to handle more fences in general for multiple BOs per fb.
+	 */
 	drm_atomic_set_fence_for_plane(state, fence);
 	return 0;
 }
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [Nouveau] [PATCH 14/23] drm/nouveau: support more than one write fence in fenv50_wndw_prepare_fb
  2022-03-21 13:58 [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
@ 2022-03-21 13:58   ` Christian König
  2022-03-21 13:58 ` [PATCH 03/23] dma-buf: add dma_resv_get_singleton v2 Christian König
                     ` (23 subsequent siblings)
  24 siblings, 0 replies; 73+ messages in thread
From: Christian König @ 2022-03-21 13:58 UTC (permalink / raw)
  To: daniel.vetter, dri-devel
  Cc: nouveau, Maxime Ripard, Christian König, Laurent Pinchart

Use dma_resv_get_singleton() here to eventually get more than one write
fence as single fence.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: Maxime Ripard <maxime@cerno.tech>
Cc: Lyude Paul <lyude@redhat.com>
Cc: nouveau@lists.freedesktop.org
---
 drivers/gpu/drm/nouveau/dispnv50/wndw.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
index 0c1a2ea0ed04..e2faf92e4831 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
@@ -536,8 +536,6 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state)
 	struct nouveau_bo *nvbo;
 	struct nv50_head_atom *asyh;
 	struct nv50_wndw_ctxdma *ctxdma;
-	struct dma_resv_iter cursor;
-	struct dma_fence *fence;
 	int ret;
 
 	NV_ATOMIC(drm, "%s prepare: %p\n", plane->name, fb);
@@ -560,13 +558,11 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state)
 			asyw->image.handle[0] = ctxdma->object.handle;
 	}
 
-	dma_resv_iter_begin(&cursor, nvbo->bo.base.resv, false);
-	dma_resv_for_each_fence_unlocked(&cursor, fence) {
-		/* TODO: We only use the first writer here */
-		asyw->state.fence = dma_fence_get(fence);
-		break;
-	}
-	dma_resv_iter_end(&cursor);
+	ret = dma_resv_get_singleton(nvbo->bo.base.resv, false,
+				     &asyw->state.fence);
+	if (ret)
+		return ret;
+
 	asyw->image.offset[0] = nvbo->offset;
 
 	if (wndw->func->prepare) {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 14/23] drm/nouveau: support more than one write fence in fenv50_wndw_prepare_fb
@ 2022-03-21 13:58   ` Christian König
  0 siblings, 0 replies; 73+ messages in thread
From: Christian König @ 2022-03-21 13:58 UTC (permalink / raw)
  To: daniel.vetter, dri-devel
  Cc: nouveau, Maxime Ripard, Thomas Zimmermann, Christian König,
	Laurent Pinchart

Use dma_resv_get_singleton() here to eventually get more than one write
fence as single fence.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: Maxime Ripard <maxime@cerno.tech>
Cc: Lyude Paul <lyude@redhat.com>
Cc: nouveau@lists.freedesktop.org
---
 drivers/gpu/drm/nouveau/dispnv50/wndw.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
index 0c1a2ea0ed04..e2faf92e4831 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
@@ -536,8 +536,6 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state)
 	struct nouveau_bo *nvbo;
 	struct nv50_head_atom *asyh;
 	struct nv50_wndw_ctxdma *ctxdma;
-	struct dma_resv_iter cursor;
-	struct dma_fence *fence;
 	int ret;
 
 	NV_ATOMIC(drm, "%s prepare: %p\n", plane->name, fb);
@@ -560,13 +558,11 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state)
 			asyw->image.handle[0] = ctxdma->object.handle;
 	}
 
-	dma_resv_iter_begin(&cursor, nvbo->bo.base.resv, false);
-	dma_resv_for_each_fence_unlocked(&cursor, fence) {
-		/* TODO: We only use the first writer here */
-		asyw->state.fence = dma_fence_get(fence);
-		break;
-	}
-	dma_resv_iter_end(&cursor);
+	ret = dma_resv_get_singleton(nvbo->bo.base.resv, false,
+				     &asyw->state.fence);
+	if (ret)
+		return ret;
+
 	asyw->image.offset[0] = nvbo->offset;
 
 	if (wndw->func->prepare) {
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 15/23] drm/amdgpu: use dma_resv_get_singleton in amdgpu_pasid_free_cb
  2022-03-21 13:58 [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
                   ` (12 preceding siblings ...)
  2022-03-21 13:58   ` Christian König
@ 2022-03-21 13:58 ` Christian König
  2022-03-21 13:58 ` [PATCH 16/23] dma-buf: add enum dma_resv_usage v3 Christian König
                   ` (10 subsequent siblings)
  24 siblings, 0 replies; 73+ messages in thread
From: Christian König @ 2022-03-21 13:58 UTC (permalink / raw)
  To: daniel.vetter, dri-devel; +Cc: Christian König, amd-gfx

Makes the code a bit more simpler.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: amd-gfx@lists.freedesktop.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 23 +++--------------------
 1 file changed, 3 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index be48487e2ca7..888d97143177 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -107,36 +107,19 @@ static void amdgpu_pasid_free_cb(struct dma_fence *fence,
 void amdgpu_pasid_free_delayed(struct dma_resv *resv,
 			       u32 pasid)
 {
-	struct dma_fence *fence, **fences;
 	struct amdgpu_pasid_cb *cb;
-	unsigned count;
+	struct dma_fence *fence;
 	int r;
 
-	r = dma_resv_get_fences(resv, true, &count, &fences);
+	r = dma_resv_get_singleton(resv, true, &fence);
 	if (r)
 		goto fallback;
 
-	if (count == 0) {
+	if (!fence) {
 		amdgpu_pasid_free(pasid);
 		return;
 	}
 
-	if (count == 1) {
-		fence = fences[0];
-		kfree(fences);
-	} else {
-		uint64_t context = dma_fence_context_alloc(1);
-		struct dma_fence_array *array;
-
-		array = dma_fence_array_create(count, fences, context,
-					       1, false);
-		if (!array) {
-			kfree(fences);
-			goto fallback;
-		}
-		fence = &array->base;
-	}
-
 	cb = kmalloc(sizeof(*cb), GFP_KERNEL);
 	if (!cb) {
 		/* Last resort when we are OOM */
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 16/23] dma-buf: add enum dma_resv_usage v3
  2022-03-21 13:58 [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
                   ` (13 preceding siblings ...)
  2022-03-21 13:58 ` [PATCH 15/23] drm/amdgpu: use dma_resv_get_singleton in amdgpu_pasid_free_cb Christian König
@ 2022-03-21 13:58 ` Christian König
  2022-03-29 15:24   ` Daniel Vetter
  2022-04-04  1:13   ` Bas Nieuwenhuizen
  2022-03-21 13:58 ` [PATCH 17/23] dma-buf: specify usage while adding fences to dma_resv obj v5 Christian König
                   ` (9 subsequent siblings)
  24 siblings, 2 replies; 73+ messages in thread
From: Christian König @ 2022-03-21 13:58 UTC (permalink / raw)
  To: daniel.vetter, dri-devel; +Cc: Christian König

This change adds the dma_resv_usage enum and allows us to specify why a
dma_resv object is queried for its containing fences.

Additional to that a dma_resv_usage_rw() helper function is added to aid
retrieving the fences for a read or write userspace submission.

This is then deployed to the different query functions of the dma_resv
object and all of their users. When the write paratermer was previously
true we now use DMA_RESV_USAGE_WRITE and DMA_RESV_USAGE_READ otherwise.

v2: add KERNEL/OTHER in separate patch
v3: some kerneldoc suggestions by Daniel

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/dma-buf.c                     |  3 +-
 drivers/dma-buf/dma-resv.c                    | 33 +++++----
 drivers/dma-buf/st-dma-resv.c                 | 48 ++++++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c   |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c       |  5 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c        |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c      |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c       |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |  7 +-
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  3 +-
 drivers/gpu/drm/drm_gem.c                     |  6 +-
 drivers/gpu/drm/drm_gem_atomic_helper.c       |  2 +-
 drivers/gpu/drm/etnaviv/etnaviv_gem.c         |  6 +-
 drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c  |  7 +-
 .../gpu/drm/i915/display/intel_atomic_plane.c |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_busy.c      |  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_lmem.c      |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_userptr.c   |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_wait.c      |  6 +-
 .../drm/i915/gem/selftests/i915_gem_dmabuf.c  |  3 +-
 drivers/gpu/drm/i915/i915_request.c           |  3 +-
 drivers/gpu/drm/i915/i915_sw_fence.c          |  2 +-
 drivers/gpu/drm/msm/msm_gem.c                 |  3 +-
 drivers/gpu/drm/nouveau/dispnv50/wndw.c       |  3 +-
 drivers/gpu/drm/nouveau/nouveau_bo.c          |  8 +--
 drivers/gpu/drm/nouveau/nouveau_fence.c       |  8 ++-
 drivers/gpu/drm/nouveau/nouveau_gem.c         |  3 +-
 drivers/gpu/drm/panfrost/panfrost_drv.c       |  3 +-
 drivers/gpu/drm/qxl/qxl_debugfs.c             |  3 +-
 drivers/gpu/drm/radeon/radeon_display.c       |  3 +-
 drivers/gpu/drm/radeon/radeon_gem.c           |  9 ++-
 drivers/gpu/drm/radeon/radeon_mn.c            |  4 +-
 drivers/gpu/drm/radeon/radeon_sync.c          |  2 +-
 drivers/gpu/drm/radeon/radeon_uvd.c           |  4 +-
 drivers/gpu/drm/scheduler/sched_main.c        |  3 +-
 drivers/gpu/drm/ttm/ttm_bo.c                  | 18 ++---
 drivers/gpu/drm/vgem/vgem_fence.c             |  4 +-
 drivers/gpu/drm/virtio/virtgpu_ioctl.c        |  5 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_bo.c            |  4 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c      |  4 +-
 drivers/infiniband/core/umem_dmabuf.c         |  3 +-
 include/linux/dma-resv.h                      | 69 +++++++++++++++----
 46 files changed, 208 insertions(+), 127 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 602b12d7470d..528983d3ba64 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -1124,7 +1124,8 @@ static int __dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
 	long ret;
 
 	/* Wait on any implicit rendering fences */
-	ret = dma_resv_wait_timeout(resv, write, true, MAX_SCHEDULE_TIMEOUT);
+	ret = dma_resv_wait_timeout(resv, dma_resv_usage_rw(write),
+				    true, MAX_SCHEDULE_TIMEOUT);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index 78a32da2cb0b..bb7b023c2d33 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -384,7 +384,7 @@ static void dma_resv_iter_restart_unlocked(struct dma_resv_iter *cursor)
 	cursor->seq = read_seqcount_begin(&cursor->obj->seq);
 	cursor->index = -1;
 	cursor->shared_count = 0;
-	if (cursor->all_fences) {
+	if (cursor->usage >= DMA_RESV_USAGE_READ) {
 		cursor->fences = dma_resv_shared_list(cursor->obj);
 		if (cursor->fences)
 			cursor->shared_count = cursor->fences->shared_count;
@@ -496,7 +496,7 @@ struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor)
 	dma_resv_assert_held(cursor->obj);
 
 	cursor->index = 0;
-	if (cursor->all_fences)
+	if (cursor->usage >= DMA_RESV_USAGE_READ)
 		cursor->fences = dma_resv_shared_list(cursor->obj);
 	else
 		cursor->fences = NULL;
@@ -551,7 +551,7 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src)
 	list = NULL;
 	excl = NULL;
 
-	dma_resv_iter_begin(&cursor, src, true);
+	dma_resv_iter_begin(&cursor, src, DMA_RESV_USAGE_READ);
 	dma_resv_for_each_fence_unlocked(&cursor, f) {
 
 		if (dma_resv_iter_is_restarted(&cursor)) {
@@ -597,7 +597,7 @@ EXPORT_SYMBOL(dma_resv_copy_fences);
  * dma_resv_get_fences - Get an object's shared and exclusive
  * fences without update side lock held
  * @obj: the reservation object
- * @write: true if we should return all fences
+ * @usage: controls which fences to include, see enum dma_resv_usage.
  * @num_fences: the number of fences returned
  * @fences: the array of fence ptrs returned (array is krealloc'd to the
  * required size, and must be freed by caller)
@@ -605,7 +605,7 @@ EXPORT_SYMBOL(dma_resv_copy_fences);
  * Retrieve all fences from the reservation object.
  * Returns either zero or -ENOMEM.
  */
-int dma_resv_get_fences(struct dma_resv *obj, bool write,
+int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage,
 			unsigned int *num_fences, struct dma_fence ***fences)
 {
 	struct dma_resv_iter cursor;
@@ -614,7 +614,7 @@ int dma_resv_get_fences(struct dma_resv *obj, bool write,
 	*num_fences = 0;
 	*fences = NULL;
 
-	dma_resv_iter_begin(&cursor, obj, write);
+	dma_resv_iter_begin(&cursor, obj, usage);
 	dma_resv_for_each_fence_unlocked(&cursor, fence) {
 
 		if (dma_resv_iter_is_restarted(&cursor)) {
@@ -646,7 +646,7 @@ EXPORT_SYMBOL_GPL(dma_resv_get_fences);
 /**
  * dma_resv_get_singleton - Get a single fence for all the fences
  * @obj: the reservation object
- * @write: true if we should return all fences
+ * @usage: controls which fences to include, see enum dma_resv_usage.
  * @fence: the resulting fence
  *
  * Get a single fence representing all the fences inside the resv object.
@@ -656,7 +656,7 @@ EXPORT_SYMBOL_GPL(dma_resv_get_fences);
  * object since that can lead to stack corruption when finalizing the
  * dma_fence_array.
  */
-int dma_resv_get_singleton(struct dma_resv *obj, bool write,
+int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage,
 			   struct dma_fence **fence)
 {
 	struct dma_fence_array *array;
@@ -664,7 +664,7 @@ int dma_resv_get_singleton(struct dma_resv *obj, bool write,
 	unsigned count;
 	int r;
 
-	r = dma_resv_get_fences(obj, write, &count, &fences);
+	r = dma_resv_get_fences(obj, usage, &count, &fences);
         if (r)
 		return r;
 
@@ -698,7 +698,7 @@ EXPORT_SYMBOL_GPL(dma_resv_get_singleton);
  * dma_resv_wait_timeout - Wait on reservation's objects
  * shared and/or exclusive fences.
  * @obj: the reservation object
- * @wait_all: if true, wait on all fences, else wait on just exclusive fence
+ * @usage: controls which fences to include, see enum dma_resv_usage.
  * @intr: if true, do interruptible wait
  * @timeout: timeout value in jiffies or zero to return immediately
  *
@@ -708,14 +708,14 @@ EXPORT_SYMBOL_GPL(dma_resv_get_singleton);
  * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or
  * greater than zer on success.
  */
-long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr,
-			   unsigned long timeout)
+long dma_resv_wait_timeout(struct dma_resv *obj, enum dma_resv_usage usage,
+			   bool intr, unsigned long timeout)
 {
 	long ret = timeout ? timeout : 1;
 	struct dma_resv_iter cursor;
 	struct dma_fence *fence;
 
-	dma_resv_iter_begin(&cursor, obj, wait_all);
+	dma_resv_iter_begin(&cursor, obj, usage);
 	dma_resv_for_each_fence_unlocked(&cursor, fence) {
 
 		ret = dma_fence_wait_timeout(fence, intr, ret);
@@ -735,8 +735,7 @@ EXPORT_SYMBOL_GPL(dma_resv_wait_timeout);
  * dma_resv_test_signaled - Test if a reservation object's fences have been
  * signaled.
  * @obj: the reservation object
- * @test_all: if true, test all fences, otherwise only test the exclusive
- * fence
+ * @usage: controls which fences to include, see enum dma_resv_usage.
  *
  * Callers are not required to hold specific locks, but maybe hold
  * dma_resv_lock() already.
@@ -745,12 +744,12 @@ EXPORT_SYMBOL_GPL(dma_resv_wait_timeout);
  *
  * True if all fences signaled, else false.
  */
-bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
+bool dma_resv_test_signaled(struct dma_resv *obj, enum dma_resv_usage usage)
 {
 	struct dma_resv_iter cursor;
 	struct dma_fence *fence;
 
-	dma_resv_iter_begin(&cursor, obj, test_all);
+	dma_resv_iter_begin(&cursor, obj, usage);
 	dma_resv_for_each_fence_unlocked(&cursor, fence) {
 		dma_resv_iter_end(&cursor);
 		return false;
diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c
index d2e61f6ae989..d097981061b1 100644
--- a/drivers/dma-buf/st-dma-resv.c
+++ b/drivers/dma-buf/st-dma-resv.c
@@ -58,7 +58,7 @@ static int sanitycheck(void *arg)
 	return r;
 }
 
-static int test_signaling(void *arg, bool shared)
+static int test_signaling(void *arg, enum dma_resv_usage usage)
 {
 	struct dma_resv resv;
 	struct dma_fence *f;
@@ -81,18 +81,18 @@ static int test_signaling(void *arg, bool shared)
 		goto err_unlock;
 	}
 
-	if (shared)
+	if (usage >= DMA_RESV_USAGE_READ)
 		dma_resv_add_shared_fence(&resv, f);
 	else
 		dma_resv_add_excl_fence(&resv, f);
 
-	if (dma_resv_test_signaled(&resv, shared)) {
+	if (dma_resv_test_signaled(&resv, usage)) {
 		pr_err("Resv unexpectedly signaled\n");
 		r = -EINVAL;
 		goto err_unlock;
 	}
 	dma_fence_signal(f);
-	if (!dma_resv_test_signaled(&resv, shared)) {
+	if (!dma_resv_test_signaled(&resv, usage)) {
 		pr_err("Resv not reporting signaled\n");
 		r = -EINVAL;
 		goto err_unlock;
@@ -107,15 +107,15 @@ static int test_signaling(void *arg, bool shared)
 
 static int test_excl_signaling(void *arg)
 {
-	return test_signaling(arg, false);
+	return test_signaling(arg, DMA_RESV_USAGE_WRITE);
 }
 
 static int test_shared_signaling(void *arg)
 {
-	return test_signaling(arg, true);
+	return test_signaling(arg, DMA_RESV_USAGE_READ);
 }
 
-static int test_for_each(void *arg, bool shared)
+static int test_for_each(void *arg, enum dma_resv_usage usage)
 {
 	struct dma_resv_iter cursor;
 	struct dma_fence *f, *fence;
@@ -139,13 +139,13 @@ static int test_for_each(void *arg, bool shared)
 		goto err_unlock;
 	}
 
-	if (shared)
+	if (usage >= DMA_RESV_USAGE_READ)
 		dma_resv_add_shared_fence(&resv, f);
 	else
 		dma_resv_add_excl_fence(&resv, f);
 
 	r = -ENOENT;
-	dma_resv_for_each_fence(&cursor, &resv, shared, fence) {
+	dma_resv_for_each_fence(&cursor, &resv, usage, fence) {
 		if (!r) {
 			pr_err("More than one fence found\n");
 			r = -EINVAL;
@@ -156,7 +156,8 @@ static int test_for_each(void *arg, bool shared)
 			r = -EINVAL;
 			goto err_unlock;
 		}
-		if (dma_resv_iter_is_exclusive(&cursor) != !shared) {
+		if (dma_resv_iter_is_exclusive(&cursor) !=
+		    (usage >= DMA_RESV_USAGE_READ)) {
 			pr_err("Unexpected fence usage\n");
 			r = -EINVAL;
 			goto err_unlock;
@@ -178,15 +179,15 @@ static int test_for_each(void *arg, bool shared)
 
 static int test_excl_for_each(void *arg)
 {
-	return test_for_each(arg, false);
+	return test_for_each(arg, DMA_RESV_USAGE_WRITE);
 }
 
 static int test_shared_for_each(void *arg)
 {
-	return test_for_each(arg, true);
+	return test_for_each(arg, DMA_RESV_USAGE_READ);
 }
 
-static int test_for_each_unlocked(void *arg, bool shared)
+static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage)
 {
 	struct dma_resv_iter cursor;
 	struct dma_fence *f, *fence;
@@ -211,14 +212,14 @@ static int test_for_each_unlocked(void *arg, bool shared)
 		goto err_free;
 	}
 
-	if (shared)
+	if (usage >= DMA_RESV_USAGE_READ)
 		dma_resv_add_shared_fence(&resv, f);
 	else
 		dma_resv_add_excl_fence(&resv, f);
 	dma_resv_unlock(&resv);
 
 	r = -ENOENT;
-	dma_resv_iter_begin(&cursor, &resv, shared);
+	dma_resv_iter_begin(&cursor, &resv, usage);
 	dma_resv_for_each_fence_unlocked(&cursor, fence) {
 		if (!r) {
 			pr_err("More than one fence found\n");
@@ -234,7 +235,8 @@ static int test_for_each_unlocked(void *arg, bool shared)
 			r = -EINVAL;
 			goto err_iter_end;
 		}
-		if (dma_resv_iter_is_exclusive(&cursor) != !shared) {
+		if (dma_resv_iter_is_exclusive(&cursor) !=
+		    (usage >= DMA_RESV_USAGE_READ)) {
 			pr_err("Unexpected fence usage\n");
 			r = -EINVAL;
 			goto err_iter_end;
@@ -262,15 +264,15 @@ static int test_for_each_unlocked(void *arg, bool shared)
 
 static int test_excl_for_each_unlocked(void *arg)
 {
-	return test_for_each_unlocked(arg, false);
+	return test_for_each_unlocked(arg, DMA_RESV_USAGE_WRITE);
 }
 
 static int test_shared_for_each_unlocked(void *arg)
 {
-	return test_for_each_unlocked(arg, true);
+	return test_for_each_unlocked(arg, DMA_RESV_USAGE_READ);
 }
 
-static int test_get_fences(void *arg, bool shared)
+static int test_get_fences(void *arg, enum dma_resv_usage usage)
 {
 	struct dma_fence *f, **fences = NULL;
 	struct dma_resv resv;
@@ -294,13 +296,13 @@ static int test_get_fences(void *arg, bool shared)
 		goto err_resv;
 	}
 
-	if (shared)
+	if (usage >= DMA_RESV_USAGE_READ)
 		dma_resv_add_shared_fence(&resv, f);
 	else
 		dma_resv_add_excl_fence(&resv, f);
 	dma_resv_unlock(&resv);
 
-	r = dma_resv_get_fences(&resv, shared, &i, &fences);
+	r = dma_resv_get_fences(&resv, usage, &i, &fences);
 	if (r) {
 		pr_err("get_fences failed\n");
 		goto err_free;
@@ -324,12 +326,12 @@ static int test_get_fences(void *arg, bool shared)
 
 static int test_excl_get_fences(void *arg)
 {
-	return test_get_fences(arg, false);
+	return test_get_fences(arg, DMA_RESV_USAGE_WRITE);
 }
 
 static int test_shared_get_fences(void *arg)
 {
-	return test_get_fences(arg, true);
+	return test_get_fences(arg, DMA_RESV_USAGE_READ);
 }
 
 int dma_resv(void)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 02488a824fe8..2bf909a4242a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1278,7 +1278,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 		 * submission in a dma_fence_chain and add it as exclusive
 		 * fence.
 		 */
-		dma_resv_for_each_fence(&cursor, resv, false, fence) {
+		dma_resv_for_each_fence(&cursor, resv,
+					DMA_RESV_USAGE_WRITE,
+					fence) {
 			break;
 		}
 		dma_fence_chain_init(chain, fence, dma_fence_get(p->fence), 1);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index e76b96d55551..cefa404d7842 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -200,8 +200,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
 		goto unpin;
 	}
 
-	/* TODO: Unify this with other drivers */
-	r = dma_resv_get_fences(new_abo->tbo.base.resv, true,
+	r = dma_resv_get_fences(new_abo->tbo.base.resv, DMA_RESV_USAGE_WRITE,
 				&work->shared_count,
 				&work->shared);
 	if (unlikely(r != 0)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 9b12cab5e606..3a5fe05c7a7e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -526,7 +526,8 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
 		return -ENOENT;
 	}
 	robj = gem_to_amdgpu_bo(gobj);
-	ret = dma_resv_wait_timeout(robj->tbo.base.resv, true, true, timeout);
+	ret = dma_resv_wait_timeout(robj->tbo.base.resv, DMA_RESV_USAGE_READ,
+				    true, timeout);
 
 	/* ret == 0 means not signaled,
 	 * ret > 0 means signaled
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 888d97143177..490d2a7a3e2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -111,7 +111,7 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,
 	struct dma_fence *fence;
 	int r;
 
-	r = dma_resv_get_singleton(resv, true, &fence);
+	r = dma_resv_get_singleton(resv, DMA_RESV_USAGE_READ, &fence);
 	if (r)
 		goto fallback;
 
@@ -139,7 +139,8 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,
 	/* Not enough memory for the delayed delete, as last resort
 	 * block for all the fences to complete.
 	 */
-	dma_resv_wait_timeout(resv, true, false, MAX_SCHEDULE_TIMEOUT);
+	dma_resv_wait_timeout(resv, DMA_RESV_USAGE_READ,
+			      false, MAX_SCHEDULE_TIMEOUT);
 	amdgpu_pasid_free(pasid);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 4b153daf283d..86f5248676b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -75,8 +75,8 @@ static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni,
 
 	mmu_interval_set_seq(mni, cur_seq);
 
-	r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false,
-				  MAX_SCHEDULE_TIMEOUT);
+	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_READ,
+				  false, MAX_SCHEDULE_TIMEOUT);
 	mutex_unlock(&adev->notifier_lock);
 	if (r <= 0)
 		DRM_ERROR("(%ld) failed to wait for user bo\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 2f808decd8d9..0a843cc54945 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -765,8 +765,8 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
 		return 0;
 	}
 
-	r = dma_resv_wait_timeout(bo->tbo.base.resv, false, false,
-				  MAX_SCHEDULE_TIMEOUT);
+	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_WRITE,
+				  false, MAX_SCHEDULE_TIMEOUT);
 	if (r < 0)
 		return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 40e06745fae9..744e144e5fc2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -259,7 +259,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 	if (resv == NULL)
 		return -EINVAL;
 
-	dma_resv_for_each_fence(&cursor, resv, true, f) {
+	/* TODO: Use DMA_RESV_USAGE_READ here */
+	dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, f) {
 		dma_fence_chain_for_each(f, f) {
 			struct dma_fence *tmp = dma_fence_chain_contained(f);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 5859ed0552a4..9ffd8c4c34a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1356,7 +1356,8 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 	 * If true, then return false as any KFD process needs all its BOs to
 	 * be resident to run successfully
 	 */
-	dma_resv_for_each_fence(&resv_cursor, bo->base.resv, true, f) {
+	dma_resv_for_each_fence(&resv_cursor, bo->base.resv,
+				DMA_RESV_USAGE_READ, f) {
 		if (amdkfd_fence_check_mm(f, current->mm))
 			return false;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 6f8de11a17f1..33deb0df62fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -1162,7 +1162,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 	ib->length_dw = 16;
 
 	if (direct) {
-		r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false,
+		r = dma_resv_wait_timeout(bo->tbo.base.resv,
+					  DMA_RESV_USAGE_WRITE, false,
 					  msecs_to_jiffies(10));
 		if (r == 0)
 			r = -ETIMEDOUT;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0ab85280e8ed..f3235aad7282 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2105,7 +2105,7 @@ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	struct dma_resv_iter cursor;
 	struct dma_fence *fence;
 
-	dma_resv_for_each_fence(&cursor, resv, true, fence) {
+	dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, fence) {
 		/* Add a callback for each fence in the reservation object */
 		amdgpu_vm_prt_get(adev);
 		amdgpu_vm_add_prt_cb(adev, fence);
@@ -2707,7 +2707,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
 		return true;
 
 	/* Don't evict VM page tables while they are busy */
-	if (!dma_resv_test_signaled(bo->tbo.base.resv, true))
+	if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_READ))
 		return false;
 
 	/* Try to block ongoing updates */
@@ -2887,7 +2887,8 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
  */
 long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
 {
-	timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv, true,
+	timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv,
+					DMA_RESV_USAGE_READ,
 					true, timeout);
 	if (timeout <= 0)
 		return timeout;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 0eeb394e949c..c9532642559c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -9199,7 +9199,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 		 * deadlock during GPU reset when this fence will not signal
 		 * but we hold reservation lock for the BO.
 		 */
-		r = dma_resv_wait_timeout(abo->tbo.base.resv, true, false,
+		r = dma_resv_wait_timeout(abo->tbo.base.resv,
+					  DMA_RESV_USAGE_WRITE, false,
 					  msecs_to_jiffies(5000));
 		if (unlikely(r <= 0))
 			DRM_ERROR("Waiting for fences timed out!");
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 3c888db59ea4..54079d762051 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -771,7 +771,8 @@ long drm_gem_dma_resv_wait(struct drm_file *filep, u32 handle,
 		return -EINVAL;
 	}
 
-	ret = dma_resv_wait_timeout(obj->resv, wait_all, true, timeout);
+	ret = dma_resv_wait_timeout(obj->resv, dma_resv_usage_rw(wait_all),
+				    true, timeout);
 	if (ret == 0)
 		ret = -ETIME;
 	else if (ret > 0)
@@ -1345,7 +1346,8 @@ int drm_gem_fence_array_add_implicit(struct xarray *fence_array,
 	struct dma_fence *fence;
 	int ret = 0;
 
-	dma_resv_for_each_fence(&cursor, obj->resv, write, fence) {
+	dma_resv_for_each_fence(&cursor, obj->resv, dma_resv_usage_rw(write),
+				fence) {
 		ret = drm_gem_fence_array_add(fence_array, fence);
 		if (ret)
 			break;
diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c
index 9338ddb7edff..a6d89aed0bda 100644
--- a/drivers/gpu/drm/drm_gem_atomic_helper.c
+++ b/drivers/gpu/drm/drm_gem_atomic_helper.c
@@ -151,7 +151,7 @@ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_st
 		return 0;
 
 	obj = drm_gem_fb_get_obj(state->fb, 0);
-	ret = dma_resv_get_singleton(obj->resv, false, &fence);
+	ret = dma_resv_get_singleton(obj->resv, DMA_RESV_USAGE_WRITE, &fence);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index d5314aa28ff7..507172e2780b 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -380,12 +380,14 @@ int etnaviv_gem_cpu_prep(struct drm_gem_object *obj, u32 op,
 	}
 
 	if (op & ETNA_PREP_NOSYNC) {
-		if (!dma_resv_test_signaled(obj->resv, write))
+		if (!dma_resv_test_signaled(obj->resv,
+					    dma_resv_usage_rw(write)))
 			return -EBUSY;
 	} else {
 		unsigned long remain = etnaviv_timeout_to_jiffies(timeout);
 
-		ret = dma_resv_wait_timeout(obj->resv, write, true, remain);
+		ret = dma_resv_wait_timeout(obj->resv, dma_resv_usage_rw(write),
+					    true, remain);
 		if (ret <= 0)
 			return ret == 0 ? -ETIMEDOUT : ret;
 	}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index b808ddb9da48..d7cd26dfaf8a 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -178,6 +178,7 @@ static int submit_fence_sync(struct etnaviv_gem_submit *submit)
 	for (i = 0; i < submit->nr_bos; i++) {
 		struct etnaviv_gem_submit_bo *bo = &submit->bos[i];
 		struct dma_resv *robj = bo->obj->base.resv;
+		enum dma_resv_usage usage;
 
 		ret = dma_resv_reserve_fences(robj, 1);
 		if (ret)
@@ -186,9 +187,9 @@ static int submit_fence_sync(struct etnaviv_gem_submit *submit)
 		if (submit->flags & ETNA_SUBMIT_NO_IMPLICIT)
 			continue;
 
-		ret = dma_resv_get_fences(robj,
-					  bo->flags & ETNA_SUBMIT_BO_WRITE,
-					  &bo->nr_shared, &bo->shared);
+		usage = dma_resv_usage_rw(bo->flags & ETNA_SUBMIT_BO_WRITE);
+		ret = dma_resv_get_fences(robj, usage, &bo->nr_shared,
+					  &bo->shared);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c
index c2c512cd8ec0..2c3bb8aecd07 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c
@@ -799,7 +799,8 @@ intel_prepare_plane_fb(struct drm_plane *_plane,
 		if (ret < 0)
 			goto unpin_fb;
 
-		dma_resv_iter_begin(&cursor, obj->base.resv, false);
+		dma_resv_iter_begin(&cursor, obj->base.resv,
+				    DMA_RESV_USAGE_WRITE);
 		dma_resv_for_each_fence_unlocked(&cursor, fence) {
 			add_rps_boost_after_vblank(new_plane_state->hw.crtc,
 						   fence);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
index 470fdfd61a0f..14a1c0ad8c3c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
@@ -138,12 +138,12 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 	 * Alternatively, we can trade that extra information on read/write
 	 * activity with
 	 *	args->busy =
-	 *		!dma_resv_test_signaled(obj->resv, true);
+	 *		!dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ);
 	 * to report the overall busyness. This is what the wait-ioctl does.
 	 *
 	 */
 	args->busy = 0;
-	dma_resv_iter_begin(&cursor, obj->base.resv, true);
+	dma_resv_iter_begin(&cursor, obj->base.resv, DMA_RESV_USAGE_READ);
 	dma_resv_for_each_fence_unlocked(&cursor, fence) {
 		if (dma_resv_iter_is_restarted(&cursor))
 			args->busy = 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
index 444f8268b9c5..a200d3e66573 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
@@ -66,7 +66,7 @@ bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj)
 	struct intel_memory_region *mr = READ_ONCE(obj->mm.region);
 
 #ifdef CONFIG_LOCKDEP
-	GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, true) &&
+	GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, DMA_RESV_USAGE_READ) &&
 		    i915_gem_object_evictable(obj));
 #endif
 	return mr && (mr->type == INTEL_MEMORY_LOCAL ||
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 3cc01c30dd62..60feff9160de 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -85,7 +85,7 @@ static bool i915_gem_userptr_invalidate(struct mmu_interval_notifier *mni,
 		return true;
 
 	/* we will unbind on next submission, still have userptr pins */
-	r = dma_resv_wait_timeout(obj->base.resv, true, false,
+	r = dma_resv_wait_timeout(obj->base.resv, DMA_RESV_USAGE_READ, false,
 				  MAX_SCHEDULE_TIMEOUT);
 	if (r <= 0)
 		drm_err(&i915->drm, "(%ld) failed to wait for idle\n", r);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
index dab3d30c09a0..319936f91ac5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
@@ -40,7 +40,8 @@ i915_gem_object_wait_reservation(struct dma_resv *resv,
 	struct dma_fence *fence;
 	long ret = timeout ?: 1;
 
-	dma_resv_iter_begin(&cursor, resv, flags & I915_WAIT_ALL);
+	dma_resv_iter_begin(&cursor, resv,
+			    dma_resv_usage_rw(flags & I915_WAIT_ALL));
 	dma_resv_for_each_fence_unlocked(&cursor, fence) {
 		ret = i915_gem_object_wait_fence(fence, flags, timeout);
 		if (ret <= 0)
@@ -117,7 +118,8 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 	struct dma_resv_iter cursor;
 	struct dma_fence *fence;
 
-	dma_resv_iter_begin(&cursor, obj->base.resv, flags & I915_WAIT_ALL);
+	dma_resv_iter_begin(&cursor, obj->base.resv,
+			    dma_resv_usage_rw(flags & I915_WAIT_ALL));
 	dma_resv_for_each_fence_unlocked(&cursor, fence)
 		i915_gem_fence_wait_priority(fence, attr);
 	dma_resv_iter_end(&cursor);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
index 3cc74b0fed06..342df658e0fc 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
@@ -219,7 +219,8 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915,
 		goto out_detach;
 	}
 
-	timeout = dma_resv_wait_timeout(dmabuf->resv, false, true, 5 * HZ);
+	timeout = dma_resv_wait_timeout(dmabuf->resv, DMA_RESV_USAGE_WRITE,
+					true, 5 * HZ);
 	if (!timeout) {
 		pr_err("dmabuf wait for exclusive fence timed out.\n");
 		timeout = -ETIME;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 76cf5ac91e94..17d7216ce221 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1594,7 +1594,8 @@ i915_request_await_object(struct i915_request *to,
 	struct dma_fence *fence;
 	int ret = 0;
 
-	dma_resv_for_each_fence(&cursor, obj->base.resv, write, fence) {
+	dma_resv_for_each_fence(&cursor, obj->base.resv,
+				dma_resv_usage_rw(write), fence) {
 		ret = i915_request_await_dma_fence(to, fence);
 		if (ret)
 			break;
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index 2a74a9a1cafe..ae984c66c48a 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -585,7 +585,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
 	debug_fence_assert(fence);
 	might_sleep_if(gfpflags_allow_blocking(gfp));
 
-	dma_resv_iter_begin(&cursor, resv, write);
+	dma_resv_iter_begin(&cursor, resv, dma_resv_usage_rw(write));
 	dma_resv_for_each_fence_unlocked(&cursor, f) {
 		pending = i915_sw_fence_await_dma_fence(fence, f, timeout,
 							gfp);
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 02b9ae65a96a..01bbb5f2d462 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -848,7 +848,8 @@ int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout)
 		op & MSM_PREP_NOSYNC ? 0 : timeout_to_jiffies(timeout);
 	long ret;
 
-	ret = dma_resv_wait_timeout(obj->resv, write, true,  remain);
+	ret = dma_resv_wait_timeout(obj->resv, dma_resv_usage_rw(write),
+				    true,  remain);
 	if (ret == 0)
 		return remain == 0 ? -EBUSY : -ETIMEDOUT;
 	else if (ret < 0)
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
index e2faf92e4831..8642b84ea20c 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
@@ -558,7 +558,8 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state)
 			asyw->image.handle[0] = ctxdma->object.handle;
 	}
 
-	ret = dma_resv_get_singleton(nvbo->bo.base.resv, false,
+	ret = dma_resv_get_singleton(nvbo->bo.base.resv,
+				     DMA_RESV_USAGE_WRITE,
 				     &asyw->state.fence);
 	if (ret)
 		return ret;
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 74f8652d2bd3..c6bb4dbcd735 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -962,11 +962,11 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
 	struct dma_fence *fence;
 	int ret;
 
-	/* TODO: This is actually a memory management dependency */
-	ret = dma_resv_get_singleton(bo->base.resv, false, &fence);
+	ret = dma_resv_get_singleton(bo->base.resv, DMA_RESV_USAGE_WRITE,
+				     &fence);
 	if (ret)
-		dma_resv_wait_timeout(bo->base.resv, false, false,
-				      MAX_SCHEDULE_TIMEOUT);
+		dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_WRITE,
+				      false, MAX_SCHEDULE_TIMEOUT);
 
 	nv10_bo_put_tile_region(dev, *old_tile, fence);
 	*old_tile = new_tile;
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index 0268259e97eb..d5e81ccee01c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -350,14 +350,16 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan,
 	if (ret)
 		return ret;
 
-	/* Waiting for the exclusive fence first causes performance regressions
-	 * under some circumstances. So manually wait for the shared ones first.
+	/* Waiting for the writes first causes performance regressions
+	 * under some circumstances. So manually wait for the reads first.
 	 */
 	for (i = 0; i < 2; ++i) {
 		struct dma_resv_iter cursor;
 		struct dma_fence *fence;
 
-		dma_resv_for_each_fence(&cursor, resv, exclusive, fence) {
+		dma_resv_for_each_fence(&cursor, resv,
+					dma_resv_usage_rw(exclusive),
+					fence) {
 			struct nouveau_fence *f;
 
 			if (i == 0 && dma_resv_iter_is_exclusive(&cursor))
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 9416bee92141..fab542a758ff 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -962,7 +962,8 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data,
 		return -ENOENT;
 	nvbo = nouveau_gem_object(gem);
 
-	lret = dma_resv_wait_timeout(nvbo->bo.base.resv, write, true,
+	lret = dma_resv_wait_timeout(nvbo->bo.base.resv,
+				     dma_resv_usage_rw(write), true,
 				     no_wait ? 0 : 30 * HZ);
 	if (!lret)
 		ret = -EBUSY;
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index 94b6f0a19c83..7fcbc2a5b6cd 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -316,7 +316,8 @@ panfrost_ioctl_wait_bo(struct drm_device *dev, void *data,
 	if (!gem_obj)
 		return -ENOENT;
 
-	ret = dma_resv_wait_timeout(gem_obj->resv, true, true, timeout);
+	ret = dma_resv_wait_timeout(gem_obj->resv, DMA_RESV_USAGE_READ,
+				    true, timeout);
 	if (!ret)
 		ret = timeout ? -ETIMEDOUT : -EBUSY;
 
diff --git a/drivers/gpu/drm/qxl/qxl_debugfs.c b/drivers/gpu/drm/qxl/qxl_debugfs.c
index 6a36b0fd845c..33e5889d6608 100644
--- a/drivers/gpu/drm/qxl/qxl_debugfs.c
+++ b/drivers/gpu/drm/qxl/qxl_debugfs.c
@@ -61,7 +61,8 @@ qxl_debugfs_buffers_info(struct seq_file *m, void *data)
 		struct dma_fence *fence;
 		int rel = 0;
 
-		dma_resv_iter_begin(&cursor, bo->tbo.base.resv, true);
+		dma_resv_iter_begin(&cursor, bo->tbo.base.resv,
+				    DMA_RESV_USAGE_READ);
 		dma_resv_for_each_fence_unlocked(&cursor, fence) {
 			if (dma_resv_iter_is_restarted(&cursor))
 				rel = 0;
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index f60e826cd292..57ff2b723c87 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -533,7 +533,8 @@ static int radeon_crtc_page_flip_target(struct drm_crtc *crtc,
 		DRM_ERROR("failed to pin new rbo buffer before flip\n");
 		goto cleanup;
 	}
-	r = dma_resv_get_singleton(new_rbo->tbo.base.resv, false, &work->fence);
+	r = dma_resv_get_singleton(new_rbo->tbo.base.resv, DMA_RESV_USAGE_WRITE,
+				   &work->fence);
 	if (r) {
 		radeon_bo_unreserve(new_rbo);
 		DRM_ERROR("failed to get new rbo buffer fences\n");
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index a36a4f2c76b0..71bf9299e45c 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -161,7 +161,9 @@ static int radeon_gem_set_domain(struct drm_gem_object *gobj,
 	}
 	if (domain == RADEON_GEM_DOMAIN_CPU) {
 		/* Asking for cpu access wait for object idle */
-		r = dma_resv_wait_timeout(robj->tbo.base.resv, true, true, 30 * HZ);
+		r = dma_resv_wait_timeout(robj->tbo.base.resv,
+					  DMA_RESV_USAGE_READ,
+					  true, 30 * HZ);
 		if (!r)
 			r = -EBUSY;
 
@@ -523,7 +525,7 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
 	}
 	robj = gem_to_radeon_bo(gobj);
 
-	r = dma_resv_test_signaled(robj->tbo.base.resv, true);
+	r = dma_resv_test_signaled(robj->tbo.base.resv, DMA_RESV_USAGE_READ);
 	if (r == 0)
 		r = -EBUSY;
 	else
@@ -552,7 +554,8 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
 	}
 	robj = gem_to_radeon_bo(gobj);
 
-	ret = dma_resv_wait_timeout(robj->tbo.base.resv, true, true, 30 * HZ);
+	ret = dma_resv_wait_timeout(robj->tbo.base.resv, DMA_RESV_USAGE_READ,
+				    true, 30 * HZ);
 	if (ret == 0)
 		r = -EBUSY;
 	else if (ret < 0)
diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c
index 9fa88549c89e..68ebeb1bdfff 100644
--- a/drivers/gpu/drm/radeon/radeon_mn.c
+++ b/drivers/gpu/drm/radeon/radeon_mn.c
@@ -66,8 +66,8 @@ static bool radeon_mn_invalidate(struct mmu_interval_notifier *mn,
 		return true;
 	}
 
-	r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false,
-				  MAX_SCHEDULE_TIMEOUT);
+	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_READ,
+				  false, MAX_SCHEDULE_TIMEOUT);
 	if (r <= 0)
 		DRM_ERROR("(%ld) failed to wait for user bo\n", r);
 
diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c
index b991ba1bcd51..49bbb2266c0f 100644
--- a/drivers/gpu/drm/radeon/radeon_sync.c
+++ b/drivers/gpu/drm/radeon/radeon_sync.c
@@ -96,7 +96,7 @@ int radeon_sync_resv(struct radeon_device *rdev,
 	struct dma_fence *f;
 	int r = 0;
 
-	dma_resv_for_each_fence(&cursor, resv, shared, f) {
+	dma_resv_for_each_fence(&cursor, resv, dma_resv_usage_rw(shared), f) {
 		fence = to_radeon_fence(f);
 		if (fence && fence->rdev == rdev)
 			radeon_sync_fence(sync, fence);
diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
index 377f9cdb5b53..4000ad2f39ba 100644
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -478,8 +478,8 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo,
 		return -EINVAL;
 	}
 
-	r = dma_resv_wait_timeout(bo->tbo.base.resv, false, false,
-				  MAX_SCHEDULE_TIMEOUT);
+	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_WRITE,
+				  false, MAX_SCHEDULE_TIMEOUT);
 	if (r <= 0) {
 		DRM_ERROR("Failed waiting for UVD message (%d)!\n", r);
 		return r ? r : -ETIME;
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index b81fceb0b8a2..0a1377dac58d 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -703,7 +703,8 @@ int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job,
 	struct dma_fence *fence;
 	int ret;
 
-	dma_resv_for_each_fence(&cursor, obj->resv, write, fence) {
+	dma_resv_for_each_fence(&cursor, obj->resv, dma_resv_usage_rw(write),
+				fence) {
 		/* Make sure to grab an additional ref on the added fence */
 		dma_fence_get(fence);
 		ret = drm_sched_job_add_dependency(job, fence);
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 1dd6f13bb03c..d4b2695606e2 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -272,7 +272,7 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo)
 	struct dma_resv_iter cursor;
 	struct dma_fence *fence;
 
-	dma_resv_iter_begin(&cursor, resv, true);
+	dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_READ);
 	dma_resv_for_each_fence_unlocked(&cursor, fence) {
 		if (!fence->ops->signaled)
 			dma_fence_enable_sw_signaling(fence);
@@ -301,7 +301,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 	struct dma_resv *resv = &bo->base._resv;
 	int ret;
 
-	if (dma_resv_test_signaled(resv, true))
+	if (dma_resv_test_signaled(resv, DMA_RESV_USAGE_READ))
 		ret = 0;
 	else
 		ret = -EBUSY;
@@ -313,7 +313,8 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 			dma_resv_unlock(bo->base.resv);
 		spin_unlock(&bo->bdev->lru_lock);
 
-		lret = dma_resv_wait_timeout(resv, true, interruptible,
+		lret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_READ,
+					     interruptible,
 					     30 * HZ);
 
 		if (lret < 0)
@@ -416,7 +417,8 @@ static void ttm_bo_release(struct kref *kref)
 			/* Last resort, if we fail to allocate memory for the
 			 * fences block for the BO to become idle
 			 */
-			dma_resv_wait_timeout(bo->base.resv, true, false,
+			dma_resv_wait_timeout(bo->base.resv,
+					      DMA_RESV_USAGE_READ, false,
 					      30 * HZ);
 		}
 
@@ -427,7 +429,7 @@ static void ttm_bo_release(struct kref *kref)
 		ttm_mem_io_free(bdev, bo->resource);
 	}
 
-	if (!dma_resv_test_signaled(bo->base.resv, true) ||
+	if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ) ||
 	    !dma_resv_trylock(bo->base.resv)) {
 		/* The BO is not idle, resurrect it for delayed destroy */
 		ttm_bo_flush_all_fences(bo);
@@ -1072,14 +1074,14 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
 	long timeout = 15 * HZ;
 
 	if (no_wait) {
-		if (dma_resv_test_signaled(bo->base.resv, true))
+		if (dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ))
 			return 0;
 		else
 			return -EBUSY;
 	}
 
-	timeout = dma_resv_wait_timeout(bo->base.resv, true, interruptible,
-					timeout);
+	timeout = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_READ,
+					interruptible, timeout);
 	if (timeout < 0)
 		return timeout;
 
diff --git a/drivers/gpu/drm/vgem/vgem_fence.c b/drivers/gpu/drm/vgem/vgem_fence.c
index 2ddbebca87d9..91fc4940c65a 100644
--- a/drivers/gpu/drm/vgem/vgem_fence.c
+++ b/drivers/gpu/drm/vgem/vgem_fence.c
@@ -130,6 +130,7 @@ int vgem_fence_attach_ioctl(struct drm_device *dev,
 	struct vgem_file *vfile = file->driver_priv;
 	struct dma_resv *resv;
 	struct drm_gem_object *obj;
+	enum dma_resv_usage usage;
 	struct dma_fence *fence;
 	int ret;
 
@@ -151,7 +152,8 @@ int vgem_fence_attach_ioctl(struct drm_device *dev,
 
 	/* Check for a conflicting fence */
 	resv = obj->resv;
-	if (!dma_resv_test_signaled(resv, arg->flags & VGEM_FENCE_WRITE)) {
+	usage = dma_resv_usage_rw(arg->flags & VGEM_FENCE_WRITE);
+	if (!dma_resv_test_signaled(resv, usage)) {
 		ret = -EBUSY;
 		goto err_fence;
 	}
diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
index c708bab555c6..5577cc7408b2 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
@@ -518,9 +518,10 @@ static int virtio_gpu_wait_ioctl(struct drm_device *dev, void *data,
 		return -ENOENT;
 
 	if (args->flags & VIRTGPU_WAIT_NOWAIT) {
-		ret = dma_resv_test_signaled(obj->resv, true);
+		ret = dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ);
 	} else {
-		ret = dma_resv_wait_timeout(obj->resv, true, true, timeout);
+		ret = dma_resv_wait_timeout(obj->resv, DMA_RESV_USAGE_READ,
+					    true, timeout);
 	}
 	if (ret == 0)
 		ret = -EBUSY;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
index fe13aa8b4a64..b96884f7d03d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -528,8 +528,8 @@ static int vmw_user_bo_synccpu_grab(struct vmw_buffer_object *vmw_bo,
 	if (flags & drm_vmw_synccpu_allow_cs) {
 		long lret;
 
-		lret = dma_resv_wait_timeout(bo->base.resv, true, true,
-					     nonblock ? 0 :
+		lret = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_READ,
+					     true, nonblock ? 0 :
 					     MAX_SCHEDULE_TIMEOUT);
 		if (!lret)
 			return -EBUSY;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index 36c3b5db7e69..39081dbf9ac8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -1166,8 +1166,8 @@ int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start,
 		if (bo->moving)
 			dma_fence_put(bo->moving);
 
-		/* TODO: This is actually a memory management dependency */
-		return dma_resv_get_singleton(bo->base.resv, false,
+		return dma_resv_get_singleton(bo->base.resv,
+					      DMA_RESV_USAGE_WRITE,
 					      &bo->moving);
 	}
 
diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c
index d32cd7538835..f9901d273b8e 100644
--- a/drivers/infiniband/core/umem_dmabuf.c
+++ b/drivers/infiniband/core/umem_dmabuf.c
@@ -67,7 +67,8 @@ int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf)
 	 * may be not up-to-date. Wait for the exporter to finish
 	 * the migration.
 	 */
-	return dma_resv_wait_timeout(umem_dmabuf->attach->dmabuf->resv, false,
+	return dma_resv_wait_timeout(umem_dmabuf->attach->dmabuf->resv,
+				     DMA_RESV_USAGE_WRITE,
 				     false, MAX_SCHEDULE_TIMEOUT);
 }
 EXPORT_SYMBOL(ib_umem_dmabuf_map_pages);
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index 5fa04d0fccad..658674c4b7b9 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -49,6 +49,49 @@ extern struct ww_class reservation_ww_class;
 
 struct dma_resv_list;
 
+/**
+ * enum dma_resv_usage - how the fences from a dma_resv obj are used
+ *
+ * This enum describes the different use cases for a dma_resv object and
+ * controls which fences are returned when queried.
+ */
+enum dma_resv_usage {
+	/**
+	 * @DMA_RESV_USAGE_WRITE: Implicit write synchronization.
+	 *
+	 * This should only be used for userspace command submissions which add
+	 * an implicit write dependency.
+	 */
+	DMA_RESV_USAGE_WRITE,
+
+	/**
+	 * @DMA_RESV_USAGE_READ: Implicit read synchronization.
+	 *
+	 * This should only be used for userspace command submissions which add
+	 * an implicit read dependency.
+	 */
+	DMA_RESV_USAGE_READ,
+};
+
+/**
+ * dma_resv_usage_rw - helper for implicit sync
+ * @write: true if we create a new implicit sync write
+ *
+ * This returns the implicit synchronization usage for write or read accesses,
+ * see enum dma_resv_usage.
+ */
+static inline enum dma_resv_usage dma_resv_usage_rw(bool write)
+{
+	/* This looks confusing at first sight, but is indeed correct.
+	 *
+	 * The rational is that new write operations needs to wait for the
+	 * existing read and write operations to finish.
+	 * But a new read operation only needs to wait for the existing write
+	 * operations to finish.
+	 */
+	return write ? DMA_RESV_USAGE_READ : DMA_RESV_USAGE_WRITE;
+}
+
 /**
  * struct dma_resv - a reservation object manages fences for a buffer
  *
@@ -142,8 +185,8 @@ struct dma_resv_iter {
 	/** @obj: The dma_resv object we iterate over */
 	struct dma_resv *obj;
 
-	/** @all_fences: If all fences should be returned */
-	bool all_fences;
+	/** @usage: Controls which fences are returned */
+	enum dma_resv_usage usage;
 
 	/** @fence: the currently handled fence */
 	struct dma_fence *fence;
@@ -173,14 +216,14 @@ struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor);
  * dma_resv_iter_begin - initialize a dma_resv_iter object
  * @cursor: The dma_resv_iter object to initialize
  * @obj: The dma_resv object which we want to iterate over
- * @all_fences: If all fences should be returned or just the exclusive one
+ * @usage: controls which fences to include, see enum dma_resv_usage.
  */
 static inline void dma_resv_iter_begin(struct dma_resv_iter *cursor,
 				       struct dma_resv *obj,
-				       bool all_fences)
+				       enum dma_resv_usage usage)
 {
 	cursor->obj = obj;
-	cursor->all_fences = all_fences;
+	cursor->usage = usage;
 	cursor->fence = NULL;
 }
 
@@ -241,7 +284,7 @@ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor)
  * dma_resv_for_each_fence - fence iterator
  * @cursor: a struct dma_resv_iter pointer
  * @obj: a dma_resv object pointer
- * @all_fences: true if all fences should be returned
+ * @usage: controls which fences to return
  * @fence: the current fence
  *
  * Iterate over the fences in a struct dma_resv object while holding the
@@ -250,8 +293,8 @@ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor)
  * valid as long as the lock is held and so no extra reference to the fence is
  * taken.
  */
-#define dma_resv_for_each_fence(cursor, obj, all_fences, fence)	\
-	for (dma_resv_iter_begin(cursor, obj, all_fences),	\
+#define dma_resv_for_each_fence(cursor, obj, usage, fence)	\
+	for (dma_resv_iter_begin(cursor, obj, usage),	\
 	     fence = dma_resv_iter_first(cursor); fence;	\
 	     fence = dma_resv_iter_next(cursor))
 
@@ -418,14 +461,14 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence);
 void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
 			     struct dma_fence *fence);
 void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence);
-int dma_resv_get_fences(struct dma_resv *obj, bool write,
+int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage,
 			unsigned int *num_fences, struct dma_fence ***fences);
-int dma_resv_get_singleton(struct dma_resv *obj, bool write,
+int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage,
 			   struct dma_fence **fence);
 int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src);
-long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr,
-			   unsigned long timeout);
-bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all);
+long dma_resv_wait_timeout(struct dma_resv *obj, enum dma_resv_usage usage,
+			   bool intr, unsigned long timeout);
+bool dma_resv_test_signaled(struct dma_resv *obj, enum dma_resv_usage usage);
 void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq);
 
 #endif /* _LINUX_RESERVATION_H */
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 17/23] dma-buf: specify usage while adding fences to dma_resv obj v5
  2022-03-21 13:58 [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
                   ` (14 preceding siblings ...)
  2022-03-21 13:58 ` [PATCH 16/23] dma-buf: add enum dma_resv_usage v3 Christian König
@ 2022-03-21 13:58 ` Christian König
  2022-03-29 15:43   ` Daniel Vetter
  2022-04-02 22:16   ` Bas Nieuwenhuizen
  2022-03-21 13:58 ` [PATCH 18/23] drm/amdgpu: remove dma_resv workaround Christian König
                   ` (8 subsequent siblings)
  24 siblings, 2 replies; 73+ messages in thread
From: Christian König @ 2022-03-21 13:58 UTC (permalink / raw)
  To: daniel.vetter, dri-devel; +Cc: Christian König

Instead of distingting between shared and exclusive fences specify
the fence usage while adding fences.

Rework all drivers to use this interface instead and deprecate the old one.

v2: some kerneldoc comments suggested by Daniel
v3: fix a missing case in radeon
v4: rebase on nouveau changes, fix lockdep and temporary disable warning
v5: more documentation updates

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/dma-resv.c                    | 345 ++++++++----------
 drivers/dma-buf/st-dma-resv.c                 | 101 ++---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        |   6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    |   6 +-
 drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c  |  10 +-
 drivers/gpu/drm/i915/gem/i915_gem_busy.c      |  13 +-
 drivers/gpu/drm/i915/gem/i915_gem_clflush.c   |   3 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  |   5 +-
 drivers/gpu/drm/i915/i915_vma.c               |   6 +-
 drivers/gpu/drm/lima/lima_gem.c               |   2 +-
 drivers/gpu/drm/msm/msm_gem_submit.c          |   2 +-
 drivers/gpu/drm/nouveau/nouveau_bo.c          |   9 +-
 drivers/gpu/drm/nouveau/nouveau_fence.c       |   4 +-
 drivers/gpu/drm/qxl/qxl_release.c             |   3 +-
 drivers/gpu/drm/radeon/radeon_object.c        |   6 +-
 drivers/gpu/drm/ttm/ttm_bo.c                  |   2 +-
 drivers/gpu/drm/ttm/ttm_bo_util.c             |   5 +-
 drivers/gpu/drm/ttm/ttm_execbuf_util.c        |   6 +-
 drivers/gpu/drm/v3d/v3d_gem.c                 |   4 +-
 drivers/gpu/drm/vc4/vc4_gem.c                 |   2 +-
 drivers/gpu/drm/vgem/vgem_fence.c             |   9 +-
 drivers/gpu/drm/virtio/virtgpu_gem.c          |   3 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_bo.c            |   3 +-
 include/linux/dma-buf.h                       |  17 +-
 include/linux/dma-resv.h                      |  72 ++--
 26 files changed, 276 insertions(+), 370 deletions(-)

diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index bb7b023c2d33..26257ba1527e 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -44,12 +44,12 @@
 /**
  * DOC: Reservation Object Overview
  *
- * The reservation object provides a mechanism to manage shared and
- * exclusive fences associated with a buffer.  A reservation object
- * can have attached one exclusive fence (normally associated with
- * write operations) or N shared fences (read operations).  The RCU
- * mechanism is used to protect read access to fences from locked
- * write-side updates.
+ * The reservation object provides a mechanism to manage a container of
+ * dma_fence object associated with a resource. A reservation object
+ * can have any number of fences attaches to it. Each fence carring an usage
+ * parameter determining how the operation represented by the fence is using the
+ * resource. The RCU mechanism is used to protect read access to fences from
+ * locked write-side updates.
  *
  * See struct dma_resv for more details.
  */
@@ -57,29 +57,74 @@
 DEFINE_WD_CLASS(reservation_ww_class);
 EXPORT_SYMBOL(reservation_ww_class);
 
+/* Mask for the lower fence pointer bits */
+#define DMA_RESV_LIST_MASK	0x3
+
 struct dma_resv_list {
 	struct rcu_head rcu;
-	u32 shared_count, shared_max;
-	struct dma_fence __rcu *shared[];
+	u32 num_fences, max_fences;
+	struct dma_fence __rcu *table[];
 };
 
+/**
+ * dma_resv_list_entry - extract fence and usage from a list entry
+ * @list: the list to extract and entry from
+ * @index: which entry we want
+ * @resv: optional dma_resv obj for lockdep check that the access is allowed
+ * @fence: the resulting fence
+ * @usage: the resulting usage
+ *
+ * Extract the fence and usage flags from an RCU protected entry in the list.
+ */
+static void dma_resv_list_entry(struct dma_resv_list *list, unsigned int index,
+				struct dma_resv *resv, struct dma_fence **fence,
+				enum dma_resv_usage *usage)
+{
+	long tmp;
+
+	tmp = (long)rcu_dereference_check(list->table[index],
+					  resv ? dma_resv_held(resv) : true);
+	*fence = (struct dma_fence *)(tmp & ~DMA_RESV_LIST_MASK);
+	if (usage)
+		*usage = tmp & DMA_RESV_LIST_MASK;
+}
+
+/**
+ * dma_resv_list_set - set fence and usage at a specific index
+ * @list: the list to modify
+ * @index: where to make the change
+ * @fence: the fence to set
+ * @usage: the usage to set
+ *
+ * Set the fence and usage flags at the specific index in the list.
+ */
+static void dma_resv_list_set(struct dma_resv_list *list,
+			      unsigned int index,
+			      struct dma_fence *fence,
+			      enum dma_resv_usage usage)
+{
+	long tmp = ((long)fence) | usage;
+
+	RCU_INIT_POINTER(list->table[index], (struct dma_fence *)tmp);
+}
+
 /**
  * dma_resv_list_alloc - allocate fence list
- * @shared_max: number of fences we need space for
+ * @max_fences: number of fences we need space for
  *
  * Allocate a new dma_resv_list and make sure to correctly initialize
- * shared_max.
+ * max_fences.
  */
-static struct dma_resv_list *dma_resv_list_alloc(unsigned int shared_max)
+static struct dma_resv_list *dma_resv_list_alloc(unsigned int max_fences)
 {
 	struct dma_resv_list *list;
 
-	list = kmalloc(struct_size(list, shared, shared_max), GFP_KERNEL);
+	list = kmalloc(struct_size(list, table, max_fences), GFP_KERNEL);
 	if (!list)
 		return NULL;
 
-	list->shared_max = (ksize(list) - offsetof(typeof(*list), shared)) /
-		sizeof(*list->shared);
+	list->max_fences = (ksize(list) - offsetof(typeof(*list), table)) /
+		sizeof(*list->table);
 
 	return list;
 }
@@ -97,9 +142,12 @@ static void dma_resv_list_free(struct dma_resv_list *list)
 	if (!list)
 		return;
 
-	for (i = 0; i < list->shared_count; ++i)
-		dma_fence_put(rcu_dereference_protected(list->shared[i], true));
+	for (i = 0; i < list->num_fences; ++i) {
+		struct dma_fence *fence;
 
+		dma_resv_list_entry(list, i, NULL, &fence, NULL);
+		dma_fence_put(fence);
+	}
 	kfree_rcu(list, rcu);
 }
 
@@ -112,8 +160,7 @@ void dma_resv_init(struct dma_resv *obj)
 	ww_mutex_init(&obj->lock, &reservation_ww_class);
 	seqcount_ww_mutex_init(&obj->seq, &obj->lock);
 
-	RCU_INIT_POINTER(obj->fence, NULL);
-	RCU_INIT_POINTER(obj->fence_excl, NULL);
+	RCU_INIT_POINTER(obj->fences, NULL);
 }
 EXPORT_SYMBOL(dma_resv_init);
 
@@ -123,46 +170,31 @@ EXPORT_SYMBOL(dma_resv_init);
  */
 void dma_resv_fini(struct dma_resv *obj)
 {
-	struct dma_resv_list *fobj;
-	struct dma_fence *excl;
-
 	/*
 	 * This object should be dead and all references must have
 	 * been released to it, so no need to be protected with rcu.
 	 */
-	excl = rcu_dereference_protected(obj->fence_excl, 1);
-	if (excl)
-		dma_fence_put(excl);
-
-	fobj = rcu_dereference_protected(obj->fence, 1);
-	dma_resv_list_free(fobj);
+	dma_resv_list_free(rcu_dereference_protected(obj->fences, true));
 	ww_mutex_destroy(&obj->lock);
 }
 EXPORT_SYMBOL(dma_resv_fini);
 
-static inline struct dma_fence *
-dma_resv_excl_fence(struct dma_resv *obj)
+static inline struct dma_resv_list *dma_resv_fences_list(struct dma_resv *obj)
 {
-       return rcu_dereference_check(obj->fence_excl, dma_resv_held(obj));
-}
-
-static inline struct dma_resv_list *dma_resv_shared_list(struct dma_resv *obj)
-{
-	return rcu_dereference_check(obj->fence, dma_resv_held(obj));
+	return rcu_dereference_check(obj->fences, dma_resv_held(obj));
 }
 
 /**
- * dma_resv_reserve_fences - Reserve space to add shared fences to
- * a dma_resv.
+ * dma_resv_reserve_fences - Reserve space to add fences to a dma_resv object.
  * @obj: reservation object
  * @num_fences: number of fences we want to add
  *
- * Should be called before dma_resv_add_shared_fence().  Must
- * be called with @obj locked through dma_resv_lock().
+ * Should be called before dma_resv_add_fence().  Must be called with @obj
+ * locked through dma_resv_lock().
  *
  * Note that the preallocated slots need to be re-reserved if @obj is unlocked
- * at any time before calling dma_resv_add_shared_fence(). This is validated
- * when CONFIG_DEBUG_MUTEXES is enabled.
+ * at any time before calling dma_resv_add_fence(). This is validated when
+ * CONFIG_DEBUG_MUTEXES is enabled.
  *
  * RETURNS
  * Zero for success, or -errno
@@ -174,11 +206,11 @@ int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences)
 
 	dma_resv_assert_held(obj);
 
-	old = dma_resv_shared_list(obj);
-	if (old && old->shared_max) {
-		if ((old->shared_count + num_fences) <= old->shared_max)
+	old = dma_resv_fences_list(obj);
+	if (old && old->max_fences) {
+		if ((old->num_fences + num_fences) <= old->max_fences)
 			return 0;
-		max = max(old->shared_count + num_fences, old->shared_max * 2);
+		max = max(old->num_fences + num_fences, old->max_fences * 2);
 	} else {
 		max = max(4ul, roundup_pow_of_two(num_fences));
 	}
@@ -193,27 +225,27 @@ int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences)
 	 * references from the old struct are carried over to
 	 * the new.
 	 */
-	for (i = 0, j = 0, k = max; i < (old ? old->shared_count : 0); ++i) {
+	for (i = 0, j = 0, k = max; i < (old ? old->num_fences : 0); ++i) {
+		enum dma_resv_usage usage;
 		struct dma_fence *fence;
 
-		fence = rcu_dereference_protected(old->shared[i],
-						  dma_resv_held(obj));
+		dma_resv_list_entry(old, i, obj, &fence, &usage);
 		if (dma_fence_is_signaled(fence))
-			RCU_INIT_POINTER(new->shared[--k], fence);
+			RCU_INIT_POINTER(new->table[--k], fence);
 		else
-			RCU_INIT_POINTER(new->shared[j++], fence);
+			dma_resv_list_set(new, j++, fence, usage);
 	}
-	new->shared_count = j;
+	new->num_fences = j;
 
 	/*
 	 * We are not changing the effective set of fences here so can
 	 * merely update the pointer to the new array; both existing
 	 * readers and new readers will see exactly the same set of
-	 * active (unsignaled) shared fences. Individual fences and the
+	 * active (unsignaled) fences. Individual fences and the
 	 * old array are protected by RCU and so will not vanish under
 	 * the gaze of the rcu_read_lock() readers.
 	 */
-	rcu_assign_pointer(obj->fence, new);
+	rcu_assign_pointer(obj->fences, new);
 
 	if (!old)
 		return 0;
@@ -222,7 +254,7 @@ int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences)
 	for (i = k; i < max; ++i) {
 		struct dma_fence *fence;
 
-		fence = rcu_dereference_protected(new->shared[i],
+		fence = rcu_dereference_protected(new->table[i],
 						  dma_resv_held(obj));
 		dma_fence_put(fence);
 	}
@@ -234,37 +266,39 @@ EXPORT_SYMBOL(dma_resv_reserve_fences);
 
 #ifdef CONFIG_DEBUG_MUTEXES
 /**
- * dma_resv_reset_shared_max - reset shared fences for debugging
+ * dma_resv_reset_max_fences - reset fences for debugging
  * @obj: the dma_resv object to reset
  *
- * Reset the number of pre-reserved shared slots to test that drivers do
+ * Reset the number of pre-reserved fence slots to test that drivers do
  * correct slot allocation using dma_resv_reserve_fences(). See also
- * &dma_resv_list.shared_max.
+ * &dma_resv_list.max_fences.
  */
-void dma_resv_reset_shared_max(struct dma_resv *obj)
+void dma_resv_reset_max_fences(struct dma_resv *obj)
 {
-	struct dma_resv_list *fences = dma_resv_shared_list(obj);
+	struct dma_resv_list *fences = dma_resv_fences_list(obj);
 
 	dma_resv_assert_held(obj);
 
-	/* Test shared fence slot reservation */
+	/* Test fence slot reservation */
 	if (fences)
-		fences->shared_max = fences->shared_count;
+		fences->max_fences = fences->num_fences;
 }
-EXPORT_SYMBOL(dma_resv_reset_shared_max);
+EXPORT_SYMBOL(dma_resv_reset_max_fences);
 #endif
 
 /**
- * dma_resv_add_shared_fence - Add a fence to a shared slot
+ * dma_resv_add_fence - Add a fence to the dma_resv obj
  * @obj: the reservation object
- * @fence: the shared fence to add
+ * @fence: the fence to add
+ * @usage: how the fence is used, see enum dma_resv_usage
  *
- * Add a fence to a shared slot, @obj must be locked with dma_resv_lock(), and
+ * Add a fence to a slot, @obj must be locked with dma_resv_lock(), and
  * dma_resv_reserve_fences() has been called.
  *
  * See also &dma_resv.fence for a discussion of the semantics.
  */
-void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence)
+void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence,
+			enum dma_resv_usage usage)
 {
 	struct dma_resv_list *fobj;
 	struct dma_fence *old;
@@ -274,44 +308,45 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence)
 
 	dma_resv_assert_held(obj);
 
-	/* Drivers should not add containers here, instead add each fence
-	 * individually.
+	/* TODO: Drivers should not add containers here, instead add each fence
+	 * individually. Disabled for now until we cleaned up amdgpu/ttm.
 	 */
-	WARN_ON(dma_fence_is_container(fence));
+	/* WARN_ON(dma_fence_is_container(fence)); */
 
-	fobj = dma_resv_shared_list(obj);
-	count = fobj->shared_count;
+	fobj = dma_resv_fences_list(obj);
+	count = fobj->num_fences;
 
 	write_seqcount_begin(&obj->seq);
 
 	for (i = 0; i < count; ++i) {
+		enum dma_resv_usage old_usage;
 
-		old = rcu_dereference_protected(fobj->shared[i],
-						dma_resv_held(obj));
-		if (old->context == fence->context ||
+		dma_resv_list_entry(fobj, i, obj, &old, &old_usage);
+		if ((old->context == fence->context && old_usage >= usage) ||
 		    dma_fence_is_signaled(old))
 			goto replace;
 	}
 
-	BUG_ON(fobj->shared_count >= fobj->shared_max);
+	BUG_ON(fobj->num_fences >= fobj->max_fences);
 	old = NULL;
 	count++;
 
 replace:
-	RCU_INIT_POINTER(fobj->shared[i], fence);
-	/* pointer update must be visible before we extend the shared_count */
-	smp_store_mb(fobj->shared_count, count);
+	dma_resv_list_set(fobj, i, fence, usage);
+	/* pointer update must be visible before we extend the num_fences */
+	smp_store_mb(fobj->num_fences, count);
 
 	write_seqcount_end(&obj->seq);
 	dma_fence_put(old);
 }
-EXPORT_SYMBOL(dma_resv_add_shared_fence);
+EXPORT_SYMBOL(dma_resv_add_fence);
 
 /**
  * dma_resv_replace_fences - replace fences in the dma_resv obj
  * @obj: the reservation object
  * @context: the context of the fences to replace
  * @replacement: the new fence to use instead
+ * @usage: how the new fence is used, see enum dma_resv_usage
  *
  * Replace fences with a specified context with a new fence. Only valid if the
  * operation represented by the original fence has no longer access to the
@@ -321,107 +356,72 @@ EXPORT_SYMBOL(dma_resv_add_shared_fence);
  * update fence which makes the resource inaccessible.
  */
 void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
-			     struct dma_fence *replacement)
+			     struct dma_fence *replacement,
+			     enum dma_resv_usage usage)
 {
 	struct dma_resv_list *list;
-	struct dma_fence *old;
 	unsigned int i;
 
 	dma_resv_assert_held(obj);
 
+	list = dma_resv_fences_list(obj);
 	write_seqcount_begin(&obj->seq);
+	for (i = 0; list && i < list->num_fences; ++i) {
+		struct dma_fence *old;
 
-	old = dma_resv_excl_fence(obj);
-	if (old->context == context) {
-		RCU_INIT_POINTER(obj->fence_excl, dma_fence_get(replacement));
-		dma_fence_put(old);
-	}
-
-	list = dma_resv_shared_list(obj);
-	for (i = 0; list && i < list->shared_count; ++i) {
-		old = rcu_dereference_protected(list->shared[i],
-						dma_resv_held(obj));
+		dma_resv_list_entry(list, i, obj, &old, NULL);
 		if (old->context != context)
 			continue;
 
-		rcu_assign_pointer(list->shared[i], dma_fence_get(replacement));
+		dma_resv_list_set(list, i, replacement, usage);
 		dma_fence_put(old);
 	}
-
 	write_seqcount_end(&obj->seq);
 }
 EXPORT_SYMBOL(dma_resv_replace_fences);
 
 /**
- * dma_resv_add_excl_fence - Add an exclusive fence.
- * @obj: the reservation object
- * @fence: the exclusive fence to add
+ * dma_resv_iter_restart_unlocked - restart the unlocked iterator
+ * @cursor: The dma_resv_iter object to restart
  *
- * Add a fence to the exclusive slot. @obj must be locked with dma_resv_lock().
- * See also &dma_resv.fence_excl for a discussion of the semantics.
+ * Restart the unlocked iteration by initializing the cursor object.
  */
-void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence)
-{
-	struct dma_fence *old_fence = dma_resv_excl_fence(obj);
-
-	dma_resv_assert_held(obj);
-
-	dma_fence_get(fence);
-
-	write_seqcount_begin(&obj->seq);
-	/* write_seqcount_begin provides the necessary memory barrier */
-	RCU_INIT_POINTER(obj->fence_excl, fence);
-	write_seqcount_end(&obj->seq);
-
-	dma_fence_put(old_fence);
-}
-EXPORT_SYMBOL(dma_resv_add_excl_fence);
-
-/* Restart the iterator by initializing all the necessary fields, but not the
- * relation to the dma_resv object. */
 static void dma_resv_iter_restart_unlocked(struct dma_resv_iter *cursor)
 {
 	cursor->seq = read_seqcount_begin(&cursor->obj->seq);
-	cursor->index = -1;
-	cursor->shared_count = 0;
-	if (cursor->usage >= DMA_RESV_USAGE_READ) {
-		cursor->fences = dma_resv_shared_list(cursor->obj);
-		if (cursor->fences)
-			cursor->shared_count = cursor->fences->shared_count;
-	} else {
-		cursor->fences = NULL;
-	}
+	cursor->index = 0;
+	cursor->num_fences = 0;
+	cursor->fences = dma_resv_fences_list(cursor->obj);
+	if (cursor->fences)
+		cursor->num_fences = cursor->fences->num_fences;
 	cursor->is_restarted = true;
 }
 
 /* Walk to the next not signaled fence and grab a reference to it */
 static void dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor)
 {
-	struct dma_resv *obj = cursor->obj;
+	if (!cursor->fences)
+		return;
 
 	do {
 		/* Drop the reference from the previous round */
 		dma_fence_put(cursor->fence);
 
-		if (cursor->index == -1) {
-			cursor->fence = dma_resv_excl_fence(obj);
-			cursor->index++;
-			if (!cursor->fence)
-				continue;
-
-		} else if (!cursor->fences ||
-			   cursor->index >= cursor->shared_count) {
+		if (cursor->index >= cursor->num_fences) {
 			cursor->fence = NULL;
 			break;
 
-		} else {
-			struct dma_resv_list *fences = cursor->fences;
-			unsigned int idx = cursor->index++;
-
-			cursor->fence = rcu_dereference(fences->shared[idx]);
 		}
+
+		dma_resv_list_entry(cursor->fences, cursor->index++,
+				    cursor->obj, &cursor->fence,
+				    &cursor->fence_usage);
 		cursor->fence = dma_fence_get_rcu(cursor->fence);
-		if (!cursor->fence || !dma_fence_is_signaled(cursor->fence))
+		if (!cursor->fence)
+			break;
+
+		if (!dma_fence_is_signaled(cursor->fence) &&
+		    cursor->usage >= cursor->fence_usage)
 			break;
 	} while (true);
 }
@@ -496,15 +496,9 @@ struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor)
 	dma_resv_assert_held(cursor->obj);
 
 	cursor->index = 0;
-	if (cursor->usage >= DMA_RESV_USAGE_READ)
-		cursor->fences = dma_resv_shared_list(cursor->obj);
-	else
-		cursor->fences = NULL;
-
-	fence = dma_resv_excl_fence(cursor->obj);
-	if (!fence)
-		fence = dma_resv_iter_next(cursor);
+	cursor->fences = dma_resv_fences_list(cursor->obj);
 
+	fence = dma_resv_iter_next(cursor);
 	cursor->is_restarted = true;
 	return fence;
 }
@@ -519,17 +513,17 @@ EXPORT_SYMBOL_GPL(dma_resv_iter_first);
  */
 struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor)
 {
-	unsigned int idx;
+	struct dma_fence *fence;
 
 	dma_resv_assert_held(cursor->obj);
 
 	cursor->is_restarted = false;
-	if (!cursor->fences || cursor->index >= cursor->fences->shared_count)
+	if (!cursor->fences || cursor->index >= cursor->fences->num_fences)
 		return NULL;
 
-	idx = cursor->index++;
-	return rcu_dereference_protected(cursor->fences->shared[idx],
-					 dma_resv_held(cursor->obj));
+	dma_resv_list_entry(cursor->fences, cursor->index++,
+			    cursor->obj, &fence, &cursor->fence_usage);
+	return fence;
 }
 EXPORT_SYMBOL_GPL(dma_resv_iter_next);
 
@@ -544,57 +538,43 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src)
 {
 	struct dma_resv_iter cursor;
 	struct dma_resv_list *list;
-	struct dma_fence *f, *excl;
+	struct dma_fence *f;
 
 	dma_resv_assert_held(dst);
 
 	list = NULL;
-	excl = NULL;
 
 	dma_resv_iter_begin(&cursor, src, DMA_RESV_USAGE_READ);
 	dma_resv_for_each_fence_unlocked(&cursor, f) {
 
 		if (dma_resv_iter_is_restarted(&cursor)) {
 			dma_resv_list_free(list);
-			dma_fence_put(excl);
-
-			if (cursor.shared_count) {
-				list = dma_resv_list_alloc(cursor.shared_count);
-				if (!list) {
-					dma_resv_iter_end(&cursor);
-					return -ENOMEM;
-				}
 
-				list->shared_count = 0;
-
-			} else {
-				list = NULL;
+			list = dma_resv_list_alloc(cursor.num_fences);
+			if (!list) {
+				dma_resv_iter_end(&cursor);
+				return -ENOMEM;
 			}
-			excl = NULL;
+			list->num_fences = 0;
 		}
 
 		dma_fence_get(f);
-		if (dma_resv_iter_is_exclusive(&cursor))
-			excl = f;
-		else
-			RCU_INIT_POINTER(list->shared[list->shared_count++], f);
+		dma_resv_list_set(list, list->num_fences++, f,
+				  dma_resv_iter_usage(&cursor));
 	}
 	dma_resv_iter_end(&cursor);
 
 	write_seqcount_begin(&dst->seq);
-	excl = rcu_replace_pointer(dst->fence_excl, excl, dma_resv_held(dst));
-	list = rcu_replace_pointer(dst->fence, list, dma_resv_held(dst));
+	list = rcu_replace_pointer(dst->fences, list, dma_resv_held(dst));
 	write_seqcount_end(&dst->seq);
 
 	dma_resv_list_free(list);
-	dma_fence_put(excl);
-
 	return 0;
 }
 EXPORT_SYMBOL(dma_resv_copy_fences);
 
 /**
- * dma_resv_get_fences - Get an object's shared and exclusive
+ * dma_resv_get_fences - Get an object's fences
  * fences without update side lock held
  * @obj: the reservation object
  * @usage: controls which fences to include, see enum dma_resv_usage.
@@ -623,7 +603,7 @@ int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage,
 			while (*num_fences)
 				dma_fence_put((*fences)[--(*num_fences)]);
 
-			count = cursor.shared_count + 1;
+			count = cursor.num_fences + 1;
 
 			/* Eventually re-allocate the array */
 			*fences = krealloc_array(*fences, count,
@@ -695,8 +675,7 @@ int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage,
 EXPORT_SYMBOL_GPL(dma_resv_get_singleton);
 
 /**
- * dma_resv_wait_timeout - Wait on reservation's objects
- * shared and/or exclusive fences.
+ * dma_resv_wait_timeout - Wait on reservation's objects fences
  * @obj: the reservation object
  * @usage: controls which fences to include, see enum dma_resv_usage.
  * @intr: if true, do interruptible wait
@@ -769,13 +748,13 @@ EXPORT_SYMBOL_GPL(dma_resv_test_signaled);
  */
 void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq)
 {
+	static const char *usage[] = { "kernel", "write", "read", "other" };
 	struct dma_resv_iter cursor;
 	struct dma_fence *fence;
 
 	dma_resv_for_each_fence(&cursor, obj, true, fence) {
 		seq_printf(seq, "\t%s fence:",
-			   dma_resv_iter_is_exclusive(&cursor) ?
-				"Exclusive" : "Shared");
+			   usage[dma_resv_iter_usage(&cursor)]);
 		dma_fence_describe(fence, seq);
 	}
 }
diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c
index d097981061b1..d0f7c2bfd4f0 100644
--- a/drivers/dma-buf/st-dma-resv.c
+++ b/drivers/dma-buf/st-dma-resv.c
@@ -58,8 +58,9 @@ static int sanitycheck(void *arg)
 	return r;
 }
 
-static int test_signaling(void *arg, enum dma_resv_usage usage)
+static int test_signaling(void *arg)
 {
+	enum dma_resv_usage usage = (unsigned long)arg;
 	struct dma_resv resv;
 	struct dma_fence *f;
 	int r;
@@ -81,11 +82,7 @@ static int test_signaling(void *arg, enum dma_resv_usage usage)
 		goto err_unlock;
 	}
 
-	if (usage >= DMA_RESV_USAGE_READ)
-		dma_resv_add_shared_fence(&resv, f);
-	else
-		dma_resv_add_excl_fence(&resv, f);
-
+	dma_resv_add_fence(&resv, f, usage);
 	if (dma_resv_test_signaled(&resv, usage)) {
 		pr_err("Resv unexpectedly signaled\n");
 		r = -EINVAL;
@@ -105,18 +102,9 @@ static int test_signaling(void *arg, enum dma_resv_usage usage)
 	return r;
 }
 
-static int test_excl_signaling(void *arg)
-{
-	return test_signaling(arg, DMA_RESV_USAGE_WRITE);
-}
-
-static int test_shared_signaling(void *arg)
-{
-	return test_signaling(arg, DMA_RESV_USAGE_READ);
-}
-
-static int test_for_each(void *arg, enum dma_resv_usage usage)
+static int test_for_each(void *arg)
 {
+	enum dma_resv_usage usage = (unsigned long)arg;
 	struct dma_resv_iter cursor;
 	struct dma_fence *f, *fence;
 	struct dma_resv resv;
@@ -139,10 +127,7 @@ static int test_for_each(void *arg, enum dma_resv_usage usage)
 		goto err_unlock;
 	}
 
-	if (usage >= DMA_RESV_USAGE_READ)
-		dma_resv_add_shared_fence(&resv, f);
-	else
-		dma_resv_add_excl_fence(&resv, f);
+	dma_resv_add_fence(&resv, f, usage);
 
 	r = -ENOENT;
 	dma_resv_for_each_fence(&cursor, &resv, usage, fence) {
@@ -156,8 +141,7 @@ static int test_for_each(void *arg, enum dma_resv_usage usage)
 			r = -EINVAL;
 			goto err_unlock;
 		}
-		if (dma_resv_iter_is_exclusive(&cursor) !=
-		    (usage >= DMA_RESV_USAGE_READ)) {
+		if (dma_resv_iter_usage(&cursor) != usage) {
 			pr_err("Unexpected fence usage\n");
 			r = -EINVAL;
 			goto err_unlock;
@@ -177,18 +161,9 @@ static int test_for_each(void *arg, enum dma_resv_usage usage)
 	return r;
 }
 
-static int test_excl_for_each(void *arg)
-{
-	return test_for_each(arg, DMA_RESV_USAGE_WRITE);
-}
-
-static int test_shared_for_each(void *arg)
-{
-	return test_for_each(arg, DMA_RESV_USAGE_READ);
-}
-
-static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage)
+static int test_for_each_unlocked(void *arg)
 {
+	enum dma_resv_usage usage = (unsigned long)arg;
 	struct dma_resv_iter cursor;
 	struct dma_fence *f, *fence;
 	struct dma_resv resv;
@@ -212,10 +187,7 @@ static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage)
 		goto err_free;
 	}
 
-	if (usage >= DMA_RESV_USAGE_READ)
-		dma_resv_add_shared_fence(&resv, f);
-	else
-		dma_resv_add_excl_fence(&resv, f);
+	dma_resv_add_fence(&resv, f, usage);
 	dma_resv_unlock(&resv);
 
 	r = -ENOENT;
@@ -235,8 +207,7 @@ static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage)
 			r = -EINVAL;
 			goto err_iter_end;
 		}
-		if (dma_resv_iter_is_exclusive(&cursor) !=
-		    (usage >= DMA_RESV_USAGE_READ)) {
+		if (dma_resv_iter_usage(&cursor) != usage) {
 			pr_err("Unexpected fence usage\n");
 			r = -EINVAL;
 			goto err_iter_end;
@@ -262,18 +233,9 @@ static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage)
 	return r;
 }
 
-static int test_excl_for_each_unlocked(void *arg)
-{
-	return test_for_each_unlocked(arg, DMA_RESV_USAGE_WRITE);
-}
-
-static int test_shared_for_each_unlocked(void *arg)
-{
-	return test_for_each_unlocked(arg, DMA_RESV_USAGE_READ);
-}
-
-static int test_get_fences(void *arg, enum dma_resv_usage usage)
+static int test_get_fences(void *arg)
 {
+	enum dma_resv_usage usage = (unsigned long)arg;
 	struct dma_fence *f, **fences = NULL;
 	struct dma_resv resv;
 	int r, i;
@@ -296,10 +258,7 @@ static int test_get_fences(void *arg, enum dma_resv_usage usage)
 		goto err_resv;
 	}
 
-	if (usage >= DMA_RESV_USAGE_READ)
-		dma_resv_add_shared_fence(&resv, f);
-	else
-		dma_resv_add_excl_fence(&resv, f);
+	dma_resv_add_fence(&resv, f, usage);
 	dma_resv_unlock(&resv);
 
 	r = dma_resv_get_fences(&resv, usage, &i, &fences);
@@ -324,30 +283,24 @@ static int test_get_fences(void *arg, enum dma_resv_usage usage)
 	return r;
 }
 
-static int test_excl_get_fences(void *arg)
-{
-	return test_get_fences(arg, DMA_RESV_USAGE_WRITE);
-}
-
-static int test_shared_get_fences(void *arg)
-{
-	return test_get_fences(arg, DMA_RESV_USAGE_READ);
-}
-
 int dma_resv(void)
 {
 	static const struct subtest tests[] = {
 		SUBTEST(sanitycheck),
-		SUBTEST(test_excl_signaling),
-		SUBTEST(test_shared_signaling),
-		SUBTEST(test_excl_for_each),
-		SUBTEST(test_shared_for_each),
-		SUBTEST(test_excl_for_each_unlocked),
-		SUBTEST(test_shared_for_each_unlocked),
-		SUBTEST(test_excl_get_fences),
-		SUBTEST(test_shared_get_fences),
+		SUBTEST(test_signaling),
+		SUBTEST(test_for_each),
+		SUBTEST(test_for_each_unlocked),
+		SUBTEST(test_get_fences),
 	};
+	enum dma_resv_usage usage;
+	int r;
 
 	spin_lock_init(&fence_lock);
-	return subtests(tests, NULL);
+	for (usage = DMA_RESV_USAGE_WRITE; usage <= DMA_RESV_USAGE_READ;
+	     ++usage) {
+		r = subtests(tests, (void *)(unsigned long)usage);
+		if (r)
+			return r;
+	}
+	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 7de8f67f7dde..ab5d6b630a49 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -263,7 +263,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
 	 */
 	replacement = dma_fence_get_stub();
 	dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context,
-				replacement);
+				replacement, DMA_RESV_USAGE_READ);
 	dma_fence_put(replacement);
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 2bf909a4242a..1c039db976a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -54,8 +54,8 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 	bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
 	p->uf_entry.priority = 0;
 	p->uf_entry.tv.bo = &bo->tbo;
-	/* One for TTM and one for the CS job */
-	p->uf_entry.tv.num_shared = 2;
+	/* One for TTM and two for the CS job */
+	p->uf_entry.tv.num_shared = 3;
 
 	drm_gem_object_put(gobj);
 
@@ -1284,7 +1284,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 			break;
 		}
 		dma_fence_chain_init(chain, fence, dma_fence_get(p->fence), 1);
-		rcu_assign_pointer(resv->fence_excl, &chain->base);
+		dma_resv_add_fence(resv, &chain->base, DMA_RESV_USAGE_WRITE);
 		e->chain = NULL;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 0a843cc54945..9085a6b1ad56 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1393,10 +1393,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
 		return;
 	}
 
-	if (shared)
-		dma_resv_add_shared_fence(resv, fence);
-	else
-		dma_resv_add_excl_fence(resv, fence);
+	dma_resv_add_fence(resv, fence, shared ? DMA_RESV_USAGE_READ :
+			   DMA_RESV_USAGE_WRITE);
 }
 
 /**
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index d7cd26dfaf8a..0cc036d93afc 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -203,14 +203,10 @@ static void submit_attach_object_fences(struct etnaviv_gem_submit *submit)
 
 	for (i = 0; i < submit->nr_bos; i++) {
 		struct drm_gem_object *obj = &submit->bos[i].obj->base;
+		bool write = submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE;
 
-		if (submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE)
-			dma_resv_add_excl_fence(obj->resv,
-							  submit->out_fence);
-		else
-			dma_resv_add_shared_fence(obj->resv,
-							    submit->out_fence);
-
+		dma_resv_add_fence(obj->resv, submit->out_fence, write ?
+				   DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ);
 		submit_unlock_object(submit, i);
 	}
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
index 14a1c0ad8c3c..e7ae94ee1b44 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
@@ -148,12 +148,13 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 		if (dma_resv_iter_is_restarted(&cursor))
 			args->busy = 0;
 
-		if (dma_resv_iter_is_exclusive(&cursor))
-			/* Translate the exclusive fence to the READ *and* WRITE engine */
-			args->busy |= busy_check_writer(fence);
-		else
-			/* Translate shared fences to READ set of engines */
-			args->busy |= busy_check_reader(fence);
+		/* Translate read fences to READ set of engines */
+		args->busy |= busy_check_reader(fence);
+	}
+	dma_resv_iter_begin(&cursor, obj->base.resv, DMA_RESV_USAGE_WRITE);
+	dma_resv_for_each_fence_unlocked(&cursor, fence) {
+		/* Translate the write fences to the READ *and* WRITE engine */
+		args->busy |= busy_check_writer(fence);
 	}
 	dma_resv_iter_end(&cursor);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
index 8a2223eb0ba9..887cb6b71ae4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -114,7 +114,8 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 						obj->base.resv, NULL, true,
 						i915_fence_timeout(i915),
 						I915_FENCE_GFP);
-		dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma);
+		dma_resv_add_fence(obj->base.resv, &clflush->base.dma,
+				   DMA_RESV_USAGE_WRITE);
 		dma_fence_work_commit(&clflush->base);
 		/*
 		 * We must have successfully populated the pages(since we are
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 4de6500f3c55..e4a232e22f9d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -622,9 +622,8 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
 	if (IS_ERR_OR_NULL(copy_fence))
 		return PTR_ERR_OR_ZERO(copy_fence);
 
-	dma_resv_add_excl_fence(dst_bo->base.resv, copy_fence);
-	dma_resv_add_shared_fence(src_bo->base.resv, copy_fence);
-
+	dma_resv_add_fence(dst_bo->base.resv, copy_fence, DMA_RESV_USAGE_WRITE);
+	dma_resv_add_fence(src_bo->base.resv, copy_fence, DMA_RESV_USAGE_READ);
 	dma_fence_put(copy_fence);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index fe9f89289418..52fd6705a518 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -1640,7 +1640,8 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
 		}
 
 		if (fence) {
-			dma_resv_add_excl_fence(vma->obj->base.resv, fence);
+			dma_resv_add_fence(vma->obj->base.resv, fence,
+					   DMA_RESV_USAGE_WRITE);
 			obj->write_domain = I915_GEM_DOMAIN_RENDER;
 			obj->read_domains = 0;
 		}
@@ -1652,7 +1653,8 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
 		}
 
 		if (fence) {
-			dma_resv_add_shared_fence(vma->obj->base.resv, fence);
+			dma_resv_add_fence(vma->obj->base.resv, fence,
+					   DMA_RESV_USAGE_READ);
 			obj->write_domain = 0;
 		}
 	}
diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c
index 9435a3ca71c8..38caa7f78871 100644
--- a/drivers/gpu/drm/lima/lima_gem.c
+++ b/drivers/gpu/drm/lima/lima_gem.c
@@ -366,7 +366,7 @@ int lima_gem_submit(struct drm_file *file, struct lima_submit *submit)
 		if (submit->bos[i].flags & LIMA_SUBMIT_BO_WRITE)
 			dma_resv_add_excl_fence(lima_bo_resv(bos[i]), fence);
 		else
-			dma_resv_add_shared_fence(lima_bo_resv(bos[i]), fence);
+			dma_resv_add_fence(lima_bo_resv(bos[i]), fence);
 	}
 
 	drm_gem_unlock_reservations((struct drm_gem_object **)bos,
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 993dbcd7a586..2786913be00a 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -397,7 +397,7 @@ static void submit_attach_object_fences(struct msm_gem_submit *submit)
 		if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE)
 			dma_resv_add_excl_fence(obj->resv, submit->user_fence);
 		else if (submit->bos[i].flags & MSM_SUBMIT_BO_READ)
-			dma_resv_add_shared_fence(obj->resv, submit->user_fence);
+			dma_resv_add_fence(obj->resv, submit->user_fence);
 	}
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index c6bb4dbcd735..05076e530e7d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -1308,10 +1308,11 @@ nouveau_bo_fence(struct nouveau_bo *nvbo, struct nouveau_fence *fence, bool excl
 {
 	struct dma_resv *resv = nvbo->bo.base.resv;
 
-	if (exclusive)
-		dma_resv_add_excl_fence(resv, &fence->base);
-	else if (fence)
-		dma_resv_add_shared_fence(resv, &fence->base);
+	if (!fence)
+		return;
+
+	dma_resv_add_fence(resv, &fence->base, exclusive ?
+			   DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ);
 }
 
 static void
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index d5e81ccee01c..7f01dcf81fab 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -360,9 +360,11 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan,
 		dma_resv_for_each_fence(&cursor, resv,
 					dma_resv_usage_rw(exclusive),
 					fence) {
+			enum dma_resv_usage usage;
 			struct nouveau_fence *f;
 
-			if (i == 0 && dma_resv_iter_is_exclusive(&cursor))
+			usage = dma_resv_iter_usage(&cursor);
+			if (i == 0 && usage == DMA_RESV_USAGE_WRITE)
 				continue;
 
 			f = nouveau_local_fence(fence, chan->drm);
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index cde1e8ddaeaa..368d26da0d6a 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -429,7 +429,8 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
 	list_for_each_entry(entry, &release->bos, head) {
 		bo = entry->bo;
 
-		dma_resv_add_shared_fence(bo->base.resv, &release->base);
+		dma_resv_add_fence(bo->base.resv, &release->base,
+				   DMA_RESV_USAGE_READ);
 		ttm_bo_move_to_lru_tail_unlocked(bo);
 		dma_resv_unlock(bo->base.resv);
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index afca4bf59a8d..382121c26f81 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -792,8 +792,6 @@ void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
 		return;
 	}
 
-	if (shared)
-		dma_resv_add_shared_fence(resv, &fence->base);
-	else
-		dma_resv_add_excl_fence(resv, &fence->base);
+	dma_resv_add_fence(resv, &fence->base, shared ?
+			   DMA_RESV_USAGE_READ : DMA_RESV_USAGE_WRITE);
 }
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index d4b2695606e2..6014c363d6e6 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -762,7 +762,7 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
 		return ret;
 	}
 
-	dma_resv_add_shared_fence(bo->base.resv, fence);
+	dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_WRITE);
 
 	ret = dma_resv_reserve_fences(bo->base.resv, 1);
 	if (unlikely(ret)) {
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 862d2f22412a..49689c7c8078 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -508,7 +508,8 @@ static int ttm_bo_move_to_ghost(struct ttm_buffer_object *bo,
 	if (ret)
 		return ret;
 
-	dma_resv_add_excl_fence(&ghost_obj->base._resv, fence);
+	dma_resv_add_fence(&ghost_obj->base._resv, fence,
+			   DMA_RESV_USAGE_WRITE);
 
 	/**
 	 * If we're not moving to fixed memory, the TTM object
@@ -562,7 +563,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 	struct ttm_resource_manager *man = ttm_manager_type(bdev, new_mem->mem_type);
 	int ret = 0;
 
-	dma_resv_add_excl_fence(bo->base.resv, fence);
+	dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_WRITE);
 	if (!evict)
 		ret = ttm_bo_move_to_ghost(bo, fence, man->use_tt);
 	else if (!from->use_tt && pipeline)
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index 789c645f004e..0eb995d25df1 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -154,10 +154,8 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
 	list_for_each_entry(entry, list, head) {
 		struct ttm_buffer_object *bo = entry->bo;
 
-		if (entry->num_shared)
-			dma_resv_add_shared_fence(bo->base.resv, fence);
-		else
-			dma_resv_add_excl_fence(bo->base.resv, fence);
+		dma_resv_add_fence(bo->base.resv, fence, entry->num_shared ?
+				   DMA_RESV_USAGE_READ : DMA_RESV_USAGE_WRITE);
 		ttm_bo_move_to_lru_tail_unlocked(bo);
 		dma_resv_unlock(bo->base.resv);
 	}
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 961812d33827..2352e9640922 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -550,8 +550,8 @@ v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv,
 
 	for (i = 0; i < job->bo_count; i++) {
 		/* XXX: Use shared fences for read-only objects. */
-		dma_resv_add_excl_fence(job->bo[i]->resv,
-					job->done_fence);
+		dma_resv_add_fence(job->bo[i]->resv, job->done_fence,
+				   DMA_RESV_USAGE_WRITE);
 	}
 
 	drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx);
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index 594bd6bb00d2..38550317e025 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -546,7 +546,7 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
 		bo = to_vc4_bo(&exec->bo[i]->base);
 		bo->seqno = seqno;
 
-		dma_resv_add_shared_fence(bo->base.base.resv, exec->fence);
+		dma_resv_add_fence(bo->base.base.resv, exec->fence);
 	}
 
 	list_for_each_entry(bo, &exec->unref_list, unref_head) {
diff --git a/drivers/gpu/drm/vgem/vgem_fence.c b/drivers/gpu/drm/vgem/vgem_fence.c
index 91fc4940c65a..c2a879734d40 100644
--- a/drivers/gpu/drm/vgem/vgem_fence.c
+++ b/drivers/gpu/drm/vgem/vgem_fence.c
@@ -161,12 +161,9 @@ int vgem_fence_attach_ioctl(struct drm_device *dev,
 	/* Expose the fence via the dma-buf */
 	dma_resv_lock(resv, NULL);
 	ret = dma_resv_reserve_fences(resv, 1);
-	if (!ret) {
-		if (arg->flags & VGEM_FENCE_WRITE)
-			dma_resv_add_excl_fence(resv, fence);
-		else
-			dma_resv_add_shared_fence(resv, fence);
-	}
+	if (!ret)
+		dma_resv_add_fence(resv, fence, arg->flags & VGEM_FENCE_WRITE ?
+				   DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ);
 	dma_resv_unlock(resv);
 
 	/* Record the fence in our idr for later signaling */
diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c
index 1820ca6cf673..580a78809836 100644
--- a/drivers/gpu/drm/virtio/virtgpu_gem.c
+++ b/drivers/gpu/drm/virtio/virtgpu_gem.c
@@ -250,7 +250,8 @@ void virtio_gpu_array_add_fence(struct virtio_gpu_object_array *objs,
 	int i;
 
 	for (i = 0; i < objs->nents; i++)
-		dma_resv_add_excl_fence(objs->objs[i]->resv, fence);
+		dma_resv_add_fence(objs->objs[i]->resv, fence,
+				   DMA_RESV_USAGE_WRITE);
 }
 
 void virtio_gpu_array_put_free(struct virtio_gpu_object_array *objs)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
index b96884f7d03d..bec50223efe5 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -758,7 +758,8 @@ void vmw_bo_fence_single(struct ttm_buffer_object *bo,
 
 	ret = dma_resv_reserve_fences(bo->base.resv, 1);
 	if (!ret)
-		dma_resv_add_excl_fence(bo->base.resv, &fence->base);
+		dma_resv_add_fence(bo->base.resv, &fence->base,
+				   DMA_RESV_USAGE_WRITE);
 	else
 		/* Last resort fallback when we are OOM */
 		dma_fence_wait(&fence->base, false);
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 74083e62e19d..a8cfc1705d6a 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -393,16 +393,13 @@ struct dma_buf {
 	 * e.g. exposed in `Implicit Fence Poll Support`_ must follow the
 	 * below rules.
 	 *
-	 * - Drivers must add a shared fence through dma_resv_add_shared_fence()
-	 *   for anything the userspace API considers a read access. This highly
-	 *   depends upon the API and window system.
+	 * - Drivers must add a read fence through dma_resv_add_fence() with the
+	 *   DMA_RESV_USAGE_READ flag for anything the userspace API considers a
+	 *   read access. This highly depends upon the API and window system.
 	 *
-	 * - Similarly drivers must set the exclusive fence through
-	 *   dma_resv_add_excl_fence() for anything the userspace API considers
-	 *   write access.
-	 *
-	 * - Drivers may just always set the exclusive fence, since that only
-	 *   causes unecessarily synchronization, but no correctness issues.
+	 * - Similarly drivers must add a write fence through
+	 *   dma_resv_add_fence() with the DMA_RESV_USAGE_WRITE flag for
+	 *   anything the userspace API considers write access.
 	 *
 	 * - Some drivers only expose a synchronous userspace API with no
 	 *   pipelining across drivers. These do not set any fences for their
@@ -413,7 +410,7 @@ struct dma_buf {
 	 * Dynamic importers, see dma_buf_attachment_is_dynamic(), have
 	 * additional constraints on how they set up fences:
 	 *
-	 * - Dynamic importers must obey the exclusive fence and wait for it to
+	 * - Dynamic importers must obey the kernel fences and wait for them to
 	 *   signal before allowing access to the buffer's underlying storage
 	 *   through the device.
 	 *
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index 658674c4b7b9..ae0436d7e7b8 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -95,8 +95,8 @@ static inline enum dma_resv_usage dma_resv_usage_rw(bool write)
 /**
  * struct dma_resv - a reservation object manages fences for a buffer
  *
- * There are multiple uses for this, with sometimes slightly different rules in
- * how the fence slots are used.
+ * This is a container for dma_fence objects which needs to handle multiple use
+ * cases.
  *
  * One use is to synchronize cross-driver access to a struct dma_buf, either for
  * dynamic buffer management or just to handle implicit synchronization between
@@ -126,47 +126,22 @@ struct dma_resv {
 	 * @seq:
 	 *
 	 * Sequence count for managing RCU read-side synchronization, allows
-	 * read-only access to @fence_excl and @fence while ensuring we take a
-	 * consistent snapshot.
+	 * read-only access to @fences while ensuring we take a consistent
+	 * snapshot.
 	 */
 	seqcount_ww_mutex_t seq;
 
 	/**
-	 * @fence_excl:
+	 * @fences:
 	 *
-	 * The exclusive fence, if there is one currently.
+	 * Array of fences which where added to the dma_resv object
 	 *
-	 * To guarantee that no fences are lost, this new fence must signal
-	 * only after the previous exclusive fence has signalled. If
-	 * semantically only a new access is added without actually treating the
-	 * previous one as a dependency the exclusive fences can be strung
-	 * together using struct dma_fence_chain.
-	 *
-	 * Note that actual semantics of what an exclusive or shared fence mean
-	 * is defined by the user, for reservation objects shared across drivers
-	 * see &dma_buf.resv.
-	 */
-	struct dma_fence __rcu *fence_excl;
-
-	/**
-	 * @fence:
-	 *
-	 * List of current shared fences.
-	 *
-	 * There are no ordering constraints of shared fences against the
-	 * exclusive fence slot. If a waiter needs to wait for all access, it
-	 * has to wait for both sets of fences to signal.
-	 *
-	 * A new fence is added by calling dma_resv_add_shared_fence(). Since
-	 * this often needs to be done past the point of no return in command
+	 * A new fence is added by calling dma_resv_add_fence(). Since this
+	 * often needs to be done past the point of no return in command
 	 * submission it cannot fail, and therefore sufficient slots need to be
 	 * reserved by calling dma_resv_reserve_fences().
-	 *
-	 * Note that actual semantics of what an exclusive or shared fence mean
-	 * is defined by the user, for reservation objects shared across drivers
-	 * see &dma_buf.resv.
 	 */
-	struct dma_resv_list __rcu *fence;
+	struct dma_resv_list __rcu *fences;
 };
 
 /**
@@ -191,6 +166,9 @@ struct dma_resv_iter {
 	/** @fence: the currently handled fence */
 	struct dma_fence *fence;
 
+	/** @fence_usage: the usage of the current fence */
+	enum dma_resv_usage fence_usage;
+
 	/** @seq: sequence number to check for modifications */
 	unsigned int seq;
 
@@ -200,8 +178,8 @@ struct dma_resv_iter {
 	/** @fences: the shared fences; private, *MUST* not dereference  */
 	struct dma_resv_list *fences;
 
-	/** @shared_count: number of shared fences */
-	unsigned int shared_count;
+	/** @num_fences: number of fences */
+	unsigned int num_fences;
 
 	/** @is_restarted: true if this is the first returned fence */
 	bool is_restarted;
@@ -240,14 +218,15 @@ static inline void dma_resv_iter_end(struct dma_resv_iter *cursor)
 }
 
 /**
- * dma_resv_iter_is_exclusive - test if the current fence is the exclusive one
+ * dma_resv_iter_usage - Return the usage of the current fence
  * @cursor: the cursor of the current position
  *
- * Returns true if the currently returned fence is the exclusive one.
+ * Returns the usage of the currently processed fence.
  */
-static inline bool dma_resv_iter_is_exclusive(struct dma_resv_iter *cursor)
+static inline enum dma_resv_usage
+dma_resv_iter_usage(struct dma_resv_iter *cursor)
 {
-	return cursor->index == 0;
+	return cursor->fence_usage;
 }
 
 /**
@@ -302,9 +281,9 @@ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor)
 #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base)
 
 #ifdef CONFIG_DEBUG_MUTEXES
-void dma_resv_reset_shared_max(struct dma_resv *obj);
+void dma_resv_reset_max_fences(struct dma_resv *obj);
 #else
-static inline void dma_resv_reset_shared_max(struct dma_resv *obj) {}
+static inline void dma_resv_reset_max_fences(struct dma_resv *obj) {}
 #endif
 
 /**
@@ -450,17 +429,18 @@ static inline struct ww_acquire_ctx *dma_resv_locking_ctx(struct dma_resv *obj)
  */
 static inline void dma_resv_unlock(struct dma_resv *obj)
 {
-	dma_resv_reset_shared_max(obj);
+	dma_resv_reset_max_fences(obj);
 	ww_mutex_unlock(&obj->lock);
 }
 
 void dma_resv_init(struct dma_resv *obj);
 void dma_resv_fini(struct dma_resv *obj);
 int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences);
-void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence);
+void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence,
+			enum dma_resv_usage usage);
 void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
-			     struct dma_fence *fence);
-void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence);
+			     struct dma_fence *fence,
+			     enum dma_resv_usage usage);
 int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage,
 			unsigned int *num_fences, struct dma_fence ***fences);
 int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage,
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 18/23] drm/amdgpu: remove dma_resv workaround
  2022-03-21 13:58 [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
                   ` (15 preceding siblings ...)
  2022-03-21 13:58 ` [PATCH 17/23] dma-buf: specify usage while adding fences to dma_resv obj v5 Christian König
@ 2022-03-21 13:58 ` Christian König
  2022-03-29 15:47   ` Daniel Vetter
  2022-03-21 13:58 ` [PATCH 19/23] dma-buf: add DMA_RESV_USAGE_KERNEL v2 Christian König
                   ` (7 subsequent siblings)
  24 siblings, 1 reply; 73+ messages in thread
From: Christian König @ 2022-03-21 13:58 UTC (permalink / raw)
  To: daniel.vetter, dri-devel; +Cc: Christian König, amd-gfx

We can now add multiple writers to the dma_resv object.

Also enable the check for not adding containers in dma_resv.c again.

Signed-off-by: Christian König <christian.koenig@amd.com>
Cc: amd-gfx@lists.freedesktop.org
---
 drivers/dma-buf/dma-resv.c                  |  6 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 51 ++-------------------
 3 files changed, 8 insertions(+), 50 deletions(-)

diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index 26257ba1527e..10d70812373c 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -308,10 +308,10 @@ void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence,
 
 	dma_resv_assert_held(obj);
 
-	/* TODO: Drivers should not add containers here, instead add each fence
-	 * individually. Disabled for now until we cleaned up amdgpu/ttm.
+	/* Drivers should not add containers here, instead add each fence
+	 * individually.
 	 */
-	/* WARN_ON(dma_fence_is_container(fence)); */
+	WARN_ON(dma_fence_is_container(fence));
 
 	fobj = dma_resv_fences_list(obj);
 	count = fobj->num_fences;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index 044b41f0bfd9..529d52a204cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -34,7 +34,6 @@ struct amdgpu_fpriv;
 struct amdgpu_bo_list_entry {
 	struct ttm_validate_buffer	tv;
 	struct amdgpu_bo_va		*bo_va;
-	struct dma_fence_chain		*chain;
 	uint32_t			priority;
 	struct page			**user_pages;
 	bool				user_invalidated;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 1c039db976a9..88009833f523 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -575,14 +575,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 
 		e->bo_va = amdgpu_vm_bo_find(vm, bo);
-
-		if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
-			e->chain = dma_fence_chain_alloc();
-			if (!e->chain) {
-				r = -ENOMEM;
-				goto error_validate;
-			}
-		}
 	}
 
 	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
@@ -633,13 +625,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	}
 
 error_validate:
-	if (r) {
-		amdgpu_bo_list_for_each_entry(e, p->bo_list) {
-			dma_fence_chain_free(e->chain);
-			e->chain = NULL;
-		}
+	if (r)
 		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
-	}
 out:
 	return r;
 }
@@ -679,17 +666,9 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
 {
 	unsigned i;
 
-	if (error && backoff) {
-		struct amdgpu_bo_list_entry *e;
-
-		amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
-			dma_fence_chain_free(e->chain);
-			e->chain = NULL;
-		}
-
+	if (error && backoff)
 		ttm_eu_backoff_reservation(&parser->ticket,
 					   &parser->validated);
-	}
 
 	for (i = 0; i < parser->num_post_deps; i++) {
 		drm_syncobj_put(parser->post_deps[i].syncobj);
@@ -1264,29 +1243,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 
 	amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
 
-	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
-		struct dma_resv *resv = e->tv.bo->base.resv;
-		struct dma_fence_chain *chain = e->chain;
-		struct dma_resv_iter cursor;
-		struct dma_fence *fence;
-
-		if (!chain)
-			continue;
-
-		/*
-		 * Work around dma_resv shortcommings by wrapping up the
-		 * submission in a dma_fence_chain and add it as exclusive
-		 * fence.
-		 */
-		dma_resv_for_each_fence(&cursor, resv,
-					DMA_RESV_USAGE_WRITE,
-					fence) {
-			break;
-		}
-		dma_fence_chain_init(chain, fence, dma_fence_get(p->fence), 1);
-		dma_resv_add_fence(resv, &chain->base, DMA_RESV_USAGE_WRITE);
-		e->chain = NULL;
-	}
+	/* Make sure all BOs are remembered as writers */
+	amdgpu_bo_list_for_each_entry(e, p->bo_list)
+		e->tv.num_shared = 0;
 
 	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
 	mutex_unlock(&p->adev->notifier_lock);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 19/23] dma-buf: add DMA_RESV_USAGE_KERNEL v2
  2022-03-21 13:58 [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
                   ` (16 preceding siblings ...)
  2022-03-21 13:58 ` [PATCH 18/23] drm/amdgpu: remove dma_resv workaround Christian König
@ 2022-03-21 13:58 ` Christian König
  2022-03-29 15:53   ` Daniel Vetter
  2022-03-21 13:58 ` [PATCH 20/23] dma-buf: add DMA_RESV_USAGE_BOOKKEEP v2 Christian König
                   ` (6 subsequent siblings)
  24 siblings, 1 reply; 73+ messages in thread
From: Christian König @ 2022-03-21 13:58 UTC (permalink / raw)
  To: daniel.vetter, dri-devel; +Cc: Christian König

Add an usage for kernel submissions. Waiting for those
are mandatory for dynamic DMA-bufs.

v2: use "must" in documentation, fix whitespaces

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/st-dma-resv.c                |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c      |  2 +-
 drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c |  6 ++++--
 drivers/gpu/drm/i915/gem/i915_gem_clflush.c  |  2 +-
 drivers/gpu/drm/nouveau/nouveau_bo.c         |  4 ++--
 drivers/gpu/drm/radeon/radeon_uvd.c          |  2 +-
 drivers/gpu/drm/ttm/ttm_bo.c                 |  2 +-
 drivers/gpu/drm/ttm/ttm_bo_util.c            |  4 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_bo.c           |  2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c     |  2 +-
 drivers/infiniband/core/umem_dmabuf.c        |  2 +-
 include/linux/dma-resv.h                     | 22 ++++++++++++++++++++
 13 files changed, 39 insertions(+), 15 deletions(-)

diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c
index d0f7c2bfd4f0..062b57d63fa6 100644
--- a/drivers/dma-buf/st-dma-resv.c
+++ b/drivers/dma-buf/st-dma-resv.c
@@ -296,7 +296,7 @@ int dma_resv(void)
 	int r;
 
 	spin_lock_init(&fence_lock);
-	for (usage = DMA_RESV_USAGE_WRITE; usage <= DMA_RESV_USAGE_READ;
+	for (usage = DMA_RESV_USAGE_KERNEL; usage <= DMA_RESV_USAGE_READ;
 	     ++usage) {
 		r = subtests(tests, (void *)(unsigned long)usage);
 		if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 9085a6b1ad56..1618b6847c69 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -765,7 +765,7 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
 		return 0;
 	}
 
-	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_WRITE,
+	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL,
 				  false, MAX_SCHEDULE_TIMEOUT);
 	if (r < 0)
 		return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 33deb0df62fd..9e102080dad9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -1163,7 +1163,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 
 	if (direct) {
 		r = dma_resv_wait_timeout(bo->tbo.base.resv,
-					  DMA_RESV_USAGE_WRITE, false,
+					  DMA_RESV_USAGE_KERNEL, false,
 					  msecs_to_jiffies(10));
 		if (r == 0)
 			r = -ETIMEDOUT;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index 0cc036d93afc..ab5249d55b32 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -185,9 +185,11 @@ static int submit_fence_sync(struct etnaviv_gem_submit *submit)
 			return ret;
 
 		if (submit->flags & ETNA_SUBMIT_NO_IMPLICIT)
-			continue;
+			usage = DMA_RESV_USAGE_KERNEL;
+		else
+			usage = dma_resv_usage_rw(bo->flags &
+						  ETNA_SUBMIT_BO_WRITE);
 
-		usage = dma_resv_usage_rw(bo->flags & ETNA_SUBMIT_BO_WRITE);
 		ret = dma_resv_get_fences(robj, usage, &bo->nr_shared,
 					  &bo->shared);
 		if (ret)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
index 887cb6b71ae4..9a7104251cb3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -115,7 +115,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 						i915_fence_timeout(i915),
 						I915_FENCE_GFP);
 		dma_resv_add_fence(obj->base.resv, &clflush->base.dma,
-				   DMA_RESV_USAGE_WRITE);
+				   DMA_RESV_USAGE_KERNEL);
 		dma_fence_work_commit(&clflush->base);
 		/*
 		 * We must have successfully populated the pages(since we are
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 05076e530e7d..13deb6c70ba6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -962,10 +962,10 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
 	struct dma_fence *fence;
 	int ret;
 
-	ret = dma_resv_get_singleton(bo->base.resv, DMA_RESV_USAGE_WRITE,
+	ret = dma_resv_get_singleton(bo->base.resv, DMA_RESV_USAGE_KERNEL,
 				     &fence);
 	if (ret)
-		dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_WRITE,
+		dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_KERNEL,
 				      false, MAX_SCHEDULE_TIMEOUT);
 
 	nv10_bo_put_tile_region(dev, *old_tile, fence);
diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
index 4000ad2f39ba..488e78889dd6 100644
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -478,7 +478,7 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo,
 		return -EINVAL;
 	}
 
-	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_WRITE,
+	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL,
 				  false, MAX_SCHEDULE_TIMEOUT);
 	if (r <= 0) {
 		DRM_ERROR("Failed waiting for UVD message (%d)!\n", r);
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 6014c363d6e6..0f0aa96fb051 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -762,7 +762,7 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
 		return ret;
 	}
 
-	dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_WRITE);
+	dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL);
 
 	ret = dma_resv_reserve_fences(bo->base.resv, 1);
 	if (unlikely(ret)) {
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 49689c7c8078..98e1c804519e 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -509,7 +509,7 @@ static int ttm_bo_move_to_ghost(struct ttm_buffer_object *bo,
 		return ret;
 
 	dma_resv_add_fence(&ghost_obj->base._resv, fence,
-			   DMA_RESV_USAGE_WRITE);
+			   DMA_RESV_USAGE_KERNEL);
 
 	/**
 	 * If we're not moving to fixed memory, the TTM object
@@ -563,7 +563,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 	struct ttm_resource_manager *man = ttm_manager_type(bdev, new_mem->mem_type);
 	int ret = 0;
 
-	dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_WRITE);
+	dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL);
 	if (!evict)
 		ret = ttm_bo_move_to_ghost(bo, fence, man->use_tt);
 	else if (!from->use_tt && pipeline)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
index bec50223efe5..408ede1f967f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -759,7 +759,7 @@ void vmw_bo_fence_single(struct ttm_buffer_object *bo,
 	ret = dma_resv_reserve_fences(bo->base.resv, 1);
 	if (!ret)
 		dma_resv_add_fence(bo->base.resv, &fence->base,
-				   DMA_RESV_USAGE_WRITE);
+				   DMA_RESV_USAGE_KERNEL);
 	else
 		/* Last resort fallback when we are OOM */
 		dma_fence_wait(&fence->base, false);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index 39081dbf9ac8..f999fdd927df 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -1167,7 +1167,7 @@ int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start,
 			dma_fence_put(bo->moving);
 
 		return dma_resv_get_singleton(bo->base.resv,
-					      DMA_RESV_USAGE_WRITE,
+					      DMA_RESV_USAGE_KERNEL,
 					      &bo->moving);
 	}
 
diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c
index f9901d273b8e..fce80a4a5147 100644
--- a/drivers/infiniband/core/umem_dmabuf.c
+++ b/drivers/infiniband/core/umem_dmabuf.c
@@ -68,7 +68,7 @@ int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf)
 	 * the migration.
 	 */
 	return dma_resv_wait_timeout(umem_dmabuf->attach->dmabuf->resv,
-				     DMA_RESV_USAGE_WRITE,
+				     DMA_RESV_USAGE_KERNEL,
 				     false, MAX_SCHEDULE_TIMEOUT);
 }
 EXPORT_SYMBOL(ib_umem_dmabuf_map_pages);
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index ae0436d7e7b8..8f4f406a5d02 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -54,8 +54,30 @@ struct dma_resv_list;
  *
  * This enum describes the different use cases for a dma_resv object and
  * controls which fences are returned when queried.
+ *
+ * An important fact is that there is the order KERNEL<WRITE<READ and
+ * when the dma_resv object is asked for fences for one use case the fences
+ * for the lower use case are returned as well.
+ *
+ * For example when asking for WRITE fences then the KERNEL fences are returned
+ * as well. Similar when asked for READ fences then both WRITE and KERNEL
+ * fences are returned as well.
  */
 enum dma_resv_usage {
+	/**
+	 * @DMA_RESV_USAGE_KERNEL: For in kernel memory management only.
+	 *
+	 * This should only be used for things like copying or clearing memory
+	 * with a DMA hardware engine for the purpose of kernel memory
+	 * management.
+	 *
+	 * Drivers *always* must wait for those fences before accessing the
+	 * resource protected by the dma_resv object. The only exception for
+	 * that is when the resource is known to be locked down in place by
+	 * pinning it previously.
+	 */
+	DMA_RESV_USAGE_KERNEL,
+
 	/**
 	 * @DMA_RESV_USAGE_WRITE: Implicit write synchronization.
 	 *
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 20/23] dma-buf: add DMA_RESV_USAGE_BOOKKEEP v2
  2022-03-21 13:58 [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
                   ` (17 preceding siblings ...)
  2022-03-21 13:58 ` [PATCH 19/23] dma-buf: add DMA_RESV_USAGE_KERNEL v2 Christian König
@ 2022-03-21 13:58 ` Christian König
  2022-03-29 16:06   ` Daniel Vetter
  2022-03-21 13:58 ` [PATCH 21/23] dma-buf: wait for map to complete for static attachments Christian König
                   ` (5 subsequent siblings)
  24 siblings, 1 reply; 73+ messages in thread
From: Christian König @ 2022-03-21 13:58 UTC (permalink / raw)
  To: daniel.vetter, dri-devel; +Cc: Christian König

Add an usage for submissions independent of implicit sync but still
interesting for memory management.

v2: cleanup the kerneldoc a bit

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/dma-resv.c                       |  2 +-
 drivers/dma-buf/st-dma-resv.c                    |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c          |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c           |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c         |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c          |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c           |  6 +++---
 drivers/gpu/drm/i915/gem/i915_gem_lmem.c         |  2 +-
 drivers/gpu/drm/i915/gem/i915_gem_userptr.c      |  2 +-
 drivers/gpu/drm/qxl/qxl_debugfs.c                |  2 +-
 drivers/gpu/drm/radeon/radeon_gem.c              |  2 +-
 drivers/gpu/drm/radeon/radeon_mn.c               |  2 +-
 drivers/gpu/drm/ttm/ttm_bo.c                     | 14 +++++++-------
 include/linux/dma-resv.h                         | 13 ++++++++++++-
 15 files changed, 35 insertions(+), 24 deletions(-)

diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index 10d70812373c..e05be4b52221 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -544,7 +544,7 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src)
 
 	list = NULL;
 
-	dma_resv_iter_begin(&cursor, src, DMA_RESV_USAGE_READ);
+	dma_resv_iter_begin(&cursor, src, DMA_RESV_USAGE_BOOKKEEP);
 	dma_resv_for_each_fence_unlocked(&cursor, f) {
 
 		if (dma_resv_iter_is_restarted(&cursor)) {
diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c
index 062b57d63fa6..8ace9e84c845 100644
--- a/drivers/dma-buf/st-dma-resv.c
+++ b/drivers/dma-buf/st-dma-resv.c
@@ -296,7 +296,7 @@ int dma_resv(void)
 	int r;
 
 	spin_lock_init(&fence_lock);
-	for (usage = DMA_RESV_USAGE_KERNEL; usage <= DMA_RESV_USAGE_READ;
+	for (usage = DMA_RESV_USAGE_KERNEL; usage <= DMA_RESV_USAGE_BOOKKEEP;
 	     ++usage) {
 		r = subtests(tests, (void *)(unsigned long)usage);
 		if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index ab5d6b630a49..b461c3aab877 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -263,7 +263,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
 	 */
 	replacement = dma_fence_get_stub();
 	dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context,
-				replacement, DMA_RESV_USAGE_READ);
+				replacement, DMA_RESV_USAGE_BOOKKEEP);
 	dma_fence_put(replacement);
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 490d2a7a3e2b..ddf46802b1ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -111,7 +111,7 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,
 	struct dma_fence *fence;
 	int r;
 
-	r = dma_resv_get_singleton(resv, DMA_RESV_USAGE_READ, &fence);
+	r = dma_resv_get_singleton(resv, DMA_RESV_USAGE_BOOKKEEP, &fence);
 	if (r)
 		goto fallback;
 
@@ -139,7 +139,7 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,
 	/* Not enough memory for the delayed delete, as last resort
 	 * block for all the fences to complete.
 	 */
-	dma_resv_wait_timeout(resv, DMA_RESV_USAGE_READ,
+	dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP,
 			      false, MAX_SCHEDULE_TIMEOUT);
 	amdgpu_pasid_free(pasid);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 86f5248676b0..b86c0b8252a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -75,7 +75,7 @@ static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni,
 
 	mmu_interval_set_seq(mni, cur_seq);
 
-	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_READ,
+	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP,
 				  false, MAX_SCHEDULE_TIMEOUT);
 	mutex_unlock(&adev->notifier_lock);
 	if (r <= 0)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 744e144e5fc2..11c46b3e4c60 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -260,7 +260,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 		return -EINVAL;
 
 	/* TODO: Use DMA_RESV_USAGE_READ here */
-	dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, f) {
+	dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, f) {
 		dma_fence_chain_for_each(f, f) {
 			struct dma_fence *tmp = dma_fence_chain_contained(f);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 9ffd8c4c34a0..63d8569ebef3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1357,7 +1357,7 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 	 * be resident to run successfully
 	 */
 	dma_resv_for_each_fence(&resv_cursor, bo->base.resv,
-				DMA_RESV_USAGE_READ, f) {
+				DMA_RESV_USAGE_BOOKKEEP, f) {
 		if (amdkfd_fence_check_mm(f, current->mm))
 			return false;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index f3235aad7282..b4f0679f8797 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2105,7 +2105,7 @@ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	struct dma_resv_iter cursor;
 	struct dma_fence *fence;
 
-	dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, fence) {
+	dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, fence) {
 		/* Add a callback for each fence in the reservation object */
 		amdgpu_vm_prt_get(adev);
 		amdgpu_vm_add_prt_cb(adev, fence);
@@ -2707,7 +2707,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
 		return true;
 
 	/* Don't evict VM page tables while they are busy */
-	if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_READ))
+	if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP))
 		return false;
 
 	/* Try to block ongoing updates */
@@ -2888,7 +2888,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
 long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
 {
 	timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv,
-					DMA_RESV_USAGE_READ,
+					DMA_RESV_USAGE_BOOKKEEP,
 					true, timeout);
 	if (timeout <= 0)
 		return timeout;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
index a200d3e66573..4115a222a853 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
@@ -66,7 +66,7 @@ bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj)
 	struct intel_memory_region *mr = READ_ONCE(obj->mm.region);
 
 #ifdef CONFIG_LOCKDEP
-	GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, DMA_RESV_USAGE_READ) &&
+	GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, DMA_RESV_USAGE_BOOKKEEP) &&
 		    i915_gem_object_evictable(obj));
 #endif
 	return mr && (mr->type == INTEL_MEMORY_LOCAL ||
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 60feff9160de..1ddd81623691 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -85,7 +85,7 @@ static bool i915_gem_userptr_invalidate(struct mmu_interval_notifier *mni,
 		return true;
 
 	/* we will unbind on next submission, still have userptr pins */
-	r = dma_resv_wait_timeout(obj->base.resv, DMA_RESV_USAGE_READ, false,
+	r = dma_resv_wait_timeout(obj->base.resv, DMA_RESV_USAGE_BOOKKEEP, false,
 				  MAX_SCHEDULE_TIMEOUT);
 	if (r <= 0)
 		drm_err(&i915->drm, "(%ld) failed to wait for idle\n", r);
diff --git a/drivers/gpu/drm/qxl/qxl_debugfs.c b/drivers/gpu/drm/qxl/qxl_debugfs.c
index 33e5889d6608..2d9ed3b94574 100644
--- a/drivers/gpu/drm/qxl/qxl_debugfs.c
+++ b/drivers/gpu/drm/qxl/qxl_debugfs.c
@@ -62,7 +62,7 @@ qxl_debugfs_buffers_info(struct seq_file *m, void *data)
 		int rel = 0;
 
 		dma_resv_iter_begin(&cursor, bo->tbo.base.resv,
-				    DMA_RESV_USAGE_READ);
+				    DMA_RESV_USAGE_BOOKKEEP);
 		dma_resv_for_each_fence_unlocked(&cursor, fence) {
 			if (dma_resv_iter_is_restarted(&cursor))
 				rel = 0;
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 71bf9299e45c..9587ab88bedd 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -162,7 +162,7 @@ static int radeon_gem_set_domain(struct drm_gem_object *gobj,
 	if (domain == RADEON_GEM_DOMAIN_CPU) {
 		/* Asking for cpu access wait for object idle */
 		r = dma_resv_wait_timeout(robj->tbo.base.resv,
-					  DMA_RESV_USAGE_READ,
+					  DMA_RESV_USAGE_BOOKKEEP,
 					  true, 30 * HZ);
 		if (!r)
 			r = -EBUSY;
diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c
index 68ebeb1bdfff..29fe8423bd90 100644
--- a/drivers/gpu/drm/radeon/radeon_mn.c
+++ b/drivers/gpu/drm/radeon/radeon_mn.c
@@ -66,7 +66,7 @@ static bool radeon_mn_invalidate(struct mmu_interval_notifier *mn,
 		return true;
 	}
 
-	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_READ,
+	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP,
 				  false, MAX_SCHEDULE_TIMEOUT);
 	if (r <= 0)
 		DRM_ERROR("(%ld) failed to wait for user bo\n", r);
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 0f0aa96fb051..8fea9f88d118 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -272,7 +272,7 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo)
 	struct dma_resv_iter cursor;
 	struct dma_fence *fence;
 
-	dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_READ);
+	dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP);
 	dma_resv_for_each_fence_unlocked(&cursor, fence) {
 		if (!fence->ops->signaled)
 			dma_fence_enable_sw_signaling(fence);
@@ -301,7 +301,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 	struct dma_resv *resv = &bo->base._resv;
 	int ret;
 
-	if (dma_resv_test_signaled(resv, DMA_RESV_USAGE_READ))
+	if (dma_resv_test_signaled(resv, DMA_RESV_USAGE_BOOKKEEP))
 		ret = 0;
 	else
 		ret = -EBUSY;
@@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 			dma_resv_unlock(bo->base.resv);
 		spin_unlock(&bo->bdev->lru_lock);
 
-		lret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_READ,
+		lret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP,
 					     interruptible,
 					     30 * HZ);
 
@@ -418,7 +418,7 @@ static void ttm_bo_release(struct kref *kref)
 			 * fences block for the BO to become idle
 			 */
 			dma_resv_wait_timeout(bo->base.resv,
-					      DMA_RESV_USAGE_READ, false,
+					      DMA_RESV_USAGE_BOOKKEEP, false,
 					      30 * HZ);
 		}
 
@@ -429,7 +429,7 @@ static void ttm_bo_release(struct kref *kref)
 		ttm_mem_io_free(bdev, bo->resource);
 	}
 
-	if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ) ||
+	if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP) ||
 	    !dma_resv_trylock(bo->base.resv)) {
 		/* The BO is not idle, resurrect it for delayed destroy */
 		ttm_bo_flush_all_fences(bo);
@@ -1074,13 +1074,13 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
 	long timeout = 15 * HZ;
 
 	if (no_wait) {
-		if (dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ))
+		if (dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP))
 			return 0;
 		else
 			return -EBUSY;
 	}
 
-	timeout = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_READ,
+	timeout = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP,
 					interruptible, timeout);
 	if (timeout < 0)
 		return timeout;
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index 8f4f406a5d02..f7b8ed0e40ad 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -55,7 +55,7 @@ struct dma_resv_list;
  * This enum describes the different use cases for a dma_resv object and
  * controls which fences are returned when queried.
  *
- * An important fact is that there is the order KERNEL<WRITE<READ and
+ * An important fact is that there is the order KERNEL<WRITE<READ<BOOKKEEP and
  * when the dma_resv object is asked for fences for one use case the fences
  * for the lower use case are returned as well.
  *
@@ -93,6 +93,17 @@ enum dma_resv_usage {
 	 * an implicit read dependency.
 	 */
 	DMA_RESV_USAGE_READ,
+
+	/**
+	 * @DMA_RESV_USAGE_BOOKKEEP: No implicit sync.
+	 *
+	 * This should be used by submissions which don't want to participate in
+	 * implicit synchronization.
+	 *
+	 * The most common case are preemption fences as well as page table
+	 * updates.
+	 */
+	DMA_RESV_USAGE_BOOKKEEP
 };
 
 /**
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 21/23] dma-buf: wait for map to complete for static attachments
  2022-03-21 13:58 [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
                   ` (18 preceding siblings ...)
  2022-03-21 13:58 ` [PATCH 20/23] dma-buf: add DMA_RESV_USAGE_BOOKKEEP v2 Christian König
@ 2022-03-21 13:58 ` Christian König
  2022-03-29 15:56   ` Daniel Vetter
  2022-03-21 13:58   ` [Intel-gfx] " Christian König
                   ` (4 subsequent siblings)
  24 siblings, 1 reply; 73+ messages in thread
From: Christian König @ 2022-03-21 13:58 UTC (permalink / raw)
  To: daniel.vetter, dri-devel; +Cc: Christian König

We have previously done that in the individual drivers but it is
more defensive to move that into the common code.

Dynamic attachments should wait for map operations to complete by themselves.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/dma-buf.c                   | 18 +++++++++++++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 14 +-------------
 drivers/gpu/drm/nouveau/nouveau_prime.c     | 17 +----------------
 drivers/gpu/drm/radeon/radeon_prime.c       | 16 +++-------------
 4 files changed, 20 insertions(+), 45 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 528983d3ba64..d3dd602c4753 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -660,12 +660,24 @@ static struct sg_table * __map_dma_buf(struct dma_buf_attachment *attach,
 				       enum dma_data_direction direction)
 {
 	struct sg_table *sg_table;
+	signed long ret;
 
 	sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
+	if (IS_ERR_OR_NULL(sg_table))
+		return sg_table;
+
+	if (!dma_buf_attachment_is_dynamic(attach)) {
+		ret = dma_resv_wait_timeout(attach->dmabuf->resv,
+					    DMA_RESV_USAGE_KERNEL, true,
+					    MAX_SCHEDULE_TIMEOUT);
+		if (ret < 0) {
+			attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
+							   direction);
+			return ERR_PTR(ret);
+		}
+	}
 
-	if (!IS_ERR_OR_NULL(sg_table))
-		mangle_sg_table(sg_table);
-
+	mangle_sg_table(sg_table);
 	return sg_table;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 579adfafe4d0..782cbca37538 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -102,21 +102,9 @@ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach)
 {
 	struct drm_gem_object *obj = attach->dmabuf->priv;
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
-	int r;
 
 	/* pin buffer into GTT */
-	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
-	if (r)
-		return r;
-
-	if (bo->tbo.moving) {
-		r = dma_fence_wait(bo->tbo.moving, true);
-		if (r) {
-			amdgpu_bo_unpin(bo);
-			return r;
-		}
-	}
-	return 0;
+	return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
 }
 
 /**
diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c
index 60019d0532fc..347488685f74 100644
--- a/drivers/gpu/drm/nouveau/nouveau_prime.c
+++ b/drivers/gpu/drm/nouveau/nouveau_prime.c
@@ -93,22 +93,7 @@ int nouveau_gem_prime_pin(struct drm_gem_object *obj)
 	if (ret)
 		return -EINVAL;
 
-	ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL);
-	if (ret)
-		goto error;
-
-	if (nvbo->bo.moving)
-		ret = dma_fence_wait(nvbo->bo.moving, true);
-
-	ttm_bo_unreserve(&nvbo->bo);
-	if (ret)
-		goto error;
-
-	return ret;
-
-error:
-	nouveau_bo_unpin(nvbo);
-	return ret;
+	return 0;
 }
 
 void nouveau_gem_prime_unpin(struct drm_gem_object *obj)
diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c
index 4a90807351e7..42a87948e28c 100644
--- a/drivers/gpu/drm/radeon/radeon_prime.c
+++ b/drivers/gpu/drm/radeon/radeon_prime.c
@@ -77,19 +77,9 @@ int radeon_gem_prime_pin(struct drm_gem_object *obj)
 
 	/* pin buffer into GTT */
 	ret = radeon_bo_pin(bo, RADEON_GEM_DOMAIN_GTT, NULL);
-	if (unlikely(ret))
-		goto error;
-
-	if (bo->tbo.moving) {
-		ret = dma_fence_wait(bo->tbo.moving, false);
-		if (unlikely(ret)) {
-			radeon_bo_unpin(bo);
-			goto error;
-		}
-	}
-
-	bo->prime_shared_count++;
-error:
+	if (likely(ret == 0))
+		bo->prime_shared_count++;
+
 	radeon_bo_unreserve(bo);
 	return ret;
 }
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 22/23] drm/i915: drop bo->moving dependency
  2022-03-21 13:58 [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
@ 2022-03-21 13:58   ` Christian König
  2022-03-21 13:58 ` [PATCH 03/23] dma-buf: add dma_resv_get_singleton v2 Christian König
                     ` (23 subsequent siblings)
  24 siblings, 0 replies; 73+ messages in thread
From: Christian König @ 2022-03-21 13:58 UTC (permalink / raw)
  To: daniel.vetter, dri-devel; +Cc: intel-gfx, Christian König

That should now be handled by the common dma_resv framework.

Signed-off-by: Christian König <christian.koenig@amd.com>
Cc: intel-gfx@lists.freedesktop.org
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c   | 29 ++++++--------------
 drivers/gpu/drm/i915/gem/i915_gem_object.h   |  5 ++--
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 15 +---------
 drivers/gpu/drm/i915/i915_vma.c              |  9 +++++-
 4 files changed, 19 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index d87b508b59b1..fd240435ffef 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -742,18 +742,19 @@ static const struct drm_gem_object_funcs i915_gem_object_funcs = {
 /**
  * i915_gem_object_get_moving_fence - Get the object's moving fence if any
  * @obj: The object whose moving fence to get.
+ * @fence: The resulting fence
  *
  * A non-signaled moving fence means that there is an async operation
  * pending on the object that needs to be waited on before setting up
  * any GPU- or CPU PTEs to the object's pages.
  *
- * Return: A refcounted pointer to the object's moving fence if any,
- * NULL otherwise.
+ * Return: Negative error code or 0 for success.
  */
-struct dma_fence *
-i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj)
+int i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj,
+				     struct dma_fence **fence)
 {
-	return dma_fence_get(i915_gem_to_ttm(obj)->moving);
+	return dma_resv_get_singleton(obj->base.resv, DMA_RESV_USAGE_KERNEL,
+				      fence);
 }
 
 /**
@@ -771,23 +772,9 @@ i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj)
 int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj,
 				      bool intr)
 {
-	struct dma_fence *fence = i915_gem_to_ttm(obj)->moving;
-	int ret;
-
 	assert_object_held(obj);
-	if (!fence)
-		return 0;
-
-	ret = dma_fence_wait(fence, intr);
-	if (ret)
-		return ret;
-
-	if (fence->error)
-		return fence->error;
-
-	i915_gem_to_ttm(obj)->moving = NULL;
-	dma_fence_put(fence);
-	return 0;
+	return dma_resv_wait_timeout(obj->base. resv, DMA_RESV_USAGE_KERNEL,
+				     intr, MAX_SCHEDULE_TIMEOUT);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index f66d46882ea7..be57af8bfb31 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -521,9 +521,8 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
 	i915_gem_object_unpin_pages(obj);
 }
 
-struct dma_fence *
-i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj);
-
+int i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj,
+				     struct dma_fence **fence);
 int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj,
 				      bool intr);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index e4a232e22f9d..4d5d0cd64f23 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -452,19 +452,6 @@ __i915_ttm_move(struct ttm_buffer_object *bo,
 	return fence;
 }
 
-static int
-prev_deps(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
-	  struct i915_deps *deps)
-{
-	int ret;
-
-	ret = i915_deps_add_dependency(deps, bo->moving, ctx);
-	if (!ret)
-		ret = i915_deps_add_resv(deps, bo->base.resv, ctx);
-
-	return ret;
-}
-
 /**
  * i915_ttm_move - The TTM move callback used by i915.
  * @bo: The buffer object.
@@ -519,7 +506,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
 		struct i915_deps deps;
 
 		i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
-		ret = prev_deps(bo, ctx, &deps);
+		ret = i915_deps_add_resv(&deps, bo->base.resv, ctx);
 		if (ret) {
 			i915_refct_sgt_put(dst_rsgt);
 			return ret;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 52fd6705a518..8737159f4706 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -1247,10 +1247,17 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	if (err)
 		return err;
 
+	if (vma->obj) {
+		err = i915_gem_object_get_moving_fence(vma->obj, &moving);
+		if (err)
+			return err;
+	} else {
+		moving = NULL;
+	}
+
 	if (flags & PIN_GLOBAL)
 		wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
 
-	moving = vma->obj ? i915_gem_object_get_moving_fence(vma->obj) : NULL;
 	if (flags & vma->vm->bind_async_flags || moving) {
 		/* lock VM */
 		err = i915_vm_lock_objects(vma->vm, ww);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [Intel-gfx] [PATCH 22/23] drm/i915: drop bo->moving dependency
@ 2022-03-21 13:58   ` Christian König
  0 siblings, 0 replies; 73+ messages in thread
From: Christian König @ 2022-03-21 13:58 UTC (permalink / raw)
  To: daniel.vetter, dri-devel; +Cc: intel-gfx, Christian König

That should now be handled by the common dma_resv framework.

Signed-off-by: Christian König <christian.koenig@amd.com>
Cc: intel-gfx@lists.freedesktop.org
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c   | 29 ++++++--------------
 drivers/gpu/drm/i915/gem/i915_gem_object.h   |  5 ++--
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 15 +---------
 drivers/gpu/drm/i915/i915_vma.c              |  9 +++++-
 4 files changed, 19 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index d87b508b59b1..fd240435ffef 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -742,18 +742,19 @@ static const struct drm_gem_object_funcs i915_gem_object_funcs = {
 /**
  * i915_gem_object_get_moving_fence - Get the object's moving fence if any
  * @obj: The object whose moving fence to get.
+ * @fence: The resulting fence
  *
  * A non-signaled moving fence means that there is an async operation
  * pending on the object that needs to be waited on before setting up
  * any GPU- or CPU PTEs to the object's pages.
  *
- * Return: A refcounted pointer to the object's moving fence if any,
- * NULL otherwise.
+ * Return: Negative error code or 0 for success.
  */
-struct dma_fence *
-i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj)
+int i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj,
+				     struct dma_fence **fence)
 {
-	return dma_fence_get(i915_gem_to_ttm(obj)->moving);
+	return dma_resv_get_singleton(obj->base.resv, DMA_RESV_USAGE_KERNEL,
+				      fence);
 }
 
 /**
@@ -771,23 +772,9 @@ i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj)
 int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj,
 				      bool intr)
 {
-	struct dma_fence *fence = i915_gem_to_ttm(obj)->moving;
-	int ret;
-
 	assert_object_held(obj);
-	if (!fence)
-		return 0;
-
-	ret = dma_fence_wait(fence, intr);
-	if (ret)
-		return ret;
-
-	if (fence->error)
-		return fence->error;
-
-	i915_gem_to_ttm(obj)->moving = NULL;
-	dma_fence_put(fence);
-	return 0;
+	return dma_resv_wait_timeout(obj->base. resv, DMA_RESV_USAGE_KERNEL,
+				     intr, MAX_SCHEDULE_TIMEOUT);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index f66d46882ea7..be57af8bfb31 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -521,9 +521,8 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
 	i915_gem_object_unpin_pages(obj);
 }
 
-struct dma_fence *
-i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj);
-
+int i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj,
+				     struct dma_fence **fence);
 int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj,
 				      bool intr);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index e4a232e22f9d..4d5d0cd64f23 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -452,19 +452,6 @@ __i915_ttm_move(struct ttm_buffer_object *bo,
 	return fence;
 }
 
-static int
-prev_deps(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
-	  struct i915_deps *deps)
-{
-	int ret;
-
-	ret = i915_deps_add_dependency(deps, bo->moving, ctx);
-	if (!ret)
-		ret = i915_deps_add_resv(deps, bo->base.resv, ctx);
-
-	return ret;
-}
-
 /**
  * i915_ttm_move - The TTM move callback used by i915.
  * @bo: The buffer object.
@@ -519,7 +506,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
 		struct i915_deps deps;
 
 		i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
-		ret = prev_deps(bo, ctx, &deps);
+		ret = i915_deps_add_resv(&deps, bo->base.resv, ctx);
 		if (ret) {
 			i915_refct_sgt_put(dst_rsgt);
 			return ret;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 52fd6705a518..8737159f4706 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -1247,10 +1247,17 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	if (err)
 		return err;
 
+	if (vma->obj) {
+		err = i915_gem_object_get_moving_fence(vma->obj, &moving);
+		if (err)
+			return err;
+	} else {
+		moving = NULL;
+	}
+
 	if (flags & PIN_GLOBAL)
 		wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
 
-	moving = vma->obj ? i915_gem_object_get_moving_fence(vma->obj) : NULL;
 	if (flags & vma->vm->bind_async_flags || moving) {
 		/* lock VM */
 		err = i915_vm_lock_objects(vma->vm, ww);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* [PATCH 23/23] drm/ttm: remove bo->moving
  2022-03-21 13:58 [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
                   ` (20 preceding siblings ...)
  2022-03-21 13:58   ` [Intel-gfx] " Christian König
@ 2022-03-21 13:58 ` Christian König
  2022-03-29 16:02   ` Daniel Vetter
  2022-03-21 14:03 ` [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
                   ` (2 subsequent siblings)
  24 siblings, 1 reply; 73+ messages in thread
From: Christian König @ 2022-03-21 13:58 UTC (permalink / raw)
  To: daniel.vetter, dri-devel; +Cc: Christian König

This is now handled by the DMA-buf framework in the dma_resv obj.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 13 ++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    |  7 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c    | 11 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c   | 11 ++++--
 drivers/gpu/drm/ttm/ttm_bo.c                  | 10 ++----
 drivers/gpu/drm/ttm/ttm_bo_util.c             |  7 ----
 drivers/gpu/drm/ttm/ttm_bo_vm.c               | 34 +++++++------------
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c      |  6 ----
 include/drm/ttm/ttm_bo_api.h                  |  2 --
 9 files changed, 40 insertions(+), 61 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index b461c3aab877..fe168b3cc3f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2406,6 +2406,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 		struct amdgpu_bo *bo = mem->bo;
 		uint32_t domain = mem->domain;
 		struct kfd_mem_attachment *attachment;
+		struct dma_resv_iter cursor;
+		struct dma_fence *fence;
 
 		total_size += amdgpu_bo_size(bo);
 
@@ -2420,10 +2422,13 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 				goto validate_map_fail;
 			}
 		}
-		ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving);
-		if (ret) {
-			pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
-			goto validate_map_fail;
+		dma_resv_for_each_fence(&cursor, bo->tbo.base.resv,
+					DMA_RESV_USAGE_KERNEL, fence) {
+			ret = amdgpu_sync_fence(&sync_obj, fence);
+			if (ret) {
+				pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
+				goto validate_map_fail;
+			}
 		}
 		list_for_each_entry(attachment, &mem->attachments, list) {
 			if (!attachment->is_mapped)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 1618b6847c69..887fa3f4284e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -609,9 +609,8 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 		if (unlikely(r))
 			goto fail_unreserve;
 
-		amdgpu_bo_fence(bo, fence, false);
-		dma_fence_put(bo->tbo.moving);
-		bo->tbo.moving = dma_fence_get(fence);
+		dma_resv_add_fence(bo->tbo.base.resv, fence,
+				   DMA_RESV_USAGE_KERNEL);
 		dma_fence_put(fence);
 	}
 	if (!bp->resv)
@@ -1307,7 +1306,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
 
 	r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence);
 	if (!WARN_ON(r)) {
-		amdgpu_bo_fence(abo, fence, false);
+		dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL);
 		dma_fence_put(fence);
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
index e3fbf0f10add..31913ae86de6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
@@ -74,13 +74,12 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p,
 {
 	unsigned int i;
 	uint64_t value;
-	int r;
+	long r;
 
-	if (vmbo->bo.tbo.moving) {
-		r = dma_fence_wait(vmbo->bo.tbo.moving, true);
-		if (r)
-			return r;
-	}
+	r = dma_resv_wait_timeout(vmbo->bo.tbo.base.resv, DMA_RESV_USAGE_KERNEL,
+				  true, MAX_SCHEDULE_TIMEOUT);
+	if (r < 0)
+		return r;
 
 	pe += (unsigned long)amdgpu_bo_kptr(&vmbo->bo);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index dbb551762805..bdb44cee19d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -204,14 +204,19 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
 	struct amdgpu_bo *bo = &vmbo->bo;
 	enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE
 		: AMDGPU_IB_POOL_DELAYED;
+	struct dma_resv_iter cursor;
 	unsigned int i, ndw, nptes;
+	struct dma_fence *fence;
 	uint64_t *pte;
 	int r;
 
 	/* Wait for PD/PT moves to be completed */
-	r = amdgpu_sync_fence(&p->job->sync, bo->tbo.moving);
-	if (r)
-		return r;
+	dma_resv_for_each_fence(&cursor, bo->tbo.base.resv,
+				DMA_RESV_USAGE_KERNEL, fence) {
+		r = amdgpu_sync_fence(&p->job->sync, fence);
+		if (r)
+			return r;
+	}
 
 	do {
 		ndw = p->num_dw_left;
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 8fea9f88d118..9bce692075da 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -468,7 +468,6 @@ static void ttm_bo_release(struct kref *kref)
 	dma_resv_unlock(bo->base.resv);
 
 	atomic_dec(&ttm_glob.bo_count);
-	dma_fence_put(bo->moving);
 	bo->destroy(bo);
 }
 
@@ -737,9 +736,8 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
 }
 
 /*
- * Add the last move fence to the BO and reserve a new shared slot. We only use
- * a shared slot to avoid unecessary sync and rely on the subsequent bo move to
- * either stall or use an exclusive fence respectively set bo->moving.
+ * Add the last move fence to the BO as kernel dependency and reserve a new
+ * fence slot.
  */
 static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
 				 struct ttm_resource_manager *man,
@@ -769,9 +767,6 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
 		dma_fence_put(fence);
 		return ret;
 	}
-
-	dma_fence_put(bo->moving);
-	bo->moving = fence;
 	return 0;
 }
 
@@ -978,7 +973,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev,
 	bo->bdev = bdev;
 	bo->type = type;
 	bo->page_alignment = page_alignment;
-	bo->moving = NULL;
 	bo->pin_count = 0;
 	bo->sg = sg;
 	if (resv) {
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 98e1c804519e..a2e3a9626198 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -229,7 +229,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 	atomic_inc(&ttm_glob.bo_count);
 	INIT_LIST_HEAD(&fbo->base.ddestroy);
 	INIT_LIST_HEAD(&fbo->base.lru);
-	fbo->base.moving = NULL;
 	drm_vma_node_reset(&fbo->base.base.vma_node);
 
 	kref_init(&fbo->base.kref);
@@ -501,9 +500,6 @@ static int ttm_bo_move_to_ghost(struct ttm_buffer_object *bo,
 	 * operation has completed.
 	 */
 
-	dma_fence_put(bo->moving);
-	bo->moving = dma_fence_get(fence);
-
 	ret = ttm_buffer_object_transfer(bo, &ghost_obj);
 	if (ret)
 		return ret;
@@ -547,9 +543,6 @@ static void ttm_bo_move_pipeline_evict(struct ttm_buffer_object *bo,
 	spin_unlock(&from->move_lock);
 
 	ttm_resource_free(bo, &bo->resource);
-
-	dma_fence_put(bo->moving);
-	bo->moving = dma_fence_get(fence);
 }
 
 int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 08ba083a80d2..5b324f245265 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -46,17 +46,13 @@
 static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
 				struct vm_fault *vmf)
 {
-	vm_fault_t ret = 0;
-	int err = 0;
-
-	if (likely(!bo->moving))
-		goto out_unlock;
+	long err = 0;
 
 	/*
 	 * Quick non-stalling check for idle.
 	 */
-	if (dma_fence_is_signaled(bo->moving))
-		goto out_clear;
+	if (dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_KERNEL))
+		return 0;
 
 	/*
 	 * If possible, avoid waiting for GPU with mmap_lock
@@ -64,34 +60,30 @@ static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
 	 * is the first attempt.
 	 */
 	if (fault_flag_allow_retry_first(vmf->flags)) {
-		ret = VM_FAULT_RETRY;
 		if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
-			goto out_unlock;
+			return VM_FAULT_RETRY;
 
 		ttm_bo_get(bo);
 		mmap_read_unlock(vmf->vma->vm_mm);
-		(void) dma_fence_wait(bo->moving, true);
+		(void)dma_resv_wait_timeout(bo->base.resv,
+					    DMA_RESV_USAGE_KERNEL, true,
+					    MAX_SCHEDULE_TIMEOUT);
 		dma_resv_unlock(bo->base.resv);
 		ttm_bo_put(bo);
-		goto out_unlock;
+		return VM_FAULT_RETRY;
 	}
 
 	/*
 	 * Ordinary wait.
 	 */
-	err = dma_fence_wait(bo->moving, true);
-	if (unlikely(err != 0)) {
-		ret = (err != -ERESTARTSYS) ? VM_FAULT_SIGBUS :
+	err = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_KERNEL, true,
+				    MAX_SCHEDULE_TIMEOUT);
+	if (unlikely(err < 0)) {
+		return (err != -ERESTARTSYS) ? VM_FAULT_SIGBUS :
 			VM_FAULT_NOPAGE;
-		goto out_unlock;
 	}
 
-out_clear:
-	dma_fence_put(bo->moving);
-	bo->moving = NULL;
-
-out_unlock:
-	return ret;
+	return 0;
 }
 
 static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index f999fdd927df..c6d02c98a19a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -1163,12 +1163,6 @@ int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start,
 		*num_prefault = __KERNEL_DIV_ROUND_UP(last_cleaned - res_start,
 						      PAGE_SIZE);
 		vmw_bo_fence_single(bo, NULL);
-		if (bo->moving)
-			dma_fence_put(bo->moving);
-
-		return dma_resv_get_singleton(bo->base.resv,
-					      DMA_RESV_USAGE_KERNEL,
-					      &bo->moving);
 	}
 
 	return 0;
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index c17b2df9178b..4c7134550262 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -97,7 +97,6 @@ struct ttm_tt;
  * @lru: List head for the lru list.
  * @ddestroy: List head for the delayed destroy list.
  * @swap: List head for swap LRU list.
- * @moving: Fence set when BO is moving
  * @offset: The current GPU offset, which can have different meanings
  * depending on the memory type. For SYSTEM type memory, it should be 0.
  * @cur_placement: Hint of current placement.
@@ -150,7 +149,6 @@ struct ttm_buffer_object {
 	 * Members protected by a bo reservation.
 	 */
 
-	struct dma_fence *moving;
 	unsigned priority;
 	unsigned pin_count;
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 73+ messages in thread

* Re: [PATCH 07/23] drm/vmwgfx: stop using dma_resv_excl_fence
  2022-03-21 13:58 ` [PATCH 07/23] drm/vmwgfx: " Christian König
@ 2022-03-21 14:02   ` Zack Rusin
  2022-03-21 14:12     ` Christian König
  0 siblings, 1 reply; 73+ messages in thread
From: Zack Rusin @ 2022-03-21 14:02 UTC (permalink / raw)
  To: dri-devel, daniel.vetter, ckoenig.leichtzumerken
  Cc: Linux-graphics-maintainer, christian.koenig

On Mon, 2022-03-21 at 14:58 +0100, Christian König wrote:
> ⚠ External Email: This email originated from outside of the
> organization. Do not click links or open attachments unless you
> recognize the sender.
> 
> Instead use the new dma_resv_get_singleton function.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: VMware Graphics <linux-graphics-maintainer@vmware.com>
> Cc: Zack Rusin <zackr@vmware.com>
> ---
>  drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 6 ++++--
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> index 708899ba2102..36c3b5db7e69 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> @@ -1165,8 +1165,10 @@ int vmw_resources_clean(struct
> vmw_buffer_object *vbo, pgoff_t start,
>                 vmw_bo_fence_single(bo, NULL);
>                 if (bo->moving)
>                         dma_fence_put(bo->moving);
> -               bo->moving = dma_fence_get
> -                       (dma_resv_excl_fence(bo->base.resv));
> +
> +               /* TODO: This is actually a memory management
> dependency */
> +               return dma_resv_get_singleton(bo->base.resv, false,
> +                                             &bo->moving);
>         }
> 
>         return 0;
> --
> 2.25.1
> 

Sorry, I haven't had the time to go over the entire series, the patch
looks good, but what's the memory management dependency the todo
mentions?

z


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2
  2022-03-21 13:58 [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
                   ` (21 preceding siblings ...)
  2022-03-21 13:58 ` [PATCH 23/23] drm/ttm: remove bo->moving Christian König
@ 2022-03-21 14:03 ` Christian König
  2022-03-23 13:09 ` Daniel Vetter
  2022-03-23 15:55 ` Felix Kuehling
  24 siblings, 0 replies; 73+ messages in thread
From: Christian König @ 2022-03-21 14:03 UTC (permalink / raw)
  To: Christian König, daniel.vetter, dri-devel

Mhm, crap I've forgot the cover letter. Anyway should be a well known 
set by now.

I've polished the documentation a bit more compared to the last version 
and with this finally managed to correctly CC all the driver maintainers 
on the relevant patches.

Please review and comment.

Cheers,
Christian.

Am 21.03.22 um 14:58 schrieb Christian König:
> This function allows to replace fences from the shared fence list when
> we can gurantee that the operation represented by the original fence has
> finished or no accesses to the resources protected by the dma_resv
> object any more when the new fence finishes.
>
> Then use this function in the amdkfd code when BOs are unmapped from the
> process.
>
> v2: add an example when this is usefull.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/dma-buf/dma-resv.c                    | 45 +++++++++++++++++
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 49 +++----------------
>   include/linux/dma-resv.h                      |  2 +
>   3 files changed, 54 insertions(+), 42 deletions(-)
>
> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
> index b51416405e86..509060861cf3 100644
> --- a/drivers/dma-buf/dma-resv.c
> +++ b/drivers/dma-buf/dma-resv.c
> @@ -289,6 +289,51 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence)
>   }
>   EXPORT_SYMBOL(dma_resv_add_shared_fence);
>   
> +/**
> + * dma_resv_replace_fences - replace fences in the dma_resv obj
> + * @obj: the reservation object
> + * @context: the context of the fences to replace
> + * @replacement: the new fence to use instead
> + *
> + * Replace fences with a specified context with a new fence. Only valid if the
> + * operation represented by the original fence has no longer access to the
> + * resources represented by the dma_resv object when the new fence completes.
> + *
> + * And example for using this is replacing a preemption fence with a page table
> + * update fence which makes the resource inaccessible.
> + */
> +void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
> +			     struct dma_fence *replacement)
> +{
> +	struct dma_resv_list *list;
> +	struct dma_fence *old;
> +	unsigned int i;
> +
> +	dma_resv_assert_held(obj);
> +
> +	write_seqcount_begin(&obj->seq);
> +
> +	old = dma_resv_excl_fence(obj);
> +	if (old->context == context) {
> +		RCU_INIT_POINTER(obj->fence_excl, dma_fence_get(replacement));
> +		dma_fence_put(old);
> +	}
> +
> +	list = dma_resv_shared_list(obj);
> +	for (i = 0; list && i < list->shared_count; ++i) {
> +		old = rcu_dereference_protected(list->shared[i],
> +						dma_resv_held(obj));
> +		if (old->context != context)
> +			continue;
> +
> +		rcu_assign_pointer(list->shared[i], dma_fence_get(replacement));
> +		dma_fence_put(old);
> +	}
> +
> +	write_seqcount_end(&obj->seq);
> +}
> +EXPORT_SYMBOL(dma_resv_replace_fences);
> +
>   /**
>    * dma_resv_add_excl_fence - Add an exclusive fence.
>    * @obj: the reservation object
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index f9bab963a948..b6f266f612ea 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -253,53 +253,18 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
>   static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
>   					struct amdgpu_amdkfd_fence *ef)
>   {
> -	struct dma_resv *resv = bo->tbo.base.resv;
> -	struct dma_resv_list *old, *new;
> -	unsigned int i, j, k;
> +	struct dma_fence *replacement;
>   
>   	if (!ef)
>   		return -EINVAL;
>   
> -	old = dma_resv_shared_list(resv);
> -	if (!old)
> -		return 0;
> -
> -	new = kmalloc(struct_size(new, shared, old->shared_max), GFP_KERNEL);
> -	if (!new)
> -		return -ENOMEM;
> -
> -	/* Go through all the shared fences in the resevation object and sort
> -	 * the interesting ones to the end of the list.
> +	/* TODO: Instead of block before we should use the fence of the page
> +	 * table update and TLB flush here directly.
>   	 */
> -	for (i = 0, j = old->shared_count, k = 0; i < old->shared_count; ++i) {
> -		struct dma_fence *f;
> -
> -		f = rcu_dereference_protected(old->shared[i],
> -					      dma_resv_held(resv));
> -
> -		if (f->context == ef->base.context)
> -			RCU_INIT_POINTER(new->shared[--j], f);
> -		else
> -			RCU_INIT_POINTER(new->shared[k++], f);
> -	}
> -	new->shared_max = old->shared_max;
> -	new->shared_count = k;
> -
> -	/* Install the new fence list, seqcount provides the barriers */
> -	write_seqcount_begin(&resv->seq);
> -	RCU_INIT_POINTER(resv->fence, new);
> -	write_seqcount_end(&resv->seq);
> -
> -	/* Drop the references to the removed fences or move them to ef_list */
> -	for (i = j; i < old->shared_count; ++i) {
> -		struct dma_fence *f;
> -
> -		f = rcu_dereference_protected(new->shared[i],
> -					      dma_resv_held(resv));
> -		dma_fence_put(f);
> -	}
> -	kfree_rcu(old, rcu);
> -
> +	replacement = dma_fence_get_stub();
> +	dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context,
> +				replacement);
> +	dma_fence_put(replacement);
>   	return 0;
>   }
>   
> diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
> index afdfdfac729f..3f53177bdb46 100644
> --- a/include/linux/dma-resv.h
> +++ b/include/linux/dma-resv.h
> @@ -468,6 +468,8 @@ void dma_resv_init(struct dma_resv *obj);
>   void dma_resv_fini(struct dma_resv *obj);
>   int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences);
>   void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence);
> +void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
> +			     struct dma_fence *fence);
>   void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence);
>   int dma_resv_get_fences(struct dma_resv *obj, bool write,
>   			unsigned int *num_fences, struct dma_fence ***fences);


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 07/23] drm/vmwgfx: stop using dma_resv_excl_fence
  2022-03-21 14:02   ` Zack Rusin
@ 2022-03-21 14:12     ` Christian König
  2022-03-21 15:11       ` Zack Rusin
  0 siblings, 1 reply; 73+ messages in thread
From: Christian König @ 2022-03-21 14:12 UTC (permalink / raw)
  To: Zack Rusin, dri-devel, daniel.vetter, ckoenig.leichtzumerken
  Cc: Linux-graphics-maintainer

Am 21.03.22 um 15:02 schrieb Zack Rusin:
> On Mon, 2022-03-21 at 14:58 +0100, Christian König wrote:
>> ⚠ External Email: This email originated from outside of the
>> organization. Do not click links or open attachments unless you
>> recognize the sender.
>>
>> Instead use the new dma_resv_get_singleton function.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>> Cc: VMware Graphics <linux-graphics-maintainer@vmware.com>
>> Cc: Zack Rusin <zackr@vmware.com>
>> ---
>>   drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 6 ++++--
>>   1 file changed, 4 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
>> b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
>> index 708899ba2102..36c3b5db7e69 100644
>> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
>> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
>> @@ -1165,8 +1165,10 @@ int vmw_resources_clean(struct
>> vmw_buffer_object *vbo, pgoff_t start,
>>                  vmw_bo_fence_single(bo, NULL);
>>                  if (bo->moving)
>>                          dma_fence_put(bo->moving);
>> -               bo->moving = dma_fence_get
>> -                       (dma_resv_excl_fence(bo->base.resv));
>> +
>> +               /* TODO: This is actually a memory management
>> dependency */
>> +               return dma_resv_get_singleton(bo->base.resv, false,
>> +                                             &bo->moving);
>>          }
>>
>>          return 0;
>> --
>> 2.25.1
>>
> Sorry, I haven't had the time to go over the entire series, the patch
> looks good, but what's the memory management dependency the todo
> mentions?

Previously the function installed only the exclusive fence as moving 
fence into the BO.
Now it grabs all fences and installs them as moving fence into the BO.

But what we really need is tracking if a fence in the reservation object 
is a kernel memory management dependency or not.

Patch #19 adds that and patch #23 then finally gets rid of the whole 
bo->moving handling here because it becomes completely unnecessary.

I can drop the comment if you want or just note that it is only 
temporary until the follow up patches are merged.

Regards,
Christian.


>
> z
>


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 07/23] drm/vmwgfx: stop using dma_resv_excl_fence
  2022-03-21 14:12     ` Christian König
@ 2022-03-21 15:11       ` Zack Rusin
  2022-03-22  7:13         ` Christian König
  0 siblings, 1 reply; 73+ messages in thread
From: Zack Rusin @ 2022-03-21 15:11 UTC (permalink / raw)
  To: dri-devel, christian.koenig, daniel.vetter, ckoenig.leichtzumerken
  Cc: Linux-graphics-maintainer

On Mon, 2022-03-21 at 15:12 +0100, Christian König wrote:
> Am 21.03.22 um 15:02 schrieb Zack Rusin:
> > On Mon, 2022-03-21 at 14:58 +0100, Christian König wrote:
> > > ⚠ External Email: This email originated from outside of the
> > > organization. Do not click links or open attachments unless you
> > > recognize the sender.
> > > 
> > > Instead use the new dma_resv_get_singleton function.
> > > 
> > > Signed-off-by: Christian König <christian.koenig@amd.com>
> > > Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > > Cc: VMware Graphics <linux-graphics-maintainer@vmware.com>
> > > Cc: Zack Rusin <zackr@vmware.com>
> > > ---
> > >   drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 6 ++++--
> > >   1 file changed, 4 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> > > b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> > > index 708899ba2102..36c3b5db7e69 100644
> > > --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> > > +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> > > @@ -1165,8 +1165,10 @@ int vmw_resources_clean(struct
> > > vmw_buffer_object *vbo, pgoff_t start,
> > >                  vmw_bo_fence_single(bo, NULL);
> > >                  if (bo->moving)
> > >                          dma_fence_put(bo->moving);
> > > -               bo->moving = dma_fence_get
> > > -                       (dma_resv_excl_fence(bo->base.resv));
> > > +
> > > +               /* TODO: This is actually a memory management
> > > dependency */
> > > +               return dma_resv_get_singleton(bo->base.resv,
> > > false,
> > > +                                             &bo->moving);
> > >          }
> > > 
> > >          return 0;
> > > --
> > > 2.25.1
> > > 
> > Sorry, I haven't had the time to go over the entire series, the
> > patch
> > looks good, but what's the memory management dependency the todo
> > mentions?
> 
> Previously the function installed only the exclusive fence as moving
> fence into the BO.
> Now it grabs all fences and installs them as moving fence into the
> BO.
> 
> But what we really need is tracking if a fence in the reservation
> object
> is a kernel memory management dependency or not.
> 
> Patch #19 adds that and patch #23 then finally gets rid of the whole
> bo->moving handling here because it becomes completely unnecessary.
> 
> I can drop the comment if you want or just note that it is only
> temporary until the follow up patches are merged.

Ah, yes, if you could remove it that'd be great. The patch will never
be backported anywhere without the rest of the series, so it shouldn't
be a problem. 

z

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 07/23] drm/vmwgfx: stop using dma_resv_excl_fence
  2022-03-21 15:11       ` Zack Rusin
@ 2022-03-22  7:13         ` Christian König
  2022-03-22 12:40           ` Zack Rusin
  0 siblings, 1 reply; 73+ messages in thread
From: Christian König @ 2022-03-22  7:13 UTC (permalink / raw)
  To: Zack Rusin, dri-devel, daniel.vetter, ckoenig.leichtzumerken
  Cc: Linux-graphics-maintainer

Am 21.03.22 um 16:11 schrieb Zack Rusin:
> On Mon, 2022-03-21 at 15:12 +0100, Christian König wrote:
>> Am 21.03.22 um 15:02 schrieb Zack Rusin:
>>> On Mon, 2022-03-21 at 14:58 +0100, Christian König wrote:
>>>> ⚠ External Email: This email originated from outside of the
>>>> organization. Do not click links or open attachments unless you
>>>> recognize the sender.
>>>>
>>>> Instead use the new dma_resv_get_singleton function.
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>>>> Cc: VMware Graphics <linux-graphics-maintainer@vmware.com>
>>>> Cc: Zack Rusin <zackr@vmware.com>
>>>> ---
>>>>    drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 6 ++++--
>>>>    1 file changed, 4 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
>>>> b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
>>>> index 708899ba2102..36c3b5db7e69 100644
>>>> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
>>>> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
>>>> @@ -1165,8 +1165,10 @@ int vmw_resources_clean(struct
>>>> vmw_buffer_object *vbo, pgoff_t start,
>>>>                   vmw_bo_fence_single(bo, NULL);
>>>>                   if (bo->moving)
>>>>                           dma_fence_put(bo->moving);
>>>> -               bo->moving = dma_fence_get
>>>> -                       (dma_resv_excl_fence(bo->base.resv));
>>>> +
>>>> +               /* TODO: This is actually a memory management
>>>> dependency */
>>>> +               return dma_resv_get_singleton(bo->base.resv,
>>>> false,
>>>> +                                             &bo->moving);
>>>>           }
>>>>
>>>>           return 0;
>>>> --
>>>> 2.25.1
>>>>
>>> Sorry, I haven't had the time to go over the entire series, the
>>> patch
>>> looks good, but what's the memory management dependency the todo
>>> mentions?
>> Previously the function installed only the exclusive fence as moving
>> fence into the BO.
>> Now it grabs all fences and installs them as moving fence into the
>> BO.
>>
>> But what we really need is tracking if a fence in the reservation
>> object
>> is a kernel memory management dependency or not.
>>
>> Patch #19 adds that and patch #23 then finally gets rid of the whole
>> bo->moving handling here because it becomes completely unnecessary.
>>
>> I can drop the comment if you want or just note that it is only
>> temporary until the follow up patches are merged.
> Ah, yes, if you could remove it that'd be great. The patch will never
> be backported anywhere without the rest of the series, so it shouldn't
> be a problem.

Sure, can I then have your rb for this one?

Thanks,
Christian.

>
> z


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 07/23] drm/vmwgfx: stop using dma_resv_excl_fence
  2022-03-22  7:13         ` Christian König
@ 2022-03-22 12:40           ` Zack Rusin
  0 siblings, 0 replies; 73+ messages in thread
From: Zack Rusin @ 2022-03-22 12:40 UTC (permalink / raw)
  To: dri-devel, christian.koenig, daniel.vetter, ckoenig.leichtzumerken
  Cc: Linux-graphics-maintainer

On Tue, 2022-03-22 at 08:13 +0100, Christian König wrote:
> Am 21.03.22 um 16:11 schrieb Zack Rusin:
> > On Mon, 2022-03-21 at 15:12 +0100, Christian König wrote:
> > > Am 21.03.22 um 15:02 schrieb Zack Rusin:
> > > > On Mon, 2022-03-21 at 14:58 +0100, Christian König wrote:
> > > > > ⚠ External Email: This email originated from outside of the
> > > > > organization. Do not click links or open attachments unless
> > > > > you
> > > > > recognize the sender.
> > > > > 
> > > > > Instead use the new dma_resv_get_singleton function.
> > > > > 
> > > > > Signed-off-by: Christian König <christian.koenig@amd.com>
> > > > > Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > > > > Cc: VMware Graphics <linux-graphics-maintainer@vmware.com>
> > > > > Cc: Zack Rusin <zackr@vmware.com>
> > > > > ---
> > > > >    drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 6 ++++--
> > > > >    1 file changed, 4 insertions(+), 2 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> > > > > b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> > > > > index 708899ba2102..36c3b5db7e69 100644
> > > > > --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> > > > > +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> > > > > @@ -1165,8 +1165,10 @@ int vmw_resources_clean(struct
> > > > > vmw_buffer_object *vbo, pgoff_t start,
> > > > >                   vmw_bo_fence_single(bo, NULL);
> > > > >                   if (bo->moving)
> > > > >                           dma_fence_put(bo->moving);
> > > > > -               bo->moving = dma_fence_get
> > > > > -                       (dma_resv_excl_fence(bo->base.resv));
> > > > > +
> > > > > +               /* TODO: This is actually a memory management
> > > > > dependency */
> > > > > +               return dma_resv_get_singleton(bo->base.resv,
> > > > > false,
> > > > > +                                             &bo->moving);
> > > > >           }
> > > > > 
> > > > >           return 0;
> > > > > --
> > > > > 2.25.1
> > > > > 
> > > > Sorry, I haven't had the time to go over the entire series, the
> > > > patch
> > > > looks good, but what's the memory management dependency the
> > > > todo
> > > > mentions?
> > > Previously the function installed only the exclusive fence as
> > > moving
> > > fence into the BO.
> > > Now it grabs all fences and installs them as moving fence into
> > > the
> > > BO.
> > > 
> > > But what we really need is tracking if a fence in the reservation
> > > object
> > > is a kernel memory management dependency or not.
> > > 
> > > Patch #19 adds that and patch #23 then finally gets rid of the
> > > whole
> > > bo->moving handling here because it becomes completely
> > > unnecessary.
> > > 
> > > I can drop the comment if you want or just note that it is only
> > > temporary until the follow up patches are merged.
> > Ah, yes, if you could remove it that'd be great. The patch will
> > never
> > be backported anywhere without the rest of the series, so it
> > shouldn't
> > be a problem.
> 
> Sure, can I then have your rb for this one?

Yes, of course. Thanks!
Reviewed-by: Zack Rusin <zackr@vmware.com>

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2
  2022-03-21 13:58 [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
                   ` (22 preceding siblings ...)
  2022-03-21 14:03 ` [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
@ 2022-03-23 13:09 ` Daniel Vetter
  2022-03-23 15:55 ` Felix Kuehling
  24 siblings, 0 replies; 73+ messages in thread
From: Daniel Vetter @ 2022-03-23 13:09 UTC (permalink / raw)
  To: Christian König; +Cc: daniel.vetter, Christian König, dri-devel

On Mon, Mar 21, 2022 at 02:58:34PM +0100, Christian König wrote:
> This function allows to replace fences from the shared fence list when
> we can gurantee that the operation represented by the original fence has
> finished or no accesses to the resources protected by the dma_resv
> object any more when the new fence finishes.
> 
> Then use this function in the amdkfd code when BOs are unmapped from the
> process.
> 
> v2: add an example when this is usefull.

Yeah this makes a looooot more sense now :-)

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> 
> Signed-off-by: Christian König <christian.koenig@amd.com>


> ---
>  drivers/dma-buf/dma-resv.c                    | 45 +++++++++++++++++
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 49 +++----------------
>  include/linux/dma-resv.h                      |  2 +
>  3 files changed, 54 insertions(+), 42 deletions(-)
> 
> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
> index b51416405e86..509060861cf3 100644
> --- a/drivers/dma-buf/dma-resv.c
> +++ b/drivers/dma-buf/dma-resv.c
> @@ -289,6 +289,51 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence)
>  }
>  EXPORT_SYMBOL(dma_resv_add_shared_fence);
>  
> +/**
> + * dma_resv_replace_fences - replace fences in the dma_resv obj
> + * @obj: the reservation object
> + * @context: the context of the fences to replace
> + * @replacement: the new fence to use instead
> + *
> + * Replace fences with a specified context with a new fence. Only valid if the
> + * operation represented by the original fence has no longer access to the
> + * resources represented by the dma_resv object when the new fence completes.
> + *
> + * And example for using this is replacing a preemption fence with a page table
> + * update fence which makes the resource inaccessible.
> + */
> +void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
> +			     struct dma_fence *replacement)
> +{
> +	struct dma_resv_list *list;
> +	struct dma_fence *old;
> +	unsigned int i;
> +
> +	dma_resv_assert_held(obj);
> +
> +	write_seqcount_begin(&obj->seq);
> +
> +	old = dma_resv_excl_fence(obj);
> +	if (old->context == context) {
> +		RCU_INIT_POINTER(obj->fence_excl, dma_fence_get(replacement));
> +		dma_fence_put(old);
> +	}
> +
> +	list = dma_resv_shared_list(obj);
> +	for (i = 0; list && i < list->shared_count; ++i) {
> +		old = rcu_dereference_protected(list->shared[i],
> +						dma_resv_held(obj));
> +		if (old->context != context)
> +			continue;
> +
> +		rcu_assign_pointer(list->shared[i], dma_fence_get(replacement));
> +		dma_fence_put(old);
> +	}
> +
> +	write_seqcount_end(&obj->seq);
> +}
> +EXPORT_SYMBOL(dma_resv_replace_fences);
> +
>  /**
>   * dma_resv_add_excl_fence - Add an exclusive fence.
>   * @obj: the reservation object
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index f9bab963a948..b6f266f612ea 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -253,53 +253,18 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
>  static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
>  					struct amdgpu_amdkfd_fence *ef)
>  {
> -	struct dma_resv *resv = bo->tbo.base.resv;
> -	struct dma_resv_list *old, *new;
> -	unsigned int i, j, k;
> +	struct dma_fence *replacement;
>  
>  	if (!ef)
>  		return -EINVAL;
>  
> -	old = dma_resv_shared_list(resv);
> -	if (!old)
> -		return 0;
> -
> -	new = kmalloc(struct_size(new, shared, old->shared_max), GFP_KERNEL);
> -	if (!new)
> -		return -ENOMEM;
> -
> -	/* Go through all the shared fences in the resevation object and sort
> -	 * the interesting ones to the end of the list.
> +	/* TODO: Instead of block before we should use the fence of the page
> +	 * table update and TLB flush here directly.
>  	 */
> -	for (i = 0, j = old->shared_count, k = 0; i < old->shared_count; ++i) {
> -		struct dma_fence *f;
> -
> -		f = rcu_dereference_protected(old->shared[i],
> -					      dma_resv_held(resv));
> -
> -		if (f->context == ef->base.context)
> -			RCU_INIT_POINTER(new->shared[--j], f);
> -		else
> -			RCU_INIT_POINTER(new->shared[k++], f);
> -	}
> -	new->shared_max = old->shared_max;
> -	new->shared_count = k;
> -
> -	/* Install the new fence list, seqcount provides the barriers */
> -	write_seqcount_begin(&resv->seq);
> -	RCU_INIT_POINTER(resv->fence, new);
> -	write_seqcount_end(&resv->seq);
> -
> -	/* Drop the references to the removed fences or move them to ef_list */
> -	for (i = j; i < old->shared_count; ++i) {
> -		struct dma_fence *f;
> -
> -		f = rcu_dereference_protected(new->shared[i],
> -					      dma_resv_held(resv));
> -		dma_fence_put(f);
> -	}
> -	kfree_rcu(old, rcu);
> -
> +	replacement = dma_fence_get_stub();
> +	dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context,
> +				replacement);
> +	dma_fence_put(replacement);
>  	return 0;
>  }
>  
> diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
> index afdfdfac729f..3f53177bdb46 100644
> --- a/include/linux/dma-resv.h
> +++ b/include/linux/dma-resv.h
> @@ -468,6 +468,8 @@ void dma_resv_init(struct dma_resv *obj);
>  void dma_resv_fini(struct dma_resv *obj);
>  int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences);
>  void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence);
> +void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
> +			     struct dma_fence *fence);
>  void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence);
>  int dma_resv_get_fences(struct dma_resv *obj, bool write,
>  			unsigned int *num_fences, struct dma_fence ***fences);
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 04/23] RDMA: use dma_resv_wait() instead of extracting the fence
  2022-03-21 13:58   ` Christian König
@ 2022-03-23 13:22     ` Daniel Vetter
  -1 siblings, 0 replies; 73+ messages in thread
From: Daniel Vetter @ 2022-03-23 13:22 UTC (permalink / raw)
  To: Christian König
  Cc: daniel.vetter, dri-devel, Christian König, Jason Gunthorpe,
	Leon Romanovsky, Maor Gottlieb, Gal Pressman, linux-media,
	linaro-mm-sig

On Mon, Mar 21, 2022 at 02:58:37PM +0100, Christian König wrote:
> Use dma_resv_wait() instead of extracting the exclusive fence and
> waiting on it manually.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Jason Gunthorpe <jgg@ziepe.ca>

Jason, can you ack this for merging through drm trees please?

Thanks, Daniel

> Cc: Leon Romanovsky <leon@kernel.org>
> Cc: Maor Gottlieb <maorg@nvidia.com>
> Cc: Gal Pressman <galpress@amazon.com>
> Cc: linux-media@vger.kernel.org
> Cc: linaro-mm-sig@lists.linaro.org
> ---
>  drivers/infiniband/core/umem_dmabuf.c | 8 ++------
>  1 file changed, 2 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c
> index f0760741f281..d32cd7538835 100644
> --- a/drivers/infiniband/core/umem_dmabuf.c
> +++ b/drivers/infiniband/core/umem_dmabuf.c
> @@ -16,7 +16,6 @@ int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf)
>  {
>  	struct sg_table *sgt;
>  	struct scatterlist *sg;
> -	struct dma_fence *fence;
>  	unsigned long start, end, cur = 0;
>  	unsigned int nmap = 0;
>  	int i;
> @@ -68,11 +67,8 @@ int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf)
>  	 * may be not up-to-date. Wait for the exporter to finish
>  	 * the migration.
>  	 */
> -	fence = dma_resv_excl_fence(umem_dmabuf->attach->dmabuf->resv);
> -	if (fence)
> -		return dma_fence_wait(fence, false);
> -
> -	return 0;
> +	return dma_resv_wait_timeout(umem_dmabuf->attach->dmabuf->resv, false,
> +				     false, MAX_SCHEDULE_TIMEOUT);
>  }
>  EXPORT_SYMBOL(ib_umem_dmabuf_map_pages);
>  
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 04/23] RDMA: use dma_resv_wait() instead of extracting the fence
@ 2022-03-23 13:22     ` Daniel Vetter
  0 siblings, 0 replies; 73+ messages in thread
From: Daniel Vetter @ 2022-03-23 13:22 UTC (permalink / raw)
  To: Christian König
  Cc: Leon Romanovsky, daniel.vetter, Gal Pressman, dri-devel,
	linaro-mm-sig, Jason Gunthorpe, Maor Gottlieb,
	Christian König, linux-media

On Mon, Mar 21, 2022 at 02:58:37PM +0100, Christian König wrote:
> Use dma_resv_wait() instead of extracting the exclusive fence and
> waiting on it manually.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Jason Gunthorpe <jgg@ziepe.ca>

Jason, can you ack this for merging through drm trees please?

Thanks, Daniel

> Cc: Leon Romanovsky <leon@kernel.org>
> Cc: Maor Gottlieb <maorg@nvidia.com>
> Cc: Gal Pressman <galpress@amazon.com>
> Cc: linux-media@vger.kernel.org
> Cc: linaro-mm-sig@lists.linaro.org
> ---
>  drivers/infiniband/core/umem_dmabuf.c | 8 ++------
>  1 file changed, 2 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c
> index f0760741f281..d32cd7538835 100644
> --- a/drivers/infiniband/core/umem_dmabuf.c
> +++ b/drivers/infiniband/core/umem_dmabuf.c
> @@ -16,7 +16,6 @@ int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf)
>  {
>  	struct sg_table *sgt;
>  	struct scatterlist *sg;
> -	struct dma_fence *fence;
>  	unsigned long start, end, cur = 0;
>  	unsigned int nmap = 0;
>  	int i;
> @@ -68,11 +67,8 @@ int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf)
>  	 * may be not up-to-date. Wait for the exporter to finish
>  	 * the migration.
>  	 */
> -	fence = dma_resv_excl_fence(umem_dmabuf->attach->dmabuf->resv);
> -	if (fence)
> -		return dma_fence_wait(fence, false);
> -
> -	return 0;
> +	return dma_resv_wait_timeout(umem_dmabuf->attach->dmabuf->resv, false,
> +				     false, MAX_SCHEDULE_TIMEOUT);
>  }
>  EXPORT_SYMBOL(ib_umem_dmabuf_map_pages);
>  
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 10/23] dma-buf: finally make dma_resv_excl_fence private v2
  2022-03-21 13:58 ` [PATCH 10/23] dma-buf: finally make dma_resv_excl_fence private v2 Christian König
@ 2022-03-23 13:36   ` Daniel Vetter
  2022-04-05  7:58     ` Christian König
  0 siblings, 1 reply; 73+ messages in thread
From: Daniel Vetter @ 2022-03-23 13:36 UTC (permalink / raw)
  To: Christian König; +Cc: daniel.vetter, Christian König, dri-devel

On Mon, Mar 21, 2022 at 02:58:43PM +0100, Christian König wrote:
> Drivers should never touch this directly.
> 
> v2: fix rebase clash
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

I guess as soon as we have the rdma ack you can land up to this patch?
-Daniel

> ---
>  drivers/dma-buf/dma-resv.c |  6 ++++++
>  include/linux/dma-resv.h   | 17 -----------------
>  2 files changed, 6 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
> index c09fd8da0c85..1c9af97fe904 100644
> --- a/drivers/dma-buf/dma-resv.c
> +++ b/drivers/dma-buf/dma-resv.c
> @@ -140,6 +140,12 @@ void dma_resv_fini(struct dma_resv *obj)
>  }
>  EXPORT_SYMBOL(dma_resv_fini);
>  
> +static inline struct dma_fence *
> +dma_resv_excl_fence(struct dma_resv *obj)
> +{
> +       return rcu_dereference_check(obj->fence_excl, dma_resv_held(obj));
> +}
> +
>  static inline struct dma_resv_list *dma_resv_shared_list(struct dma_resv *obj)
>  {
>  	return rcu_dereference_check(obj->fence, dma_resv_held(obj));
> diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
> index 08512c1e215d..20e13f36710a 100644
> --- a/include/linux/dma-resv.h
> +++ b/include/linux/dma-resv.h
> @@ -423,23 +423,6 @@ static inline void dma_resv_unlock(struct dma_resv *obj)
>  	ww_mutex_unlock(&obj->lock);
>  }
>  
> -/**
> - * dma_resv_excl_fence - return the object's exclusive fence
> - * @obj: the reservation object
> - *
> - * Returns the exclusive fence (if any). Caller must either hold the objects
> - * through dma_resv_lock() or the RCU read side lock through rcu_read_lock(),
> - * or one of the variants of each
> - *
> - * RETURNS
> - * The exclusive fence or NULL
> - */
> -static inline struct dma_fence *
> -dma_resv_excl_fence(struct dma_resv *obj)
> -{
> -	return rcu_dereference_check(obj->fence_excl, dma_resv_held(obj));
> -}
> -
>  void dma_resv_init(struct dma_resv *obj);
>  void dma_resv_fini(struct dma_resv *obj);
>  int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences);
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 11/23] dma-buf: drop the DAG approach for the dma_resv object v2
  2022-03-21 13:58 ` [PATCH 11/23] dma-buf: drop the DAG approach for the dma_resv object v2 Christian König
@ 2022-03-23 13:40   ` Daniel Vetter
  0 siblings, 0 replies; 73+ messages in thread
From: Daniel Vetter @ 2022-03-23 13:40 UTC (permalink / raw)
  To: Christian König; +Cc: daniel.vetter, Christian König, dri-devel

On Mon, Mar 21, 2022 at 02:58:44PM +0100, Christian König wrote:
> So far we had the approach of using a directed acyclic
> graph with the dma_resv obj.
> 
> This turned out to have many downsides, especially it means
> that every single driver and user of this interface needs
> to be aware of this restriction when adding fences. If the
> rules for the DAG are not followed then we end up with
> potential hard to debug memory corruption, information
> leaks or even elephant big security holes because we allow
> userspace to access freed up memory.
> 
> Since we already took a step back from that by always
> looking at all fences we now go a step further and stop
> dropping the shared fences when a new exclusive one is
> added.
> 
> v2: Drop some now superflous documentation
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/dma-buf/dma-resv.c | 16 +---------------
>  include/linux/dma-buf.h    |  7 -------
>  include/linux/dma-resv.h   | 22 +++++-----------------
>  3 files changed, 6 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
> index 1c9af97fe904..4b12141579e2 100644
> --- a/drivers/dma-buf/dma-resv.c
> +++ b/drivers/dma-buf/dma-resv.c
> @@ -358,35 +358,21 @@ EXPORT_SYMBOL(dma_resv_replace_fences);
>   * @fence: the exclusive fence to add
>   *
>   * Add a fence to the exclusive slot. @obj must be locked with dma_resv_lock().
> - * Note that this function replaces all fences attached to @obj, see also
> - * &dma_resv.fence_excl for a discussion of the semantics.
> + * See also &dma_resv.fence_excl for a discussion of the semantics.
>   */
>  void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence)
>  {
>  	struct dma_fence *old_fence = dma_resv_excl_fence(obj);
> -	struct dma_resv_list *old;
> -	u32 i = 0;
>  
>  	dma_resv_assert_held(obj);
>  
> -	old = dma_resv_shared_list(obj);
> -	if (old)
> -		i = old->shared_count;
> -
>  	dma_fence_get(fence);
>  
>  	write_seqcount_begin(&obj->seq);
>  	/* write_seqcount_begin provides the necessary memory barrier */
>  	RCU_INIT_POINTER(obj->fence_excl, fence);
> -	if (old)
> -		old->shared_count = 0;
>  	write_seqcount_end(&obj->seq);
>  
> -	/* inplace update, no shared fences */
> -	while (i--)
> -		dma_fence_put(rcu_dereference_protected(old->shared[i],
> -						dma_resv_held(obj)));
> -
>  	dma_fence_put(old_fence);
>  }
>  EXPORT_SYMBOL(dma_resv_add_excl_fence);
> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
> index 7ab50076e7a6..74083e62e19d 100644
> --- a/include/linux/dma-buf.h
> +++ b/include/linux/dma-buf.h
> @@ -420,13 +420,6 @@ struct dma_buf {
>  	 * - Dynamic importers should set fences for any access that they can't
>  	 *   disable immediately from their &dma_buf_attach_ops.move_notify
>  	 *   callback.
> -	 *
> -	 * IMPORTANT:
> -	 *
> -	 * All drivers must obey the struct dma_resv rules, specifically the
> -	 * rules for updating fences, see &dma_resv.fence_excl and
> -	 * &dma_resv.fence. If these dependency rules are broken access tracking
> -	 * can be lost resulting in use after free issues.

Uh that's a bit much. I do think we should keep this, and update it to
point at whatever new dma_resv fence slot rules you're adding. Maybe just
keep the first part like:

	 * All drivers must obey the struct dma_resv rules, specifically the
	 * rules for updating and obeying fences.

With that

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

>  	 */
>  	struct dma_resv *resv;
>  
> diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
> index 20e13f36710a..ecb697d4d861 100644
> --- a/include/linux/dma-resv.h
> +++ b/include/linux/dma-resv.h
> @@ -93,23 +93,11 @@ struct dma_resv {
>  	 *
>  	 * The exclusive fence, if there is one currently.
>  	 *
> -	 * There are two ways to update this fence:
> -	 *
> -	 * - First by calling dma_resv_add_excl_fence(), which replaces all
> -	 *   fences attached to the reservation object. To guarantee that no
> -	 *   fences are lost, this new fence must signal only after all previous
> -	 *   fences, both shared and exclusive, have signalled. In some cases it
> -	 *   is convenient to achieve that by attaching a struct dma_fence_array
> -	 *   with all the new and old fences.
> -	 *
> -	 * - Alternatively the fence can be set directly, which leaves the
> -	 *   shared fences unchanged. To guarantee that no fences are lost, this
> -	 *   new fence must signal only after the previous exclusive fence has
> -	 *   signalled. Since the shared fences are staying intact, it is not
> -	 *   necessary to maintain any ordering against those. If semantically
> -	 *   only a new access is added without actually treating the previous
> -	 *   one as a dependency the exclusive fences can be strung together
> -	 *   using struct dma_fence_chain.
> +	 * To guarantee that no fences are lost, this new fence must signal
> +	 * only after the previous exclusive fence has signalled. If
> +	 * semantically only a new access is added without actually treating the
> +	 * previous one as a dependency the exclusive fences can be strung
> +	 * together using struct dma_fence_chain.
>  	 *
>  	 * Note that actual semantics of what an exclusive or shared fence mean
>  	 * is defined by the user, for reservation objects shared across drivers
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2
  2022-03-21 13:58 [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
                   ` (23 preceding siblings ...)
  2022-03-23 13:09 ` Daniel Vetter
@ 2022-03-23 15:55 ` Felix Kuehling
  2022-03-23 15:57   ` Christian König
  24 siblings, 1 reply; 73+ messages in thread
From: Felix Kuehling @ 2022-03-23 15:55 UTC (permalink / raw)
  To: Christian König, daniel.vetter, dri-devel; +Cc: Christian König


Am 2022-03-21 um 09:58 schrieb Christian König:
> This function allows to replace fences from the shared fence list when
> we can gurantee that the operation represented by the original fence has
> finished or no accesses to the resources protected by the dma_resv
> object any more when the new fence finishes.
>
> Then use this function in the amdkfd code when BOs are unmapped from the
> process.
>
> v2: add an example when this is usefull.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/dma-buf/dma-resv.c                    | 45 +++++++++++++++++
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 49 +++----------------
>   include/linux/dma-resv.h                      |  2 +
>   3 files changed, 54 insertions(+), 42 deletions(-)
>
> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
> index b51416405e86..509060861cf3 100644
> --- a/drivers/dma-buf/dma-resv.c
> +++ b/drivers/dma-buf/dma-resv.c
> @@ -289,6 +289,51 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence)
>   }
>   EXPORT_SYMBOL(dma_resv_add_shared_fence);
>   
> +/**
> + * dma_resv_replace_fences - replace fences in the dma_resv obj
> + * @obj: the reservation object
> + * @context: the context of the fences to replace
> + * @replacement: the new fence to use instead
> + *
> + * Replace fences with a specified context with a new fence. Only valid if the
> + * operation represented by the original fence has no longer access to the
> + * resources represented by the dma_resv object when the new fence completes.
> + *
> + * And example for using this is replacing a preemption fence with a page table
> + * update fence which makes the resource inaccessible.
> + */
> +void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
> +			     struct dma_fence *replacement)
> +{
> +	struct dma_resv_list *list;
> +	struct dma_fence *old;
> +	unsigned int i;
> +
> +	dma_resv_assert_held(obj);
> +
> +	write_seqcount_begin(&obj->seq);
> +
> +	old = dma_resv_excl_fence(obj);
> +	if (old->context == context) {
> +		RCU_INIT_POINTER(obj->fence_excl, dma_fence_get(replacement));
> +		dma_fence_put(old);
> +	}
> +
> +	list = dma_resv_shared_list(obj);
> +	for (i = 0; list && i < list->shared_count; ++i) {
> +		old = rcu_dereference_protected(list->shared[i],
> +						dma_resv_held(obj));
> +		if (old->context != context)
> +			continue;
> +
> +		rcu_assign_pointer(list->shared[i], dma_fence_get(replacement));
> +		dma_fence_put(old);
> +	}
> +
> +	write_seqcount_end(&obj->seq);
> +}
> +EXPORT_SYMBOL(dma_resv_replace_fences);
> +
>   /**
>    * dma_resv_add_excl_fence - Add an exclusive fence.
>    * @obj: the reservation object
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index f9bab963a948..b6f266f612ea 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -253,53 +253,18 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
>   static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
>   					struct amdgpu_amdkfd_fence *ef)
>   {
> -	struct dma_resv *resv = bo->tbo.base.resv;
> -	struct dma_resv_list *old, *new;
> -	unsigned int i, j, k;
> +	struct dma_fence *replacement;
>   
>   	if (!ef)
>   		return -EINVAL;
>   
> -	old = dma_resv_shared_list(resv);
> -	if (!old)
> -		return 0;
> -
> -	new = kmalloc(struct_size(new, shared, old->shared_max), GFP_KERNEL);
> -	if (!new)
> -		return -ENOMEM;
> -
> -	/* Go through all the shared fences in the resevation object and sort
> -	 * the interesting ones to the end of the list.
> +	/* TODO: Instead of block before we should use the fence of the page
> +	 * table update and TLB flush here directly.

I don't understand this comment. Which block are you referring to? And 
what does this have to do with TLB flushing? Maybe you're thinking of a 
specific use case of amdgpu_amdkfd_remove_eviction_fence for page table 
BOs. But that's not the only case where we need to remove eviction fences.

Other than that, this patch is

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>


>   	 */
> -	for (i = 0, j = old->shared_count, k = 0; i < old->shared_count; ++i) {
> -		struct dma_fence *f;
> -
> -		f = rcu_dereference_protected(old->shared[i],
> -					      dma_resv_held(resv));
> -
> -		if (f->context == ef->base.context)
> -			RCU_INIT_POINTER(new->shared[--j], f);
> -		else
> -			RCU_INIT_POINTER(new->shared[k++], f);
> -	}
> -	new->shared_max = old->shared_max;
> -	new->shared_count = k;
> -
> -	/* Install the new fence list, seqcount provides the barriers */
> -	write_seqcount_begin(&resv->seq);
> -	RCU_INIT_POINTER(resv->fence, new);
> -	write_seqcount_end(&resv->seq);
> -
> -	/* Drop the references to the removed fences or move them to ef_list */
> -	for (i = j; i < old->shared_count; ++i) {
> -		struct dma_fence *f;
> -
> -		f = rcu_dereference_protected(new->shared[i],
> -					      dma_resv_held(resv));
> -		dma_fence_put(f);
> -	}
> -	kfree_rcu(old, rcu);
> -
> +	replacement = dma_fence_get_stub();
> +	dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context,
> +				replacement);
> +	dma_fence_put(replacement);
>   	return 0;
>   }
>   
> diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
> index afdfdfac729f..3f53177bdb46 100644
> --- a/include/linux/dma-resv.h
> +++ b/include/linux/dma-resv.h
> @@ -468,6 +468,8 @@ void dma_resv_init(struct dma_resv *obj);
>   void dma_resv_fini(struct dma_resv *obj);
>   int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences);
>   void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence);
> +void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
> +			     struct dma_fence *fence);
>   void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence);
>   int dma_resv_get_fences(struct dma_resv *obj, bool write,
>   			unsigned int *num_fences, struct dma_fence ***fences);

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2
  2022-03-23 15:55 ` Felix Kuehling
@ 2022-03-23 15:57   ` Christian König
  0 siblings, 0 replies; 73+ messages in thread
From: Christian König @ 2022-03-23 15:57 UTC (permalink / raw)
  To: Felix Kuehling, Christian König, daniel.vetter, dri-devel

Am 23.03.22 um 16:55 schrieb Felix Kuehling:
>
> Am 2022-03-21 um 09:58 schrieb Christian König:
>> This function allows to replace fences from the shared fence list when
>> we can gurantee that the operation represented by the original fence has
>> finished or no accesses to the resources protected by the dma_resv
>> object any more when the new fence finishes.
>>
>> Then use this function in the amdkfd code when BOs are unmapped from the
>> process.
>>
>> v2: add an example when this is usefull.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/dma-buf/dma-resv.c                    | 45 +++++++++++++++++
>>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 49 +++----------------
>>   include/linux/dma-resv.h                      |  2 +
>>   3 files changed, 54 insertions(+), 42 deletions(-)
>>
>> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
>> index b51416405e86..509060861cf3 100644
>> --- a/drivers/dma-buf/dma-resv.c
>> +++ b/drivers/dma-buf/dma-resv.c
>> @@ -289,6 +289,51 @@ void dma_resv_add_shared_fence(struct dma_resv 
>> *obj, struct dma_fence *fence)
>>   }
>>   EXPORT_SYMBOL(dma_resv_add_shared_fence);
>>   +/**
>> + * dma_resv_replace_fences - replace fences in the dma_resv obj
>> + * @obj: the reservation object
>> + * @context: the context of the fences to replace
>> + * @replacement: the new fence to use instead
>> + *
>> + * Replace fences with a specified context with a new fence. Only 
>> valid if the
>> + * operation represented by the original fence has no longer access 
>> to the
>> + * resources represented by the dma_resv object when the new fence 
>> completes.
>> + *
>> + * And example for using this is replacing a preemption fence with a 
>> page table
>> + * update fence which makes the resource inaccessible.
>> + */
>> +void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
>> +                 struct dma_fence *replacement)
>> +{
>> +    struct dma_resv_list *list;
>> +    struct dma_fence *old;
>> +    unsigned int i;
>> +
>> +    dma_resv_assert_held(obj);
>> +
>> +    write_seqcount_begin(&obj->seq);
>> +
>> +    old = dma_resv_excl_fence(obj);
>> +    if (old->context == context) {
>> +        RCU_INIT_POINTER(obj->fence_excl, dma_fence_get(replacement));
>> +        dma_fence_put(old);
>> +    }
>> +
>> +    list = dma_resv_shared_list(obj);
>> +    for (i = 0; list && i < list->shared_count; ++i) {
>> +        old = rcu_dereference_protected(list->shared[i],
>> +                        dma_resv_held(obj));
>> +        if (old->context != context)
>> +            continue;
>> +
>> +        rcu_assign_pointer(list->shared[i], 
>> dma_fence_get(replacement));
>> +        dma_fence_put(old);
>> +    }
>> +
>> +    write_seqcount_end(&obj->seq);
>> +}
>> +EXPORT_SYMBOL(dma_resv_replace_fences);
>> +
>>   /**
>>    * dma_resv_add_excl_fence - Add an exclusive fence.
>>    * @obj: the reservation object
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index f9bab963a948..b6f266f612ea 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -253,53 +253,18 @@ void amdgpu_amdkfd_release_notify(struct 
>> amdgpu_bo *bo)
>>   static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
>>                       struct amdgpu_amdkfd_fence *ef)
>>   {
>> -    struct dma_resv *resv = bo->tbo.base.resv;
>> -    struct dma_resv_list *old, *new;
>> -    unsigned int i, j, k;
>> +    struct dma_fence *replacement;
>>         if (!ef)
>>           return -EINVAL;
>>   -    old = dma_resv_shared_list(resv);
>> -    if (!old)
>> -        return 0;
>> -
>> -    new = kmalloc(struct_size(new, shared, old->shared_max), 
>> GFP_KERNEL);
>> -    if (!new)
>> -        return -ENOMEM;
>> -
>> -    /* Go through all the shared fences in the resevation object and 
>> sort
>> -     * the interesting ones to the end of the list.
>> +    /* TODO: Instead of block before we should use the fence of the 
>> page
>> +     * table update and TLB flush here directly.
>
> I don't understand this comment. Which block are you referring to? And 
> what does this have to do with TLB flushing? Maybe you're thinking of 
> a specific use case of amdgpu_amdkfd_remove_eviction_fence for page 
> table BOs. But that's not the only case where we need to remove 
> eviction fences.

More or less, correct. See the eviction fence can be removed because we 
can make sure that the hardware operation represented by it doesn't have 
access to the resources protected by the dma_resv object any more.

In other words when we unmap the BO from the process this unmap 
operation results in a fence we can potentially use to replace the 
eviction fence here.

>
> Other than that, this patch is
>
> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>

Thanks,
Christian.

>
>
>>        */
>> -    for (i = 0, j = old->shared_count, k = 0; i < old->shared_count; 
>> ++i) {
>> -        struct dma_fence *f;
>> -
>> -        f = rcu_dereference_protected(old->shared[i],
>> -                          dma_resv_held(resv));
>> -
>> -        if (f->context == ef->base.context)
>> -            RCU_INIT_POINTER(new->shared[--j], f);
>> -        else
>> -            RCU_INIT_POINTER(new->shared[k++], f);
>> -    }
>> -    new->shared_max = old->shared_max;
>> -    new->shared_count = k;
>> -
>> -    /* Install the new fence list, seqcount provides the barriers */
>> -    write_seqcount_begin(&resv->seq);
>> -    RCU_INIT_POINTER(resv->fence, new);
>> -    write_seqcount_end(&resv->seq);
>> -
>> -    /* Drop the references to the removed fences or move them to 
>> ef_list */
>> -    for (i = j; i < old->shared_count; ++i) {
>> -        struct dma_fence *f;
>> -
>> -        f = rcu_dereference_protected(new->shared[i],
>> -                          dma_resv_held(resv));
>> -        dma_fence_put(f);
>> -    }
>> -    kfree_rcu(old, rcu);
>> -
>> +    replacement = dma_fence_get_stub();
>> +    dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context,
>> +                replacement);
>> +    dma_fence_put(replacement);
>>       return 0;
>>   }
>>   diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
>> index afdfdfac729f..3f53177bdb46 100644
>> --- a/include/linux/dma-resv.h
>> +++ b/include/linux/dma-resv.h
>> @@ -468,6 +468,8 @@ void dma_resv_init(struct dma_resv *obj);
>>   void dma_resv_fini(struct dma_resv *obj);
>>   int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int 
>> num_fences);
>>   void dma_resv_add_shared_fence(struct dma_resv *obj, struct 
>> dma_fence *fence);
>> +void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
>> +                 struct dma_fence *fence);
>>   void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence 
>> *fence);
>>   int dma_resv_get_fences(struct dma_resv *obj, bool write,
>>               unsigned int *num_fences, struct dma_fence ***fences);


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 05/23] drm/etnaviv: stop using dma_resv_excl_fence
  2022-03-21 13:58 ` [PATCH 05/23] drm/etnaviv: stop using dma_resv_excl_fence Christian König
@ 2022-03-23 15:58   ` Lucas Stach
  0 siblings, 0 replies; 73+ messages in thread
From: Lucas Stach @ 2022-03-23 15:58 UTC (permalink / raw)
  To: Christian König, daniel.vetter, dri-devel
  Cc: Christian König, etnaviv, Russell King

Am Montag, dem 21.03.2022 um 14:58 +0100 schrieb Christian König:
> We can get the excl fence together with the shared ones as well.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Lucas Stach <l.stach@pengutronix.de>
> Cc: Russell King <linux+etnaviv@armlinux.org.uk>
> Cc: Christian Gmeiner <christian.gmeiner@gmail.com>
> Cc: etnaviv@lists.freedesktop.org
> ---
>  drivers/gpu/drm/etnaviv/etnaviv_gem.h        |  1 -
>  drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 14 +++++---------
>  drivers/gpu/drm/etnaviv/etnaviv_sched.c      | 10 ----------
>  3 files changed, 5 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
> index 98e60df882b6..f596d743baa3 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
> @@ -80,7 +80,6 @@ struct etnaviv_gem_submit_bo {
>  	u64 va;
>  	struct etnaviv_gem_object *obj;
>  	struct etnaviv_vram_mapping *mapping;
> -	struct dma_fence *excl;
>  	unsigned int nr_shared;
>  	struct dma_fence **shared;

Please rename this to something less specific, like fences or
dependencies. Now that the exclusive fence can be part of this array,
the naming is confusing.

Otherwise the commit looks fine.
Reviewed-by: Lucas Stach <l.stach@pengutronix.de>

>  };
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
> index 4eb00a0cb650..385ea141c23e 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
> @@ -188,15 +188,11 @@ static int submit_fence_sync(struct etnaviv_gem_submit *submit)
>  		if (submit->flags & ETNA_SUBMIT_NO_IMPLICIT)
>  			continue;
>  
> -		if (bo->flags & ETNA_SUBMIT_BO_WRITE) {
> -			ret = dma_resv_get_fences(robj, true, &bo->nr_shared,
> -						  &bo->shared);
> -			if (ret)
> -				return ret;
> -		} else {
> -			bo->excl = dma_fence_get(dma_resv_excl_fence(robj));
> -		}
> -
> +		ret = dma_resv_get_fences(robj,
> +					  bo->flags & ETNA_SUBMIT_BO_WRITE,
> +					  &bo->nr_shared, &bo->shared);
> +		if (ret)
> +			return ret;
>  	}
>  
>  	return ret;
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> index 35e5ef7dbdcc..59277dc62011 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> @@ -39,16 +39,6 @@ etnaviv_sched_dependency(struct drm_sched_job *sched_job,
>  		struct etnaviv_gem_submit_bo *bo = &submit->bos[i];
>  		int j;
>  
> -		if (bo->excl) {
> -			fence = bo->excl;
> -			bo->excl = NULL;
> -
> -			if (!dma_fence_is_signaled(fence))
> -				return fence;
> -
> -			dma_fence_put(fence);
> -		}
> -
>  		for (j = 0; j < bo->nr_shared; j++) {
>  			if (!bo->shared[j])
>  				continue;



^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 04/23] RDMA: use dma_resv_wait() instead of extracting the fence
  2022-03-23 13:22     ` Daniel Vetter
@ 2022-03-23 16:32       ` Jason Gunthorpe
  -1 siblings, 0 replies; 73+ messages in thread
From: Jason Gunthorpe @ 2022-03-23 16:32 UTC (permalink / raw)
  To: Daniel Vetter
  Cc: Christian König, daniel.vetter, dri-devel,
	Christian König, Leon Romanovsky, Maor Gottlieb,
	Gal Pressman, linux-media, linaro-mm-sig

On Wed, Mar 23, 2022 at 02:22:01PM +0100, Daniel Vetter wrote:
> On Mon, Mar 21, 2022 at 02:58:37PM +0100, Christian König wrote:
> > Use dma_resv_wait() instead of extracting the exclusive fence and
> > waiting on it manually.
> > 
> > Signed-off-by: Christian König <christian.koenig@amd.com>
> > Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > Cc: Jason Gunthorpe <jgg@ziepe.ca>
> 
> Jason, can you ack this for merging through drm trees please?

Sure, it looks trivial, but I didn't see the whole series:

Acked-by: Jason Gunthorpe <jgg@nvidia.com>

Jason

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 04/23] RDMA: use dma_resv_wait() instead of extracting the fence
@ 2022-03-23 16:32       ` Jason Gunthorpe
  0 siblings, 0 replies; 73+ messages in thread
From: Jason Gunthorpe @ 2022-03-23 16:32 UTC (permalink / raw)
  To: Daniel Vetter
  Cc: Leon Romanovsky, Christian König, Gal Pressman, dri-devel,
	linaro-mm-sig, daniel.vetter, Maor Gottlieb,
	Christian König, linux-media

On Wed, Mar 23, 2022 at 02:22:01PM +0100, Daniel Vetter wrote:
> On Mon, Mar 21, 2022 at 02:58:37PM +0100, Christian König wrote:
> > Use dma_resv_wait() instead of extracting the exclusive fence and
> > waiting on it manually.
> > 
> > Signed-off-by: Christian König <christian.koenig@amd.com>
> > Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > Cc: Jason Gunthorpe <jgg@ziepe.ca>
> 
> Jason, can you ack this for merging through drm trees please?

Sure, it looks trivial, but I didn't see the whole series:

Acked-by: Jason Gunthorpe <jgg@nvidia.com>

Jason

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 04/23] RDMA: use dma_resv_wait() instead of extracting the fence
  2022-03-23 16:32       ` Jason Gunthorpe
@ 2022-03-23 17:34         ` Daniel Vetter
  -1 siblings, 0 replies; 73+ messages in thread
From: Daniel Vetter @ 2022-03-23 17:34 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Christian König, dri-devel, Christian König,
	Leon Romanovsky, Maor Gottlieb, Gal Pressman, linux-media,
	linaro-mm-sig

On Wed, 23 Mar 2022 at 17:32, Jason Gunthorpe <jgg@ziepe.ca> wrote:
>
> On Wed, Mar 23, 2022 at 02:22:01PM +0100, Daniel Vetter wrote:
> > On Mon, Mar 21, 2022 at 02:58:37PM +0100, Christian König wrote:
> > > Use dma_resv_wait() instead of extracting the exclusive fence and
> > > waiting on it manually.
> > >
> > > Signed-off-by: Christian König <christian.koenig@amd.com>
> > > Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > > Cc: Jason Gunthorpe <jgg@ziepe.ca>
> >
> > Jason, can you ack this for merging through drm trees please?
>
> Sure, it looks trivial, but I didn't see the whole series:
>
> Acked-by: Jason Gunthorpe <jgg@nvidia.com>

The entire series reworks how dma_resv stores fences (and what exactly
the mean), which is why we need to get users away from some of these
low-level accessors and towards functions at a slightly higher level.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 04/23] RDMA: use dma_resv_wait() instead of extracting the fence
@ 2022-03-23 17:34         ` Daniel Vetter
  0 siblings, 0 replies; 73+ messages in thread
From: Daniel Vetter @ 2022-03-23 17:34 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Leon Romanovsky, Christian König, Gal Pressman, dri-devel,
	linaro-mm-sig, Maor Gottlieb, Christian König, linux-media

On Wed, 23 Mar 2022 at 17:32, Jason Gunthorpe <jgg@ziepe.ca> wrote:
>
> On Wed, Mar 23, 2022 at 02:22:01PM +0100, Daniel Vetter wrote:
> > On Mon, Mar 21, 2022 at 02:58:37PM +0100, Christian König wrote:
> > > Use dma_resv_wait() instead of extracting the exclusive fence and
> > > waiting on it manually.
> > >
> > > Signed-off-by: Christian König <christian.koenig@amd.com>
> > > Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > > Cc: Jason Gunthorpe <jgg@ziepe.ca>
> >
> > Jason, can you ack this for merging through drm trees please?
>
> Sure, it looks trivial, but I didn't see the whole series:
>
> Acked-by: Jason Gunthorpe <jgg@nvidia.com>

The entire series reworks how dma_resv stores fences (and what exactly
the mean), which is why we need to get users away from some of these
low-level accessors and towards functions at a slightly higher level.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 12/23] dma-buf/drivers: make reserving a shared slot mandatory v3
  2022-03-21 13:58 ` [PATCH 12/23] dma-buf/drivers: make reserving a shared slot mandatory v3 Christian König
@ 2022-03-28 17:14   ` Daniel Vetter
  2022-03-31 12:07     ` Christian König
  0 siblings, 1 reply; 73+ messages in thread
From: Daniel Vetter @ 2022-03-28 17:14 UTC (permalink / raw)
  To: Christian König; +Cc: daniel.vetter, Christian König, dri-devel

On Mon, Mar 21, 2022 at 02:58:45PM +0100, Christian König wrote:
> Audit all the users of dma_resv_add_excl_fence() and make sure they
> reserve a shared slot also when only trying to add an exclusive fence.
> 
> This is the next step towards handling the exclusive fence like a
> shared one.
> 
> v2: fix missed case in amdgpu
> v3: and two more radeon, rename function
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/dma-buf/dma-resv.c                    | 10 +--
>  drivers/dma-buf/st-dma-resv.c                 | 64 +++++++++----------
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  4 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    |  8 +++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |  4 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c          |  2 +-
>  drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c  |  8 +--
>  drivers/gpu/drm/i915/gem/i915_gem_clflush.c   |  3 +-
>  .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 10 ++-
>  drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  |  6 +-
>  .../drm/i915/gem/selftests/i915_gem_migrate.c |  5 +-
>  drivers/gpu/drm/i915/i915_vma.c               |  8 ++-
>  .../drm/i915/selftests/intel_memory_region.c  |  7 ++
>  drivers/gpu/drm/lima/lima_gem.c               | 10 ++-
>  drivers/gpu/drm/msm/msm_gem_submit.c          | 18 +++---
>  drivers/gpu/drm/nouveau/nouveau_fence.c       |  8 +--
>  drivers/gpu/drm/panfrost/panfrost_job.c       |  4 ++
>  drivers/gpu/drm/qxl/qxl_release.c             |  2 +-
>  drivers/gpu/drm/radeon/radeon_cs.c            |  4 ++
>  drivers/gpu/drm/radeon/radeon_object.c        |  8 +++
>  drivers/gpu/drm/radeon/radeon_vm.c            |  2 +-
>  drivers/gpu/drm/ttm/ttm_bo.c                  |  4 +-
>  drivers/gpu/drm/ttm/ttm_bo_util.c             | 12 +++-
>  drivers/gpu/drm/ttm/ttm_execbuf_util.c        | 15 ++---
>  drivers/gpu/drm/v3d/v3d_gem.c                 | 15 +++--
>  drivers/gpu/drm/vc4/vc4_gem.c                 |  2 +-
>  drivers/gpu/drm/vgem/vgem_fence.c             | 12 ++--
>  drivers/gpu/drm/virtio/virtgpu_gem.c          |  9 +++
>  drivers/gpu/drm/vmwgfx/vmwgfx_bo.c            | 16 +++--
>  include/linux/dma-resv.h                      |  4 +-
>  30 files changed, 171 insertions(+), 113 deletions(-)
> 
> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
> index 4b12141579e2..78a32da2cb0b 100644
> --- a/drivers/dma-buf/dma-resv.c
> +++ b/drivers/dma-buf/dma-resv.c
> @@ -152,7 +152,7 @@ static inline struct dma_resv_list *dma_resv_shared_list(struct dma_resv *obj)
>  }
>  
>  /**
> - * dma_resv_reserve_shared - Reserve space to add shared fences to
> + * dma_resv_reserve_fences - Reserve space to add shared fences to
>   * a dma_resv.
>   * @obj: reservation object
>   * @num_fences: number of fences we want to add
> @@ -167,7 +167,7 @@ static inline struct dma_resv_list *dma_resv_shared_list(struct dma_resv *obj)
>   * RETURNS
>   * Zero for success, or -errno
>   */
> -int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences)
> +int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences)
>  {
>  	struct dma_resv_list *old, *new;
>  	unsigned int i, j, k, max;
> @@ -230,7 +230,7 @@ int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences)
>  
>  	return 0;
>  }
> -EXPORT_SYMBOL(dma_resv_reserve_shared);
> +EXPORT_SYMBOL(dma_resv_reserve_fences);
>  
>  #ifdef CONFIG_DEBUG_MUTEXES
>  /**
> @@ -238,7 +238,7 @@ EXPORT_SYMBOL(dma_resv_reserve_shared);
>   * @obj: the dma_resv object to reset
>   *
>   * Reset the number of pre-reserved shared slots to test that drivers do
> - * correct slot allocation using dma_resv_reserve_shared(). See also
> + * correct slot allocation using dma_resv_reserve_fences(). See also
>   * &dma_resv_list.shared_max.
>   */
>  void dma_resv_reset_shared_max(struct dma_resv *obj)
> @@ -260,7 +260,7 @@ EXPORT_SYMBOL(dma_resv_reset_shared_max);
>   * @fence: the shared fence to add
>   *
>   * Add a fence to a shared slot, @obj must be locked with dma_resv_lock(), and
> - * dma_resv_reserve_shared() has been called.
> + * dma_resv_reserve_fences() has been called.
>   *
>   * See also &dma_resv.fence for a discussion of the semantics.
>   */
> diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c
> index cbe999c6e7a6..d2e61f6ae989 100644
> --- a/drivers/dma-buf/st-dma-resv.c
> +++ b/drivers/dma-buf/st-dma-resv.c
> @@ -75,17 +75,16 @@ static int test_signaling(void *arg, bool shared)
>  		goto err_free;
>  	}
>  
> -	if (shared) {
> -		r = dma_resv_reserve_shared(&resv, 1);
> -		if (r) {
> -			pr_err("Resv shared slot allocation failed\n");
> -			goto err_unlock;
> -		}
> +	r = dma_resv_reserve_fences(&resv, 1);
> +	if (r) {
> +		pr_err("Resv shared slot allocation failed\n");
> +		goto err_unlock;
> +	}
>  
> +	if (shared)
>  		dma_resv_add_shared_fence(&resv, f);
> -	} else {
> +	else
>  		dma_resv_add_excl_fence(&resv, f);
> -	}
>  
>  	if (dma_resv_test_signaled(&resv, shared)) {
>  		pr_err("Resv unexpectedly signaled\n");
> @@ -134,17 +133,16 @@ static int test_for_each(void *arg, bool shared)
>  		goto err_free;
>  	}
>  
> -	if (shared) {
> -		r = dma_resv_reserve_shared(&resv, 1);
> -		if (r) {
> -			pr_err("Resv shared slot allocation failed\n");
> -			goto err_unlock;
> -		}
> +	r = dma_resv_reserve_fences(&resv, 1);
> +	if (r) {
> +		pr_err("Resv shared slot allocation failed\n");
> +		goto err_unlock;
> +	}
>  
> +	if (shared)
>  		dma_resv_add_shared_fence(&resv, f);
> -	} else {
> +	else
>  		dma_resv_add_excl_fence(&resv, f);
> -	}
>  
>  	r = -ENOENT;
>  	dma_resv_for_each_fence(&cursor, &resv, shared, fence) {
> @@ -206,18 +204,17 @@ static int test_for_each_unlocked(void *arg, bool shared)
>  		goto err_free;
>  	}
>  
> -	if (shared) {
> -		r = dma_resv_reserve_shared(&resv, 1);
> -		if (r) {
> -			pr_err("Resv shared slot allocation failed\n");
> -			dma_resv_unlock(&resv);
> -			goto err_free;
> -		}
> +	r = dma_resv_reserve_fences(&resv, 1);
> +	if (r) {
> +		pr_err("Resv shared slot allocation failed\n");
> +		dma_resv_unlock(&resv);
> +		goto err_free;
> +	}
>  
> +	if (shared)
>  		dma_resv_add_shared_fence(&resv, f);
> -	} else {
> +	else
>  		dma_resv_add_excl_fence(&resv, f);
> -	}
>  	dma_resv_unlock(&resv);
>  
>  	r = -ENOENT;
> @@ -290,18 +287,17 @@ static int test_get_fences(void *arg, bool shared)
>  		goto err_resv;
>  	}
>  
> -	if (shared) {
> -		r = dma_resv_reserve_shared(&resv, 1);
> -		if (r) {
> -			pr_err("Resv shared slot allocation failed\n");
> -			dma_resv_unlock(&resv);
> -			goto err_resv;
> -		}
> +	r = dma_resv_reserve_fences(&resv, 1);
> +	if (r) {
> +		pr_err("Resv shared slot allocation failed\n");
> +		dma_resv_unlock(&resv);
> +		goto err_resv;
> +	}
>  
> +	if (shared)
>  		dma_resv_add_shared_fence(&resv, f);
> -	} else {
> +	else
>  		dma_resv_add_excl_fence(&resv, f);
> -	}
>  	dma_resv_unlock(&resv);
>  
>  	r = dma_resv_get_fences(&resv, shared, &i, &fences);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index b6f266f612ea..7de8f67f7dde 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -1220,7 +1220,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
>  				  AMDGPU_FENCE_OWNER_KFD, false);
>  	if (ret)
>  		goto wait_pd_fail;
> -	ret = dma_resv_reserve_shared(vm->root.bo->tbo.base.resv, 1);
> +	ret = dma_resv_reserve_fences(vm->root.bo->tbo.base.resv, 1);
>  	if (ret)
>  		goto reserve_shared_fail;
>  	amdgpu_bo_fence(vm->root.bo,
> @@ -2530,7 +2530,7 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem
>  	 * Add process eviction fence to bo so they can
>  	 * evict each other.
>  	 */
> -	ret = dma_resv_reserve_shared(gws_bo->tbo.base.resv, 1);
> +	ret = dma_resv_reserve_fences(gws_bo->tbo.base.resv, 1);
>  	if (ret)
>  		goto reserve_shared_fail;
>  	amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index ea0cde4904f0..2f808decd8d9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -1384,6 +1384,14 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
>  		     bool shared)
>  {
>  	struct dma_resv *resv = bo->tbo.base.resv;
> +	int r;
> +
> +	r = dma_resv_reserve_fences(resv, 1);

This is quite a hack, but I did scroll through all the callers of
amdgpu_bo_fence and I think it's fine - i.e. no recursion into the
shrinker from a calling context where recursion into shrinker/memalloc
isn't allowed.

But it aint pretty :-/

> +	if (r) {
> +		/* As last resort on OOM we block for the fence */
> +		dma_fence_wait(fence, false);
> +		return;
> +	}
>  
>  	if (shared)
>  		dma_resv_add_shared_fence(resv, fence);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index b37fc7d7d2c7..0ab85280e8ed 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2968,7 +2968,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>  	if (r)
>  		goto error_free_root;
>  
> -	r = dma_resv_reserve_shared(root_bo->tbo.base.resv, 1);
> +	r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1);
>  	if (r)
>  		goto error_unreserve;
>  
> @@ -3411,7 +3411,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
>  		value = 0;
>  	}
>  
> -	r = dma_resv_reserve_shared(root->tbo.base.resv, 1);
> +	r = dma_resv_reserve_fences(root->tbo.base.resv, 1);
>  	if (r) {
>  		pr_debug("failed %d to reserve fence slot\n", r);
>  		goto error_unlock;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index f2805ba74c80..7b878221f1d3 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -543,7 +543,7 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
>  		goto reserve_bo_failed;
>  	}
>  
> -	r = dma_resv_reserve_shared(bo->tbo.base.resv, 1);
> +	r = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
>  	if (r) {
>  		pr_debug("failed %d to reserve bo\n", r);
>  		amdgpu_bo_unreserve(bo);
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
> index 385ea141c23e..b808ddb9da48 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
> @@ -179,11 +179,9 @@ static int submit_fence_sync(struct etnaviv_gem_submit *submit)
>  		struct etnaviv_gem_submit_bo *bo = &submit->bos[i];
>  		struct dma_resv *robj = bo->obj->base.resv;
>  
> -		if (!(bo->flags & ETNA_SUBMIT_BO_WRITE)) {
> -			ret = dma_resv_reserve_shared(robj, 1);
> -			if (ret)
> -				return ret;
> -		}
> +		ret = dma_resv_reserve_fences(robj, 1);
> +		if (ret)
> +			return ret;
>  
>  		if (submit->flags & ETNA_SUBMIT_NO_IMPLICIT)
>  			continue;
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
> index 8a248003dfae..8a2223eb0ba9 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
> @@ -106,7 +106,8 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
>  	trace_i915_gem_object_clflush(obj);
>  
>  	clflush = NULL;
> -	if (!(flags & I915_CLFLUSH_SYNC))
> +	if (!(flags & I915_CLFLUSH_SYNC) &&
> +	    dma_resv_reserve_fences(obj->base.resv, 1) == 0)
>  		clflush = clflush_work_create(obj);
>  	if (clflush) {
>  		i915_sw_fence_await_reservation(&clflush->base.chain,


> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 3a5b247be738..e8eb6ee83f24 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -994,11 +994,9 @@ static int eb_validate_vmas(struct i915_execbuffer *eb)
>  			}
>  		}
>  
> -		if (!(ev->flags & EXEC_OBJECT_WRITE)) {
> -			err = dma_resv_reserve_shared(vma->obj->base.resv, 1);
> -			if (err)
> -				return err;
> -		}
> +		err = dma_resv_reserve_fences(vma->obj->base.resv, 1);
> +		if (err)
> +			return err;
>  
>  		GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&
>  			   eb_vma_misplaced(&eb->exec[i], vma, ev->flags));
> @@ -2297,7 +2295,7 @@ static int eb_parse(struct i915_execbuffer *eb)
>  		goto err_trampoline;
>  	}
>  
> -	err = dma_resv_reserve_shared(shadow->obj->base.resv, 1);
> +	err = dma_resv_reserve_fences(shadow->obj->base.resv, 1);
>  	if (err)
>  		goto err_trampoline;
>  
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
> index ee9612a3ee5e..4de6500f3c55 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
> @@ -596,7 +596,11 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
>  	assert_object_held(src);
>  	i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
>  
> -	ret = dma_resv_reserve_shared(src_bo->base.resv, 1);
> +	ret = dma_resv_reserve_fences(src_bo->base.resv, 1);
> +	if (ret)
> +		return ret;
> +
> +	ret = dma_resv_reserve_fences(dst_bo->base.resv, 1);

Can't we just reserve 2 slots instead of doing this 2x?

>  	if (ret)
>  		return ret;
>  
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
> index ecb691c81d1e..f9b369ed4b50 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
> @@ -180,7 +180,10 @@ static int igt_lmem_pages_migrate(void *arg)
>  					  i915_gem_object_is_lmem(obj),
>  					  0xdeadbeaf, &rq);
>  		if (rq) {
> -			dma_resv_add_excl_fence(obj->base.resv, &rq->fence);
> +			err = dma_resv_reserve_fences(obj->base.resv, 1);
> +			if (!err)
> +				dma_resv_add_excl_fence(obj->base.resv,
> +							&rq->fence);
>  			i915_request_put(rq);
>  		}
>  		if (err)
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index c0d6d5526abe..fe9f89289418 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -1633,6 +1633,12 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
>  			intel_frontbuffer_put(front);
>  		}
>  
> +		if (!(flags & __EXEC_OBJECT_NO_RESERVE)) {
> +			err = dma_resv_reserve_fences(vma->obj->base.resv, 1);
> +			if (unlikely(err))
> +				return err;
> +		}
> +
>  		if (fence) {
>  			dma_resv_add_excl_fence(vma->obj->base.resv, fence);
>  			obj->write_domain = I915_GEM_DOMAIN_RENDER;
> @@ -1640,7 +1646,7 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
>  		}
>  	} else {
>  		if (!(flags & __EXEC_OBJECT_NO_RESERVE)) {
> -			err = dma_resv_reserve_shared(vma->obj->base.resv, 1);
> +			err = dma_resv_reserve_fences(vma->obj->base.resv, 1);
>  			if (unlikely(err))
>  				return err;
>  		}
> diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
> index 7acba1d2135e..7f40502b2c1e 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
> @@ -895,6 +895,13 @@ static int igt_lmem_write_cpu(void *arg)
>  	}
>  
>  	i915_gem_object_lock(obj, NULL);
> +
> +	err = dma_resv_reserve_fences(obj->base.resv, 1);
> +	if (err) {
> +		i915_gem_object_unlock(obj);
> +		goto out_put;
> +	}
> +
>  	/* Put the pages into a known state -- from the gpu for added fun */
>  	intel_engine_pm_get(engine);
>  	err = intel_context_migrate_clear(engine->gt->migrate.context, NULL,
> diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c
> index 6a6f6f2ead75..9435a3ca71c8 100644
> --- a/drivers/gpu/drm/lima/lima_gem.c
> +++ b/drivers/gpu/drm/lima/lima_gem.c
> @@ -256,13 +256,11 @@ int lima_gem_get_info(struct drm_file *file, u32 handle, u32 *va, u64 *offset)
>  static int lima_gem_sync_bo(struct lima_sched_task *task, struct lima_bo *bo,
>  			    bool write, bool explicit)
>  {
> -	int err = 0;
> +	int err;
>  
> -	if (!write) {
> -		err = dma_resv_reserve_shared(lima_bo_resv(bo), 1);
> -		if (err)
> -			return err;
> -	}
> +	err = dma_resv_reserve_fences(lima_bo_resv(bo), 1);
> +	if (err)
> +		return err;
>  
>  	/* explicit sync use user passed dep fence */
>  	if (explicit)
> diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
> index 6cfa984dee6a..993dbcd7a586 100644
> --- a/drivers/gpu/drm/msm/msm_gem_submit.c
> +++ b/drivers/gpu/drm/msm/msm_gem_submit.c
> @@ -320,16 +320,14 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit)
>  		struct drm_gem_object *obj = &submit->bos[i].obj->base;
>  		bool write = submit->bos[i].flags & MSM_SUBMIT_BO_WRITE;
>  
> -		if (!write) {
> -			/* NOTE: _reserve_shared() must happen before
> -			 * _add_shared_fence(), which makes this a slightly
> -			 * strange place to call it.  OTOH this is a
> -			 * convenient can-fail point to hook it in.
> -			 */
> -			ret = dma_resv_reserve_shared(obj->resv, 1);
> -			if (ret)
> -				return ret;
> -		}
> +		/* NOTE: _reserve_shared() must happen before
> +		 * _add_shared_fence(), which makes this a slightly
> +		 * strange place to call it.  OTOH this is a
> +		 * convenient can-fail point to hook it in.
> +		 */
> +		ret = dma_resv_reserve_fences(obj->resv, 1);
> +		if (ret)
> +			return ret;
>  
>  		/* exclusive fences must be ordered */
>  		if (no_implicit && !write)
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
> index a3a04e0d76ec..0268259e97eb 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fence.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
> @@ -346,11 +346,9 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan,
>  	struct dma_resv *resv = nvbo->bo.base.resv;
>  	int i, ret;
>  
> -	if (!exclusive) {
> -		ret = dma_resv_reserve_shared(resv, 1);
> -		if (ret)
> -			return ret;
> -	}
> +	ret = dma_resv_reserve_fences(resv, 1);
> +	if (ret)
> +		return ret;
>  
>  	/* Waiting for the exclusive fence first causes performance regressions
>  	 * under some circumstances. So manually wait for the shared ones first.
> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
> index a6925dbb6224..c34114560e49 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> @@ -247,6 +247,10 @@ static int panfrost_acquire_object_fences(struct drm_gem_object **bos,
>  	int i, ret;
>  
>  	for (i = 0; i < bo_count; i++) {
> +		ret = dma_resv_reserve_fences(bos[i]->resv, 1);
> +		if (ret)
> +			return ret;
> +
>  		/* panfrost always uses write mode in its current uapi */
>  		ret = drm_sched_job_add_implicit_dependencies(job, bos[i],

I wonder whether we shouldn't move the dma-resv reserving into some shared
helpers eventually ...

>  							      true);
> diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
> index 469979cd0341..cde1e8ddaeaa 100644
> --- a/drivers/gpu/drm/qxl/qxl_release.c
> +++ b/drivers/gpu/drm/qxl/qxl_release.c
> @@ -200,7 +200,7 @@ static int qxl_release_validate_bo(struct qxl_bo *bo)
>  			return ret;
>  	}
>  
> -	ret = dma_resv_reserve_shared(bo->tbo.base.resv, 1);
> +	ret = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
>  	if (ret)
>  		return ret;
>  
> diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
> index 9ed2b2700e0a..446f7bae54c4 100644
> --- a/drivers/gpu/drm/radeon/radeon_cs.c
> +++ b/drivers/gpu/drm/radeon/radeon_cs.c
> @@ -535,6 +535,10 @@ static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
>  			return r;
>  
>  		radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update);
> +
> +		r = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
> +		if (r)
> +			return r;
>  	}
>  
>  	return radeon_vm_clear_invalids(rdev, vm);
> diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
> index b827b87aefe2..afca4bf59a8d 100644
> --- a/drivers/gpu/drm/radeon/radeon_object.c
> +++ b/drivers/gpu/drm/radeon/radeon_object.c
> @@ -783,6 +783,14 @@ void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
>  		     bool shared)
>  {
>  	struct dma_resv *resv = bo->tbo.base.resv;
> +	int r;
> +
> +	r = dma_resv_reserve_fences(resv, 1);
> +	if (r) {
> +		/* As last resort on OOM we block for the fence */
> +		dma_fence_wait(&fence->base, false);
> +		return;
> +	}
>  
>  	if (shared)
>  		dma_resv_add_shared_fence(resv, &fence->base);
> diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
> index bb53016f3138..987cabbf1318 100644
> --- a/drivers/gpu/drm/radeon/radeon_vm.c
> +++ b/drivers/gpu/drm/radeon/radeon_vm.c
> @@ -831,7 +831,7 @@ static int radeon_vm_update_ptes(struct radeon_device *rdev,
>  		int r;
>  
>  		radeon_sync_resv(rdev, &ib->sync, pt->tbo.base.resv, true);
> -		r = dma_resv_reserve_shared(pt->tbo.base.resv, 1);
> +		r = dma_resv_reserve_fences(pt->tbo.base.resv, 1);
>  		if (r)
>  			return r;
>  
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index db3dc7ef5382..1dd6f13bb03c 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -762,7 +762,7 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
>  
>  	dma_resv_add_shared_fence(bo->base.resv, fence);
>  
> -	ret = dma_resv_reserve_shared(bo->base.resv, 1);
> +	ret = dma_resv_reserve_fences(bo->base.resv, 1);
>  	if (unlikely(ret)) {
>  		dma_fence_put(fence);
>  		return ret;
> @@ -821,7 +821,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
>  	bool type_found = false;
>  	int i, ret;
>  
> -	ret = dma_resv_reserve_shared(bo->base.resv, 1);
> +	ret = dma_resv_reserve_fences(bo->base.resv, 1);
>  	if (unlikely(ret))
>  		return ret;
>  
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index 544a84fa6589..862d2f22412a 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -221,9 +221,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
>  
>  	fbo->base = *bo;
>  
> -	ttm_bo_get(bo);
> -	fbo->bo = bo;
> -
>  	/**
>  	 * Fix up members that we shouldn't copy directly:
>  	 * TODO: Explicit member copy would probably be better here.
> @@ -251,6 +248,15 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
>  	ret = dma_resv_trylock(&fbo->base.base._resv);
>  	WARN_ON(!ret);
>  
> +	ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1);
> +	if (ret) {
> +		kfree(fbo);
> +		return ret;
> +	}
> +
> +	ttm_bo_get(bo);
> +	fbo->bo = bo;
> +
>  	ttm_bo_move_to_lru_tail_unlocked(&fbo->base);
>  
>  	*new_obj = &fbo->base;
> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> index 071c48d672c6..789c645f004e 100644
> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> @@ -90,6 +90,7 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
>  
>  	list_for_each_entry(entry, list, head) {
>  		struct ttm_buffer_object *bo = entry->bo;
> +		unsigned int num_fences;
>  
>  		ret = ttm_bo_reserve(bo, intr, (ticket == NULL), ticket);
>  		if (ret == -EALREADY && dups) {
> @@ -100,12 +101,10 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
>  			continue;
>  		}
>  
> +		num_fences = min(entry->num_shared, 1u);
>  		if (!ret) {
> -			if (!entry->num_shared)
> -				continue;
> -
> -			ret = dma_resv_reserve_shared(bo->base.resv,
> -								entry->num_shared);
> +			ret = dma_resv_reserve_fences(bo->base.resv,
> +						      num_fences);
>  			if (!ret)
>  				continue;
>  		}
> @@ -120,9 +119,9 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
>  			ret = ttm_bo_reserve_slowpath(bo, intr, ticket);
>  		}
>  
> -		if (!ret && entry->num_shared)
> -			ret = dma_resv_reserve_shared(bo->base.resv,
> -								entry->num_shared);
> +		if (!ret)
> +			ret = dma_resv_reserve_fences(bo->base.resv,
> +						      num_fences);
>  
>  		if (unlikely(ret != 0)) {
>  			if (ticket) {

I didn't fine the corresponding reserve for the dma_resv_add_excl_fence()
in ttm_bo_move_accel_cleanup(). Was that an oversight?

> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
> index 92bc0faee84f..961812d33827 100644
> --- a/drivers/gpu/drm/v3d/v3d_gem.c
> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> @@ -259,16 +259,21 @@ v3d_lock_bo_reservations(struct v3d_job *job,
>  		return ret;
>  
>  	for (i = 0; i < job->bo_count; i++) {
> +		ret = dma_resv_reserve_fences(job->bo[i]->resv, 1);
> +		if (ret)
> +			goto fail;
> +
>  		ret = drm_sched_job_add_implicit_dependencies(&job->base,
>  							      job->bo[i], true);
> -		if (ret) {
> -			drm_gem_unlock_reservations(job->bo, job->bo_count,
> -						    acquire_ctx);
> -			return ret;
> -		}
> +		if (ret)
> +			goto fail;
>  	}
>  
>  	return 0;
> +
> +fail:
> +	drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx);
> +	return ret;
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
> index 4abf10b66fe8..594bd6bb00d2 100644
> --- a/drivers/gpu/drm/vc4/vc4_gem.c
> +++ b/drivers/gpu/drm/vc4/vc4_gem.c
> @@ -644,7 +644,7 @@ vc4_lock_bo_reservations(struct drm_device *dev,
>  	for (i = 0; i < exec->bo_count; i++) {
>  		bo = &exec->bo[i]->base;
>  
> -		ret = dma_resv_reserve_shared(bo->resv, 1);
> +		ret = dma_resv_reserve_fences(bo->resv, 1);
>  		if (ret) {
>  			vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
>  			return ret;

v3d and vc4 are missing in the conversion. I think for both you need to
add it before the call to drm_sched_job_add_implicit_dependencies like
with etnaviv.

> diff --git a/drivers/gpu/drm/vgem/vgem_fence.c b/drivers/gpu/drm/vgem/vgem_fence.c
> index bd6f75285fd9..2ddbebca87d9 100644
> --- a/drivers/gpu/drm/vgem/vgem_fence.c
> +++ b/drivers/gpu/drm/vgem/vgem_fence.c
> @@ -157,12 +157,14 @@ int vgem_fence_attach_ioctl(struct drm_device *dev,
>  	}
>  
>  	/* Expose the fence via the dma-buf */
> -	ret = 0;
>  	dma_resv_lock(resv, NULL);
> -	if (arg->flags & VGEM_FENCE_WRITE)
> -		dma_resv_add_excl_fence(resv, fence);
> -	else if ((ret = dma_resv_reserve_shared(resv, 1)) == 0)
> -		dma_resv_add_shared_fence(resv, fence);
> +	ret = dma_resv_reserve_fences(resv, 1);
> +	if (!ret) {
> +		if (arg->flags & VGEM_FENCE_WRITE)
> +			dma_resv_add_excl_fence(resv, fence);
> +		else
> +			dma_resv_add_shared_fence(resv, fence);
> +	}
>  	dma_resv_unlock(resv);
>  
>  	/* Record the fence in our idr for later signaling */
> diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c
> index 48d3c9955f0d..1820ca6cf673 100644
> --- a/drivers/gpu/drm/virtio/virtgpu_gem.c
> +++ b/drivers/gpu/drm/virtio/virtgpu_gem.c
> @@ -214,6 +214,7 @@ void virtio_gpu_array_add_obj(struct virtio_gpu_object_array *objs,
>  
>  int virtio_gpu_array_lock_resv(struct virtio_gpu_object_array *objs)
>  {
> +	unsigned int i;
>  	int ret;
>  
>  	if (objs->nents == 1) {
> @@ -222,6 +223,14 @@ int virtio_gpu_array_lock_resv(struct virtio_gpu_object_array *objs)
>  		ret = drm_gem_lock_reservations(objs->objs, objs->nents,
>  						&objs->ticket);
>  	}
> +	if (ret)
> +		return ret;
> +
> +	for (i = 0; i < objs->nents; ++i) {
> +		ret = dma_resv_reserve_fences(objs->objs[i]->resv, 1);

I think you could stuff this into the same loop, but also probably doesn't
matter.

> +		if (ret)
> +			return ret;
> +	}
>  	return ret;
>  }
>  
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> index 31aecc46624b..fe13aa8b4a64 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> @@ -747,16 +747,22 @@ void vmw_bo_fence_single(struct ttm_buffer_object *bo,
>  			 struct vmw_fence_obj *fence)
>  {
>  	struct ttm_device *bdev = bo->bdev;
> -
>  	struct vmw_private *dev_priv =
>  		container_of(bdev, struct vmw_private, bdev);
> +	int ret;
>  
> -	if (fence == NULL) {
> +	if (fence == NULL)
>  		vmw_execbuf_fence_commands(NULL, dev_priv, &fence, NULL);
> +	else
> +		dma_fence_get(&fence->base);
> +
> +	ret = dma_resv_reserve_fences(bo->base.resv, 1);
> +	if (!ret)
>  		dma_resv_add_excl_fence(bo->base.resv, &fence->base);
> -		dma_fence_put(&fence->base);
> -	} else
> -		dma_resv_add_excl_fence(bo->base.resv, &fence->base);
> +	else
> +		/* Last resort fallback when we are OOM */
> +		dma_fence_wait(&fence->base, false);
> +	dma_fence_put(&fence->base);
>  }
>  
>  
> diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
> index ecb697d4d861..5fa04d0fccad 100644
> --- a/include/linux/dma-resv.h
> +++ b/include/linux/dma-resv.h
> @@ -117,7 +117,7 @@ struct dma_resv {
>  	 * A new fence is added by calling dma_resv_add_shared_fence(). Since
>  	 * this often needs to be done past the point of no return in command
>  	 * submission it cannot fail, and therefore sufficient slots need to be
> -	 * reserved by calling dma_resv_reserve_shared().
> +	 * reserved by calling dma_resv_reserve_fences().
>  	 *
>  	 * Note that actual semantics of what an exclusive or shared fence mean
>  	 * is defined by the user, for reservation objects shared across drivers
> @@ -413,7 +413,7 @@ static inline void dma_resv_unlock(struct dma_resv *obj)
>  
>  void dma_resv_init(struct dma_resv *obj);
>  void dma_resv_fini(struct dma_resv *obj);
> -int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences);
> +int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences);
>  void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence);
>  void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
>  			     struct dma_fence *fence);

I found a few things, but with those (vc4 and v3d plus the ttm question,
the other stuff is just comments) corrected this gets my

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 16/23] dma-buf: add enum dma_resv_usage v3
  2022-03-21 13:58 ` [PATCH 16/23] dma-buf: add enum dma_resv_usage v3 Christian König
@ 2022-03-29 15:24   ` Daniel Vetter
  2022-04-04  1:13   ` Bas Nieuwenhuizen
  1 sibling, 0 replies; 73+ messages in thread
From: Daniel Vetter @ 2022-03-29 15:24 UTC (permalink / raw)
  To: Christian König; +Cc: daniel.vetter, Christian König, dri-devel

On Mon, Mar 21, 2022 at 02:58:49PM +0100, Christian König wrote:
> This change adds the dma_resv_usage enum and allows us to specify why a
> dma_resv object is queried for its containing fences.
> 
> Additional to that a dma_resv_usage_rw() helper function is added to aid
> retrieving the fences for a read or write userspace submission.
> 
> This is then deployed to the different query functions of the dma_resv
> object and all of their users. When the write paratermer was previously
> true we now use DMA_RESV_USAGE_WRITE and DMA_RESV_USAGE_READ otherwise.
> 
> v2: add KERNEL/OTHER in separate patch
> v3: some kerneldoc suggestions by Daniel
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>

I think this patch here is the one which should (re)add the IMPORTANT:
section you remove in an earlier patch from the dma_buf.resv rules, and
instead make it point at enum dma_resv_usage.
> ---
>  drivers/dma-buf/dma-buf.c                     |  3 +-
>  drivers/dma-buf/dma-resv.c                    | 33 +++++----
>  drivers/dma-buf/st-dma-resv.c                 | 48 ++++++-------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        |  4 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_display.c   |  3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c       |  5 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c        |  4 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    |  4 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c      |  3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       |  3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c       |  3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |  7 +-
>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  3 +-
>  drivers/gpu/drm/drm_gem.c                     |  6 +-
>  drivers/gpu/drm/drm_gem_atomic_helper.c       |  2 +-
>  drivers/gpu/drm/etnaviv/etnaviv_gem.c         |  6 +-
>  drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c  |  7 +-
>  .../gpu/drm/i915/display/intel_atomic_plane.c |  3 +-
>  drivers/gpu/drm/i915/gem/i915_gem_busy.c      |  4 +-
>  drivers/gpu/drm/i915/gem/i915_gem_lmem.c      |  2 +-
>  drivers/gpu/drm/i915/gem/i915_gem_userptr.c   |  2 +-
>  drivers/gpu/drm/i915/gem/i915_gem_wait.c      |  6 +-
>  .../drm/i915/gem/selftests/i915_gem_dmabuf.c  |  3 +-
>  drivers/gpu/drm/i915/i915_request.c           |  3 +-
>  drivers/gpu/drm/i915/i915_sw_fence.c          |  2 +-
>  drivers/gpu/drm/msm/msm_gem.c                 |  3 +-
>  drivers/gpu/drm/nouveau/dispnv50/wndw.c       |  3 +-
>  drivers/gpu/drm/nouveau/nouveau_bo.c          |  8 +--
>  drivers/gpu/drm/nouveau/nouveau_fence.c       |  8 ++-
>  drivers/gpu/drm/nouveau/nouveau_gem.c         |  3 +-
>  drivers/gpu/drm/panfrost/panfrost_drv.c       |  3 +-
>  drivers/gpu/drm/qxl/qxl_debugfs.c             |  3 +-
>  drivers/gpu/drm/radeon/radeon_display.c       |  3 +-
>  drivers/gpu/drm/radeon/radeon_gem.c           |  9 ++-
>  drivers/gpu/drm/radeon/radeon_mn.c            |  4 +-
>  drivers/gpu/drm/radeon/radeon_sync.c          |  2 +-
>  drivers/gpu/drm/radeon/radeon_uvd.c           |  4 +-
>  drivers/gpu/drm/scheduler/sched_main.c        |  3 +-
>  drivers/gpu/drm/ttm/ttm_bo.c                  | 18 ++---
>  drivers/gpu/drm/vgem/vgem_fence.c             |  4 +-
>  drivers/gpu/drm/virtio/virtgpu_ioctl.c        |  5 +-
>  drivers/gpu/drm/vmwgfx/vmwgfx_bo.c            |  4 +-
>  drivers/gpu/drm/vmwgfx/vmwgfx_resource.c      |  4 +-
>  drivers/infiniband/core/umem_dmabuf.c         |  3 +-
>  include/linux/dma-resv.h                      | 69 +++++++++++++++----
>  46 files changed, 208 insertions(+), 127 deletions(-)
> 
> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> index 602b12d7470d..528983d3ba64 100644
> --- a/drivers/dma-buf/dma-buf.c
> +++ b/drivers/dma-buf/dma-buf.c
> @@ -1124,7 +1124,8 @@ static int __dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
>  	long ret;
>  
>  	/* Wait on any implicit rendering fences */
> -	ret = dma_resv_wait_timeout(resv, write, true, MAX_SCHEDULE_TIMEOUT);
> +	ret = dma_resv_wait_timeout(resv, dma_resv_usage_rw(write),
> +				    true, MAX_SCHEDULE_TIMEOUT);
>  	if (ret < 0)
>  		return ret;
>  
> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
> index 78a32da2cb0b..bb7b023c2d33 100644
> --- a/drivers/dma-buf/dma-resv.c
> +++ b/drivers/dma-buf/dma-resv.c
> @@ -384,7 +384,7 @@ static void dma_resv_iter_restart_unlocked(struct dma_resv_iter *cursor)
>  	cursor->seq = read_seqcount_begin(&cursor->obj->seq);
>  	cursor->index = -1;
>  	cursor->shared_count = 0;
> -	if (cursor->all_fences) {
> +	if (cursor->usage >= DMA_RESV_USAGE_READ) {

Bit a bikeshed, but I think this would read clearer as
dma_resv_usage_rw(true), exactly because of the confusion you explain in
the comment in that function.

If this (and all the others like this) don't change in the next patches
I'd change them, but up to you really.

>  		cursor->fences = dma_resv_shared_list(cursor->obj);
>  		if (cursor->fences)
>  			cursor->shared_count = cursor->fences->shared_count;
> @@ -496,7 +496,7 @@ struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor)
>  	dma_resv_assert_held(cursor->obj);
>  
>  	cursor->index = 0;
> -	if (cursor->all_fences)
> +	if (cursor->usage >= DMA_RESV_USAGE_READ)
>  		cursor->fences = dma_resv_shared_list(cursor->obj);
>  	else
>  		cursor->fences = NULL;
> @@ -551,7 +551,7 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src)
>  	list = NULL;
>  	excl = NULL;
>  
> -	dma_resv_iter_begin(&cursor, src, true);
> +	dma_resv_iter_begin(&cursor, src, DMA_RESV_USAGE_READ);
>  	dma_resv_for_each_fence_unlocked(&cursor, f) {
>  
>  		if (dma_resv_iter_is_restarted(&cursor)) {
> @@ -597,7 +597,7 @@ EXPORT_SYMBOL(dma_resv_copy_fences);
>   * dma_resv_get_fences - Get an object's shared and exclusive
>   * fences without update side lock held
>   * @obj: the reservation object
> - * @write: true if we should return all fences
> + * @usage: controls which fences to include, see enum dma_resv_usage.
>   * @num_fences: the number of fences returned
>   * @fences: the array of fence ptrs returned (array is krealloc'd to the
>   * required size, and must be freed by caller)
> @@ -605,7 +605,7 @@ EXPORT_SYMBOL(dma_resv_copy_fences);
>   * Retrieve all fences from the reservation object.
>   * Returns either zero or -ENOMEM.
>   */
> -int dma_resv_get_fences(struct dma_resv *obj, bool write,
> +int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage,
>  			unsigned int *num_fences, struct dma_fence ***fences)
>  {
>  	struct dma_resv_iter cursor;
> @@ -614,7 +614,7 @@ int dma_resv_get_fences(struct dma_resv *obj, bool write,
>  	*num_fences = 0;
>  	*fences = NULL;
>  
> -	dma_resv_iter_begin(&cursor, obj, write);
> +	dma_resv_iter_begin(&cursor, obj, usage);
>  	dma_resv_for_each_fence_unlocked(&cursor, fence) {
>  
>  		if (dma_resv_iter_is_restarted(&cursor)) {
> @@ -646,7 +646,7 @@ EXPORT_SYMBOL_GPL(dma_resv_get_fences);
>  /**
>   * dma_resv_get_singleton - Get a single fence for all the fences
>   * @obj: the reservation object
> - * @write: true if we should return all fences
> + * @usage: controls which fences to include, see enum dma_resv_usage.
>   * @fence: the resulting fence
>   *
>   * Get a single fence representing all the fences inside the resv object.
> @@ -656,7 +656,7 @@ EXPORT_SYMBOL_GPL(dma_resv_get_fences);
>   * object since that can lead to stack corruption when finalizing the
>   * dma_fence_array.
>   */
> -int dma_resv_get_singleton(struct dma_resv *obj, bool write,
> +int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage,
>  			   struct dma_fence **fence)
>  {
>  	struct dma_fence_array *array;
> @@ -664,7 +664,7 @@ int dma_resv_get_singleton(struct dma_resv *obj, bool write,
>  	unsigned count;
>  	int r;
>  
> -	r = dma_resv_get_fences(obj, write, &count, &fences);
> +	r = dma_resv_get_fences(obj, usage, &count, &fences);
>          if (r)
>  		return r;
>  
> @@ -698,7 +698,7 @@ EXPORT_SYMBOL_GPL(dma_resv_get_singleton);
>   * dma_resv_wait_timeout - Wait on reservation's objects
>   * shared and/or exclusive fences.
>   * @obj: the reservation object
> - * @wait_all: if true, wait on all fences, else wait on just exclusive fence
> + * @usage: controls which fences to include, see enum dma_resv_usage.
>   * @intr: if true, do interruptible wait
>   * @timeout: timeout value in jiffies or zero to return immediately
>   *
> @@ -708,14 +708,14 @@ EXPORT_SYMBOL_GPL(dma_resv_get_singleton);
>   * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or
>   * greater than zer on success.
>   */
> -long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr,
> -			   unsigned long timeout)
> +long dma_resv_wait_timeout(struct dma_resv *obj, enum dma_resv_usage usage,
> +			   bool intr, unsigned long timeout)
>  {
>  	long ret = timeout ? timeout : 1;
>  	struct dma_resv_iter cursor;
>  	struct dma_fence *fence;
>  
> -	dma_resv_iter_begin(&cursor, obj, wait_all);
> +	dma_resv_iter_begin(&cursor, obj, usage);
>  	dma_resv_for_each_fence_unlocked(&cursor, fence) {
>  
>  		ret = dma_fence_wait_timeout(fence, intr, ret);
> @@ -735,8 +735,7 @@ EXPORT_SYMBOL_GPL(dma_resv_wait_timeout);
>   * dma_resv_test_signaled - Test if a reservation object's fences have been
>   * signaled.
>   * @obj: the reservation object
> - * @test_all: if true, test all fences, otherwise only test the exclusive
> - * fence
> + * @usage: controls which fences to include, see enum dma_resv_usage.
>   *
>   * Callers are not required to hold specific locks, but maybe hold
>   * dma_resv_lock() already.
> @@ -745,12 +744,12 @@ EXPORT_SYMBOL_GPL(dma_resv_wait_timeout);
>   *
>   * True if all fences signaled, else false.
>   */
> -bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
> +bool dma_resv_test_signaled(struct dma_resv *obj, enum dma_resv_usage usage)
>  {
>  	struct dma_resv_iter cursor;
>  	struct dma_fence *fence;
>  
> -	dma_resv_iter_begin(&cursor, obj, test_all);
> +	dma_resv_iter_begin(&cursor, obj, usage);
>  	dma_resv_for_each_fence_unlocked(&cursor, fence) {
>  		dma_resv_iter_end(&cursor);
>  		return false;
> diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c
> index d2e61f6ae989..d097981061b1 100644
> --- a/drivers/dma-buf/st-dma-resv.c
> +++ b/drivers/dma-buf/st-dma-resv.c
> @@ -58,7 +58,7 @@ static int sanitycheck(void *arg)
>  	return r;
>  }
>  
> -static int test_signaling(void *arg, bool shared)
> +static int test_signaling(void *arg, enum dma_resv_usage usage)
>  {
>  	struct dma_resv resv;
>  	struct dma_fence *f;
> @@ -81,18 +81,18 @@ static int test_signaling(void *arg, bool shared)
>  		goto err_unlock;
>  	}
>  
> -	if (shared)
> +	if (usage >= DMA_RESV_USAGE_READ)
>  		dma_resv_add_shared_fence(&resv, f);
>  	else
>  		dma_resv_add_excl_fence(&resv, f);
>  
> -	if (dma_resv_test_signaled(&resv, shared)) {
> +	if (dma_resv_test_signaled(&resv, usage)) {
>  		pr_err("Resv unexpectedly signaled\n");
>  		r = -EINVAL;
>  		goto err_unlock;
>  	}
>  	dma_fence_signal(f);
> -	if (!dma_resv_test_signaled(&resv, shared)) {
> +	if (!dma_resv_test_signaled(&resv, usage)) {
>  		pr_err("Resv not reporting signaled\n");
>  		r = -EINVAL;
>  		goto err_unlock;
> @@ -107,15 +107,15 @@ static int test_signaling(void *arg, bool shared)
>  
>  static int test_excl_signaling(void *arg)
>  {
> -	return test_signaling(arg, false);
> +	return test_signaling(arg, DMA_RESV_USAGE_WRITE);
>  }
>  
>  static int test_shared_signaling(void *arg)
>  {
> -	return test_signaling(arg, true);
> +	return test_signaling(arg, DMA_RESV_USAGE_READ);
>  }
>  
> -static int test_for_each(void *arg, bool shared)
> +static int test_for_each(void *arg, enum dma_resv_usage usage)
>  {
>  	struct dma_resv_iter cursor;
>  	struct dma_fence *f, *fence;
> @@ -139,13 +139,13 @@ static int test_for_each(void *arg, bool shared)
>  		goto err_unlock;
>  	}
>  
> -	if (shared)
> +	if (usage >= DMA_RESV_USAGE_READ)
>  		dma_resv_add_shared_fence(&resv, f);
>  	else
>  		dma_resv_add_excl_fence(&resv, f);
>  
>  	r = -ENOENT;
> -	dma_resv_for_each_fence(&cursor, &resv, shared, fence) {
> +	dma_resv_for_each_fence(&cursor, &resv, usage, fence) {
>  		if (!r) {
>  			pr_err("More than one fence found\n");
>  			r = -EINVAL;
> @@ -156,7 +156,8 @@ static int test_for_each(void *arg, bool shared)
>  			r = -EINVAL;
>  			goto err_unlock;
>  		}
> -		if (dma_resv_iter_is_exclusive(&cursor) != !shared) {
> +		if (dma_resv_iter_is_exclusive(&cursor) !=
> +		    (usage >= DMA_RESV_USAGE_READ)) {
>  			pr_err("Unexpected fence usage\n");
>  			r = -EINVAL;
>  			goto err_unlock;
> @@ -178,15 +179,15 @@ static int test_for_each(void *arg, bool shared)
>  
>  static int test_excl_for_each(void *arg)
>  {
> -	return test_for_each(arg, false);
> +	return test_for_each(arg, DMA_RESV_USAGE_WRITE);
>  }
>  
>  static int test_shared_for_each(void *arg)
>  {
> -	return test_for_each(arg, true);
> +	return test_for_each(arg, DMA_RESV_USAGE_READ);
>  }
>  
> -static int test_for_each_unlocked(void *arg, bool shared)
> +static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage)
>  {
>  	struct dma_resv_iter cursor;
>  	struct dma_fence *f, *fence;
> @@ -211,14 +212,14 @@ static int test_for_each_unlocked(void *arg, bool shared)
>  		goto err_free;
>  	}
>  
> -	if (shared)
> +	if (usage >= DMA_RESV_USAGE_READ)
>  		dma_resv_add_shared_fence(&resv, f);
>  	else
>  		dma_resv_add_excl_fence(&resv, f);
>  	dma_resv_unlock(&resv);
>  
>  	r = -ENOENT;
> -	dma_resv_iter_begin(&cursor, &resv, shared);
> +	dma_resv_iter_begin(&cursor, &resv, usage);
>  	dma_resv_for_each_fence_unlocked(&cursor, fence) {
>  		if (!r) {
>  			pr_err("More than one fence found\n");
> @@ -234,7 +235,8 @@ static int test_for_each_unlocked(void *arg, bool shared)
>  			r = -EINVAL;
>  			goto err_iter_end;
>  		}
> -		if (dma_resv_iter_is_exclusive(&cursor) != !shared) {
> +		if (dma_resv_iter_is_exclusive(&cursor) !=
> +		    (usage >= DMA_RESV_USAGE_READ)) {
>  			pr_err("Unexpected fence usage\n");
>  			r = -EINVAL;
>  			goto err_iter_end;
> @@ -262,15 +264,15 @@ static int test_for_each_unlocked(void *arg, bool shared)
>  
>  static int test_excl_for_each_unlocked(void *arg)
>  {
> -	return test_for_each_unlocked(arg, false);
> +	return test_for_each_unlocked(arg, DMA_RESV_USAGE_WRITE);
>  }
>  
>  static int test_shared_for_each_unlocked(void *arg)
>  {
> -	return test_for_each_unlocked(arg, true);
> +	return test_for_each_unlocked(arg, DMA_RESV_USAGE_READ);
>  }
>  
> -static int test_get_fences(void *arg, bool shared)
> +static int test_get_fences(void *arg, enum dma_resv_usage usage)
>  {
>  	struct dma_fence *f, **fences = NULL;
>  	struct dma_resv resv;
> @@ -294,13 +296,13 @@ static int test_get_fences(void *arg, bool shared)
>  		goto err_resv;
>  	}
>  
> -	if (shared)
> +	if (usage >= DMA_RESV_USAGE_READ)
>  		dma_resv_add_shared_fence(&resv, f);
>  	else
>  		dma_resv_add_excl_fence(&resv, f);
>  	dma_resv_unlock(&resv);
>  
> -	r = dma_resv_get_fences(&resv, shared, &i, &fences);
> +	r = dma_resv_get_fences(&resv, usage, &i, &fences);
>  	if (r) {
>  		pr_err("get_fences failed\n");
>  		goto err_free;
> @@ -324,12 +326,12 @@ static int test_get_fences(void *arg, bool shared)
>  
>  static int test_excl_get_fences(void *arg)
>  {
> -	return test_get_fences(arg, false);
> +	return test_get_fences(arg, DMA_RESV_USAGE_WRITE);
>  }
>  
>  static int test_shared_get_fences(void *arg)
>  {
> -	return test_get_fences(arg, true);
> +	return test_get_fences(arg, DMA_RESV_USAGE_READ);
>  }
>  
>  int dma_resv(void)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 02488a824fe8..2bf909a4242a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1278,7 +1278,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>  		 * submission in a dma_fence_chain and add it as exclusive
>  		 * fence.
>  		 */
> -		dma_resv_for_each_fence(&cursor, resv, false, fence) {
> +		dma_resv_for_each_fence(&cursor, resv,
> +					DMA_RESV_USAGE_WRITE,
> +					fence) {
>  			break;
>  		}
>  		dma_fence_chain_init(chain, fence, dma_fence_get(p->fence), 1);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> index e76b96d55551..cefa404d7842 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> @@ -200,8 +200,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
>  		goto unpin;
>  	}
>  
> -	/* TODO: Unify this with other drivers */
> -	r = dma_resv_get_fences(new_abo->tbo.base.resv, true,
> +	r = dma_resv_get_fences(new_abo->tbo.base.resv, DMA_RESV_USAGE_WRITE,
>  				&work->shared_count,
>  				&work->shared);
>  	if (unlikely(r != 0)) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 9b12cab5e606..3a5fe05c7a7e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -526,7 +526,8 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
>  		return -ENOENT;
>  	}
>  	robj = gem_to_amdgpu_bo(gobj);
> -	ret = dma_resv_wait_timeout(robj->tbo.base.resv, true, true, timeout);
> +	ret = dma_resv_wait_timeout(robj->tbo.base.resv, DMA_RESV_USAGE_READ,
> +				    true, timeout);
>  
>  	/* ret == 0 means not signaled,
>  	 * ret > 0 means signaled
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> index 888d97143177..490d2a7a3e2b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> @@ -111,7 +111,7 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,
>  	struct dma_fence *fence;
>  	int r;
>  
> -	r = dma_resv_get_singleton(resv, true, &fence);
> +	r = dma_resv_get_singleton(resv, DMA_RESV_USAGE_READ, &fence);
>  	if (r)
>  		goto fallback;
>  
> @@ -139,7 +139,8 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,
>  	/* Not enough memory for the delayed delete, as last resort
>  	 * block for all the fences to complete.
>  	 */
> -	dma_resv_wait_timeout(resv, true, false, MAX_SCHEDULE_TIMEOUT);
> +	dma_resv_wait_timeout(resv, DMA_RESV_USAGE_READ,
> +			      false, MAX_SCHEDULE_TIMEOUT);
>  	amdgpu_pasid_free(pasid);
>  }
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> index 4b153daf283d..86f5248676b0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> @@ -75,8 +75,8 @@ static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni,
>  
>  	mmu_interval_set_seq(mni, cur_seq);
>  
> -	r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false,
> -				  MAX_SCHEDULE_TIMEOUT);
> +	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_READ,
> +				  false, MAX_SCHEDULE_TIMEOUT);
>  	mutex_unlock(&adev->notifier_lock);
>  	if (r <= 0)
>  		DRM_ERROR("(%ld) failed to wait for user bo\n", r);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 2f808decd8d9..0a843cc54945 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -765,8 +765,8 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
>  		return 0;
>  	}
>  
> -	r = dma_resv_wait_timeout(bo->tbo.base.resv, false, false,
> -				  MAX_SCHEDULE_TIMEOUT);
> +	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_WRITE,
> +				  false, MAX_SCHEDULE_TIMEOUT);
>  	if (r < 0)
>  		return r;
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> index 40e06745fae9..744e144e5fc2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> @@ -259,7 +259,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
>  	if (resv == NULL)
>  		return -EINVAL;
>  
> -	dma_resv_for_each_fence(&cursor, resv, true, f) {
> +	/* TODO: Use DMA_RESV_USAGE_READ here */
> +	dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, f) {
>  		dma_fence_chain_for_each(f, f) {
>  			struct dma_fence *tmp = dma_fence_chain_contained(f);
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 5859ed0552a4..9ffd8c4c34a0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1356,7 +1356,8 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
>  	 * If true, then return false as any KFD process needs all its BOs to
>  	 * be resident to run successfully
>  	 */
> -	dma_resv_for_each_fence(&resv_cursor, bo->base.resv, true, f) {
> +	dma_resv_for_each_fence(&resv_cursor, bo->base.resv,
> +				DMA_RESV_USAGE_READ, f) {
>  		if (amdkfd_fence_check_mm(f, current->mm))
>  			return false;
>  	}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> index 6f8de11a17f1..33deb0df62fd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> @@ -1162,7 +1162,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>  	ib->length_dw = 16;
>  
>  	if (direct) {
> -		r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false,
> +		r = dma_resv_wait_timeout(bo->tbo.base.resv,
> +					  DMA_RESV_USAGE_WRITE, false,
>  					  msecs_to_jiffies(10));
>  		if (r == 0)
>  			r = -ETIMEDOUT;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 0ab85280e8ed..f3235aad7282 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2105,7 +2105,7 @@ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>  	struct dma_resv_iter cursor;
>  	struct dma_fence *fence;
>  
> -	dma_resv_for_each_fence(&cursor, resv, true, fence) {
> +	dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, fence) {
>  		/* Add a callback for each fence in the reservation object */
>  		amdgpu_vm_prt_get(adev);
>  		amdgpu_vm_add_prt_cb(adev, fence);
> @@ -2707,7 +2707,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
>  		return true;
>  
>  	/* Don't evict VM page tables while they are busy */
> -	if (!dma_resv_test_signaled(bo->tbo.base.resv, true))
> +	if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_READ))
>  		return false;
>  
>  	/* Try to block ongoing updates */
> @@ -2887,7 +2887,8 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
>   */
>  long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
>  {
> -	timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv, true,
> +	timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv,
> +					DMA_RESV_USAGE_READ,
>  					true, timeout);
>  	if (timeout <= 0)
>  		return timeout;
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 0eeb394e949c..c9532642559c 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -9199,7 +9199,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
>  		 * deadlock during GPU reset when this fence will not signal
>  		 * but we hold reservation lock for the BO.
>  		 */
> -		r = dma_resv_wait_timeout(abo->tbo.base.resv, true, false,
> +		r = dma_resv_wait_timeout(abo->tbo.base.resv,
> +					  DMA_RESV_USAGE_WRITE, false,
>  					  msecs_to_jiffies(5000));
>  		if (unlikely(r <= 0))
>  			DRM_ERROR("Waiting for fences timed out!");
> diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
> index 3c888db59ea4..54079d762051 100644
> --- a/drivers/gpu/drm/drm_gem.c
> +++ b/drivers/gpu/drm/drm_gem.c
> @@ -771,7 +771,8 @@ long drm_gem_dma_resv_wait(struct drm_file *filep, u32 handle,
>  		return -EINVAL;
>  	}
>  
> -	ret = dma_resv_wait_timeout(obj->resv, wait_all, true, timeout);
> +	ret = dma_resv_wait_timeout(obj->resv, dma_resv_usage_rw(wait_all),
> +				    true, timeout);
>  	if (ret == 0)
>  		ret = -ETIME;
>  	else if (ret > 0)
> @@ -1345,7 +1346,8 @@ int drm_gem_fence_array_add_implicit(struct xarray *fence_array,
>  	struct dma_fence *fence;
>  	int ret = 0;
>  
> -	dma_resv_for_each_fence(&cursor, obj->resv, write, fence) {
> +	dma_resv_for_each_fence(&cursor, obj->resv, dma_resv_usage_rw(write),
> +				fence) {
>  		ret = drm_gem_fence_array_add(fence_array, fence);
>  		if (ret)
>  			break;
> diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c
> index 9338ddb7edff..a6d89aed0bda 100644
> --- a/drivers/gpu/drm/drm_gem_atomic_helper.c
> +++ b/drivers/gpu/drm/drm_gem_atomic_helper.c
> @@ -151,7 +151,7 @@ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_st
>  		return 0;
>  
>  	obj = drm_gem_fb_get_obj(state->fb, 0);
> -	ret = dma_resv_get_singleton(obj->resv, false, &fence);
> +	ret = dma_resv_get_singleton(obj->resv, DMA_RESV_USAGE_WRITE, &fence);
>  	if (ret)
>  		return ret;
>  
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
> index d5314aa28ff7..507172e2780b 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
> @@ -380,12 +380,14 @@ int etnaviv_gem_cpu_prep(struct drm_gem_object *obj, u32 op,
>  	}
>  
>  	if (op & ETNA_PREP_NOSYNC) {
> -		if (!dma_resv_test_signaled(obj->resv, write))
> +		if (!dma_resv_test_signaled(obj->resv,
> +					    dma_resv_usage_rw(write)))
>  			return -EBUSY;
>  	} else {
>  		unsigned long remain = etnaviv_timeout_to_jiffies(timeout);
>  
> -		ret = dma_resv_wait_timeout(obj->resv, write, true, remain);
> +		ret = dma_resv_wait_timeout(obj->resv, dma_resv_usage_rw(write),
> +					    true, remain);
>  		if (ret <= 0)
>  			return ret == 0 ? -ETIMEDOUT : ret;
>  	}
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
> index b808ddb9da48..d7cd26dfaf8a 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
> @@ -178,6 +178,7 @@ static int submit_fence_sync(struct etnaviv_gem_submit *submit)
>  	for (i = 0; i < submit->nr_bos; i++) {
>  		struct etnaviv_gem_submit_bo *bo = &submit->bos[i];
>  		struct dma_resv *robj = bo->obj->base.resv;
> +		enum dma_resv_usage usage;
>  
>  		ret = dma_resv_reserve_fences(robj, 1);
>  		if (ret)
> @@ -186,9 +187,9 @@ static int submit_fence_sync(struct etnaviv_gem_submit *submit)
>  		if (submit->flags & ETNA_SUBMIT_NO_IMPLICIT)
>  			continue;
>  
> -		ret = dma_resv_get_fences(robj,
> -					  bo->flags & ETNA_SUBMIT_BO_WRITE,
> -					  &bo->nr_shared, &bo->shared);
> +		usage = dma_resv_usage_rw(bo->flags & ETNA_SUBMIT_BO_WRITE);
> +		ret = dma_resv_get_fences(robj, usage, &bo->nr_shared,
> +					  &bo->shared);
>  		if (ret)
>  			return ret;
>  	}
> diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c
> index c2c512cd8ec0..2c3bb8aecd07 100644
> --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c
> +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c
> @@ -799,7 +799,8 @@ intel_prepare_plane_fb(struct drm_plane *_plane,
>  		if (ret < 0)
>  			goto unpin_fb;
>  
> -		dma_resv_iter_begin(&cursor, obj->base.resv, false);
> +		dma_resv_iter_begin(&cursor, obj->base.resv,
> +				    DMA_RESV_USAGE_WRITE);
>  		dma_resv_for_each_fence_unlocked(&cursor, fence) {
>  			add_rps_boost_after_vblank(new_plane_state->hw.crtc,
>  						   fence);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
> index 470fdfd61a0f..14a1c0ad8c3c 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
> @@ -138,12 +138,12 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
>  	 * Alternatively, we can trade that extra information on read/write
>  	 * activity with
>  	 *	args->busy =
> -	 *		!dma_resv_test_signaled(obj->resv, true);
> +	 *		!dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ);
>  	 * to report the overall busyness. This is what the wait-ioctl does.
>  	 *
>  	 */
>  	args->busy = 0;
> -	dma_resv_iter_begin(&cursor, obj->base.resv, true);
> +	dma_resv_iter_begin(&cursor, obj->base.resv, DMA_RESV_USAGE_READ);
>  	dma_resv_for_each_fence_unlocked(&cursor, fence) {
>  		if (dma_resv_iter_is_restarted(&cursor))
>  			args->busy = 0;
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
> index 444f8268b9c5..a200d3e66573 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
> @@ -66,7 +66,7 @@ bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj)
>  	struct intel_memory_region *mr = READ_ONCE(obj->mm.region);
>  
>  #ifdef CONFIG_LOCKDEP
> -	GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, true) &&
> +	GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, DMA_RESV_USAGE_READ) &&
>  		    i915_gem_object_evictable(obj));
>  #endif
>  	return mr && (mr->type == INTEL_MEMORY_LOCAL ||
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
> index 3cc01c30dd62..60feff9160de 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
> @@ -85,7 +85,7 @@ static bool i915_gem_userptr_invalidate(struct mmu_interval_notifier *mni,
>  		return true;
>  
>  	/* we will unbind on next submission, still have userptr pins */
> -	r = dma_resv_wait_timeout(obj->base.resv, true, false,
> +	r = dma_resv_wait_timeout(obj->base.resv, DMA_RESV_USAGE_READ, false,
>  				  MAX_SCHEDULE_TIMEOUT);
>  	if (r <= 0)
>  		drm_err(&i915->drm, "(%ld) failed to wait for idle\n", r);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
> index dab3d30c09a0..319936f91ac5 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
> @@ -40,7 +40,8 @@ i915_gem_object_wait_reservation(struct dma_resv *resv,
>  	struct dma_fence *fence;
>  	long ret = timeout ?: 1;
>  
> -	dma_resv_iter_begin(&cursor, resv, flags & I915_WAIT_ALL);
> +	dma_resv_iter_begin(&cursor, resv,
> +			    dma_resv_usage_rw(flags & I915_WAIT_ALL));
>  	dma_resv_for_each_fence_unlocked(&cursor, fence) {
>  		ret = i915_gem_object_wait_fence(fence, flags, timeout);
>  		if (ret <= 0)
> @@ -117,7 +118,8 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
>  	struct dma_resv_iter cursor;
>  	struct dma_fence *fence;
>  
> -	dma_resv_iter_begin(&cursor, obj->base.resv, flags & I915_WAIT_ALL);
> +	dma_resv_iter_begin(&cursor, obj->base.resv,
> +			    dma_resv_usage_rw(flags & I915_WAIT_ALL));
>  	dma_resv_for_each_fence_unlocked(&cursor, fence)
>  		i915_gem_fence_wait_priority(fence, attr);
>  	dma_resv_iter_end(&cursor);
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
> index 3cc74b0fed06..342df658e0fc 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
> @@ -219,7 +219,8 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915,
>  		goto out_detach;
>  	}
>  
> -	timeout = dma_resv_wait_timeout(dmabuf->resv, false, true, 5 * HZ);
> +	timeout = dma_resv_wait_timeout(dmabuf->resv, DMA_RESV_USAGE_WRITE,
> +					true, 5 * HZ);
>  	if (!timeout) {
>  		pr_err("dmabuf wait for exclusive fence timed out.\n");
>  		timeout = -ETIME;
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 76cf5ac91e94..17d7216ce221 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1594,7 +1594,8 @@ i915_request_await_object(struct i915_request *to,
>  	struct dma_fence *fence;
>  	int ret = 0;
>  
> -	dma_resv_for_each_fence(&cursor, obj->base.resv, write, fence) {
> +	dma_resv_for_each_fence(&cursor, obj->base.resv,
> +				dma_resv_usage_rw(write), fence) {
>  		ret = i915_request_await_dma_fence(to, fence);
>  		if (ret)
>  			break;
> diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
> index 2a74a9a1cafe..ae984c66c48a 100644
> --- a/drivers/gpu/drm/i915/i915_sw_fence.c
> +++ b/drivers/gpu/drm/i915/i915_sw_fence.c
> @@ -585,7 +585,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
>  	debug_fence_assert(fence);
>  	might_sleep_if(gfpflags_allow_blocking(gfp));
>  
> -	dma_resv_iter_begin(&cursor, resv, write);
> +	dma_resv_iter_begin(&cursor, resv, dma_resv_usage_rw(write));
>  	dma_resv_for_each_fence_unlocked(&cursor, f) {
>  		pending = i915_sw_fence_await_dma_fence(fence, f, timeout,
>  							gfp);
> diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
> index 02b9ae65a96a..01bbb5f2d462 100644
> --- a/drivers/gpu/drm/msm/msm_gem.c
> +++ b/drivers/gpu/drm/msm/msm_gem.c
> @@ -848,7 +848,8 @@ int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout)
>  		op & MSM_PREP_NOSYNC ? 0 : timeout_to_jiffies(timeout);
>  	long ret;
>  
> -	ret = dma_resv_wait_timeout(obj->resv, write, true,  remain);
> +	ret = dma_resv_wait_timeout(obj->resv, dma_resv_usage_rw(write),
> +				    true,  remain);
>  	if (ret == 0)
>  		return remain == 0 ? -EBUSY : -ETIMEDOUT;
>  	else if (ret < 0)
> diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
> index e2faf92e4831..8642b84ea20c 100644
> --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c
> +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
> @@ -558,7 +558,8 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state)
>  			asyw->image.handle[0] = ctxdma->object.handle;
>  	}
>  
> -	ret = dma_resv_get_singleton(nvbo->bo.base.resv, false,
> +	ret = dma_resv_get_singleton(nvbo->bo.base.resv,
> +				     DMA_RESV_USAGE_WRITE,
>  				     &asyw->state.fence);
>  	if (ret)
>  		return ret;
> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
> index 74f8652d2bd3..c6bb4dbcd735 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
> @@ -962,11 +962,11 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
>  	struct dma_fence *fence;
>  	int ret;
>  
> -	/* TODO: This is actually a memory management dependency */
> -	ret = dma_resv_get_singleton(bo->base.resv, false, &fence);
> +	ret = dma_resv_get_singleton(bo->base.resv, DMA_RESV_USAGE_WRITE,
> +				     &fence);
>  	if (ret)
> -		dma_resv_wait_timeout(bo->base.resv, false, false,
> -				      MAX_SCHEDULE_TIMEOUT);
> +		dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_WRITE,
> +				      false, MAX_SCHEDULE_TIMEOUT);
>  
>  	nv10_bo_put_tile_region(dev, *old_tile, fence);
>  	*old_tile = new_tile;
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
> index 0268259e97eb..d5e81ccee01c 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fence.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
> @@ -350,14 +350,16 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan,
>  	if (ret)
>  		return ret;
>  
> -	/* Waiting for the exclusive fence first causes performance regressions
> -	 * under some circumstances. So manually wait for the shared ones first.
> +	/* Waiting for the writes first causes performance regressions
> +	 * under some circumstances. So manually wait for the reads first.
>  	 */
>  	for (i = 0; i < 2; ++i) {
>  		struct dma_resv_iter cursor;
>  		struct dma_fence *fence;
>  
> -		dma_resv_for_each_fence(&cursor, resv, exclusive, fence) {
> +		dma_resv_for_each_fence(&cursor, resv,
> +					dma_resv_usage_rw(exclusive),
> +					fence) {
>  			struct nouveau_fence *f;
>  
>  			if (i == 0 && dma_resv_iter_is_exclusive(&cursor))
> diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
> index 9416bee92141..fab542a758ff 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_gem.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
> @@ -962,7 +962,8 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data,
>  		return -ENOENT;
>  	nvbo = nouveau_gem_object(gem);
>  
> -	lret = dma_resv_wait_timeout(nvbo->bo.base.resv, write, true,
> +	lret = dma_resv_wait_timeout(nvbo->bo.base.resv,
> +				     dma_resv_usage_rw(write), true,
>  				     no_wait ? 0 : 30 * HZ);
>  	if (!lret)
>  		ret = -EBUSY;
> diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> index 94b6f0a19c83..7fcbc2a5b6cd 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> @@ -316,7 +316,8 @@ panfrost_ioctl_wait_bo(struct drm_device *dev, void *data,
>  	if (!gem_obj)
>  		return -ENOENT;
>  
> -	ret = dma_resv_wait_timeout(gem_obj->resv, true, true, timeout);
> +	ret = dma_resv_wait_timeout(gem_obj->resv, DMA_RESV_USAGE_READ,
> +				    true, timeout);
>  	if (!ret)
>  		ret = timeout ? -ETIMEDOUT : -EBUSY;
>  
> diff --git a/drivers/gpu/drm/qxl/qxl_debugfs.c b/drivers/gpu/drm/qxl/qxl_debugfs.c
> index 6a36b0fd845c..33e5889d6608 100644
> --- a/drivers/gpu/drm/qxl/qxl_debugfs.c
> +++ b/drivers/gpu/drm/qxl/qxl_debugfs.c
> @@ -61,7 +61,8 @@ qxl_debugfs_buffers_info(struct seq_file *m, void *data)
>  		struct dma_fence *fence;
>  		int rel = 0;
>  
> -		dma_resv_iter_begin(&cursor, bo->tbo.base.resv, true);
> +		dma_resv_iter_begin(&cursor, bo->tbo.base.resv,
> +				    DMA_RESV_USAGE_READ);
>  		dma_resv_for_each_fence_unlocked(&cursor, fence) {
>  			if (dma_resv_iter_is_restarted(&cursor))
>  				rel = 0;
> diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
> index f60e826cd292..57ff2b723c87 100644
> --- a/drivers/gpu/drm/radeon/radeon_display.c
> +++ b/drivers/gpu/drm/radeon/radeon_display.c
> @@ -533,7 +533,8 @@ static int radeon_crtc_page_flip_target(struct drm_crtc *crtc,
>  		DRM_ERROR("failed to pin new rbo buffer before flip\n");
>  		goto cleanup;
>  	}
> -	r = dma_resv_get_singleton(new_rbo->tbo.base.resv, false, &work->fence);
> +	r = dma_resv_get_singleton(new_rbo->tbo.base.resv, DMA_RESV_USAGE_WRITE,
> +				   &work->fence);
>  	if (r) {
>  		radeon_bo_unreserve(new_rbo);
>  		DRM_ERROR("failed to get new rbo buffer fences\n");
> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
> index a36a4f2c76b0..71bf9299e45c 100644
> --- a/drivers/gpu/drm/radeon/radeon_gem.c
> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
> @@ -161,7 +161,9 @@ static int radeon_gem_set_domain(struct drm_gem_object *gobj,
>  	}
>  	if (domain == RADEON_GEM_DOMAIN_CPU) {
>  		/* Asking for cpu access wait for object idle */
> -		r = dma_resv_wait_timeout(robj->tbo.base.resv, true, true, 30 * HZ);
> +		r = dma_resv_wait_timeout(robj->tbo.base.resv,
> +					  DMA_RESV_USAGE_READ,
> +					  true, 30 * HZ);
>  		if (!r)
>  			r = -EBUSY;
>  
> @@ -523,7 +525,7 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
>  	}
>  	robj = gem_to_radeon_bo(gobj);
>  
> -	r = dma_resv_test_signaled(robj->tbo.base.resv, true);
> +	r = dma_resv_test_signaled(robj->tbo.base.resv, DMA_RESV_USAGE_READ);
>  	if (r == 0)
>  		r = -EBUSY;
>  	else
> @@ -552,7 +554,8 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
>  	}
>  	robj = gem_to_radeon_bo(gobj);
>  
> -	ret = dma_resv_wait_timeout(robj->tbo.base.resv, true, true, 30 * HZ);
> +	ret = dma_resv_wait_timeout(robj->tbo.base.resv, DMA_RESV_USAGE_READ,
> +				    true, 30 * HZ);
>  	if (ret == 0)
>  		r = -EBUSY;
>  	else if (ret < 0)
> diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c
> index 9fa88549c89e..68ebeb1bdfff 100644
> --- a/drivers/gpu/drm/radeon/radeon_mn.c
> +++ b/drivers/gpu/drm/radeon/radeon_mn.c
> @@ -66,8 +66,8 @@ static bool radeon_mn_invalidate(struct mmu_interval_notifier *mn,
>  		return true;
>  	}
>  
> -	r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false,
> -				  MAX_SCHEDULE_TIMEOUT);
> +	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_READ,
> +				  false, MAX_SCHEDULE_TIMEOUT);
>  	if (r <= 0)
>  		DRM_ERROR("(%ld) failed to wait for user bo\n", r);
>  
> diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c
> index b991ba1bcd51..49bbb2266c0f 100644
> --- a/drivers/gpu/drm/radeon/radeon_sync.c
> +++ b/drivers/gpu/drm/radeon/radeon_sync.c
> @@ -96,7 +96,7 @@ int radeon_sync_resv(struct radeon_device *rdev,
>  	struct dma_fence *f;
>  	int r = 0;
>  
> -	dma_resv_for_each_fence(&cursor, resv, shared, f) {
> +	dma_resv_for_each_fence(&cursor, resv, dma_resv_usage_rw(shared), f) {
>  		fence = to_radeon_fence(f);
>  		if (fence && fence->rdev == rdev)
>  			radeon_sync_fence(sync, fence);
> diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
> index 377f9cdb5b53..4000ad2f39ba 100644
> --- a/drivers/gpu/drm/radeon/radeon_uvd.c
> +++ b/drivers/gpu/drm/radeon/radeon_uvd.c
> @@ -478,8 +478,8 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo,
>  		return -EINVAL;
>  	}
>  
> -	r = dma_resv_wait_timeout(bo->tbo.base.resv, false, false,
> -				  MAX_SCHEDULE_TIMEOUT);
> +	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_WRITE,
> +				  false, MAX_SCHEDULE_TIMEOUT);
>  	if (r <= 0) {
>  		DRM_ERROR("Failed waiting for UVD message (%d)!\n", r);
>  		return r ? r : -ETIME;
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index b81fceb0b8a2..0a1377dac58d 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -703,7 +703,8 @@ int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job,
>  	struct dma_fence *fence;
>  	int ret;
>  
> -	dma_resv_for_each_fence(&cursor, obj->resv, write, fence) {
> +	dma_resv_for_each_fence(&cursor, obj->resv, dma_resv_usage_rw(write),
> +				fence) {
>  		/* Make sure to grab an additional ref on the added fence */
>  		dma_fence_get(fence);
>  		ret = drm_sched_job_add_dependency(job, fence);
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 1dd6f13bb03c..d4b2695606e2 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -272,7 +272,7 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo)
>  	struct dma_resv_iter cursor;
>  	struct dma_fence *fence;
>  
> -	dma_resv_iter_begin(&cursor, resv, true);
> +	dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_READ);
>  	dma_resv_for_each_fence_unlocked(&cursor, fence) {
>  		if (!fence->ops->signaled)
>  			dma_fence_enable_sw_signaling(fence);
> @@ -301,7 +301,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>  	struct dma_resv *resv = &bo->base._resv;
>  	int ret;
>  
> -	if (dma_resv_test_signaled(resv, true))
> +	if (dma_resv_test_signaled(resv, DMA_RESV_USAGE_READ))
>  		ret = 0;
>  	else
>  		ret = -EBUSY;
> @@ -313,7 +313,8 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>  			dma_resv_unlock(bo->base.resv);
>  		spin_unlock(&bo->bdev->lru_lock);
>  
> -		lret = dma_resv_wait_timeout(resv, true, interruptible,
> +		lret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_READ,
> +					     interruptible,
>  					     30 * HZ);
>  
>  		if (lret < 0)
> @@ -416,7 +417,8 @@ static void ttm_bo_release(struct kref *kref)
>  			/* Last resort, if we fail to allocate memory for the
>  			 * fences block for the BO to become idle
>  			 */
> -			dma_resv_wait_timeout(bo->base.resv, true, false,
> +			dma_resv_wait_timeout(bo->base.resv,
> +					      DMA_RESV_USAGE_READ, false,
>  					      30 * HZ);
>  		}
>  
> @@ -427,7 +429,7 @@ static void ttm_bo_release(struct kref *kref)
>  		ttm_mem_io_free(bdev, bo->resource);
>  	}
>  
> -	if (!dma_resv_test_signaled(bo->base.resv, true) ||
> +	if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ) ||
>  	    !dma_resv_trylock(bo->base.resv)) {
>  		/* The BO is not idle, resurrect it for delayed destroy */
>  		ttm_bo_flush_all_fences(bo);
> @@ -1072,14 +1074,14 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
>  	long timeout = 15 * HZ;
>  
>  	if (no_wait) {
> -		if (dma_resv_test_signaled(bo->base.resv, true))
> +		if (dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ))
>  			return 0;
>  		else
>  			return -EBUSY;
>  	}
>  
> -	timeout = dma_resv_wait_timeout(bo->base.resv, true, interruptible,
> -					timeout);
> +	timeout = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_READ,
> +					interruptible, timeout);
>  	if (timeout < 0)
>  		return timeout;
>  
> diff --git a/drivers/gpu/drm/vgem/vgem_fence.c b/drivers/gpu/drm/vgem/vgem_fence.c
> index 2ddbebca87d9..91fc4940c65a 100644
> --- a/drivers/gpu/drm/vgem/vgem_fence.c
> +++ b/drivers/gpu/drm/vgem/vgem_fence.c
> @@ -130,6 +130,7 @@ int vgem_fence_attach_ioctl(struct drm_device *dev,
>  	struct vgem_file *vfile = file->driver_priv;
>  	struct dma_resv *resv;
>  	struct drm_gem_object *obj;
> +	enum dma_resv_usage usage;
>  	struct dma_fence *fence;
>  	int ret;
>  
> @@ -151,7 +152,8 @@ int vgem_fence_attach_ioctl(struct drm_device *dev,
>  
>  	/* Check for a conflicting fence */
>  	resv = obj->resv;
> -	if (!dma_resv_test_signaled(resv, arg->flags & VGEM_FENCE_WRITE)) {
> +	usage = dma_resv_usage_rw(arg->flags & VGEM_FENCE_WRITE);
> +	if (!dma_resv_test_signaled(resv, usage)) {
>  		ret = -EBUSY;
>  		goto err_fence;
>  	}
> diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
> index c708bab555c6..5577cc7408b2 100644
> --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
> +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
> @@ -518,9 +518,10 @@ static int virtio_gpu_wait_ioctl(struct drm_device *dev, void *data,
>  		return -ENOENT;
>  
>  	if (args->flags & VIRTGPU_WAIT_NOWAIT) {
> -		ret = dma_resv_test_signaled(obj->resv, true);
> +		ret = dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ);
>  	} else {
> -		ret = dma_resv_wait_timeout(obj->resv, true, true, timeout);
> +		ret = dma_resv_wait_timeout(obj->resv, DMA_RESV_USAGE_READ,
> +					    true, timeout);
>  	}
>  	if (ret == 0)
>  		ret = -EBUSY;
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> index fe13aa8b4a64..b96884f7d03d 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> @@ -528,8 +528,8 @@ static int vmw_user_bo_synccpu_grab(struct vmw_buffer_object *vmw_bo,
>  	if (flags & drm_vmw_synccpu_allow_cs) {
>  		long lret;
>  
> -		lret = dma_resv_wait_timeout(bo->base.resv, true, true,
> -					     nonblock ? 0 :
> +		lret = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_READ,
> +					     true, nonblock ? 0 :
>  					     MAX_SCHEDULE_TIMEOUT);
>  		if (!lret)
>  			return -EBUSY;
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> index 36c3b5db7e69..39081dbf9ac8 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> @@ -1166,8 +1166,8 @@ int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start,
>  		if (bo->moving)
>  			dma_fence_put(bo->moving);
>  
> -		/* TODO: This is actually a memory management dependency */
> -		return dma_resv_get_singleton(bo->base.resv, false,
> +		return dma_resv_get_singleton(bo->base.resv,
> +					      DMA_RESV_USAGE_WRITE,
>  					      &bo->moving);
>  	}
>  
> diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c
> index d32cd7538835..f9901d273b8e 100644
> --- a/drivers/infiniband/core/umem_dmabuf.c
> +++ b/drivers/infiniband/core/umem_dmabuf.c
> @@ -67,7 +67,8 @@ int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf)
>  	 * may be not up-to-date. Wait for the exporter to finish
>  	 * the migration.
>  	 */
> -	return dma_resv_wait_timeout(umem_dmabuf->attach->dmabuf->resv, false,
> +	return dma_resv_wait_timeout(umem_dmabuf->attach->dmabuf->resv,
> +				     DMA_RESV_USAGE_WRITE,
>  				     false, MAX_SCHEDULE_TIMEOUT);
>  }
>  EXPORT_SYMBOL(ib_umem_dmabuf_map_pages);
> diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
> index 5fa04d0fccad..658674c4b7b9 100644
> --- a/include/linux/dma-resv.h
> +++ b/include/linux/dma-resv.h
> @@ -49,6 +49,49 @@ extern struct ww_class reservation_ww_class;
>  
>  struct dma_resv_list;
>  
> +/**
> + * enum dma_resv_usage - how the fences from a dma_resv obj are used
> + *
> + * This enum describes the different use cases for a dma_resv object and
> + * controls which fences are returned when queried.

You need to add here that usage levels are sorted, and in many cases a
later enum implies all previous ones:

"Note that usage levels are sorted and in many cases a later enum value
implies all previous ones, e.g. when iterating fences of a struct dma_resv
using dma_resv_for_each_fence()."

> + */
> +enum dma_resv_usage {
> +	/**
> +	 * @DMA_RESV_USAGE_WRITE: Implicit write synchronization.
> +	 *
> +	 * This should only be used for userspace command submissions which add
> +	 * an implicit write dependency.
> +	 */
> +	DMA_RESV_USAGE_WRITE,
> +
> +	/**
> +	 * @DMA_RESV_USAGE_READ: Implicit read synchronization.
> +	 *
> +	 * This should only be used for userspace command submissions which add
> +	 * an implicit read dependency.
> +	 */
> +	DMA_RESV_USAGE_READ,
> +};
> +
> +/**
> + * dma_resv_usage_rw - helper for implicit sync
> + * @write: true if we create a new implicit sync write
> + *
> + * This returns the implicit synchronization usage for write or read accesses,
> + * see enum dma_resv_usage.
> + */

I think a link from the IMPLICIT SYNCHRONIZATION RULES in the dma_buf.resv
kerneldoc to this helper here should be added. Also perhaps link from here
to &dma_buf.resv for completeness.

> +static inline enum dma_resv_usage dma_resv_usage_rw(bool write)
> +{
> +	/* This looks confusing at first sight, but is indeed correct.
> +	 *
> +	 * The rational is that new write operations needs to wait for the
> +	 * existing read and write operations to finish.
> +	 * But a new read operation only needs to wait for the existing write
> +	 * operations to finish.
> +	 */
> +	return write ? DMA_RESV_USAGE_READ : DMA_RESV_USAGE_WRITE;
> +}
> +
>  /**"
>   * struct dma_resv - a reservation object manages fences for a buffer
>   *
> @@ -142,8 +185,8 @@ struct dma_resv_iter {
>  	/** @obj: The dma_resv object we iterate over */
>  	struct dma_resv *obj;
>  
> -	/** @all_fences: If all fences should be returned */
> -	bool all_fences;
> +	/** @usage: Controls which fences are returned */

This should clarify that all fences with that usage or higher are
iterated.

> +	enum dma_resv_usage usage;
>  
>  	/** @fence: the currently handled fence */
>  	struct dma_fence *fence;
> @@ -173,14 +216,14 @@ struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor);
>   * dma_resv_iter_begin - initialize a dma_resv_iter object
>   * @cursor: The dma_resv_iter object to initialize
>   * @obj: The dma_resv object which we want to iterate over
> - * @all_fences: If all fences should be returned or just the exclusive one
> + * @usage: controls which fences to include, see enum dma_resv_usage.
>   */
>  static inline void dma_resv_iter_begin(struct dma_resv_iter *cursor,
>  				       struct dma_resv *obj,
> -				       bool all_fences)
> +				       enum dma_resv_usage usage)
>  {
>  	cursor->obj = obj;
> -	cursor->all_fences = all_fences;
> +	cursor->usage = usage;
>  	cursor->fence = NULL;
>  }
>  
> @@ -241,7 +284,7 @@ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor)
>   * dma_resv_for_each_fence - fence iterator
>   * @cursor: a struct dma_resv_iter pointer
>   * @obj: a dma_resv object pointer
> - * @all_fences: true if all fences should be returned
> + * @usage: controls which fences to return
>   * @fence: the current fence
>   *
>   * Iterate over the fences in a struct dma_resv object while holding the
> @@ -250,8 +293,8 @@ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor)
>   * valid as long as the lock is held and so no extra reference to the fence is
>   * taken.
>   */
> -#define dma_resv_for_each_fence(cursor, obj, all_fences, fence)	\
> -	for (dma_resv_iter_begin(cursor, obj, all_fences),	\
> +#define dma_resv_for_each_fence(cursor, obj, usage, fence)	\
> +	for (dma_resv_iter_begin(cursor, obj, usage),	\
>  	     fence = dma_resv_iter_first(cursor); fence;	\
>  	     fence = dma_resv_iter_next(cursor))
>  
> @@ -418,14 +461,14 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence);
>  void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
>  			     struct dma_fence *fence);
>  void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence);
> -int dma_resv_get_fences(struct dma_resv *obj, bool write,
> +int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage,
>  			unsigned int *num_fences, struct dma_fence ***fences);
> -int dma_resv_get_singleton(struct dma_resv *obj, bool write,
> +int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage,
>  			   struct dma_fence **fence);
>  int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src);
> -long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr,
> -			   unsigned long timeout);
> -bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all);
> +long dma_resv_wait_timeout(struct dma_resv *obj, enum dma_resv_usage usage,
> +			   bool intr, unsigned long timeout);
> +bool dma_resv_test_signaled(struct dma_resv *obj, enum dma_resv_usage usage);
>  void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq);
>  
>  #endif /* _LINUX_RESERVATION_H */

With the kerneldoc links and missing stuff filled out:

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

I didn't carefully check whether the patch split is right, but I'm
assuming if it compiles it should be good in that regard :-)

Cheers, Daniel
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 17/23] dma-buf: specify usage while adding fences to dma_resv obj v5
  2022-03-21 13:58 ` [PATCH 17/23] dma-buf: specify usage while adding fences to dma_resv obj v5 Christian König
@ 2022-03-29 15:43   ` Daniel Vetter
  2022-04-01 15:01     ` Christian König
  2022-04-02 22:16   ` Bas Nieuwenhuizen
  1 sibling, 1 reply; 73+ messages in thread
From: Daniel Vetter @ 2022-03-29 15:43 UTC (permalink / raw)
  To: Christian König; +Cc: daniel.vetter, Christian König, dri-devel

On Mon, Mar 21, 2022 at 02:58:50PM +0100, Christian König wrote:
> Instead of distingting between shared and exclusive fences specify
> the fence usage while adding fences.
> 
> Rework all drivers to use this interface instead and deprecate the old one.
> 
> v2: some kerneldoc comments suggested by Daniel
> v3: fix a missing case in radeon
> v4: rebase on nouveau changes, fix lockdep and temporary disable warning
> v5: more documentation updates
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/dma-buf/dma-resv.c                    | 345 ++++++++----------
>  drivers/dma-buf/st-dma-resv.c                 | 101 ++---
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        |   6 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    |   6 +-
>  drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c  |  10 +-
>  drivers/gpu/drm/i915/gem/i915_gem_busy.c      |  13 +-
>  drivers/gpu/drm/i915/gem/i915_gem_clflush.c   |   3 +-
>  drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  |   5 +-
>  drivers/gpu/drm/i915/i915_vma.c               |   6 +-
>  drivers/gpu/drm/lima/lima_gem.c               |   2 +-
>  drivers/gpu/drm/msm/msm_gem_submit.c          |   2 +-
>  drivers/gpu/drm/nouveau/nouveau_bo.c          |   9 +-
>  drivers/gpu/drm/nouveau/nouveau_fence.c       |   4 +-
>  drivers/gpu/drm/qxl/qxl_release.c             |   3 +-
>  drivers/gpu/drm/radeon/radeon_object.c        |   6 +-
>  drivers/gpu/drm/ttm/ttm_bo.c                  |   2 +-
>  drivers/gpu/drm/ttm/ttm_bo_util.c             |   5 +-
>  drivers/gpu/drm/ttm/ttm_execbuf_util.c        |   6 +-
>  drivers/gpu/drm/v3d/v3d_gem.c                 |   4 +-
>  drivers/gpu/drm/vc4/vc4_gem.c                 |   2 +-
>  drivers/gpu/drm/vgem/vgem_fence.c             |   9 +-
>  drivers/gpu/drm/virtio/virtgpu_gem.c          |   3 +-
>  drivers/gpu/drm/vmwgfx/vmwgfx_bo.c            |   3 +-
>  include/linux/dma-buf.h                       |  17 +-
>  include/linux/dma-resv.h                      |  72 ++--
>  26 files changed, 276 insertions(+), 370 deletions(-)
> 
> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
> index bb7b023c2d33..26257ba1527e 100644
> --- a/drivers/dma-buf/dma-resv.c
> +++ b/drivers/dma-buf/dma-resv.c
> @@ -44,12 +44,12 @@
>  /**
>   * DOC: Reservation Object Overview
>   *
> - * The reservation object provides a mechanism to manage shared and
> - * exclusive fences associated with a buffer.  A reservation object
> - * can have attached one exclusive fence (normally associated with
> - * write operations) or N shared fences (read operations).  The RCU
> - * mechanism is used to protect read access to fences from locked
> - * write-side updates.
> + * The reservation object provides a mechanism to manage a container of
> + * dma_fence object associated with a resource. A reservation object
> + * can have any number of fences attaches to it. Each fence carring an usage
								carries

> + * parameter determining how the operation represented by the fence is using the
> + * resource. The RCU mechanism is used to protect read access to fences from
> + * locked write-side updates.
>   *
>   * See struct dma_resv for more details.
>   */
> @@ -57,29 +57,74 @@
>  DEFINE_WD_CLASS(reservation_ww_class);
>  EXPORT_SYMBOL(reservation_ww_class);
>  
> +/* Mask for the lower fence pointer bits */
> +#define DMA_RESV_LIST_MASK	0x3
> +
>  struct dma_resv_list {
>  	struct rcu_head rcu;
> -	u32 shared_count, shared_max;
> -	struct dma_fence __rcu *shared[];
> +	u32 num_fences, max_fences;
> +	struct dma_fence __rcu *table[];
>  };
>  
> +/**
> + * dma_resv_list_entry - extract fence and usage from a list entry
> + * @list: the list to extract and entry from
> + * @index: which entry we want
> + * @resv: optional dma_resv obj for lockdep check that the access is allowed
> + * @fence: the resulting fence
> + * @usage: the resulting usage
> + *
> + * Extract the fence and usage flags from an RCU protected entry in the list.
> + */

No kerneldoc for static helpers which are internal to .c files. I was
going a bit wtf why would you export such a bad internals-heavy interface
until I realized it's only used in dma-resv.c.

> +static void dma_resv_list_entry(struct dma_resv_list *list, unsigned int index,
> +				struct dma_resv *resv, struct dma_fence **fence,
> +				enum dma_resv_usage *usage)
> +{
> +	long tmp;
> +
> +	tmp = (long)rcu_dereference_check(list->table[index],
> +					  resv ? dma_resv_held(resv) : true);
> +	*fence = (struct dma_fence *)(tmp & ~DMA_RESV_LIST_MASK);
> +	if (usage)
> +		*usage = tmp & DMA_RESV_LIST_MASK;
> +}
> +
> +/**
> + * dma_resv_list_set - set fence and usage at a specific index
> + * @list: the list to modify
> + * @index: where to make the change
> + * @fence: the fence to set
> + * @usage: the usage to set
> + *
> + * Set the fence and usage flags at the specific index in the list.
> + */

Same here.

The kerneldoc we write has driver authors as target audience, so anything
that's too much internals should only be a comment.

> +static void dma_resv_list_set(struct dma_resv_list *list,
> +			      unsigned int index,
> +			      struct dma_fence *fence,
> +			      enum dma_resv_usage usage)
> +{
> +	long tmp = ((long)fence) | usage;
> +
> +	RCU_INIT_POINTER(list->table[index], (struct dma_fence *)tmp);
> +}
> +
>  /**
>   * dma_resv_list_alloc - allocate fence list
> - * @shared_max: number of fences we need space for
> + * @max_fences: number of fences we need space for
>   *
>   * Allocate a new dma_resv_list and make sure to correctly initialize
> - * shared_max.
> + * max_fences.
>   */

Maybe do a cleanup of all these :-)

> -static struct dma_resv_list *dma_resv_list_alloc(unsigned int shared_max)
> +static struct dma_resv_list *dma_resv_list_alloc(unsigned int max_fences)
>  {
>  	struct dma_resv_list *list;
>  
> -	list = kmalloc(struct_size(list, shared, shared_max), GFP_KERNEL);
> +	list = kmalloc(struct_size(list, table, max_fences), GFP_KERNEL);
>  	if (!list)
>  		return NULL;
>  
> -	list->shared_max = (ksize(list) - offsetof(typeof(*list), shared)) /
> -		sizeof(*list->shared);
> +	list->max_fences = (ksize(list) - offsetof(typeof(*list), table)) /
> +		sizeof(*list->table);
>  
>  	return list;
>  }
> @@ -97,9 +142,12 @@ static void dma_resv_list_free(struct dma_resv_list *list)
>  	if (!list)
>  		return;
>  
> -	for (i = 0; i < list->shared_count; ++i)
> -		dma_fence_put(rcu_dereference_protected(list->shared[i], true));
> +	for (i = 0; i < list->num_fences; ++i) {
> +		struct dma_fence *fence;
>  
> +		dma_resv_list_entry(list, i, NULL, &fence, NULL);
> +		dma_fence_put(fence);
> +	}
>  	kfree_rcu(list, rcu);
>  }
>  
> @@ -112,8 +160,7 @@ void dma_resv_init(struct dma_resv *obj)
>  	ww_mutex_init(&obj->lock, &reservation_ww_class);
>  	seqcount_ww_mutex_init(&obj->seq, &obj->lock);
>  
> -	RCU_INIT_POINTER(obj->fence, NULL);
> -	RCU_INIT_POINTER(obj->fence_excl, NULL);
> +	RCU_INIT_POINTER(obj->fences, NULL);
>  }
>  EXPORT_SYMBOL(dma_resv_init);
>  
> @@ -123,46 +170,31 @@ EXPORT_SYMBOL(dma_resv_init);
>   */
>  void dma_resv_fini(struct dma_resv *obj)
>  {
> -	struct dma_resv_list *fobj;
> -	struct dma_fence *excl;
> -
>  	/*
>  	 * This object should be dead and all references must have
>  	 * been released to it, so no need to be protected with rcu.
>  	 */
> -	excl = rcu_dereference_protected(obj->fence_excl, 1);
> -	if (excl)
> -		dma_fence_put(excl);
> -
> -	fobj = rcu_dereference_protected(obj->fence, 1);
> -	dma_resv_list_free(fobj);
> +	dma_resv_list_free(rcu_dereference_protected(obj->fences, true));
>  	ww_mutex_destroy(&obj->lock);
>  }
>  EXPORT_SYMBOL(dma_resv_fini);
>  
> -static inline struct dma_fence *
> -dma_resv_excl_fence(struct dma_resv *obj)
> +static inline struct dma_resv_list *dma_resv_fences_list(struct dma_resv *obj)
>  {
> -       return rcu_dereference_check(obj->fence_excl, dma_resv_held(obj));
> -}
> -
> -static inline struct dma_resv_list *dma_resv_shared_list(struct dma_resv *obj)
> -{
> -	return rcu_dereference_check(obj->fence, dma_resv_held(obj));
> +	return rcu_dereference_check(obj->fences, dma_resv_held(obj));
>  }
>  
>  /**
> - * dma_resv_reserve_fences - Reserve space to add shared fences to
> - * a dma_resv.
> + * dma_resv_reserve_fences - Reserve space to add fences to a dma_resv object.
>   * @obj: reservation object
>   * @num_fences: number of fences we want to add
>   *
> - * Should be called before dma_resv_add_shared_fence().  Must
> - * be called with @obj locked through dma_resv_lock().
> + * Should be called before dma_resv_add_fence().  Must be called with @obj
> + * locked through dma_resv_lock().
>   *
>   * Note that the preallocated slots need to be re-reserved if @obj is unlocked
> - * at any time before calling dma_resv_add_shared_fence(). This is validated
> - * when CONFIG_DEBUG_MUTEXES is enabled.
> + * at any time before calling dma_resv_add_fence(). This is validated when
> + * CONFIG_DEBUG_MUTEXES is enabled.
>   *
>   * RETURNS
>   * Zero for success, or -errno
> @@ -174,11 +206,11 @@ int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences)
>  
>  	dma_resv_assert_held(obj);
>  
> -	old = dma_resv_shared_list(obj);
> -	if (old && old->shared_max) {
> -		if ((old->shared_count + num_fences) <= old->shared_max)
> +	old = dma_resv_fences_list(obj);
> +	if (old && old->max_fences) {
> +		if ((old->num_fences + num_fences) <= old->max_fences)
>  			return 0;
> -		max = max(old->shared_count + num_fences, old->shared_max * 2);
> +		max = max(old->num_fences + num_fences, old->max_fences * 2);
>  	} else {
>  		max = max(4ul, roundup_pow_of_two(num_fences));
>  	}
> @@ -193,27 +225,27 @@ int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences)
>  	 * references from the old struct are carried over to
>  	 * the new.
>  	 */
> -	for (i = 0, j = 0, k = max; i < (old ? old->shared_count : 0); ++i) {
> +	for (i = 0, j = 0, k = max; i < (old ? old->num_fences : 0); ++i) {
> +		enum dma_resv_usage usage;
>  		struct dma_fence *fence;
>  
> -		fence = rcu_dereference_protected(old->shared[i],
> -						  dma_resv_held(obj));
> +		dma_resv_list_entry(old, i, obj, &fence, &usage);
>  		if (dma_fence_is_signaled(fence))
> -			RCU_INIT_POINTER(new->shared[--k], fence);
> +			RCU_INIT_POINTER(new->table[--k], fence);
>  		else
> -			RCU_INIT_POINTER(new->shared[j++], fence);
> +			dma_resv_list_set(new, j++, fence, usage);
>  	}
> -	new->shared_count = j;
> +	new->num_fences = j;
>  
>  	/*
>  	 * We are not changing the effective set of fences here so can
>  	 * merely update the pointer to the new array; both existing
>  	 * readers and new readers will see exactly the same set of
> -	 * active (unsignaled) shared fences. Individual fences and the
> +	 * active (unsignaled) fences. Individual fences and the
>  	 * old array are protected by RCU and so will not vanish under
>  	 * the gaze of the rcu_read_lock() readers.
>  	 */
> -	rcu_assign_pointer(obj->fence, new);
> +	rcu_assign_pointer(obj->fences, new);
>  
>  	if (!old)
>  		return 0;
> @@ -222,7 +254,7 @@ int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences)
>  	for (i = k; i < max; ++i) {
>  		struct dma_fence *fence;
>  
> -		fence = rcu_dereference_protected(new->shared[i],
> +		fence = rcu_dereference_protected(new->table[i],
>  						  dma_resv_held(obj));
>  		dma_fence_put(fence);
>  	}
> @@ -234,37 +266,39 @@ EXPORT_SYMBOL(dma_resv_reserve_fences);
>  
>  #ifdef CONFIG_DEBUG_MUTEXES
>  /**
> - * dma_resv_reset_shared_max - reset shared fences for debugging
> + * dma_resv_reset_max_fences - reset fences for debugging
>   * @obj: the dma_resv object to reset
>   *
> - * Reset the number of pre-reserved shared slots to test that drivers do
> + * Reset the number of pre-reserved fence slots to test that drivers do
>   * correct slot allocation using dma_resv_reserve_fences(). See also
> - * &dma_resv_list.shared_max.
> + * &dma_resv_list.max_fences.
>   */
> -void dma_resv_reset_shared_max(struct dma_resv *obj)
> +void dma_resv_reset_max_fences(struct dma_resv *obj)
>  {
> -	struct dma_resv_list *fences = dma_resv_shared_list(obj);
> +	struct dma_resv_list *fences = dma_resv_fences_list(obj);
>  
>  	dma_resv_assert_held(obj);
>  
> -	/* Test shared fence slot reservation */
> +	/* Test fence slot reservation */
>  	if (fences)
> -		fences->shared_max = fences->shared_count;
> +		fences->max_fences = fences->num_fences;
>  }
> -EXPORT_SYMBOL(dma_resv_reset_shared_max);
> +EXPORT_SYMBOL(dma_resv_reset_max_fences);
>  #endif
>  
>  /**
> - * dma_resv_add_shared_fence - Add a fence to a shared slot
> + * dma_resv_add_fence - Add a fence to the dma_resv obj
>   * @obj: the reservation object
> - * @fence: the shared fence to add
> + * @fence: the fence to add
> + * @usage: how the fence is used, see enum dma_resv_usage
>   *
> - * Add a fence to a shared slot, @obj must be locked with dma_resv_lock(), and
> + * Add a fence to a slot, @obj must be locked with dma_resv_lock(), and
>   * dma_resv_reserve_fences() has been called.
>   *
>   * See also &dma_resv.fence for a discussion of the semantics.
>   */
> -void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence)
> +void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence,
> +			enum dma_resv_usage usage)
>  {
>  	struct dma_resv_list *fobj;
>  	struct dma_fence *old;
> @@ -274,44 +308,45 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence)
>  
>  	dma_resv_assert_held(obj);
>  
> -	/* Drivers should not add containers here, instead add each fence
> -	 * individually.
> +	/* TODO: Drivers should not add containers here, instead add each fence
> +	 * individually. Disabled for now until we cleaned up amdgpu/ttm.
>  	 */
> -	WARN_ON(dma_fence_is_container(fence));
> +	/* WARN_ON(dma_fence_is_container(fence)); */

Uh this looks like it's a misplaced hack?

If you do need it and cant get rid of it with patch reordering, then I
think it needs to be split out for extra attention.

>  
> -	fobj = dma_resv_shared_list(obj);
> -	count = fobj->shared_count;
> +	fobj = dma_resv_fences_list(obj);
> +	count = fobj->num_fences;
>  
>  	write_seqcount_begin(&obj->seq);
>  
>  	for (i = 0; i < count; ++i) {
> +		enum dma_resv_usage old_usage;
>  
> -		old = rcu_dereference_protected(fobj->shared[i],
> -						dma_resv_held(obj));
> -		if (old->context == fence->context ||
> +		dma_resv_list_entry(fobj, i, obj, &old, &old_usage);
> +		if ((old->context == fence->context && old_usage >= usage) ||
>  		    dma_fence_is_signaled(old))
>  			goto replace;
>  	}
>  
> -	BUG_ON(fobj->shared_count >= fobj->shared_max);
> +	BUG_ON(fobj->num_fences >= fobj->max_fences);
>  	old = NULL;
>  	count++;
>  
>  replace:
> -	RCU_INIT_POINTER(fobj->shared[i], fence);
> -	/* pointer update must be visible before we extend the shared_count */
> -	smp_store_mb(fobj->shared_count, count);
> +	dma_resv_list_set(fobj, i, fence, usage);
> +	/* pointer update must be visible before we extend the num_fences */
> +	smp_store_mb(fobj->num_fences, count);
>  
>  	write_seqcount_end(&obj->seq);
>  	dma_fence_put(old);
>  }
> -EXPORT_SYMBOL(dma_resv_add_shared_fence);
> +EXPORT_SYMBOL(dma_resv_add_fence);
>  
>  /**
>   * dma_resv_replace_fences - replace fences in the dma_resv obj
>   * @obj: the reservation object
>   * @context: the context of the fences to replace
>   * @replacement: the new fence to use instead
> + * @usage: how the new fence is used, see enum dma_resv_usage
>   *
>   * Replace fences with a specified context with a new fence. Only valid if the
>   * operation represented by the original fence has no longer access to the
> @@ -321,107 +356,72 @@ EXPORT_SYMBOL(dma_resv_add_shared_fence);
>   * update fence which makes the resource inaccessible.
>   */
>  void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
> -			     struct dma_fence *replacement)
> +			     struct dma_fence *replacement,
> +			     enum dma_resv_usage usage)
>  {
>  	struct dma_resv_list *list;
> -	struct dma_fence *old;
>  	unsigned int i;
>  
>  	dma_resv_assert_held(obj);
>  
> +	list = dma_resv_fences_list(obj);
>  	write_seqcount_begin(&obj->seq);
> +	for (i = 0; list && i < list->num_fences; ++i) {
> +		struct dma_fence *old;
>  
> -	old = dma_resv_excl_fence(obj);
> -	if (old->context == context) {
> -		RCU_INIT_POINTER(obj->fence_excl, dma_fence_get(replacement));
> -		dma_fence_put(old);
> -	}
> -
> -	list = dma_resv_shared_list(obj);
> -	for (i = 0; list && i < list->shared_count; ++i) {
> -		old = rcu_dereference_protected(list->shared[i],
> -						dma_resv_held(obj));
> +		dma_resv_list_entry(list, i, obj, &old, NULL);
>  		if (old->context != context)
>  			continue;
>  
> -		rcu_assign_pointer(list->shared[i], dma_fence_get(replacement));
> +		dma_resv_list_set(list, i, replacement, usage);
>  		dma_fence_put(old);
>  	}
> -
>  	write_seqcount_end(&obj->seq);
>  }
>  EXPORT_SYMBOL(dma_resv_replace_fences);
>  
>  /**

Kerneldoc comment but not a kerneldoc comment. Needs to be fixed. Probably
good to go through the entire file once and ditch all the kerneldoc for
static function heres.

> - * dma_resv_add_excl_fence - Add an exclusive fence.
> - * @obj: the reservation object
> - * @fence: the exclusive fence to add
> + * dma_resv_iter_restart_unlocked - restart the unlocked iterator
> + * @cursor: The dma_resv_iter object to restart
>   *
> - * Add a fence to the exclusive slot. @obj must be locked with dma_resv_lock().
> - * See also &dma_resv.fence_excl for a discussion of the semantics.
> + * Restart the unlocked iteration by initializing the cursor object.
>   */
> -void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence)
> -{
> -	struct dma_fence *old_fence = dma_resv_excl_fence(obj);
> -
> -	dma_resv_assert_held(obj);
> -
> -	dma_fence_get(fence);
> -
> -	write_seqcount_begin(&obj->seq);
> -	/* write_seqcount_begin provides the necessary memory barrier */
> -	RCU_INIT_POINTER(obj->fence_excl, fence);
> -	write_seqcount_end(&obj->seq);
> -
> -	dma_fence_put(old_fence);
> -}
> -EXPORT_SYMBOL(dma_resv_add_excl_fence);
> -
> -/* Restart the iterator by initializing all the necessary fields, but not the
> - * relation to the dma_resv object. */
>  static void dma_resv_iter_restart_unlocked(struct dma_resv_iter *cursor)
>  {
>  	cursor->seq = read_seqcount_begin(&cursor->obj->seq);
> -	cursor->index = -1;
> -	cursor->shared_count = 0;
> -	if (cursor->usage >= DMA_RESV_USAGE_READ) {
> -		cursor->fences = dma_resv_shared_list(cursor->obj);
> -		if (cursor->fences)
> -			cursor->shared_count = cursor->fences->shared_count;
> -	} else {
> -		cursor->fences = NULL;
> -	}
> +	cursor->index = 0;
> +	cursor->num_fences = 0;
> +	cursor->fences = dma_resv_fences_list(cursor->obj);
> +	if (cursor->fences)
> +		cursor->num_fences = cursor->fences->num_fences;
>  	cursor->is_restarted = true;
>  }
>  
>  /* Walk to the next not signaled fence and grab a reference to it */
>  static void dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor)
>  {
> -	struct dma_resv *obj = cursor->obj;
> +	if (!cursor->fences)
> +		return;
>  
>  	do {
>  		/* Drop the reference from the previous round */
>  		dma_fence_put(cursor->fence);
>  
> -		if (cursor->index == -1) {
> -			cursor->fence = dma_resv_excl_fence(obj);
> -			cursor->index++;
> -			if (!cursor->fence)
> -				continue;
> -
> -		} else if (!cursor->fences ||
> -			   cursor->index >= cursor->shared_count) {
> +		if (cursor->index >= cursor->num_fences) {
>  			cursor->fence = NULL;
>  			break;
>  
> -		} else {
> -			struct dma_resv_list *fences = cursor->fences;
> -			unsigned int idx = cursor->index++;
> -
> -			cursor->fence = rcu_dereference(fences->shared[idx]);
>  		}
> +
> +		dma_resv_list_entry(cursor->fences, cursor->index++,
> +				    cursor->obj, &cursor->fence,
> +				    &cursor->fence_usage);
>  		cursor->fence = dma_fence_get_rcu(cursor->fence);
> -		if (!cursor->fence || !dma_fence_is_signaled(cursor->fence))
> +		if (!cursor->fence)
> +			break;
> +
> +		if (!dma_fence_is_signaled(cursor->fence) &&
> +		    cursor->usage >= cursor->fence_usage)
>  			break;
>  	} while (true);
>  }
> @@ -496,15 +496,9 @@ struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor)
>  	dma_resv_assert_held(cursor->obj);
>  
>  	cursor->index = 0;
> -	if (cursor->usage >= DMA_RESV_USAGE_READ)
> -		cursor->fences = dma_resv_shared_list(cursor->obj);
> -	else
> -		cursor->fences = NULL;
> -
> -	fence = dma_resv_excl_fence(cursor->obj);
> -	if (!fence)
> -		fence = dma_resv_iter_next(cursor);
> +	cursor->fences = dma_resv_fences_list(cursor->obj);
>  
> +	fence = dma_resv_iter_next(cursor);
>  	cursor->is_restarted = true;
>  	return fence;
>  }
> @@ -519,17 +513,17 @@ EXPORT_SYMBOL_GPL(dma_resv_iter_first);
>   */
>  struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor)
>  {
> -	unsigned int idx;
> +	struct dma_fence *fence;
>  
>  	dma_resv_assert_held(cursor->obj);
>  
>  	cursor->is_restarted = false;
> -	if (!cursor->fences || cursor->index >= cursor->fences->shared_count)
> +	if (!cursor->fences || cursor->index >= cursor->fences->num_fences)
>  		return NULL;
>  
> -	idx = cursor->index++;
> -	return rcu_dereference_protected(cursor->fences->shared[idx],
> -					 dma_resv_held(cursor->obj));
> +	dma_resv_list_entry(cursor->fences, cursor->index++,
> +			    cursor->obj, &fence, &cursor->fence_usage);
> +	return fence;
>  }
>  EXPORT_SYMBOL_GPL(dma_resv_iter_next);
>  
> @@ -544,57 +538,43 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src)
>  {
>  	struct dma_resv_iter cursor;
>  	struct dma_resv_list *list;
> -	struct dma_fence *f, *excl;
> +	struct dma_fence *f;
>  
>  	dma_resv_assert_held(dst);
>  
>  	list = NULL;
> -	excl = NULL;
>  
>  	dma_resv_iter_begin(&cursor, src, DMA_RESV_USAGE_READ);
>  	dma_resv_for_each_fence_unlocked(&cursor, f) {
>  
>  		if (dma_resv_iter_is_restarted(&cursor)) {
>  			dma_resv_list_free(list);
> -			dma_fence_put(excl);
> -
> -			if (cursor.shared_count) {
> -				list = dma_resv_list_alloc(cursor.shared_count);
> -				if (!list) {
> -					dma_resv_iter_end(&cursor);
> -					return -ENOMEM;
> -				}
>  
> -				list->shared_count = 0;
> -
> -			} else {
> -				list = NULL;
> +			list = dma_resv_list_alloc(cursor.num_fences);
> +			if (!list) {
> +				dma_resv_iter_end(&cursor);
> +				return -ENOMEM;
>  			}
> -			excl = NULL;
> +			list->num_fences = 0;
>  		}
>  
>  		dma_fence_get(f);
> -		if (dma_resv_iter_is_exclusive(&cursor))
> -			excl = f;
> -		else
> -			RCU_INIT_POINTER(list->shared[list->shared_count++], f);
> +		dma_resv_list_set(list, list->num_fences++, f,
> +				  dma_resv_iter_usage(&cursor));
>  	}
>  	dma_resv_iter_end(&cursor);
>  
>  	write_seqcount_begin(&dst->seq);
> -	excl = rcu_replace_pointer(dst->fence_excl, excl, dma_resv_held(dst));
> -	list = rcu_replace_pointer(dst->fence, list, dma_resv_held(dst));
> +	list = rcu_replace_pointer(dst->fences, list, dma_resv_held(dst));
>  	write_seqcount_end(&dst->seq);
>  
>  	dma_resv_list_free(list);
> -	dma_fence_put(excl);
> -
>  	return 0;
>  }
>  EXPORT_SYMBOL(dma_resv_copy_fences);
>  
>  /**
> - * dma_resv_get_fences - Get an object's shared and exclusive
> + * dma_resv_get_fences - Get an object's fences
>   * fences without update side lock held
>   * @obj: the reservation object
>   * @usage: controls which fences to include, see enum dma_resv_usage.
> @@ -623,7 +603,7 @@ int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage,
>  			while (*num_fences)
>  				dma_fence_put((*fences)[--(*num_fences)]);
>  
> -			count = cursor.shared_count + 1;
> +			count = cursor.num_fences + 1;
>  
>  			/* Eventually re-allocate the array */
>  			*fences = krealloc_array(*fences, count,
> @@ -695,8 +675,7 @@ int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage,
>  EXPORT_SYMBOL_GPL(dma_resv_get_singleton);
>  
>  /**
> - * dma_resv_wait_timeout - Wait on reservation's objects
> - * shared and/or exclusive fences.
> + * dma_resv_wait_timeout - Wait on reservation's objects fences
>   * @obj: the reservation object
>   * @usage: controls which fences to include, see enum dma_resv_usage.
>   * @intr: if true, do interruptible wait
> @@ -769,13 +748,13 @@ EXPORT_SYMBOL_GPL(dma_resv_test_signaled);
>   */
>  void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq)
>  {
> +	static const char *usage[] = { "kernel", "write", "read", "other" };
>  	struct dma_resv_iter cursor;
>  	struct dma_fence *fence;
>  
>  	dma_resv_for_each_fence(&cursor, obj, true, fence) {
>  		seq_printf(seq, "\t%s fence:",
> -			   dma_resv_iter_is_exclusive(&cursor) ?
> -				"Exclusive" : "Shared");
> +			   usage[dma_resv_iter_usage(&cursor)]);
>  		dma_fence_describe(fence, seq);
>  	}
>  }
> diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c
> index d097981061b1..d0f7c2bfd4f0 100644
> --- a/drivers/dma-buf/st-dma-resv.c
> +++ b/drivers/dma-buf/st-dma-resv.c
> @@ -58,8 +58,9 @@ static int sanitycheck(void *arg)
>  	return r;
>  }
>  
> -static int test_signaling(void *arg, enum dma_resv_usage usage)
> +static int test_signaling(void *arg)
>  {
> +	enum dma_resv_usage usage = (unsigned long)arg;
>  	struct dma_resv resv;
>  	struct dma_fence *f;
>  	int r;
> @@ -81,11 +82,7 @@ static int test_signaling(void *arg, enum dma_resv_usage usage)
>  		goto err_unlock;
>  	}
>  
> -	if (usage >= DMA_RESV_USAGE_READ)
> -		dma_resv_add_shared_fence(&resv, f);
> -	else
> -		dma_resv_add_excl_fence(&resv, f);
> -
> +	dma_resv_add_fence(&resv, f, usage);
>  	if (dma_resv_test_signaled(&resv, usage)) {
>  		pr_err("Resv unexpectedly signaled\n");
>  		r = -EINVAL;
> @@ -105,18 +102,9 @@ static int test_signaling(void *arg, enum dma_resv_usage usage)
>  	return r;
>  }
>  
> -static int test_excl_signaling(void *arg)
> -{
> -	return test_signaling(arg, DMA_RESV_USAGE_WRITE);
> -}
> -
> -static int test_shared_signaling(void *arg)
> -{
> -	return test_signaling(arg, DMA_RESV_USAGE_READ);
> -}
> -
> -static int test_for_each(void *arg, enum dma_resv_usage usage)
> +static int test_for_each(void *arg)
>  {
> +	enum dma_resv_usage usage = (unsigned long)arg;
>  	struct dma_resv_iter cursor;
>  	struct dma_fence *f, *fence;
>  	struct dma_resv resv;
> @@ -139,10 +127,7 @@ static int test_for_each(void *arg, enum dma_resv_usage usage)
>  		goto err_unlock;
>  	}
>  
> -	if (usage >= DMA_RESV_USAGE_READ)
> -		dma_resv_add_shared_fence(&resv, f);
> -	else
> -		dma_resv_add_excl_fence(&resv, f);
> +	dma_resv_add_fence(&resv, f, usage);
>  
>  	r = -ENOENT;
>  	dma_resv_for_each_fence(&cursor, &resv, usage, fence) {
> @@ -156,8 +141,7 @@ static int test_for_each(void *arg, enum dma_resv_usage usage)
>  			r = -EINVAL;
>  			goto err_unlock;
>  		}
> -		if (dma_resv_iter_is_exclusive(&cursor) !=
> -		    (usage >= DMA_RESV_USAGE_READ)) {
> +		if (dma_resv_iter_usage(&cursor) != usage) {
>  			pr_err("Unexpected fence usage\n");
>  			r = -EINVAL;
>  			goto err_unlock;
> @@ -177,18 +161,9 @@ static int test_for_each(void *arg, enum dma_resv_usage usage)
>  	return r;
>  }
>  
> -static int test_excl_for_each(void *arg)
> -{
> -	return test_for_each(arg, DMA_RESV_USAGE_WRITE);
> -}
> -
> -static int test_shared_for_each(void *arg)
> -{
> -	return test_for_each(arg, DMA_RESV_USAGE_READ);
> -}
> -
> -static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage)
> +static int test_for_each_unlocked(void *arg)
>  {
> +	enum dma_resv_usage usage = (unsigned long)arg;
>  	struct dma_resv_iter cursor;
>  	struct dma_fence *f, *fence;
>  	struct dma_resv resv;
> @@ -212,10 +187,7 @@ static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage)
>  		goto err_free;
>  	}
>  
> -	if (usage >= DMA_RESV_USAGE_READ)
> -		dma_resv_add_shared_fence(&resv, f);
> -	else
> -		dma_resv_add_excl_fence(&resv, f);
> +	dma_resv_add_fence(&resv, f, usage);
>  	dma_resv_unlock(&resv);
>  
>  	r = -ENOENT;
> @@ -235,8 +207,7 @@ static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage)
>  			r = -EINVAL;
>  			goto err_iter_end;
>  		}
> -		if (dma_resv_iter_is_exclusive(&cursor) !=
> -		    (usage >= DMA_RESV_USAGE_READ)) {
> +		if (dma_resv_iter_usage(&cursor) != usage) {
>  			pr_err("Unexpected fence usage\n");
>  			r = -EINVAL;
>  			goto err_iter_end;
> @@ -262,18 +233,9 @@ static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage)
>  	return r;
>  }
>  
> -static int test_excl_for_each_unlocked(void *arg)
> -{
> -	return test_for_each_unlocked(arg, DMA_RESV_USAGE_WRITE);
> -}
> -
> -static int test_shared_for_each_unlocked(void *arg)
> -{
> -	return test_for_each_unlocked(arg, DMA_RESV_USAGE_READ);
> -}
> -
> -static int test_get_fences(void *arg, enum dma_resv_usage usage)
> +static int test_get_fences(void *arg)
>  {
> +	enum dma_resv_usage usage = (unsigned long)arg;
>  	struct dma_fence *f, **fences = NULL;
>  	struct dma_resv resv;
>  	int r, i;
> @@ -296,10 +258,7 @@ static int test_get_fences(void *arg, enum dma_resv_usage usage)
>  		goto err_resv;
>  	}
>  
> -	if (usage >= DMA_RESV_USAGE_READ)
> -		dma_resv_add_shared_fence(&resv, f);
> -	else
> -		dma_resv_add_excl_fence(&resv, f);
> +	dma_resv_add_fence(&resv, f, usage);
>  	dma_resv_unlock(&resv);
>  
>  	r = dma_resv_get_fences(&resv, usage, &i, &fences);
> @@ -324,30 +283,24 @@ static int test_get_fences(void *arg, enum dma_resv_usage usage)
>  	return r;
>  }
>  
> -static int test_excl_get_fences(void *arg)
> -{
> -	return test_get_fences(arg, DMA_RESV_USAGE_WRITE);
> -}
> -
> -static int test_shared_get_fences(void *arg)
> -{
> -	return test_get_fences(arg, DMA_RESV_USAGE_READ);
> -}
> -
>  int dma_resv(void)
>  {
>  	static const struct subtest tests[] = {
>  		SUBTEST(sanitycheck),
> -		SUBTEST(test_excl_signaling),
> -		SUBTEST(test_shared_signaling),
> -		SUBTEST(test_excl_for_each),
> -		SUBTEST(test_shared_for_each),
> -		SUBTEST(test_excl_for_each_unlocked),
> -		SUBTEST(test_shared_for_each_unlocked),
> -		SUBTEST(test_excl_get_fences),
> -		SUBTEST(test_shared_get_fences),
> +		SUBTEST(test_signaling),
> +		SUBTEST(test_for_each),
> +		SUBTEST(test_for_each_unlocked),
> +		SUBTEST(test_get_fences),
>  	};
> +	enum dma_resv_usage usage;
> +	int r;
>  
>  	spin_lock_init(&fence_lock);
> -	return subtests(tests, NULL);
> +	for (usage = DMA_RESV_USAGE_WRITE; usage <= DMA_RESV_USAGE_READ;
> +	     ++usage) {
> +		r = subtests(tests, (void *)(unsigned long)usage);
> +		if (r)
> +			return r;
> +	}
> +	return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 7de8f67f7dde..ab5d6b630a49 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -263,7 +263,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
>  	 */
>  	replacement = dma_fence_get_stub();
>  	dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context,
> -				replacement);
> +				replacement, DMA_RESV_USAGE_READ);
>  	dma_fence_put(replacement);
>  	return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 2bf909a4242a..1c039db976a9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -54,8 +54,8 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
>  	bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
>  	p->uf_entry.priority = 0;
>  	p->uf_entry.tv.bo = &bo->tbo;
> -	/* One for TTM and one for the CS job */
> -	p->uf_entry.tv.num_shared = 2;
> +	/* One for TTM and two for the CS job */
> +	p->uf_entry.tv.num_shared = 3;
>  
>  	drm_gem_object_put(gobj);
>  
> @@ -1284,7 +1284,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>  			break;
>  		}
>  		dma_fence_chain_init(chain, fence, dma_fence_get(p->fence), 1);
> -		rcu_assign_pointer(resv->fence_excl, &chain->base);
> +		dma_resv_add_fence(resv, &chain->base, DMA_RESV_USAGE_WRITE);
>  		e->chain = NULL;
>  	}
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 0a843cc54945..9085a6b1ad56 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -1393,10 +1393,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
>  		return;
>  	}
>  
> -	if (shared)
> -		dma_resv_add_shared_fence(resv, fence);
> -	else
> -		dma_resv_add_excl_fence(resv, fence);
> +	dma_resv_add_fence(resv, fence, shared ? DMA_RESV_USAGE_READ :
> +			   DMA_RESV_USAGE_WRITE);
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
> index d7cd26dfaf8a..0cc036d93afc 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
> @@ -203,14 +203,10 @@ static void submit_attach_object_fences(struct etnaviv_gem_submit *submit)
>  
>  	for (i = 0; i < submit->nr_bos; i++) {
>  		struct drm_gem_object *obj = &submit->bos[i].obj->base;
> +		bool write = submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE;
>  
> -		if (submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE)
> -			dma_resv_add_excl_fence(obj->resv,
> -							  submit->out_fence);
> -		else
> -			dma_resv_add_shared_fence(obj->resv,
> -							    submit->out_fence);
> -
> +		dma_resv_add_fence(obj->resv, submit->out_fence, write ?
> +				   DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ);
>  		submit_unlock_object(submit, i);
>  	}
>  }
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
> index 14a1c0ad8c3c..e7ae94ee1b44 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
> @@ -148,12 +148,13 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
>  		if (dma_resv_iter_is_restarted(&cursor))
>  			args->busy = 0;
>  
> -		if (dma_resv_iter_is_exclusive(&cursor))
> -			/* Translate the exclusive fence to the READ *and* WRITE engine */
> -			args->busy |= busy_check_writer(fence);
> -		else
> -			/* Translate shared fences to READ set of engines */
> -			args->busy |= busy_check_reader(fence);
> +		/* Translate read fences to READ set of engines */
> +		args->busy |= busy_check_reader(fence);
> +	}
> +	dma_resv_iter_begin(&cursor, obj->base.resv, DMA_RESV_USAGE_WRITE);
> +	dma_resv_for_each_fence_unlocked(&cursor, fence) {
> +		/* Translate the write fences to the READ *and* WRITE engine */
> +		args->busy |= busy_check_writer(fence);
>  	}
>  	dma_resv_iter_end(&cursor);
>  
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
> index 8a2223eb0ba9..887cb6b71ae4 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
> @@ -114,7 +114,8 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
>  						obj->base.resv, NULL, true,
>  						i915_fence_timeout(i915),
>  						I915_FENCE_GFP);
> -		dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma);
> +		dma_resv_add_fence(obj->base.resv, &clflush->base.dma,
> +				   DMA_RESV_USAGE_WRITE);
>  		dma_fence_work_commit(&clflush->base);
>  		/*
>  		 * We must have successfully populated the pages(since we are
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
> index 4de6500f3c55..e4a232e22f9d 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
> @@ -622,9 +622,8 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
>  	if (IS_ERR_OR_NULL(copy_fence))
>  		return PTR_ERR_OR_ZERO(copy_fence);
>  
> -	dma_resv_add_excl_fence(dst_bo->base.resv, copy_fence);
> -	dma_resv_add_shared_fence(src_bo->base.resv, copy_fence);
> -
> +	dma_resv_add_fence(dst_bo->base.resv, copy_fence, DMA_RESV_USAGE_WRITE);
> +	dma_resv_add_fence(src_bo->base.resv, copy_fence, DMA_RESV_USAGE_READ);
>  	dma_fence_put(copy_fence);
>  
>  	return 0;
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index fe9f89289418..52fd6705a518 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -1640,7 +1640,8 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
>  		}
>  
>  		if (fence) {
> -			dma_resv_add_excl_fence(vma->obj->base.resv, fence);
> +			dma_resv_add_fence(vma->obj->base.resv, fence,
> +					   DMA_RESV_USAGE_WRITE);
>  			obj->write_domain = I915_GEM_DOMAIN_RENDER;
>  			obj->read_domains = 0;
>  		}
> @@ -1652,7 +1653,8 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
>  		}
>  
>  		if (fence) {
> -			dma_resv_add_shared_fence(vma->obj->base.resv, fence);
> +			dma_resv_add_fence(vma->obj->base.resv, fence,
> +					   DMA_RESV_USAGE_READ);
>  			obj->write_domain = 0;
>  		}
>  	}
> diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c
> index 9435a3ca71c8..38caa7f78871 100644
> --- a/drivers/gpu/drm/lima/lima_gem.c
> +++ b/drivers/gpu/drm/lima/lima_gem.c
> @@ -366,7 +366,7 @@ int lima_gem_submit(struct drm_file *file, struct lima_submit *submit)
>  		if (submit->bos[i].flags & LIMA_SUBMIT_BO_WRITE)
>  			dma_resv_add_excl_fence(lima_bo_resv(bos[i]), fence);

Not very compile-tested it seems.

I think it'd be good to split this further:

- Add dma_resv_add_fence, which just adds either an exclusive or shared
  fences.
- Convert drivers, cc driver authors (this patch doesn't seem to have
  them).

I think the above two could also be a single patch, but should work even
more split.

- Remaining pieces of this patch to add the functional changes.

Also compile testing of all arm drivers I guess :-)

>  		else
> -			dma_resv_add_shared_fence(lima_bo_resv(bos[i]), fence);
> +			dma_resv_add_fence(lima_bo_resv(bos[i]), fence);
>  	}
>  
>  	drm_gem_unlock_reservations((struct drm_gem_object **)bos,
> diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
> index 993dbcd7a586..2786913be00a 100644
> --- a/drivers/gpu/drm/msm/msm_gem_submit.c
> +++ b/drivers/gpu/drm/msm/msm_gem_submit.c
> @@ -397,7 +397,7 @@ static void submit_attach_object_fences(struct msm_gem_submit *submit)
>  		if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE)
>  			dma_resv_add_excl_fence(obj->resv, submit->user_fence);
>  		else if (submit->bos[i].flags & MSM_SUBMIT_BO_READ)
> -			dma_resv_add_shared_fence(obj->resv, submit->user_fence);
> +			dma_resv_add_fence(obj->resv, submit->user_fence);
>  	}
>  }
>  
> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
> index c6bb4dbcd735..05076e530e7d 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
> @@ -1308,10 +1308,11 @@ nouveau_bo_fence(struct nouveau_bo *nvbo, struct nouveau_fence *fence, bool excl
>  {
>  	struct dma_resv *resv = nvbo->bo.base.resv;
>  
> -	if (exclusive)
> -		dma_resv_add_excl_fence(resv, &fence->base);
> -	else if (fence)
> -		dma_resv_add_shared_fence(resv, &fence->base);
> +	if (!fence)
> +		return;
> +
> +	dma_resv_add_fence(resv, &fence->base, exclusive ?
> +			   DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ);
>  }
>  
>  static void
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
> index d5e81ccee01c..7f01dcf81fab 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fence.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
> @@ -360,9 +360,11 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan,
>  		dma_resv_for_each_fence(&cursor, resv,
>  					dma_resv_usage_rw(exclusive),
>  					fence) {
> +			enum dma_resv_usage usage;
>  			struct nouveau_fence *f;
>  
> -			if (i == 0 && dma_resv_iter_is_exclusive(&cursor))
> +			usage = dma_resv_iter_usage(&cursor);
> +			if (i == 0 && usage == DMA_RESV_USAGE_WRITE)
>  				continue;
>  
>  			f = nouveau_local_fence(fence, chan->drm);
> diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
> index cde1e8ddaeaa..368d26da0d6a 100644
> --- a/drivers/gpu/drm/qxl/qxl_release.c
> +++ b/drivers/gpu/drm/qxl/qxl_release.c
> @@ -429,7 +429,8 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
>  	list_for_each_entry(entry, &release->bos, head) {
>  		bo = entry->bo;
>  
> -		dma_resv_add_shared_fence(bo->base.resv, &release->base);
> +		dma_resv_add_fence(bo->base.resv, &release->base,
> +				   DMA_RESV_USAGE_READ);
>  		ttm_bo_move_to_lru_tail_unlocked(bo);
>  		dma_resv_unlock(bo->base.resv);
>  	}
> diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
> index afca4bf59a8d..382121c26f81 100644
> --- a/drivers/gpu/drm/radeon/radeon_object.c
> +++ b/drivers/gpu/drm/radeon/radeon_object.c
> @@ -792,8 +792,6 @@ void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
>  		return;
>  	}
>  
> -	if (shared)
> -		dma_resv_add_shared_fence(resv, &fence->base);
> -	else
> -		dma_resv_add_excl_fence(resv, &fence->base);
> +	dma_resv_add_fence(resv, &fence->base, shared ?
> +			   DMA_RESV_USAGE_READ : DMA_RESV_USAGE_WRITE);
>  }
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index d4b2695606e2..6014c363d6e6 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -762,7 +762,7 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
>  		return ret;
>  	}
>  
> -	dma_resv_add_shared_fence(bo->base.resv, fence);
> +	dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_WRITE);
>  
>  	ret = dma_resv_reserve_fences(bo->base.resv, 1);
>  	if (unlikely(ret)) {
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index 862d2f22412a..49689c7c8078 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -508,7 +508,8 @@ static int ttm_bo_move_to_ghost(struct ttm_buffer_object *bo,
>  	if (ret)
>  		return ret;
>  
> -	dma_resv_add_excl_fence(&ghost_obj->base._resv, fence);
> +	dma_resv_add_fence(&ghost_obj->base._resv, fence,
> +			   DMA_RESV_USAGE_WRITE);
>  
>  	/**
>  	 * If we're not moving to fixed memory, the TTM object
> @@ -562,7 +563,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
>  	struct ttm_resource_manager *man = ttm_manager_type(bdev, new_mem->mem_type);
>  	int ret = 0;
>  
> -	dma_resv_add_excl_fence(bo->base.resv, fence);
> +	dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_WRITE);
>  	if (!evict)
>  		ret = ttm_bo_move_to_ghost(bo, fence, man->use_tt);
>  	else if (!from->use_tt && pipeline)
> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> index 789c645f004e..0eb995d25df1 100644
> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> @@ -154,10 +154,8 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
>  	list_for_each_entry(entry, list, head) {
>  		struct ttm_buffer_object *bo = entry->bo;
>  
> -		if (entry->num_shared)
> -			dma_resv_add_shared_fence(bo->base.resv, fence);
> -		else
> -			dma_resv_add_excl_fence(bo->base.resv, fence);
> +		dma_resv_add_fence(bo->base.resv, fence, entry->num_shared ?
> +				   DMA_RESV_USAGE_READ : DMA_RESV_USAGE_WRITE);
>  		ttm_bo_move_to_lru_tail_unlocked(bo);
>  		dma_resv_unlock(bo->base.resv);
>  	}
> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
> index 961812d33827..2352e9640922 100644
> --- a/drivers/gpu/drm/v3d/v3d_gem.c
> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> @@ -550,8 +550,8 @@ v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv,
>  
>  	for (i = 0; i < job->bo_count; i++) {
>  		/* XXX: Use shared fences for read-only objects. */
> -		dma_resv_add_excl_fence(job->bo[i]->resv,
> -					job->done_fence);
> +		dma_resv_add_fence(job->bo[i]->resv, job->done_fence,
> +				   DMA_RESV_USAGE_WRITE);
>  	}
>  
>  	drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx);
> diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
> index 594bd6bb00d2..38550317e025 100644
> --- a/drivers/gpu/drm/vc4/vc4_gem.c
> +++ b/drivers/gpu/drm/vc4/vc4_gem.c
> @@ -546,7 +546,7 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
>  		bo = to_vc4_bo(&exec->bo[i]->base);
>  		bo->seqno = seqno;
>  
> -		dma_resv_add_shared_fence(bo->base.base.resv, exec->fence);
> +		dma_resv_add_fence(bo->base.base.resv, exec->fence);
>  	}
>  
>  	list_for_each_entry(bo, &exec->unref_list, unref_head) {
> diff --git a/drivers/gpu/drm/vgem/vgem_fence.c b/drivers/gpu/drm/vgem/vgem_fence.c
> index 91fc4940c65a..c2a879734d40 100644
> --- a/drivers/gpu/drm/vgem/vgem_fence.c
> +++ b/drivers/gpu/drm/vgem/vgem_fence.c
> @@ -161,12 +161,9 @@ int vgem_fence_attach_ioctl(struct drm_device *dev,
>  	/* Expose the fence via the dma-buf */
>  	dma_resv_lock(resv, NULL);
>  	ret = dma_resv_reserve_fences(resv, 1);
> -	if (!ret) {
> -		if (arg->flags & VGEM_FENCE_WRITE)
> -			dma_resv_add_excl_fence(resv, fence);
> -		else
> -			dma_resv_add_shared_fence(resv, fence);
> -	}
> +	if (!ret)
> +		dma_resv_add_fence(resv, fence, arg->flags & VGEM_FENCE_WRITE ?
> +				   DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ);
>  	dma_resv_unlock(resv);
>  
>  	/* Record the fence in our idr for later signaling */
> diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c
> index 1820ca6cf673..580a78809836 100644
> --- a/drivers/gpu/drm/virtio/virtgpu_gem.c
> +++ b/drivers/gpu/drm/virtio/virtgpu_gem.c
> @@ -250,7 +250,8 @@ void virtio_gpu_array_add_fence(struct virtio_gpu_object_array *objs,
>  	int i;
>  
>  	for (i = 0; i < objs->nents; i++)
> -		dma_resv_add_excl_fence(objs->objs[i]->resv, fence);
> +		dma_resv_add_fence(objs->objs[i]->resv, fence,
> +				   DMA_RESV_USAGE_WRITE);
>  }
>  
>  void virtio_gpu_array_put_free(struct virtio_gpu_object_array *objs)
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> index b96884f7d03d..bec50223efe5 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> @@ -758,7 +758,8 @@ void vmw_bo_fence_single(struct ttm_buffer_object *bo,
>  
>  	ret = dma_resv_reserve_fences(bo->base.resv, 1);
>  	if (!ret)
> -		dma_resv_add_excl_fence(bo->base.resv, &fence->base);
> +		dma_resv_add_fence(bo->base.resv, &fence->base,
> +				   DMA_RESV_USAGE_WRITE);
>  	else
>  		/* Last resort fallback when we are OOM */
>  		dma_fence_wait(&fence->base, false);
> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
> index 74083e62e19d..a8cfc1705d6a 100644
> --- a/include/linux/dma-buf.h
> +++ b/include/linux/dma-buf.h
> @@ -393,16 +393,13 @@ struct dma_buf {
>  	 * e.g. exposed in `Implicit Fence Poll Support`_ must follow the
>  	 * below rules.
>  	 *
> -	 * - Drivers must add a shared fence through dma_resv_add_shared_fence()
> -	 *   for anything the userspace API considers a read access. This highly
> -	 *   depends upon the API and window system.
> +	 * - Drivers must add a read fence through dma_resv_add_fence() with the
> +	 *   DMA_RESV_USAGE_READ flag for anything the userspace API considers a
> +	 *   read access. This highly depends upon the API and window system.
>  	 *
> -	 * - Similarly drivers must set the exclusive fence through
> -	 *   dma_resv_add_excl_fence() for anything the userspace API considers
> -	 *   write access.
> -	 *
> -	 * - Drivers may just always set the exclusive fence, since that only
> -	 *   causes unecessarily synchronization, but no correctness issues.

Why did you drop this comment? We have a lot of drivers which are a bit
dumb in this regard here, which is why I added this "being dumb and always
setting write usage is fine" line. And that's still a valid statement, so
why remove it?

> +	 * - Similarly drivers must add a write fence through
> +	 *   dma_resv_add_fence() with the DMA_RESV_USAGE_WRITE flag for
> +	 *   anything the userspace API considers write access.
>  	 *
>  	 * - Some drivers only expose a synchronous userspace API with no
>  	 *   pipelining across drivers. These do not set any fences for their
> @@ -413,7 +410,7 @@ struct dma_buf {
>  	 * Dynamic importers, see dma_buf_attachment_is_dynamic(), have
>  	 * additional constraints on how they set up fences:
>  	 *
> -	 * - Dynamic importers must obey the exclusive fence and wait for it to
> +	 * - Dynamic importers must obey the kernel fences and wait for them to
>  	 *   signal before allowing access to the buffer's underlying storage
>  	 *   through the device.
>  	 *
> diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
> index 658674c4b7b9..ae0436d7e7b8 100644
> --- a/include/linux/dma-resv.h
> +++ b/include/linux/dma-resv.h
> @@ -95,8 +95,8 @@ static inline enum dma_resv_usage dma_resv_usage_rw(bool write)
>  /**
>   * struct dma_resv - a reservation object manages fences for a buffer
>   *
> - * There are multiple uses for this, with sometimes slightly different rules in
> - * how the fence slots are used.
> + * This is a container for dma_fence objects which needs to handle multiple use
> + * cases.

Uh this still holds true? amdgpu still (and will always have to, due to it
being uapi) the additional concept of fence owner. Dropping this cautious
wording doesn't seem vise

If you really want to I think at least add a line that there's additional
complications in some drivers like amdgpu's owner concept.

>   *
>   * One use is to synchronize cross-driver access to a struct dma_buf, either for
>   * dynamic buffer management or just to handle implicit synchronization between
> @@ -126,47 +126,22 @@ struct dma_resv {
>  	 * @seq:
>  	 *
>  	 * Sequence count for managing RCU read-side synchronization, allows
> -	 * read-only access to @fence_excl and @fence while ensuring we take a
> -	 * consistent snapshot.
> +	 * read-only access to @fences while ensuring we take a consistent
> +	 * snapshot.
>  	 */
>  	seqcount_ww_mutex_t seq;
>  
>  	/**
> -	 * @fence_excl:
> +	 * @fences:
>  	 *
> -	 * The exclusive fence, if there is one currently.
> +	 * Array of fences which where added to the dma_resv object
>  	 *
> -	 * To guarantee that no fences are lost, this new fence must signal
> -	 * only after the previous exclusive fence has signalled. If
> -	 * semantically only a new access is added without actually treating the
> -	 * previous one as a dependency the exclusive fences can be strung
> -	 * together using struct dma_fence_chain.
> -	 *
> -	 * Note that actual semantics of what an exclusive or shared fence mean
> -	 * is defined by the user, for reservation objects shared across drivers
> -	 * see &dma_buf.resv.
> -	 */
> -	struct dma_fence __rcu *fence_excl;
> -
> -	/**
> -	 * @fence:
> -	 *
> -	 * List of current shared fences.
> -	 *
> -	 * There are no ordering constraints of shared fences against the
> -	 * exclusive fence slot. If a waiter needs to wait for all access, it
> -	 * has to wait for both sets of fences to signal.
> -	 *
> -	 * A new fence is added by calling dma_resv_add_shared_fence(). Since
> -	 * this often needs to be done past the point of no return in command
> +	 * A new fence is added by calling dma_resv_add_fence(). Since this
> +	 * often needs to be done past the point of no return in command
>  	 * submission it cannot fail, and therefore sufficient slots need to be
>  	 * reserved by calling dma_resv_reserve_fences().
> -	 *
> -	 * Note that actual semantics of what an exclusive or shared fence mean
> -	 * is defined by the user, for reservation objects shared across drivers
> -	 * see &dma_buf.resv.
>  	 */
> -	struct dma_resv_list __rcu *fence;
> +	struct dma_resv_list __rcu *fences;
>  };
>  
>  /**
> @@ -191,6 +166,9 @@ struct dma_resv_iter {
>  	/** @fence: the currently handled fence */
>  	struct dma_fence *fence;
>  
> +	/** @fence_usage: the usage of the current fence */
> +	enum dma_resv_usage fence_usage;
> +
>  	/** @seq: sequence number to check for modifications */
>  	unsigned int seq;
>  
> @@ -200,8 +178,8 @@ struct dma_resv_iter {
>  	/** @fences: the shared fences; private, *MUST* not dereference  */
>  	struct dma_resv_list *fences;
>  
> -	/** @shared_count: number of shared fences */
> -	unsigned int shared_count;
> +	/** @num_fences: number of fences */
> +	unsigned int num_fences;
>  
>  	/** @is_restarted: true if this is the first returned fence */
>  	bool is_restarted;
> @@ -240,14 +218,15 @@ static inline void dma_resv_iter_end(struct dma_resv_iter *cursor)
>  }
>  
>  /**
> - * dma_resv_iter_is_exclusive - test if the current fence is the exclusive one
> + * dma_resv_iter_usage - Return the usage of the current fence
>   * @cursor: the cursor of the current position
>   *
> - * Returns true if the currently returned fence is the exclusive one.
> + * Returns the usage of the currently processed fence.
>   */
> -static inline bool dma_resv_iter_is_exclusive(struct dma_resv_iter *cursor)
> +static inline enum dma_resv_usage
> +dma_resv_iter_usage(struct dma_resv_iter *cursor)
>  {
> -	return cursor->index == 0;
> +	return cursor->fence_usage;
>  }
>  
>  /**
> @@ -302,9 +281,9 @@ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor)
>  #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base)
>  
>  #ifdef CONFIG_DEBUG_MUTEXES
> -void dma_resv_reset_shared_max(struct dma_resv *obj);
> +void dma_resv_reset_max_fences(struct dma_resv *obj);
>  #else
> -static inline void dma_resv_reset_shared_max(struct dma_resv *obj) {}
> +static inline void dma_resv_reset_max_fences(struct dma_resv *obj) {}
>  #endif
>  
>  /**
> @@ -450,17 +429,18 @@ static inline struct ww_acquire_ctx *dma_resv_locking_ctx(struct dma_resv *obj)
>   */
>  static inline void dma_resv_unlock(struct dma_resv *obj)
>  {
> -	dma_resv_reset_shared_max(obj);
> +	dma_resv_reset_max_fences(obj);
>  	ww_mutex_unlock(&obj->lock);
>  }
>  
>  void dma_resv_init(struct dma_resv *obj);
>  void dma_resv_fini(struct dma_resv *obj);
>  int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences);
> -void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence);
> +void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence,
> +			enum dma_resv_usage usage);
>  void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
> -			     struct dma_fence *fence);
> -void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence);
> +			     struct dma_fence *fence,
> +			     enum dma_resv_usage usage);
>  int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage,
>  			unsigned int *num_fences, struct dma_fence ***fences);
>  int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage,

Feels a bit too unbacked yet to slap an r-b onto it.
-Daniel

> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 18/23] drm/amdgpu: remove dma_resv workaround
  2022-03-21 13:58 ` [PATCH 18/23] drm/amdgpu: remove dma_resv workaround Christian König
@ 2022-03-29 15:47   ` Daniel Vetter
  0 siblings, 0 replies; 73+ messages in thread
From: Daniel Vetter @ 2022-03-29 15:47 UTC (permalink / raw)
  To: Christian König
  Cc: daniel.vetter, amd-gfx, Christian König, dri-devel

On Mon, Mar 21, 2022 at 02:58:51PM +0100, Christian König wrote:
> We can now add multiple writers to the dma_resv object.
> 
> Also enable the check for not adding containers in dma_resv.c again.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Cc: amd-gfx@lists.freedesktop.org

It's a bit much magic, but that's the entire point of your huge prep
series to be able to have all the fences on a dma-resv :-)

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> ---
>  drivers/dma-buf/dma-resv.c                  |  6 +--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h |  1 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 51 ++-------------------
>  3 files changed, 8 insertions(+), 50 deletions(-)
> 
> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
> index 26257ba1527e..10d70812373c 100644
> --- a/drivers/dma-buf/dma-resv.c
> +++ b/drivers/dma-buf/dma-resv.c
> @@ -308,10 +308,10 @@ void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence,
>  
>  	dma_resv_assert_held(obj);
>  
> -	/* TODO: Drivers should not add containers here, instead add each fence
> -	 * individually. Disabled for now until we cleaned up amdgpu/ttm.
> +	/* Drivers should not add containers here, instead add each fence
> +	 * individually.
>  	 */
> -	/* WARN_ON(dma_fence_is_container(fence)); */
> +	WARN_ON(dma_fence_is_container(fence));
>  
>  	fobj = dma_resv_fences_list(obj);
>  	count = fobj->num_fences;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> index 044b41f0bfd9..529d52a204cf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> @@ -34,7 +34,6 @@ struct amdgpu_fpriv;
>  struct amdgpu_bo_list_entry {
>  	struct ttm_validate_buffer	tv;
>  	struct amdgpu_bo_va		*bo_va;
> -	struct dma_fence_chain		*chain;
>  	uint32_t			priority;
>  	struct page			**user_pages;
>  	bool				user_invalidated;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 1c039db976a9..88009833f523 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -575,14 +575,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>  		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
>  
>  		e->bo_va = amdgpu_vm_bo_find(vm, bo);
> -
> -		if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
> -			e->chain = dma_fence_chain_alloc();
> -			if (!e->chain) {
> -				r = -ENOMEM;
> -				goto error_validate;
> -			}
> -		}
>  	}
>  
>  	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
> @@ -633,13 +625,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>  	}
>  
>  error_validate:
> -	if (r) {
> -		amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> -			dma_fence_chain_free(e->chain);
> -			e->chain = NULL;
> -		}
> +	if (r)
>  		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
> -	}
>  out:
>  	return r;
>  }
> @@ -679,17 +666,9 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
>  {
>  	unsigned i;
>  
> -	if (error && backoff) {
> -		struct amdgpu_bo_list_entry *e;
> -
> -		amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
> -			dma_fence_chain_free(e->chain);
> -			e->chain = NULL;
> -		}
> -
> +	if (error && backoff)
>  		ttm_eu_backoff_reservation(&parser->ticket,
>  					   &parser->validated);
> -	}
>  
>  	for (i = 0; i < parser->num_post_deps; i++) {
>  		drm_syncobj_put(parser->post_deps[i].syncobj);
> @@ -1264,29 +1243,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>  
>  	amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
>  
> -	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> -		struct dma_resv *resv = e->tv.bo->base.resv;
> -		struct dma_fence_chain *chain = e->chain;
> -		struct dma_resv_iter cursor;
> -		struct dma_fence *fence;
> -
> -		if (!chain)
> -			continue;
> -
> -		/*
> -		 * Work around dma_resv shortcommings by wrapping up the
> -		 * submission in a dma_fence_chain and add it as exclusive
> -		 * fence.
> -		 */
> -		dma_resv_for_each_fence(&cursor, resv,
> -					DMA_RESV_USAGE_WRITE,
> -					fence) {
> -			break;
> -		}
> -		dma_fence_chain_init(chain, fence, dma_fence_get(p->fence), 1);
> -		dma_resv_add_fence(resv, &chain->base, DMA_RESV_USAGE_WRITE);
> -		e->chain = NULL;
> -	}
> +	/* Make sure all BOs are remembered as writers */
> +	amdgpu_bo_list_for_each_entry(e, p->bo_list)
> +		e->tv.num_shared = 0;
>  
>  	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>  	mutex_unlock(&p->adev->notifier_lock);
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 19/23] dma-buf: add DMA_RESV_USAGE_KERNEL v2
  2022-03-21 13:58 ` [PATCH 19/23] dma-buf: add DMA_RESV_USAGE_KERNEL v2 Christian König
@ 2022-03-29 15:53   ` Daniel Vetter
  0 siblings, 0 replies; 73+ messages in thread
From: Daniel Vetter @ 2022-03-29 15:53 UTC (permalink / raw)
  To: Christian König; +Cc: daniel.vetter, Christian König, dri-devel

On Mon, Mar 21, 2022 at 02:58:52PM +0100, Christian König wrote:
> Add an usage for kernel submissions. Waiting for those
> are mandatory for dynamic DMA-bufs.
> 
> v2: use "must" in documentation, fix whitespaces
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>

I think it's not super great that you smash the users of this all into the
same patch, since there's not really an explanation for why each case is
right.

I'll leave the amd/radeon cases up to you, but added comments for those
where I couldn't convince myself it's the right thing to do. I think a bit
more splitting would be good.

The other parts lgtm now.
-Daniel

> ---
>  drivers/dma-buf/st-dma-resv.c                |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c   |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c      |  2 +-
>  drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c |  6 ++++--
>  drivers/gpu/drm/i915/gem/i915_gem_clflush.c  |  2 +-
>  drivers/gpu/drm/nouveau/nouveau_bo.c         |  4 ++--
>  drivers/gpu/drm/radeon/radeon_uvd.c          |  2 +-
>  drivers/gpu/drm/ttm/ttm_bo.c                 |  2 +-
>  drivers/gpu/drm/ttm/ttm_bo_util.c            |  4 ++--
>  drivers/gpu/drm/vmwgfx/vmwgfx_bo.c           |  2 +-
>  drivers/gpu/drm/vmwgfx/vmwgfx_resource.c     |  2 +-
>  drivers/infiniband/core/umem_dmabuf.c        |  2 +-
>  include/linux/dma-resv.h                     | 22 ++++++++++++++++++++
>  13 files changed, 39 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c
> index d0f7c2bfd4f0..062b57d63fa6 100644
> --- a/drivers/dma-buf/st-dma-resv.c
> +++ b/drivers/dma-buf/st-dma-resv.c
> @@ -296,7 +296,7 @@ int dma_resv(void)
>  	int r;
>  
>  	spin_lock_init(&fence_lock);
> -	for (usage = DMA_RESV_USAGE_WRITE; usage <= DMA_RESV_USAGE_READ;
> +	for (usage = DMA_RESV_USAGE_KERNEL; usage <= DMA_RESV_USAGE_READ;
>  	     ++usage) {
>  		r = subtests(tests, (void *)(unsigned long)usage);
>  		if (r)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 9085a6b1ad56..1618b6847c69 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -765,7 +765,7 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
>  		return 0;
>  	}
>  
> -	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_WRITE,
> +	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL,
>  				  false, MAX_SCHEDULE_TIMEOUT);
>  	if (r < 0)
>  		return r;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> index 33deb0df62fd..9e102080dad9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> @@ -1163,7 +1163,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>  
>  	if (direct) {
>  		r = dma_resv_wait_timeout(bo->tbo.base.resv,
> -					  DMA_RESV_USAGE_WRITE, false,
> +					  DMA_RESV_USAGE_KERNEL, false,
>  					  msecs_to_jiffies(10));
>  		if (r == 0)
>  			r = -ETIMEDOUT;
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
> index 0cc036d93afc..ab5249d55b32 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
> @@ -185,9 +185,11 @@ static int submit_fence_sync(struct etnaviv_gem_submit *submit)
>  			return ret;
>  
>  		if (submit->flags & ETNA_SUBMIT_NO_IMPLICIT)
> -			continue;
> +			usage = DMA_RESV_USAGE_KERNEL;
> +		else
> +			usage = dma_resv_usage_rw(bo->flags &
> +						  ETNA_SUBMIT_BO_WRITE);
>  
> -		usage = dma_resv_usage_rw(bo->flags & ETNA_SUBMIT_BO_WRITE);
>  		ret = dma_resv_get_fences(robj, usage, &bo->nr_shared,
>  					  &bo->shared);
>  		if (ret)
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
> index 887cb6b71ae4..9a7104251cb3 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
> @@ -115,7 +115,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
>  						i915_fence_timeout(i915),
>  						I915_FENCE_GFP);
>  		dma_resv_add_fence(obj->base.resv, &clflush->base.dma,
> -				   DMA_RESV_USAGE_WRITE);
> +				   DMA_RESV_USAGE_KERNEL);

Note that there's some supremely questionable gunk to clean up in
i915_gem_execbuffer, but I guess that's for the gem team to sort out.

Please do make sure intel-gfx-ci does run this, since it might blow up in
funny ways.

>  		dma_fence_work_commit(&clflush->base);
>  		/*
>  		 * We must have successfully populated the pages(since we are
> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
> index 05076e530e7d..13deb6c70ba6 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
> @@ -962,10 +962,10 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
>  	struct dma_fence *fence;
>  	int ret;
>  
> -	ret = dma_resv_get_singleton(bo->base.resv, DMA_RESV_USAGE_WRITE,
> +	ret = dma_resv_get_singleton(bo->base.resv, DMA_RESV_USAGE_KERNEL,
>  				     &fence);
>  	if (ret)
> -		dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_WRITE,
> +		dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_KERNEL,
>  				      false, MAX_SCHEDULE_TIMEOUT);

This needs an ack from nouveau folks I think. Might be best to split it
out as a separate patch.

>  
>  	nv10_bo_put_tile_region(dev, *old_tile, fence);
> diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
> index 4000ad2f39ba..488e78889dd6 100644
> --- a/drivers/gpu/drm/radeon/radeon_uvd.c
> +++ b/drivers/gpu/drm/radeon/radeon_uvd.c
> @@ -478,7 +478,7 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo,
>  		return -EINVAL;
>  	}
>  
> -	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_WRITE,
> +	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL,
>  				  false, MAX_SCHEDULE_TIMEOUT);
>  	if (r <= 0) {
>  		DRM_ERROR("Failed waiting for UVD message (%d)!\n", r);
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 6014c363d6e6..0f0aa96fb051 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -762,7 +762,7 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
>  		return ret;
>  	}
>  
> -	dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_WRITE);
> +	dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL);
>  
>  	ret = dma_resv_reserve_fences(bo->base.resv, 1);
>  	if (unlikely(ret)) {
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index 49689c7c8078..98e1c804519e 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -509,7 +509,7 @@ static int ttm_bo_move_to_ghost(struct ttm_buffer_object *bo,
>  		return ret;
>  
>  	dma_resv_add_fence(&ghost_obj->base._resv, fence,
> -			   DMA_RESV_USAGE_WRITE);
> +			   DMA_RESV_USAGE_KERNEL);
>  
>  	/**
>  	 * If we're not moving to fixed memory, the TTM object
> @@ -563,7 +563,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
>  	struct ttm_resource_manager *man = ttm_manager_type(bdev, new_mem->mem_type);
>  	int ret = 0;
>  
> -	dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_WRITE);
> +	dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL);
>  	if (!evict)
>  		ret = ttm_bo_move_to_ghost(bo, fence, man->use_tt);
>  	else if (!from->use_tt && pipeline)
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> index bec50223efe5..408ede1f967f 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> @@ -759,7 +759,7 @@ void vmw_bo_fence_single(struct ttm_buffer_object *bo,
>  	ret = dma_resv_reserve_fences(bo->base.resv, 1);
>  	if (!ret)
>  		dma_resv_add_fence(bo->base.resv, &fence->base,
> -				   DMA_RESV_USAGE_WRITE);
> +				   DMA_RESV_USAGE_KERNEL);
>  	else
>  		/* Last resort fallback when we are OOM */
>  		dma_fence_wait(&fence->base, false);
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> index 39081dbf9ac8..f999fdd927df 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> @@ -1167,7 +1167,7 @@ int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start,
>  			dma_fence_put(bo->moving);
>  
>  		return dma_resv_get_singleton(bo->base.resv,
> -					      DMA_RESV_USAGE_WRITE,
> +					      DMA_RESV_USAGE_KERNEL,

Same here.
>  					      &bo->moving);
>  	}
>  
> diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c
> index f9901d273b8e..fce80a4a5147 100644
> --- a/drivers/infiniband/core/umem_dmabuf.c
> +++ b/drivers/infiniband/core/umem_dmabuf.c
> @@ -68,7 +68,7 @@ int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf)
>  	 * the migration.
>  	 */
>  	return dma_resv_wait_timeout(umem_dmabuf->attach->dmabuf->resv,
> -				     DMA_RESV_USAGE_WRITE,
> +				     DMA_RESV_USAGE_KERNEL,
>  				     false, MAX_SCHEDULE_TIMEOUT);
>  }
>  EXPORT_SYMBOL(ib_umem_dmabuf_map_pages);
> diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
> index ae0436d7e7b8..8f4f406a5d02 100644
> --- a/include/linux/dma-resv.h
> +++ b/include/linux/dma-resv.h
> @@ -54,8 +54,30 @@ struct dma_resv_list;
>   *
>   * This enum describes the different use cases for a dma_resv object and
>   * controls which fences are returned when queried.
> + *
> + * An important fact is that there is the order KERNEL<WRITE<READ and
> + * when the dma_resv object is asked for fences for one use case the fences
> + * for the lower use case are returned as well.
> + *
> + * For example when asking for WRITE fences then the KERNEL fences are returned
> + * as well. Similar when asked for READ fences then both WRITE and KERNEL
> + * fences are returned as well.

Ah here's the important thing, but there's still a few missing links in
other places that need to highlight this I think (like in other functions
and iterators).

>   */
>  enum dma_resv_usage {
> +	/**
> +	 * @DMA_RESV_USAGE_KERNEL: For in kernel memory management only.
> +	 *
> +	 * This should only be used for things like copying or clearing memory
> +	 * with a DMA hardware engine for the purpose of kernel memory
> +	 * management.
> +	 *
> +	 * Drivers *always* must wait for those fences before accessing the
> +	 * resource protected by the dma_resv object. The only exception for
> +	 * that is when the resource is known to be locked down in place by
> +	 * pinning it previously.
> +	 */
> +	DMA_RESV_USAGE_KERNEL,
> +
>  	/**
>  	 * @DMA_RESV_USAGE_WRITE: Implicit write synchronization.
>  	 *

Cheers, Daniel

> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 21/23] dma-buf: wait for map to complete for static attachments
  2022-03-21 13:58 ` [PATCH 21/23] dma-buf: wait for map to complete for static attachments Christian König
@ 2022-03-29 15:56   ` Daniel Vetter
  0 siblings, 0 replies; 73+ messages in thread
From: Daniel Vetter @ 2022-03-29 15:56 UTC (permalink / raw)
  To: Christian König; +Cc: daniel.vetter, Christian König, dri-devel

On Mon, Mar 21, 2022 at 02:58:54PM +0100, Christian König wrote:
> We have previously done that in the individual drivers but it is
> more defensive to move that into the common code.
> 
> Dynamic attachments should wait for map operations to complete by themselves.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>

You could perhaps ditch the call to i915_gem_object_wait_migration() in
the i915 dma-buf code too with this, but that entire thing is a bit a mess
and totally different. So I guess we'll leave that.

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> ---
>  drivers/dma-buf/dma-buf.c                   | 18 +++++++++++++++---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 14 +-------------
>  drivers/gpu/drm/nouveau/nouveau_prime.c     | 17 +----------------
>  drivers/gpu/drm/radeon/radeon_prime.c       | 16 +++-------------
>  4 files changed, 20 insertions(+), 45 deletions(-)
> 
> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> index 528983d3ba64..d3dd602c4753 100644
> --- a/drivers/dma-buf/dma-buf.c
> +++ b/drivers/dma-buf/dma-buf.c
> @@ -660,12 +660,24 @@ static struct sg_table * __map_dma_buf(struct dma_buf_attachment *attach,
>  				       enum dma_data_direction direction)
>  {
>  	struct sg_table *sg_table;
> +	signed long ret;
>  
>  	sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
> +	if (IS_ERR_OR_NULL(sg_table))
> +		return sg_table;
> +
> +	if (!dma_buf_attachment_is_dynamic(attach)) {
> +		ret = dma_resv_wait_timeout(attach->dmabuf->resv,
> +					    DMA_RESV_USAGE_KERNEL, true,
> +					    MAX_SCHEDULE_TIMEOUT);
> +		if (ret < 0) {
> +			attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
> +							   direction);
> +			return ERR_PTR(ret);
> +		}
> +	}
>  
> -	if (!IS_ERR_OR_NULL(sg_table))
> -		mangle_sg_table(sg_table);
> -
> +	mangle_sg_table(sg_table);
>  	return sg_table;
>  }
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> index 579adfafe4d0..782cbca37538 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> @@ -102,21 +102,9 @@ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach)
>  {
>  	struct drm_gem_object *obj = attach->dmabuf->priv;
>  	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
> -	int r;
>  
>  	/* pin buffer into GTT */
> -	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
> -	if (r)
> -		return r;
> -
> -	if (bo->tbo.moving) {
> -		r = dma_fence_wait(bo->tbo.moving, true);
> -		if (r) {
> -			amdgpu_bo_unpin(bo);
> -			return r;
> -		}
> -	}
> -	return 0;
> +	return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c
> index 60019d0532fc..347488685f74 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_prime.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_prime.c
> @@ -93,22 +93,7 @@ int nouveau_gem_prime_pin(struct drm_gem_object *obj)
>  	if (ret)
>  		return -EINVAL;
>  
> -	ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL);
> -	if (ret)
> -		goto error;
> -
> -	if (nvbo->bo.moving)
> -		ret = dma_fence_wait(nvbo->bo.moving, true);
> -
> -	ttm_bo_unreserve(&nvbo->bo);
> -	if (ret)
> -		goto error;
> -
> -	return ret;
> -
> -error:
> -	nouveau_bo_unpin(nvbo);
> -	return ret;
> +	return 0;
>  }
>  
>  void nouveau_gem_prime_unpin(struct drm_gem_object *obj)
> diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c
> index 4a90807351e7..42a87948e28c 100644
> --- a/drivers/gpu/drm/radeon/radeon_prime.c
> +++ b/drivers/gpu/drm/radeon/radeon_prime.c
> @@ -77,19 +77,9 @@ int radeon_gem_prime_pin(struct drm_gem_object *obj)
>  
>  	/* pin buffer into GTT */
>  	ret = radeon_bo_pin(bo, RADEON_GEM_DOMAIN_GTT, NULL);
> -	if (unlikely(ret))
> -		goto error;
> -
> -	if (bo->tbo.moving) {
> -		ret = dma_fence_wait(bo->tbo.moving, false);
> -		if (unlikely(ret)) {
> -			radeon_bo_unpin(bo);
> -			goto error;
> -		}
> -	}
> -
> -	bo->prime_shared_count++;
> -error:
> +	if (likely(ret == 0))
> +		bo->prime_shared_count++;
> +
>  	radeon_bo_unreserve(bo);
>  	return ret;
>  }
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 22/23] drm/i915: drop bo->moving dependency
  2022-03-21 13:58   ` [Intel-gfx] " Christian König
@ 2022-03-29 15:57     ` Daniel Vetter
  -1 siblings, 0 replies; 73+ messages in thread
From: Daniel Vetter @ 2022-03-29 15:57 UTC (permalink / raw)
  To: Christian König
  Cc: daniel.vetter, intel-gfx, Christian König, dri-devel

On Mon, Mar 21, 2022 at 02:58:55PM +0100, Christian König wrote:
> That should now be handled by the common dma_resv framework.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Cc: intel-gfx@lists.freedesktop.org

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> ---
>  drivers/gpu/drm/i915/gem/i915_gem_object.c   | 29 ++++++--------------
>  drivers/gpu/drm/i915/gem/i915_gem_object.h   |  5 ++--
>  drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 15 +---------
>  drivers/gpu/drm/i915/i915_vma.c              |  9 +++++-
>  4 files changed, 19 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> index d87b508b59b1..fd240435ffef 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> @@ -742,18 +742,19 @@ static const struct drm_gem_object_funcs i915_gem_object_funcs = {
>  /**
>   * i915_gem_object_get_moving_fence - Get the object's moving fence if any
>   * @obj: The object whose moving fence to get.
> + * @fence: The resulting fence
>   *
>   * A non-signaled moving fence means that there is an async operation
>   * pending on the object that needs to be waited on before setting up
>   * any GPU- or CPU PTEs to the object's pages.
>   *
> - * Return: A refcounted pointer to the object's moving fence if any,
> - * NULL otherwise.
> + * Return: Negative error code or 0 for success.
>   */
> -struct dma_fence *
> -i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj)
> +int i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj,
> +				     struct dma_fence **fence)
>  {
> -	return dma_fence_get(i915_gem_to_ttm(obj)->moving);
> +	return dma_resv_get_singleton(obj->base.resv, DMA_RESV_USAGE_KERNEL,
> +				      fence);
>  }
>  
>  /**
> @@ -771,23 +772,9 @@ i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj)
>  int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj,
>  				      bool intr)
>  {
> -	struct dma_fence *fence = i915_gem_to_ttm(obj)->moving;
> -	int ret;
> -
>  	assert_object_held(obj);
> -	if (!fence)
> -		return 0;
> -
> -	ret = dma_fence_wait(fence, intr);
> -	if (ret)
> -		return ret;
> -
> -	if (fence->error)
> -		return fence->error;
> -
> -	i915_gem_to_ttm(obj)->moving = NULL;
> -	dma_fence_put(fence);
> -	return 0;
> +	return dma_resv_wait_timeout(obj->base. resv, DMA_RESV_USAGE_KERNEL,
> +				     intr, MAX_SCHEDULE_TIMEOUT);
>  }
>  
>  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> index f66d46882ea7..be57af8bfb31 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> @@ -521,9 +521,8 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
>  	i915_gem_object_unpin_pages(obj);
>  }
>  
> -struct dma_fence *
> -i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj);
> -
> +int i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj,
> +				     struct dma_fence **fence);
>  int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj,
>  				      bool intr);
>  
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
> index e4a232e22f9d..4d5d0cd64f23 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
> @@ -452,19 +452,6 @@ __i915_ttm_move(struct ttm_buffer_object *bo,
>  	return fence;
>  }
>  
> -static int
> -prev_deps(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
> -	  struct i915_deps *deps)
> -{
> -	int ret;
> -
> -	ret = i915_deps_add_dependency(deps, bo->moving, ctx);
> -	if (!ret)
> -		ret = i915_deps_add_resv(deps, bo->base.resv, ctx);
> -
> -	return ret;
> -}
> -
>  /**
>   * i915_ttm_move - The TTM move callback used by i915.
>   * @bo: The buffer object.
> @@ -519,7 +506,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
>  		struct i915_deps deps;
>  
>  		i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
> -		ret = prev_deps(bo, ctx, &deps);
> +		ret = i915_deps_add_resv(&deps, bo->base.resv, ctx);
>  		if (ret) {
>  			i915_refct_sgt_put(dst_rsgt);
>  			return ret;
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 52fd6705a518..8737159f4706 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -1247,10 +1247,17 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
>  	if (err)
>  		return err;
>  
> +	if (vma->obj) {
> +		err = i915_gem_object_get_moving_fence(vma->obj, &moving);
> +		if (err)
> +			return err;
> +	} else {
> +		moving = NULL;
> +	}
> +
>  	if (flags & PIN_GLOBAL)
>  		wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
>  
> -	moving = vma->obj ? i915_gem_object_get_moving_fence(vma->obj) : NULL;
>  	if (flags & vma->vm->bind_async_flags || moving) {
>  		/* lock VM */
>  		err = i915_vm_lock_objects(vma->vm, ww);
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Intel-gfx] [PATCH 22/23] drm/i915: drop bo->moving dependency
@ 2022-03-29 15:57     ` Daniel Vetter
  0 siblings, 0 replies; 73+ messages in thread
From: Daniel Vetter @ 2022-03-29 15:57 UTC (permalink / raw)
  To: Christian König
  Cc: daniel.vetter, intel-gfx, Christian König, dri-devel

On Mon, Mar 21, 2022 at 02:58:55PM +0100, Christian König wrote:
> That should now be handled by the common dma_resv framework.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Cc: intel-gfx@lists.freedesktop.org

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> ---
>  drivers/gpu/drm/i915/gem/i915_gem_object.c   | 29 ++++++--------------
>  drivers/gpu/drm/i915/gem/i915_gem_object.h   |  5 ++--
>  drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 15 +---------
>  drivers/gpu/drm/i915/i915_vma.c              |  9 +++++-
>  4 files changed, 19 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> index d87b508b59b1..fd240435ffef 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> @@ -742,18 +742,19 @@ static const struct drm_gem_object_funcs i915_gem_object_funcs = {
>  /**
>   * i915_gem_object_get_moving_fence - Get the object's moving fence if any
>   * @obj: The object whose moving fence to get.
> + * @fence: The resulting fence
>   *
>   * A non-signaled moving fence means that there is an async operation
>   * pending on the object that needs to be waited on before setting up
>   * any GPU- or CPU PTEs to the object's pages.
>   *
> - * Return: A refcounted pointer to the object's moving fence if any,
> - * NULL otherwise.
> + * Return: Negative error code or 0 for success.
>   */
> -struct dma_fence *
> -i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj)
> +int i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj,
> +				     struct dma_fence **fence)
>  {
> -	return dma_fence_get(i915_gem_to_ttm(obj)->moving);
> +	return dma_resv_get_singleton(obj->base.resv, DMA_RESV_USAGE_KERNEL,
> +				      fence);
>  }
>  
>  /**
> @@ -771,23 +772,9 @@ i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj)
>  int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj,
>  				      bool intr)
>  {
> -	struct dma_fence *fence = i915_gem_to_ttm(obj)->moving;
> -	int ret;
> -
>  	assert_object_held(obj);
> -	if (!fence)
> -		return 0;
> -
> -	ret = dma_fence_wait(fence, intr);
> -	if (ret)
> -		return ret;
> -
> -	if (fence->error)
> -		return fence->error;
> -
> -	i915_gem_to_ttm(obj)->moving = NULL;
> -	dma_fence_put(fence);
> -	return 0;
> +	return dma_resv_wait_timeout(obj->base. resv, DMA_RESV_USAGE_KERNEL,
> +				     intr, MAX_SCHEDULE_TIMEOUT);
>  }
>  
>  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> index f66d46882ea7..be57af8bfb31 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
> @@ -521,9 +521,8 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
>  	i915_gem_object_unpin_pages(obj);
>  }
>  
> -struct dma_fence *
> -i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj);
> -
> +int i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj,
> +				     struct dma_fence **fence);
>  int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj,
>  				      bool intr);
>  
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
> index e4a232e22f9d..4d5d0cd64f23 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
> @@ -452,19 +452,6 @@ __i915_ttm_move(struct ttm_buffer_object *bo,
>  	return fence;
>  }
>  
> -static int
> -prev_deps(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
> -	  struct i915_deps *deps)
> -{
> -	int ret;
> -
> -	ret = i915_deps_add_dependency(deps, bo->moving, ctx);
> -	if (!ret)
> -		ret = i915_deps_add_resv(deps, bo->base.resv, ctx);
> -
> -	return ret;
> -}
> -
>  /**
>   * i915_ttm_move - The TTM move callback used by i915.
>   * @bo: The buffer object.
> @@ -519,7 +506,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
>  		struct i915_deps deps;
>  
>  		i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
> -		ret = prev_deps(bo, ctx, &deps);
> +		ret = i915_deps_add_resv(&deps, bo->base.resv, ctx);
>  		if (ret) {
>  			i915_refct_sgt_put(dst_rsgt);
>  			return ret;
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 52fd6705a518..8737159f4706 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -1247,10 +1247,17 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
>  	if (err)
>  		return err;
>  
> +	if (vma->obj) {
> +		err = i915_gem_object_get_moving_fence(vma->obj, &moving);
> +		if (err)
> +			return err;
> +	} else {
> +		moving = NULL;
> +	}
> +
>  	if (flags & PIN_GLOBAL)
>  		wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
>  
> -	moving = vma->obj ? i915_gem_object_get_moving_fence(vma->obj) : NULL;
>  	if (flags & vma->vm->bind_async_flags || moving) {
>  		/* lock VM */
>  		err = i915_vm_lock_objects(vma->vm, ww);
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 23/23] drm/ttm: remove bo->moving
  2022-03-21 13:58 ` [PATCH 23/23] drm/ttm: remove bo->moving Christian König
@ 2022-03-29 16:02   ` Daniel Vetter
  2022-04-05  9:25     ` Christian König
  0 siblings, 1 reply; 73+ messages in thread
From: Daniel Vetter @ 2022-03-29 16:02 UTC (permalink / raw)
  To: Christian König; +Cc: daniel.vetter, Christian König, dri-devel

On Mon, Mar 21, 2022 at 02:58:56PM +0100, Christian König wrote:
> This is now handled by the DMA-buf framework in the dma_resv obj.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 13 ++++---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    |  7 ++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c    | 11 +++---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c   | 11 ++++--
>  drivers/gpu/drm/ttm/ttm_bo.c                  | 10 ++----
>  drivers/gpu/drm/ttm/ttm_bo_util.c             |  7 ----
>  drivers/gpu/drm/ttm/ttm_bo_vm.c               | 34 +++++++------------
>  drivers/gpu/drm/vmwgfx/vmwgfx_resource.c      |  6 ----
>  include/drm/ttm/ttm_bo_api.h                  |  2 --
>  9 files changed, 40 insertions(+), 61 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index b461c3aab877..fe168b3cc3f2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -2406,6 +2406,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
>  		struct amdgpu_bo *bo = mem->bo;
>  		uint32_t domain = mem->domain;
>  		struct kfd_mem_attachment *attachment;
> +		struct dma_resv_iter cursor;
> +		struct dma_fence *fence;
>  
>  		total_size += amdgpu_bo_size(bo);
>  
> @@ -2420,10 +2422,13 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
>  				goto validate_map_fail;
>  			}
>  		}
> -		ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving);
> -		if (ret) {
> -			pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
> -			goto validate_map_fail;
> +		dma_resv_for_each_fence(&cursor, bo->tbo.base.resv,
> +					DMA_RESV_USAGE_KERNEL, fence) {
> +			ret = amdgpu_sync_fence(&sync_obj, fence);
> +			if (ret) {
> +				pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
> +				goto validate_map_fail;
> +			}
>  		}
>  		list_for_each_entry(attachment, &mem->attachments, list) {
>  			if (!attachment->is_mapped)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 1618b6847c69..887fa3f4284e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -609,9 +609,8 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
>  		if (unlikely(r))
>  			goto fail_unreserve;
>  
> -		amdgpu_bo_fence(bo, fence, false);
> -		dma_fence_put(bo->tbo.moving);
> -		bo->tbo.moving = dma_fence_get(fence);
> +		dma_resv_add_fence(bo->tbo.base.resv, fence,
> +				   DMA_RESV_USAGE_KERNEL);
>  		dma_fence_put(fence);
>  	}
>  	if (!bp->resv)
> @@ -1307,7 +1306,7 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
>  
>  	r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence);
>  	if (!WARN_ON(r)) {
> -		amdgpu_bo_fence(abo, fence, false);
> +		dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL);
>  		dma_fence_put(fence);
>  	}
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
> index e3fbf0f10add..31913ae86de6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
> @@ -74,13 +74,12 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p,
>  {
>  	unsigned int i;
>  	uint64_t value;
> -	int r;
> +	long r;
>  
> -	if (vmbo->bo.tbo.moving) {
> -		r = dma_fence_wait(vmbo->bo.tbo.moving, true);
> -		if (r)
> -			return r;
> -	}
> +	r = dma_resv_wait_timeout(vmbo->bo.tbo.base.resv, DMA_RESV_USAGE_KERNEL,
> +				  true, MAX_SCHEDULE_TIMEOUT);
> +	if (r < 0)
> +		return r;
>  
>  	pe += (unsigned long)amdgpu_bo_kptr(&vmbo->bo);
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> index dbb551762805..bdb44cee19d3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
> @@ -204,14 +204,19 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
>  	struct amdgpu_bo *bo = &vmbo->bo;
>  	enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE
>  		: AMDGPU_IB_POOL_DELAYED;
> +	struct dma_resv_iter cursor;
>  	unsigned int i, ndw, nptes;
> +	struct dma_fence *fence;
>  	uint64_t *pte;
>  	int r;
>  
>  	/* Wait for PD/PT moves to be completed */
> -	r = amdgpu_sync_fence(&p->job->sync, bo->tbo.moving);
> -	if (r)
> -		return r;
> +	dma_resv_for_each_fence(&cursor, bo->tbo.base.resv,
> +				DMA_RESV_USAGE_KERNEL, fence) {
> +		r = amdgpu_sync_fence(&p->job->sync, fence);

Just a bikeshed, but I think a amdgpu_sync_resv(resv, usage) helper would
be neat.

> +		if (r)
> +			return r;
> +	}
>  
>  	do {
>  		ndw = p->num_dw_left;
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 8fea9f88d118..9bce692075da 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -468,7 +468,6 @@ static void ttm_bo_release(struct kref *kref)
>  	dma_resv_unlock(bo->base.resv);
>  
>  	atomic_dec(&ttm_glob.bo_count);
> -	dma_fence_put(bo->moving);
>  	bo->destroy(bo);
>  }
>  
> @@ -737,9 +736,8 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
>  }
>  
>  /*
> - * Add the last move fence to the BO and reserve a new shared slot. We only use
> - * a shared slot to avoid unecessary sync and rely on the subsequent bo move to
> - * either stall or use an exclusive fence respectively set bo->moving.
> + * Add the last move fence to the BO as kernel dependency and reserve a new
> + * fence slot.
>   */
>  static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
>  				 struct ttm_resource_manager *man,
> @@ -769,9 +767,6 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
>  		dma_fence_put(fence);
>  		return ret;
>  	}
> -
> -	dma_fence_put(bo->moving);
> -	bo->moving = fence;
>  	return 0;
>  }
>  
> @@ -978,7 +973,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev,
>  	bo->bdev = bdev;
>  	bo->type = type;
>  	bo->page_alignment = page_alignment;
> -	bo->moving = NULL;
>  	bo->pin_count = 0;
>  	bo->sg = sg;
>  	if (resv) {
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index 98e1c804519e..a2e3a9626198 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -229,7 +229,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
>  	atomic_inc(&ttm_glob.bo_count);
>  	INIT_LIST_HEAD(&fbo->base.ddestroy);
>  	INIT_LIST_HEAD(&fbo->base.lru);
> -	fbo->base.moving = NULL;
>  	drm_vma_node_reset(&fbo->base.base.vma_node);
>  
>  	kref_init(&fbo->base.kref);
> @@ -501,9 +500,6 @@ static int ttm_bo_move_to_ghost(struct ttm_buffer_object *bo,
>  	 * operation has completed.
>  	 */
>  
> -	dma_fence_put(bo->moving);
> -	bo->moving = dma_fence_get(fence);
> -
>  	ret = ttm_buffer_object_transfer(bo, &ghost_obj);
>  	if (ret)
>  		return ret;
> @@ -547,9 +543,6 @@ static void ttm_bo_move_pipeline_evict(struct ttm_buffer_object *bo,
>  	spin_unlock(&from->move_lock);
>  
>  	ttm_resource_free(bo, &bo->resource);
> -
> -	dma_fence_put(bo->moving);
> -	bo->moving = dma_fence_get(fence);
>  }
>  
>  int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> index 08ba083a80d2..5b324f245265 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> @@ -46,17 +46,13 @@
>  static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
>  				struct vm_fault *vmf)
>  {
> -	vm_fault_t ret = 0;
> -	int err = 0;
> -
> -	if (likely(!bo->moving))
> -		goto out_unlock;
> +	long err = 0;
>  
>  	/*
>  	 * Quick non-stalling check for idle.
>  	 */
> -	if (dma_fence_is_signaled(bo->moving))
> -		goto out_clear;
> +	if (dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_KERNEL))
> +		return 0;
>  
>  	/*
>  	 * If possible, avoid waiting for GPU with mmap_lock
> @@ -64,34 +60,30 @@ static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
>  	 * is the first attempt.
>  	 */
>  	if (fault_flag_allow_retry_first(vmf->flags)) {
> -		ret = VM_FAULT_RETRY;
>  		if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
> -			goto out_unlock;
> +			return VM_FAULT_RETRY;
>  
>  		ttm_bo_get(bo);
>  		mmap_read_unlock(vmf->vma->vm_mm);
> -		(void) dma_fence_wait(bo->moving, true);
> +		(void)dma_resv_wait_timeout(bo->base.resv,
> +					    DMA_RESV_USAGE_KERNEL, true,
> +					    MAX_SCHEDULE_TIMEOUT);
>  		dma_resv_unlock(bo->base.resv);
>  		ttm_bo_put(bo);
> -		goto out_unlock;
> +		return VM_FAULT_RETRY;
>  	}
>  
>  	/*
>  	 * Ordinary wait.
>  	 */
> -	err = dma_fence_wait(bo->moving, true);
> -	if (unlikely(err != 0)) {
> -		ret = (err != -ERESTARTSYS) ? VM_FAULT_SIGBUS :
> +	err = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_KERNEL, true,
> +				    MAX_SCHEDULE_TIMEOUT);
> +	if (unlikely(err < 0)) {
> +		return (err != -ERESTARTSYS) ? VM_FAULT_SIGBUS :
>  			VM_FAULT_NOPAGE;
> -		goto out_unlock;
>  	}
>  
> -out_clear:
> -	dma_fence_put(bo->moving);
> -	bo->moving = NULL;
> -
> -out_unlock:
> -	return ret;
> +	return 0;
>  }
>  
>  static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo,
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> index f999fdd927df..c6d02c98a19a 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> @@ -1163,12 +1163,6 @@ int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start,
>  		*num_prefault = __KERNEL_DIV_ROUND_UP(last_cleaned - res_start,
>  						      PAGE_SIZE);
>  		vmw_bo_fence_single(bo, NULL);
> -		if (bo->moving)
> -			dma_fence_put(bo->moving);
> -
> -		return dma_resv_get_singleton(bo->base.resv,
> -					      DMA_RESV_USAGE_KERNEL,
> -					      &bo->moving);

This seems to be entirely misplaced and I'm pretty sure doesn't even
compile interim.

>  	}
>  
>  	return 0;
> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> index c17b2df9178b..4c7134550262 100644
> --- a/include/drm/ttm/ttm_bo_api.h
> +++ b/include/drm/ttm/ttm_bo_api.h
> @@ -97,7 +97,6 @@ struct ttm_tt;
>   * @lru: List head for the lru list.
>   * @ddestroy: List head for the delayed destroy list.
>   * @swap: List head for swap LRU list.
> - * @moving: Fence set when BO is moving
>   * @offset: The current GPU offset, which can have different meanings
>   * depending on the memory type. For SYSTEM type memory, it should be 0.
>   * @cur_placement: Hint of current placement.
> @@ -150,7 +149,6 @@ struct ttm_buffer_object {
>  	 * Members protected by a bo reservation.
>  	 */
>  
> -	struct dma_fence *moving;
>  	unsigned priority;
>  	unsigned pin_count;

Aside from the vmwgfx thing this looks good. With the vmwgfx patch split
issue (I think it's just that) fixed:

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

>  
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 20/23] dma-buf: add DMA_RESV_USAGE_BOOKKEEP v2
  2022-03-21 13:58 ` [PATCH 20/23] dma-buf: add DMA_RESV_USAGE_BOOKKEEP v2 Christian König
@ 2022-03-29 16:06   ` Daniel Vetter
  0 siblings, 0 replies; 73+ messages in thread
From: Daniel Vetter @ 2022-03-29 16:06 UTC (permalink / raw)
  To: Christian König; +Cc: daniel.vetter, Christian König, dri-devel

On Mon, Mar 21, 2022 at 02:58:53PM +0100, Christian König wrote:
> Add an usage for submissions independent of implicit sync but still
> interesting for memory management.

This fails to mention that you're hding the amdgpu vm optimization in
here. I'd split that out, but it really needs to be in the commit message.


> v2: cleanup the kerneldoc a bit
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/dma-buf/dma-resv.c                       |  2 +-
>  drivers/dma-buf/st-dma-resv.c                    |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c          |  4 ++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c           |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c         |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c          |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c           |  6 +++---
>  drivers/gpu/drm/i915/gem/i915_gem_lmem.c         |  2 +-
>  drivers/gpu/drm/i915/gem/i915_gem_userptr.c      |  2 +-
>  drivers/gpu/drm/qxl/qxl_debugfs.c                |  2 +-
>  drivers/gpu/drm/radeon/radeon_gem.c              |  2 +-
>  drivers/gpu/drm/radeon/radeon_mn.c               |  2 +-
>  drivers/gpu/drm/ttm/ttm_bo.c                     | 14 +++++++-------
>  include/linux/dma-resv.h                         | 13 ++++++++++++-
>  15 files changed, 35 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
> index 10d70812373c..e05be4b52221 100644
> --- a/drivers/dma-buf/dma-resv.c
> +++ b/drivers/dma-buf/dma-resv.c
> @@ -544,7 +544,7 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src)
>  
>  	list = NULL;
>  
> -	dma_resv_iter_begin(&cursor, src, DMA_RESV_USAGE_READ);
> +	dma_resv_iter_begin(&cursor, src, DMA_RESV_USAGE_BOOKKEEP);
>  	dma_resv_for_each_fence_unlocked(&cursor, f) {
>  
>  		if (dma_resv_iter_is_restarted(&cursor)) {
> diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c
> index 062b57d63fa6..8ace9e84c845 100644
> --- a/drivers/dma-buf/st-dma-resv.c
> +++ b/drivers/dma-buf/st-dma-resv.c
> @@ -296,7 +296,7 @@ int dma_resv(void)
>  	int r;
>  
>  	spin_lock_init(&fence_lock);
> -	for (usage = DMA_RESV_USAGE_KERNEL; usage <= DMA_RESV_USAGE_READ;
> +	for (usage = DMA_RESV_USAGE_KERNEL; usage <= DMA_RESV_USAGE_BOOKKEEP;
>  	     ++usage) {
>  		r = subtests(tests, (void *)(unsigned long)usage);
>  		if (r)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index ab5d6b630a49..b461c3aab877 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -263,7 +263,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
>  	 */
>  	replacement = dma_fence_get_stub();
>  	dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context,
> -				replacement, DMA_RESV_USAGE_READ);
> +				replacement, DMA_RESV_USAGE_BOOKKEEP);
>  	dma_fence_put(replacement);
>  	return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> index 490d2a7a3e2b..ddf46802b1ff 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> @@ -111,7 +111,7 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,
>  	struct dma_fence *fence;
>  	int r;
>  
> -	r = dma_resv_get_singleton(resv, DMA_RESV_USAGE_READ, &fence);
> +	r = dma_resv_get_singleton(resv, DMA_RESV_USAGE_BOOKKEEP, &fence);
>  	if (r)
>  		goto fallback;
>  
> @@ -139,7 +139,7 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,
>  	/* Not enough memory for the delayed delete, as last resort
>  	 * block for all the fences to complete.
>  	 */
> -	dma_resv_wait_timeout(resv, DMA_RESV_USAGE_READ,
> +	dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP,
>  			      false, MAX_SCHEDULE_TIMEOUT);
>  	amdgpu_pasid_free(pasid);
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> index 86f5248676b0..b86c0b8252a5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> @@ -75,7 +75,7 @@ static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni,
>  
>  	mmu_interval_set_seq(mni, cur_seq);
>  
> -	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_READ,
> +	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP,
>  				  false, MAX_SCHEDULE_TIMEOUT);
>  	mutex_unlock(&adev->notifier_lock);
>  	if (r <= 0)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> index 744e144e5fc2..11c46b3e4c60 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> @@ -260,7 +260,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
>  		return -EINVAL;
>  
>  	/* TODO: Use DMA_RESV_USAGE_READ here */
> -	dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, f) {
> +	dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, f) {
>  		dma_fence_chain_for_each(f, f) {
>  			struct dma_fence *tmp = dma_fence_chain_contained(f);
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 9ffd8c4c34a0..63d8569ebef3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1357,7 +1357,7 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
>  	 * be resident to run successfully
>  	 */
>  	dma_resv_for_each_fence(&resv_cursor, bo->base.resv,
> -				DMA_RESV_USAGE_READ, f) {
> +				DMA_RESV_USAGE_BOOKKEEP, f) {
>  		if (amdkfd_fence_check_mm(f, current->mm))
>  			return false;
>  	}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index f3235aad7282..b4f0679f8797 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2105,7 +2105,7 @@ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>  	struct dma_resv_iter cursor;
>  	struct dma_fence *fence;
>  
> -	dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, fence) {
> +	dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, fence) {
>  		/* Add a callback for each fence in the reservation object */
>  		amdgpu_vm_prt_get(adev);
>  		amdgpu_vm_add_prt_cb(adev, fence);
> @@ -2707,7 +2707,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
>  		return true;
>  
>  	/* Don't evict VM page tables while they are busy */
> -	if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_READ))
> +	if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP))
>  		return false;
>  
>  	/* Try to block ongoing updates */
> @@ -2888,7 +2888,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
>  long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
>  {
>  	timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv,
> -					DMA_RESV_USAGE_READ,
> +					DMA_RESV_USAGE_BOOKKEEP,
>  					true, timeout);
>  	if (timeout <= 0)
>  		return timeout;
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
> index a200d3e66573..4115a222a853 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
> @@ -66,7 +66,7 @@ bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj)
>  	struct intel_memory_region *mr = READ_ONCE(obj->mm.region);
>  
>  #ifdef CONFIG_LOCKDEP
> -	GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, DMA_RESV_USAGE_READ) &&
> +	GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, DMA_RESV_USAGE_BOOKKEEP) &&
>  		    i915_gem_object_evictable(obj));
>  #endif
>  	return mr && (mr->type == INTEL_MEMORY_LOCAL ||
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
> index 60feff9160de..1ddd81623691 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
> @@ -85,7 +85,7 @@ static bool i915_gem_userptr_invalidate(struct mmu_interval_notifier *mni,
>  		return true;
>  
>  	/* we will unbind on next submission, still have userptr pins */
> -	r = dma_resv_wait_timeout(obj->base.resv, DMA_RESV_USAGE_READ, false,
> +	r = dma_resv_wait_timeout(obj->base.resv, DMA_RESV_USAGE_BOOKKEEP, false,
>  				  MAX_SCHEDULE_TIMEOUT);
>  	if (r <= 0)
>  		drm_err(&i915->drm, "(%ld) failed to wait for idle\n", r);
> diff --git a/drivers/gpu/drm/qxl/qxl_debugfs.c b/drivers/gpu/drm/qxl/qxl_debugfs.c
> index 33e5889d6608..2d9ed3b94574 100644
> --- a/drivers/gpu/drm/qxl/qxl_debugfs.c
> +++ b/drivers/gpu/drm/qxl/qxl_debugfs.c
> @@ -62,7 +62,7 @@ qxl_debugfs_buffers_info(struct seq_file *m, void *data)
>  		int rel = 0;
>  
>  		dma_resv_iter_begin(&cursor, bo->tbo.base.resv,
> -				    DMA_RESV_USAGE_READ);
> +				    DMA_RESV_USAGE_BOOKKEEP);
>  		dma_resv_for_each_fence_unlocked(&cursor, fence) {
>  			if (dma_resv_iter_is_restarted(&cursor))
>  				rel = 0;
> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
> index 71bf9299e45c..9587ab88bedd 100644
> --- a/drivers/gpu/drm/radeon/radeon_gem.c
> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
> @@ -162,7 +162,7 @@ static int radeon_gem_set_domain(struct drm_gem_object *gobj,
>  	if (domain == RADEON_GEM_DOMAIN_CPU) {
>  		/* Asking for cpu access wait for object idle */
>  		r = dma_resv_wait_timeout(robj->tbo.base.resv,
> -					  DMA_RESV_USAGE_READ,
> +					  DMA_RESV_USAGE_BOOKKEEP,
>  					  true, 30 * HZ);
>  		if (!r)
>  			r = -EBUSY;
> diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c
> index 68ebeb1bdfff..29fe8423bd90 100644
> --- a/drivers/gpu/drm/radeon/radeon_mn.c
> +++ b/drivers/gpu/drm/radeon/radeon_mn.c
> @@ -66,7 +66,7 @@ static bool radeon_mn_invalidate(struct mmu_interval_notifier *mn,
>  		return true;
>  	}
>  
> -	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_READ,
> +	r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP,
>  				  false, MAX_SCHEDULE_TIMEOUT);
>  	if (r <= 0)
>  		DRM_ERROR("(%ld) failed to wait for user bo\n", r);
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 0f0aa96fb051..8fea9f88d118 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -272,7 +272,7 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo)
>  	struct dma_resv_iter cursor;
>  	struct dma_fence *fence;
>  
> -	dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_READ);
> +	dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP);
>  	dma_resv_for_each_fence_unlocked(&cursor, fence) {
>  		if (!fence->ops->signaled)
>  			dma_fence_enable_sw_signaling(fence);
> @@ -301,7 +301,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>  	struct dma_resv *resv = &bo->base._resv;
>  	int ret;
>  
> -	if (dma_resv_test_signaled(resv, DMA_RESV_USAGE_READ))
> +	if (dma_resv_test_signaled(resv, DMA_RESV_USAGE_BOOKKEEP))
>  		ret = 0;
>  	else
>  		ret = -EBUSY;
> @@ -313,7 +313,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>  			dma_resv_unlock(bo->base.resv);
>  		spin_unlock(&bo->bdev->lru_lock);
>  
> -		lret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_READ,
> +		lret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP,
>  					     interruptible,
>  					     30 * HZ);
>  
> @@ -418,7 +418,7 @@ static void ttm_bo_release(struct kref *kref)
>  			 * fences block for the BO to become idle
>  			 */
>  			dma_resv_wait_timeout(bo->base.resv,
> -					      DMA_RESV_USAGE_READ, false,
> +					      DMA_RESV_USAGE_BOOKKEEP, false,
>  					      30 * HZ);
>  		}
>  
> @@ -429,7 +429,7 @@ static void ttm_bo_release(struct kref *kref)
>  		ttm_mem_io_free(bdev, bo->resource);
>  	}
>  
> -	if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ) ||
> +	if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP) ||
>  	    !dma_resv_trylock(bo->base.resv)) {
>  		/* The BO is not idle, resurrect it for delayed destroy */
>  		ttm_bo_flush_all_fences(bo);
> @@ -1074,13 +1074,13 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
>  	long timeout = 15 * HZ;
>  
>  	if (no_wait) {
> -		if (dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ))
> +		if (dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP))
>  			return 0;
>  		else
>  			return -EBUSY;
>  	}
>  
> -	timeout = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_READ,
> +	timeout = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP,
>  					interruptible, timeout);
>  	if (timeout < 0)
>  		return timeout;
> diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
> index 8f4f406a5d02..f7b8ed0e40ad 100644
> --- a/include/linux/dma-resv.h
> +++ b/include/linux/dma-resv.h
> @@ -55,7 +55,7 @@ struct dma_resv_list;
>   * This enum describes the different use cases for a dma_resv object and
>   * controls which fences are returned when queried.
>   *
> - * An important fact is that there is the order KERNEL<WRITE<READ and
> + * An important fact is that there is the order KERNEL<WRITE<READ<BOOKKEEP and
>   * when the dma_resv object is asked for fences for one use case the fences
>   * for the lower use case are returned as well.
>   *
> @@ -93,6 +93,17 @@ enum dma_resv_usage {
>  	 * an implicit read dependency.
>  	 */
>  	DMA_RESV_USAGE_READ,
> +
> +	/**
> +	 * @DMA_RESV_USAGE_BOOKKEEP: No implicit sync.
> +	 *
> +	 * This should be used by submissions which don't want to participate in
> +	 * implicit synchronization.
> +	 *
> +	 * The most common case are preemption fences as well as page table
> +	 * updates.

Maybe add here "... and their TLB flushes." Since we kinda screwed that up
a bit in i915 and scored a CVE for it :-)

I haven't checked whether you've really caught them all, but I guess we
can do that once more when this has all landed.

With or without the bikesheds:

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>


> +	 */
> +	DMA_RESV_USAGE_BOOKKEEP
>  };
>  
>  /**
> -- 
> 2.25.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 12/23] dma-buf/drivers: make reserving a shared slot mandatory v3
  2022-03-28 17:14   ` Daniel Vetter
@ 2022-03-31 12:07     ` Christian König
  2022-03-31 16:42       ` Daniel Vetter
  0 siblings, 1 reply; 73+ messages in thread
From: Christian König @ 2022-03-31 12:07 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: daniel.vetter, Christian König, dri-devel

Am 28.03.22 um 19:14 schrieb Daniel Vetter:
> On Mon, Mar 21, 2022 at 02:58:45PM +0100, Christian König wrote:
>> [SNIP]
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index ea0cde4904f0..2f808decd8d9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -1384,6 +1384,14 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
>>   		     bool shared)
>>   {
>>   	struct dma_resv *resv = bo->tbo.base.resv;
>> +	int r;
>> +
>> +	r = dma_resv_reserve_fences(resv, 1);
> This is quite a hack, but I did scroll through all the callers of
> amdgpu_bo_fence and I think it's fine - i.e. no recursion into the
> shrinker from a calling context where recursion into shrinker/memalloc
> isn't allowed.
>
> But it aint pretty :-/

Yeah, but one long term goal of this is to remove all the hacky handling 
of manually adding fences to the resv object using this function. I 
could add a TODO if that helps.

> [SNIP]
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
>> index ee9612a3ee5e..4de6500f3c55 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
>> @@ -596,7 +596,11 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
>>   	assert_object_held(src);
>>   	i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
>>   
>> -	ret = dma_resv_reserve_shared(src_bo->base.resv, 1);
>> +	ret = dma_resv_reserve_fences(src_bo->base.resv, 1);
>> +	if (ret)
>> +		return ret;
>> +
>> +	ret = dma_resv_reserve_fences(dst_bo->base.resv, 1);
> Can't we just reserve 2 slots instead of doing this 2x?

*handing you some coffee* We reserve one one slot on the source and one 
on the destination buffer :)

> [SNIP]
> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
> index a6925dbb6224..c34114560e49 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> @@ -247,6 +247,10 @@ static int panfrost_acquire_object_fences(struct drm_gem_object **bos,
>   	int i, ret;
>   
>   	for (i = 0; i < bo_count; i++) {
> +		ret = dma_resv_reserve_fences(bos[i]->resv, 1);
> +		if (ret)
> +			return ret;
> +
>   		/* panfrost always uses write mode in its current uapi */
>   		ret = drm_sched_job_add_implicit_dependencies(job, bos[i],
> I wonder whether we shouldn't move the dma-resv reserving into some shared
> helpers eventually ...

I was going back and forth adding this to 
drm_sched_job_add_implicit_dependencies(), but then decided against that 
because it is really two independent functionalities.

>> [SNIP]
>> @@ -120,9 +119,9 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
>>   			ret = ttm_bo_reserve_slowpath(bo, intr, ticket);
>>   		}
>>   
>> -		if (!ret && entry->num_shared)
>> -			ret = dma_resv_reserve_shared(bo->base.resv,
>> -								entry->num_shared);
>> +		if (!ret)
>> +			ret = dma_resv_reserve_fences(bo->base.resv,
>> +						      num_fences);
>>   
>>   		if (unlikely(ret != 0)) {
>>   			if (ticket) {
> I didn't fine the corresponding reserve for the dma_resv_add_excl_fence()
> in ttm_bo_move_accel_cleanup(). Was that an oversight?

Mhm, need to double check as well. Could be that I missed that one.

>> [SNIP]
>> diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
>> index 4abf10b66fe8..594bd6bb00d2 100644
>> --- a/drivers/gpu/drm/vc4/vc4_gem.c
>> +++ b/drivers/gpu/drm/vc4/vc4_gem.c
>> @@ -644,7 +644,7 @@ vc4_lock_bo_reservations(struct drm_device *dev,
>>   	for (i = 0; i < exec->bo_count; i++) {
>>   		bo = &exec->bo[i]->base;
>>   
>> -		ret = dma_resv_reserve_shared(bo->resv, 1);
>> +		ret = dma_resv_reserve_fences(bo->resv, 1);
>>   		if (ret) {
>>   			vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
>>   			return ret;
> v3d and vc4 are missing in the conversion. I think for both you need to
> add it before the call to  like
> with etnaviv.

Both drivers already have the necessary calls. See 
vc4_lock_bo_reservations() and v3d_lock_bo_reservations().

>> [SNIP]
>> diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c
>> index 48d3c9955f0d..1820ca6cf673 100644
>> --- a/drivers/gpu/drm/virtio/virtgpu_gem.c
>> +++ b/drivers/gpu/drm/virtio/virtgpu_gem.c
>> @@ -214,6 +214,7 @@ void virtio_gpu_array_add_obj(struct virtio_gpu_object_array *objs,
>>   
>>   int virtio_gpu_array_lock_resv(struct virtio_gpu_object_array *objs)
>>   {
>> +	unsigned int i;
>>   	int ret;
>>   
>>   	if (objs->nents == 1) {
>> @@ -222,6 +223,14 @@ int virtio_gpu_array_lock_resv(struct virtio_gpu_object_array *objs)
>>   		ret = drm_gem_lock_reservations(objs->objs, objs->nents,
>>   						&objs->ticket);
>>   	}
>> +	if (ret)
>> +		return ret;
>> +
>> +	for (i = 0; i < objs->nents; ++i) {
>> +		ret = dma_resv_reserve_fences(objs->objs[i]->resv, 1);
> I think you could stuff this into the same loop, but also probably doesn't
> matter.

Na, that loop is inside drm_gem_lock_reservations().

> [SNIP]
>
> I found a few things, but with those (vc4 and v3d plus the ttm question,
> the other stuff is just comments) corrected this gets my

Going to double check the TTM case once more, but apart from that I 
think its solid.

> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

Thanks,
Christian.

>
>> -- 
>> 2.25.1
>>


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 12/23] dma-buf/drivers: make reserving a shared slot mandatory v3
  2022-03-31 12:07     ` Christian König
@ 2022-03-31 16:42       ` Daniel Vetter
  0 siblings, 0 replies; 73+ messages in thread
From: Daniel Vetter @ 2022-03-31 16:42 UTC (permalink / raw)
  To: Christian König; +Cc: Christian König, dri-devel

On Thu, 31 Mar 2022 at 14:07, Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Am 28.03.22 um 19:14 schrieb Daniel Vetter:
> > On Mon, Mar 21, 2022 at 02:58:45PM +0100, Christian König wrote:
> >> [SNIP]
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> >> index ea0cde4904f0..2f808decd8d9 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> >> @@ -1384,6 +1384,14 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
> >>                   bool shared)
> >>   {
> >>      struct dma_resv *resv = bo->tbo.base.resv;
> >> +    int r;
> >> +
> >> +    r = dma_resv_reserve_fences(resv, 1);
> > This is quite a hack, but I did scroll through all the callers of
> > amdgpu_bo_fence and I think it's fine - i.e. no recursion into the
> > shrinker from a calling context where recursion into shrinker/memalloc
> > isn't allowed.
> >
> > But it aint pretty :-/
>
> Yeah, but one long term goal of this is to remove all the hacky handling
> of manually adding fences to the resv object using this function. I
> could add a TODO if that helps.
>
> > [SNIP]
> >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
> >> index ee9612a3ee5e..4de6500f3c55 100644
> >> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
> >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
> >> @@ -596,7 +596,11 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
> >>      assert_object_held(src);
> >>      i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
> >>
> >> -    ret = dma_resv_reserve_shared(src_bo->base.resv, 1);
> >> +    ret = dma_resv_reserve_fences(src_bo->base.resv, 1);
> >> +    if (ret)
> >> +            return ret;
> >> +
> >> +    ret = dma_resv_reserve_fences(dst_bo->base.resv, 1);
> > Can't we just reserve 2 slots instead of doing this 2x?
>
> *handing you some coffee* We reserve one one slot on the source and one
> on the destination buffer :)

Ah, coffee, great :-)

> > [SNIP]
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
> > index a6925dbb6224..c34114560e49 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> > @@ -247,6 +247,10 @@ static int panfrost_acquire_object_fences(struct drm_gem_object **bos,
> >       int i, ret;
> >
> >       for (i = 0; i < bo_count; i++) {
> > +             ret = dma_resv_reserve_fences(bos[i]->resv, 1);
> > +             if (ret)
> > +                     return ret;
> > +
> >               /* panfrost always uses write mode in its current uapi */
> >               ret = drm_sched_job_add_implicit_dependencies(job, bos[i],
> > I wonder whether we shouldn't move the dma-resv reserving into some shared
> > helpers eventually ...
>
> I was going back and forth adding this to
> drm_sched_job_add_implicit_dependencies(), but then decided against that
> because it is really two independent functionalities.

Yeah it doesn't really fit. Maybe together as a combo packet of ttm eu
helpers (lifted to gem_bo level) combined with drm_sched. Defo
something for another patch set.

> >> [SNIP]
> >> @@ -120,9 +119,9 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
> >>                      ret = ttm_bo_reserve_slowpath(bo, intr, ticket);
> >>              }
> >>
> >> -            if (!ret && entry->num_shared)
> >> -                    ret = dma_resv_reserve_shared(bo->base.resv,
> >> -                                                            entry->num_shared);
> >> +            if (!ret)
> >> +                    ret = dma_resv_reserve_fences(bo->base.resv,
> >> +                                                  num_fences);
> >>
> >>              if (unlikely(ret != 0)) {
> >>                      if (ticket) {
> > I didn't fine the corresponding reserve for the dma_resv_add_excl_fence()
> > in ttm_bo_move_accel_cleanup(). Was that an oversight?
>
> Mhm, need to double check as well. Could be that I missed that one.
>
> >> [SNIP]
> >> diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
> >> index 4abf10b66fe8..594bd6bb00d2 100644
> >> --- a/drivers/gpu/drm/vc4/vc4_gem.c
> >> +++ b/drivers/gpu/drm/vc4/vc4_gem.c
> >> @@ -644,7 +644,7 @@ vc4_lock_bo_reservations(struct drm_device *dev,
> >>      for (i = 0; i < exec->bo_count; i++) {
> >>              bo = &exec->bo[i]->base;
> >>
> >> -            ret = dma_resv_reserve_shared(bo->resv, 1);
> >> +            ret = dma_resv_reserve_fences(bo->resv, 1);
> >>              if (ret) {
> >>                      vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
> >>                      return ret;
> > v3d and vc4 are missing in the conversion. I think for both you need to
> > add it before the call to  like
> > with etnaviv.
>
> Both drivers already have the necessary calls. See
> vc4_lock_bo_reservations() and v3d_lock_bo_reservations().

Indeed I missed that they unconditionally reserve slots and aren't
trying to be clever.

> >> [SNIP]
> >> diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c
> >> index 48d3c9955f0d..1820ca6cf673 100644
> >> --- a/drivers/gpu/drm/virtio/virtgpu_gem.c
> >> +++ b/drivers/gpu/drm/virtio/virtgpu_gem.c
> >> @@ -214,6 +214,7 @@ void virtio_gpu_array_add_obj(struct virtio_gpu_object_array *objs,
> >>
> >>   int virtio_gpu_array_lock_resv(struct virtio_gpu_object_array *objs)
> >>   {
> >> +    unsigned int i;
> >>      int ret;
> >>
> >>      if (objs->nents == 1) {
> >> @@ -222,6 +223,14 @@ int virtio_gpu_array_lock_resv(struct virtio_gpu_object_array *objs)
> >>              ret = drm_gem_lock_reservations(objs->objs, objs->nents,
> >>                                              &objs->ticket);
> >>      }
> >> +    if (ret)
> >> +            return ret;
> >> +
> >> +    for (i = 0; i < objs->nents; ++i) {
> >> +            ret = dma_resv_reserve_fences(objs->objs[i]->resv, 1);
> > I think you could stuff this into the same loop, but also probably doesn't
> > matter.
>
> Na, that loop is inside drm_gem_lock_reservations().

Hm maybe another case for unified execbuf helpers that do this for drivers :-)

> > [SNIP]
> >
> > I found a few things, but with those (vc4 and v3d plus the ttm question,
> > the other stuff is just comments) corrected this gets my
>
> Going to double check the TTM case once more, but apart from that I
> think its solid.

Yeah with ttm I'm just a bit too much out of my own depth, so if you
can reply with an explainer for dummies so I can check myself where
all the pieces are I think we have it all now!
-Daniel

>
> > Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>
> Thanks,
> Christian.
>
> >
> >> --
> >> 2.25.1
> >>
>


-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 03/23] dma-buf: add dma_resv_get_singleton v2
  2022-03-21 13:58 ` [PATCH 03/23] dma-buf: add dma_resv_get_singleton v2 Christian König
@ 2022-04-01  8:21   ` Christian König
  2022-04-01 15:57     ` Daniel Vetter
  0 siblings, 1 reply; 73+ messages in thread
From: Christian König @ 2022-04-01  8:21 UTC (permalink / raw)
  To: Christian König, daniel.vetter, dri-devel

Daniel any more comments on this one here?

It's the prerequisite to a bunch of other patches and I would like to 
get it out of my feet.

Thanks,
Christian.

Am 21.03.22 um 14:58 schrieb Christian König:
> Add a function to simplify getting a single fence for all the fences in
> the dma_resv object.
>
> v2: fix ref leak in error handling
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/dma-buf/dma-resv.c | 52 ++++++++++++++++++++++++++++++++++++++
>   include/linux/dma-resv.h   |  2 ++
>   2 files changed, 54 insertions(+)
>
> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
> index 5001e9b4420a..c09fd8da0c85 100644
> --- a/drivers/dma-buf/dma-resv.c
> +++ b/drivers/dma-buf/dma-resv.c
> @@ -34,6 +34,7 @@
>    */
>   
>   #include <linux/dma-resv.h>
> +#include <linux/dma-fence-array.h>
>   #include <linux/export.h>
>   #include <linux/mm.h>
>   #include <linux/sched/mm.h>
> @@ -650,6 +651,57 @@ int dma_resv_get_fences(struct dma_resv *obj, bool write,
>   }
>   EXPORT_SYMBOL_GPL(dma_resv_get_fences);
>   
> +/**
> + * dma_resv_get_singleton - Get a single fence for all the fences
> + * @obj: the reservation object
> + * @write: true if we should return all fences
> + * @fence: the resulting fence
> + *
> + * Get a single fence representing all the fences inside the resv object.
> + * Returns either 0 for success or -ENOMEM.
> + *
> + * Warning: This can't be used like this when adding the fence back to the resv
> + * object since that can lead to stack corruption when finalizing the
> + * dma_fence_array.
> + */
> +int dma_resv_get_singleton(struct dma_resv *obj, bool write,
> +			   struct dma_fence **fence)
> +{
> +	struct dma_fence_array *array;
> +	struct dma_fence **fences;
> +	unsigned count;
> +	int r;
> +
> +	r = dma_resv_get_fences(obj, write, &count, &fences);
> +        if (r)
> +		return r;
> +
> +	if (count == 0) {
> +		*fence = NULL;
> +		return 0;
> +	}
> +
> +	if (count == 1) {
> +		*fence = fences[0];
> +		kfree(fences);
> +		return 0;
> +	}
> +
> +	array = dma_fence_array_create(count, fences,
> +				       dma_fence_context_alloc(1),
> +				       1, false);
> +	if (!array) {
> +		while (count--)
> +			dma_fence_put(fences[count]);
> +		kfree(fences);
> +		return -ENOMEM;
> +	}
> +
> +	*fence = &array->base;
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(dma_resv_get_singleton);
> +
>   /**
>    * dma_resv_wait_timeout - Wait on reservation's objects
>    * shared and/or exclusive fences.
> diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
> index 202cc65d0621..08512c1e215d 100644
> --- a/include/linux/dma-resv.h
> +++ b/include/linux/dma-resv.h
> @@ -449,6 +449,8 @@ void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
>   void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence);
>   int dma_resv_get_fences(struct dma_resv *obj, bool write,
>   			unsigned int *num_fences, struct dma_fence ***fences);
> +int dma_resv_get_singleton(struct dma_resv *obj, bool write,
> +			   struct dma_fence **fence);
>   int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src);
>   long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr,
>   			   unsigned long timeout);


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 17/23] dma-buf: specify usage while adding fences to dma_resv obj v5
  2022-03-29 15:43   ` Daniel Vetter
@ 2022-04-01 15:01     ` Christian König
  2022-04-01 16:16       ` Daniel Vetter
  0 siblings, 1 reply; 73+ messages in thread
From: Christian König @ 2022-04-01 15:01 UTC (permalink / raw)
  To: Daniel Vetter, Christian König; +Cc: daniel.vetter, dri-devel



Am 29.03.22 um 17:43 schrieb Daniel Vetter:
> On Mon, Mar 21, 2022 at 02:58:50PM +0100, Christian König wrote:
> [SNIP]
>>   /**
>> - * dma_resv_add_shared_fence - Add a fence to a shared slot
>> + * dma_resv_add_fence - Add a fence to the dma_resv obj
>>    * @obj: the reservation object
>> - * @fence: the shared fence to add
>> + * @fence: the fence to add
>> + * @usage: how the fence is used, see enum dma_resv_usage
>>    *
>> - * Add a fence to a shared slot, @obj must be locked with dma_resv_lock(), and
>> + * Add a fence to a slot, @obj must be locked with dma_resv_lock(), and
>>    * dma_resv_reserve_fences() has been called.
>>    *
>>    * See also &dma_resv.fence for a discussion of the semantics.
>>    */
>> -void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence)
>> +void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence,
>> +			enum dma_resv_usage usage)
>>   {
>>   	struct dma_resv_list *fobj;
>>   	struct dma_fence *old;
>> @@ -274,44 +308,45 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence)
>>   
>>   	dma_resv_assert_held(obj);
>>   
>> -	/* Drivers should not add containers here, instead add each fence
>> -	 * individually.
>> +	/* TODO: Drivers should not add containers here, instead add each fence
>> +	 * individually. Disabled for now until we cleaned up amdgpu/ttm.
>>   	 */
>> -	WARN_ON(dma_fence_is_container(fence));
>> +	/* WARN_ON(dma_fence_is_container(fence)); */
> Uh this looks like it's a misplaced hack?

Unfortunately not.

> If you do need it and cant get rid of it with patch reordering, then I
> think it needs to be split out for extra attention.

The problem is that I would need to squash removing the amdgpu 
workaround into this patch as well.

And I don't really want to make this patch more complicated that it 
already is.

>> diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c
>> index 9435a3ca71c8..38caa7f78871 100644
>> --- a/drivers/gpu/drm/lima/lima_gem.c
>> +++ b/drivers/gpu/drm/lima/lima_gem.c
>> @@ -366,7 +366,7 @@ int lima_gem_submit(struct drm_file *file, struct lima_submit *submit)
>>   		if (submit->bos[i].flags & LIMA_SUBMIT_BO_WRITE)
>>   			dma_resv_add_excl_fence(lima_bo_resv(bos[i]), fence);
> Not very compile-tested it seems.

At least it used to compile fine once, but obviously need to give it 
another go.

> I think it'd be good to split this further:
>
> - Add dma_resv_add_fence, which just adds either an exclusive or shared
>    fences.
> - Convert drivers, cc driver authors (this patch doesn't seem to have
>    them).
>
> I think the above two could also be a single patch, but should work even
> more split.

That is easier said than done. I will see what I can do.

The other documentation comments you had should be fixed in the next 
round, but you might want to take another full look at this.

Thanks,
Christian.


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 03/23] dma-buf: add dma_resv_get_singleton v2
  2022-04-01  8:21   ` Christian König
@ 2022-04-01 15:57     ` Daniel Vetter
  0 siblings, 0 replies; 73+ messages in thread
From: Daniel Vetter @ 2022-04-01 15:57 UTC (permalink / raw)
  To: Christian König; +Cc: Christian König, dri-devel, daniel.vetter

On Fri, Apr 01, 2022 at 10:21:22AM +0200, Christian König wrote:
> Daniel any more comments on this one here?
> 
> It's the prerequisite to a bunch of other patches and I would like to get it
> out of my feet.

Apologies for the miss, I thought I've r-b stamped this one already.

> 
> Thanks,
> Christian.
> 
> Am 21.03.22 um 14:58 schrieb Christian König:
> > Add a function to simplify getting a single fence for all the fences in
> > the dma_resv object.
> > 
> > v2: fix ref leak in error handling
> > 
> > Signed-off-by: Christian König <christian.koenig@amd.com>
> > ---
> >   drivers/dma-buf/dma-resv.c | 52 ++++++++++++++++++++++++++++++++++++++
> >   include/linux/dma-resv.h   |  2 ++
> >   2 files changed, 54 insertions(+)
> > 
> > diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
> > index 5001e9b4420a..c09fd8da0c85 100644
> > --- a/drivers/dma-buf/dma-resv.c
> > +++ b/drivers/dma-buf/dma-resv.c
> > @@ -34,6 +34,7 @@
> >    */
> >   #include <linux/dma-resv.h>
> > +#include <linux/dma-fence-array.h>
> >   #include <linux/export.h>
> >   #include <linux/mm.h>
> >   #include <linux/sched/mm.h>
> > @@ -650,6 +651,57 @@ int dma_resv_get_fences(struct dma_resv *obj, bool write,
> >   }
> >   EXPORT_SYMBOL_GPL(dma_resv_get_fences);
> > +/**
> > + * dma_resv_get_singleton - Get a single fence for all the fences
> > + * @obj: the reservation object
> > + * @write: true if we should return all fences
> > + * @fence: the resulting fence
> > + *
> > + * Get a single fence representing all the fences inside the resv object.
> > + * Returns either 0 for success or -ENOMEM.
> > + *
> > + * Warning: This can't be used like this when adding the fence back to the resv
> > + * object since that can lead to stack corruption when finalizing the
> > + * dma_fence_array.

Please add the standard boilerplate here:

Returns 0 on success and negative error values on failure.

> > + */
> > +int dma_resv_get_singleton(struct dma_resv *obj, bool write,
> > +			   struct dma_fence **fence)
> > +{
> > +	struct dma_fence_array *array;
> > +	struct dma_fence **fences;
> > +	unsigned count;
> > +	int r;
> > +
> > +	r = dma_resv_get_fences(obj, write, &count, &fences);

Deep down in here we already have the dma_resv_assert_held, so I'm happy.

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> > +        if (r)
> > +		return r;
> > +
> > +	if (count == 0) {
> > +		*fence = NULL;
> > +		return 0;
> > +	}
> > +
> > +	if (count == 1) {
> > +		*fence = fences[0];
> > +		kfree(fences);
> > +		return 0;
> > +	}
> > +
> > +	array = dma_fence_array_create(count, fences,
> > +				       dma_fence_context_alloc(1),
> > +				       1, false);
> > +	if (!array) {
> > +		while (count--)
> > +			dma_fence_put(fences[count]);
> > +		kfree(fences);
> > +		return -ENOMEM;
> > +	}
> > +
> > +	*fence = &array->base;
> > +	return 0;
> > +}
> > +EXPORT_SYMBOL_GPL(dma_resv_get_singleton);
> > +
> >   /**
> >    * dma_resv_wait_timeout - Wait on reservation's objects
> >    * shared and/or exclusive fences.
> > diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
> > index 202cc65d0621..08512c1e215d 100644
> > --- a/include/linux/dma-resv.h
> > +++ b/include/linux/dma-resv.h
> > @@ -449,6 +449,8 @@ void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
> >   void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence);
> >   int dma_resv_get_fences(struct dma_resv *obj, bool write,
> >   			unsigned int *num_fences, struct dma_fence ***fences);
> > +int dma_resv_get_singleton(struct dma_resv *obj, bool write,
> > +			   struct dma_fence **fence);
> >   int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src);
> >   long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr,
> >   			   unsigned long timeout);
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 17/23] dma-buf: specify usage while adding fences to dma_resv obj v5
  2022-04-01 15:01     ` Christian König
@ 2022-04-01 16:16       ` Daniel Vetter
  0 siblings, 0 replies; 73+ messages in thread
From: Daniel Vetter @ 2022-04-01 16:16 UTC (permalink / raw)
  To: Christian König; +Cc: Christian König, dri-devel, daniel.vetter

On Fri, Apr 01, 2022 at 05:01:13PM +0200, Christian König wrote:
> 
> 
> Am 29.03.22 um 17:43 schrieb Daniel Vetter:
> > On Mon, Mar 21, 2022 at 02:58:50PM +0100, Christian König wrote:
> > [SNIP]
> > >   /**
> > > - * dma_resv_add_shared_fence - Add a fence to a shared slot
> > > + * dma_resv_add_fence - Add a fence to the dma_resv obj
> > >    * @obj: the reservation object
> > > - * @fence: the shared fence to add
> > > + * @fence: the fence to add
> > > + * @usage: how the fence is used, see enum dma_resv_usage
> > >    *
> > > - * Add a fence to a shared slot, @obj must be locked with dma_resv_lock(), and
> > > + * Add a fence to a slot, @obj must be locked with dma_resv_lock(), and
> > >    * dma_resv_reserve_fences() has been called.
> > >    *
> > >    * See also &dma_resv.fence for a discussion of the semantics.
> > >    */
> > > -void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence)
> > > +void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence,
> > > +			enum dma_resv_usage usage)
> > >   {
> > >   	struct dma_resv_list *fobj;
> > >   	struct dma_fence *old;
> > > @@ -274,44 +308,45 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence)
> > >   	dma_resv_assert_held(obj);
> > > -	/* Drivers should not add containers here, instead add each fence
> > > -	 * individually.
> > > +	/* TODO: Drivers should not add containers here, instead add each fence
> > > +	 * individually. Disabled for now until we cleaned up amdgpu/ttm.
> > >   	 */
> > > -	WARN_ON(dma_fence_is_container(fence));
> > > +	/* WARN_ON(dma_fence_is_container(fence)); */
> > Uh this looks like it's a misplaced hack?
> 
> Unfortunately not.
> 
> > If you do need it and cant get rid of it with patch reordering, then I
> > think it needs to be split out for extra attention.
> 
> The problem is that I would need to squash removing the amdgpu workaround
> into this patch as well.
> 
> And I don't really want to make this patch more complicated that it already
> is.

Yeah I got it later on. Please explain the story in the commit message,
and how it'll be resolved. Otherwise this is a bit much wtf to merge :-)

> 
> > > diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c
> > > index 9435a3ca71c8..38caa7f78871 100644
> > > --- a/drivers/gpu/drm/lima/lima_gem.c
> > > +++ b/drivers/gpu/drm/lima/lima_gem.c
> > > @@ -366,7 +366,7 @@ int lima_gem_submit(struct drm_file *file, struct lima_submit *submit)
> > >   		if (submit->bos[i].flags & LIMA_SUBMIT_BO_WRITE)
> > >   			dma_resv_add_excl_fence(lima_bo_resv(bos[i]), fence);
> > Not very compile-tested it seems.
> 
> At least it used to compile fine once, but obviously need to give it another
> go.
> 
> > I think it'd be good to split this further:
> > 
> > - Add dma_resv_add_fence, which just adds either an exclusive or shared
> >    fences.
> > - Convert drivers, cc driver authors (this patch doesn't seem to have
> >    them).
> > 
> > I think the above two could also be a single patch, but should work even
> > more split.
> 
> That is easier said than done. I will see what I can do.
> 
> The other documentation comments you had should be fixed in the next round,
> but you might want to take another full look at this.

Yeah I get that it's utter pain. I think if you add a list to the commit
message with a few comments on how each driver is touched and all that
(i.e. at least type up the separate commmit messages for the separate
patches that should be split, but are a real pain to split), then I think
that's fine.

I've also done audit patches in the past which had that per-driver blurb
to cover all the cases, sometimes that's the least crappy way to do
things.

Holds also for the other patches which then add USAGE_KERNEL and
USAGE_BOOKKEEPING - splitting is a bit much but at least having a
per-driver blurb of what/why you change would be really good to include I
think. Just so we remember a bit easier why things changed. I think then
we should be good here with these (well aside from the one ttm change that
I didn't follow yet in another patch).
-Daniel

> 
> Thanks,
> Christian.
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 17/23] dma-buf: specify usage while adding fences to dma_resv obj v5
  2022-03-21 13:58 ` [PATCH 17/23] dma-buf: specify usage while adding fences to dma_resv obj v5 Christian König
  2022-03-29 15:43   ` Daniel Vetter
@ 2022-04-02 22:16   ` Bas Nieuwenhuizen
  2022-04-03 17:48     ` Christian König
  1 sibling, 1 reply; 73+ messages in thread
From: Bas Nieuwenhuizen @ 2022-04-02 22:16 UTC (permalink / raw)
  To: Christian König; +Cc: Daniel Vetter, Christian König, ML dri-devel

On Mon, Mar 21, 2022 at 2:59 PM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Instead of distingting between shared and exclusive fences specify
> the fence usage while adding fences.
>
> Rework all drivers to use this interface instead and deprecate the old one.
>
> v2: some kerneldoc comments suggested by Daniel
> v3: fix a missing case in radeon
> v4: rebase on nouveau changes, fix lockdep and temporary disable warning
> v5: more documentation updates
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/dma-buf/dma-resv.c                    | 345 ++++++++----------
>  drivers/dma-buf/st-dma-resv.c                 | 101 ++---
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        |   6 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    |   6 +-
>  drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c  |  10 +-
>  drivers/gpu/drm/i915/gem/i915_gem_busy.c      |  13 +-
>  drivers/gpu/drm/i915/gem/i915_gem_clflush.c   |   3 +-
>  drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  |   5 +-
>  drivers/gpu/drm/i915/i915_vma.c               |   6 +-
>  drivers/gpu/drm/lima/lima_gem.c               |   2 +-
>  drivers/gpu/drm/msm/msm_gem_submit.c          |   2 +-
>  drivers/gpu/drm/nouveau/nouveau_bo.c          |   9 +-
>  drivers/gpu/drm/nouveau/nouveau_fence.c       |   4 +-
>  drivers/gpu/drm/qxl/qxl_release.c             |   3 +-
>  drivers/gpu/drm/radeon/radeon_object.c        |   6 +-
>  drivers/gpu/drm/ttm/ttm_bo.c                  |   2 +-
>  drivers/gpu/drm/ttm/ttm_bo_util.c             |   5 +-
>  drivers/gpu/drm/ttm/ttm_execbuf_util.c        |   6 +-
>  drivers/gpu/drm/v3d/v3d_gem.c                 |   4 +-
>  drivers/gpu/drm/vc4/vc4_gem.c                 |   2 +-
>  drivers/gpu/drm/vgem/vgem_fence.c             |   9 +-
>  drivers/gpu/drm/virtio/virtgpu_gem.c          |   3 +-
>  drivers/gpu/drm/vmwgfx/vmwgfx_bo.c            |   3 +-
>  include/linux/dma-buf.h                       |  17 +-
>  include/linux/dma-resv.h                      |  72 ++--
>  26 files changed, 276 insertions(+), 370 deletions(-)
>
> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
> index bb7b023c2d33..26257ba1527e 100644
> --- a/drivers/dma-buf/dma-resv.c
> +++ b/drivers/dma-buf/dma-resv.c
> @@ -44,12 +44,12 @@
>  /**
>   * DOC: Reservation Object Overview
>   *
> - * The reservation object provides a mechanism to manage shared and
> - * exclusive fences associated with a buffer.  A reservation object
> - * can have attached one exclusive fence (normally associated with
> - * write operations) or N shared fences (read operations).  The RCU
> - * mechanism is used to protect read access to fences from locked
> - * write-side updates.
> + * The reservation object provides a mechanism to manage a container of
> + * dma_fence object associated with a resource. A reservation object
> + * can have any number of fences attaches to it. Each fence carring an usage
> + * parameter determining how the operation represented by the fence is using the
> + * resource. The RCU mechanism is used to protect read access to fences from
> + * locked write-side updates.
>   *
>   * See struct dma_resv for more details.
>   */
> @@ -57,29 +57,74 @@
>  DEFINE_WD_CLASS(reservation_ww_class);
>  EXPORT_SYMBOL(reservation_ww_class);
>
> +/* Mask for the lower fence pointer bits */
> +#define DMA_RESV_LIST_MASK     0x3
> +
>  struct dma_resv_list {
>         struct rcu_head rcu;
> -       u32 shared_count, shared_max;
> -       struct dma_fence __rcu *shared[];
> +       u32 num_fences, max_fences;
> +       struct dma_fence __rcu *table[];
>  };
>
> +/**
> + * dma_resv_list_entry - extract fence and usage from a list entry
> + * @list: the list to extract and entry from
> + * @index: which entry we want
> + * @resv: optional dma_resv obj for lockdep check that the access is allowed
> + * @fence: the resulting fence
> + * @usage: the resulting usage
> + *
> + * Extract the fence and usage flags from an RCU protected entry in the list.
> + */
> +static void dma_resv_list_entry(struct dma_resv_list *list, unsigned int index,
> +                               struct dma_resv *resv, struct dma_fence **fence,
> +                               enum dma_resv_usage *usage)
> +{
> +       long tmp;
> +
> +       tmp = (long)rcu_dereference_check(list->table[index],
> +                                         resv ? dma_resv_held(resv) : true);
> +       *fence = (struct dma_fence *)(tmp & ~DMA_RESV_LIST_MASK);
> +       if (usage)
> +               *usage = tmp & DMA_RESV_LIST_MASK;
> +}
> +
> +/**
> + * dma_resv_list_set - set fence and usage at a specific index
> + * @list: the list to modify
> + * @index: where to make the change
> + * @fence: the fence to set
> + * @usage: the usage to set
> + *
> + * Set the fence and usage flags at the specific index in the list.
> + */
> +static void dma_resv_list_set(struct dma_resv_list *list,
> +                             unsigned int index,
> +                             struct dma_fence *fence,
> +                             enum dma_resv_usage usage)
> +{
> +       long tmp = ((long)fence) | usage;
> +
> +       RCU_INIT_POINTER(list->table[index], (struct dma_fence *)tmp);
> +}
> +
>  /**
>   * dma_resv_list_alloc - allocate fence list
> - * @shared_max: number of fences we need space for
> + * @max_fences: number of fences we need space for
>   *
>   * Allocate a new dma_resv_list and make sure to correctly initialize
> - * shared_max.
> + * max_fences.
>   */
> -static struct dma_resv_list *dma_resv_list_alloc(unsigned int shared_max)
> +static struct dma_resv_list *dma_resv_list_alloc(unsigned int max_fences)
>  {
>         struct dma_resv_list *list;
>
> -       list = kmalloc(struct_size(list, shared, shared_max), GFP_KERNEL);
> +       list = kmalloc(struct_size(list, table, max_fences), GFP_KERNEL);
>         if (!list)
>                 return NULL;
>
> -       list->shared_max = (ksize(list) - offsetof(typeof(*list), shared)) /
> -               sizeof(*list->shared);
> +       list->max_fences = (ksize(list) - offsetof(typeof(*list), table)) /
> +               sizeof(*list->table);
>
>         return list;
>  }
> @@ -97,9 +142,12 @@ static void dma_resv_list_free(struct dma_resv_list *list)
>         if (!list)
>                 return;
>
> -       for (i = 0; i < list->shared_count; ++i)
> -               dma_fence_put(rcu_dereference_protected(list->shared[i], true));
> +       for (i = 0; i < list->num_fences; ++i) {
> +               struct dma_fence *fence;
>
> +               dma_resv_list_entry(list, i, NULL, &fence, NULL);
> +               dma_fence_put(fence);
> +       }
>         kfree_rcu(list, rcu);
>  }
>
> @@ -112,8 +160,7 @@ void dma_resv_init(struct dma_resv *obj)
>         ww_mutex_init(&obj->lock, &reservation_ww_class);
>         seqcount_ww_mutex_init(&obj->seq, &obj->lock);
>
> -       RCU_INIT_POINTER(obj->fence, NULL);
> -       RCU_INIT_POINTER(obj->fence_excl, NULL);
> +       RCU_INIT_POINTER(obj->fences, NULL);
>  }
>  EXPORT_SYMBOL(dma_resv_init);
>
> @@ -123,46 +170,31 @@ EXPORT_SYMBOL(dma_resv_init);
>   */
>  void dma_resv_fini(struct dma_resv *obj)
>  {
> -       struct dma_resv_list *fobj;
> -       struct dma_fence *excl;
> -
>         /*
>          * This object should be dead and all references must have
>          * been released to it, so no need to be protected with rcu.
>          */
> -       excl = rcu_dereference_protected(obj->fence_excl, 1);
> -       if (excl)
> -               dma_fence_put(excl);
> -
> -       fobj = rcu_dereference_protected(obj->fence, 1);
> -       dma_resv_list_free(fobj);
> +       dma_resv_list_free(rcu_dereference_protected(obj->fences, true));
>         ww_mutex_destroy(&obj->lock);
>  }
>  EXPORT_SYMBOL(dma_resv_fini);
>
> -static inline struct dma_fence *
> -dma_resv_excl_fence(struct dma_resv *obj)
> +static inline struct dma_resv_list *dma_resv_fences_list(struct dma_resv *obj)
>  {
> -       return rcu_dereference_check(obj->fence_excl, dma_resv_held(obj));
> -}
> -
> -static inline struct dma_resv_list *dma_resv_shared_list(struct dma_resv *obj)
> -{
> -       return rcu_dereference_check(obj->fence, dma_resv_held(obj));
> +       return rcu_dereference_check(obj->fences, dma_resv_held(obj));
>  }
>
>  /**
> - * dma_resv_reserve_fences - Reserve space to add shared fences to
> - * a dma_resv.
> + * dma_resv_reserve_fences - Reserve space to add fences to a dma_resv object.
>   * @obj: reservation object
>   * @num_fences: number of fences we want to add
>   *
> - * Should be called before dma_resv_add_shared_fence().  Must
> - * be called with @obj locked through dma_resv_lock().
> + * Should be called before dma_resv_add_fence().  Must be called with @obj
> + * locked through dma_resv_lock().
>   *
>   * Note that the preallocated slots need to be re-reserved if @obj is unlocked
> - * at any time before calling dma_resv_add_shared_fence(). This is validated
> - * when CONFIG_DEBUG_MUTEXES is enabled.
> + * at any time before calling dma_resv_add_fence(). This is validated when
> + * CONFIG_DEBUG_MUTEXES is enabled.
>   *
>   * RETURNS
>   * Zero for success, or -errno
> @@ -174,11 +206,11 @@ int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences)
>
>         dma_resv_assert_held(obj);
>
> -       old = dma_resv_shared_list(obj);
> -       if (old && old->shared_max) {
> -               if ((old->shared_count + num_fences) <= old->shared_max)
> +       old = dma_resv_fences_list(obj);
> +       if (old && old->max_fences) {
> +               if ((old->num_fences + num_fences) <= old->max_fences)
>                         return 0;
> -               max = max(old->shared_count + num_fences, old->shared_max * 2);
> +               max = max(old->num_fences + num_fences, old->max_fences * 2);
>         } else {
>                 max = max(4ul, roundup_pow_of_two(num_fences));
>         }
> @@ -193,27 +225,27 @@ int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences)
>          * references from the old struct are carried over to
>          * the new.
>          */
> -       for (i = 0, j = 0, k = max; i < (old ? old->shared_count : 0); ++i) {
> +       for (i = 0, j = 0, k = max; i < (old ? old->num_fences : 0); ++i) {
> +               enum dma_resv_usage usage;
>                 struct dma_fence *fence;
>
> -               fence = rcu_dereference_protected(old->shared[i],
> -                                                 dma_resv_held(obj));
> +               dma_resv_list_entry(old, i, obj, &fence, &usage);
>                 if (dma_fence_is_signaled(fence))
> -                       RCU_INIT_POINTER(new->shared[--k], fence);
> +                       RCU_INIT_POINTER(new->table[--k], fence);
>                 else
> -                       RCU_INIT_POINTER(new->shared[j++], fence);
> +                       dma_resv_list_set(new, j++, fence, usage);
>         }
> -       new->shared_count = j;
> +       new->num_fences = j;
>
>         /*
>          * We are not changing the effective set of fences here so can
>          * merely update the pointer to the new array; both existing
>          * readers and new readers will see exactly the same set of
> -        * active (unsignaled) shared fences. Individual fences and the
> +        * active (unsignaled) fences. Individual fences and the
>          * old array are protected by RCU and so will not vanish under
>          * the gaze of the rcu_read_lock() readers.
>          */
> -       rcu_assign_pointer(obj->fence, new);
> +       rcu_assign_pointer(obj->fences, new);
>
>         if (!old)
>                 return 0;
> @@ -222,7 +254,7 @@ int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences)
>         for (i = k; i < max; ++i) {
>                 struct dma_fence *fence;
>
> -               fence = rcu_dereference_protected(new->shared[i],
> +               fence = rcu_dereference_protected(new->table[i],
>                                                   dma_resv_held(obj));
>                 dma_fence_put(fence);
>         }
> @@ -234,37 +266,39 @@ EXPORT_SYMBOL(dma_resv_reserve_fences);
>
>  #ifdef CONFIG_DEBUG_MUTEXES
>  /**
> - * dma_resv_reset_shared_max - reset shared fences for debugging
> + * dma_resv_reset_max_fences - reset fences for debugging
>   * @obj: the dma_resv object to reset
>   *
> - * Reset the number of pre-reserved shared slots to test that drivers do
> + * Reset the number of pre-reserved fence slots to test that drivers do
>   * correct slot allocation using dma_resv_reserve_fences(). See also
> - * &dma_resv_list.shared_max.
> + * &dma_resv_list.max_fences.
>   */
> -void dma_resv_reset_shared_max(struct dma_resv *obj)
> +void dma_resv_reset_max_fences(struct dma_resv *obj)
>  {
> -       struct dma_resv_list *fences = dma_resv_shared_list(obj);
> +       struct dma_resv_list *fences = dma_resv_fences_list(obj);
>
>         dma_resv_assert_held(obj);
>
> -       /* Test shared fence slot reservation */
> +       /* Test fence slot reservation */
>         if (fences)
> -               fences->shared_max = fences->shared_count;
> +               fences->max_fences = fences->num_fences;
>  }
> -EXPORT_SYMBOL(dma_resv_reset_shared_max);
> +EXPORT_SYMBOL(dma_resv_reset_max_fences);
>  #endif
>
>  /**
> - * dma_resv_add_shared_fence - Add a fence to a shared slot
> + * dma_resv_add_fence - Add a fence to the dma_resv obj
>   * @obj: the reservation object
> - * @fence: the shared fence to add
> + * @fence: the fence to add
> + * @usage: how the fence is used, see enum dma_resv_usage
>   *
> - * Add a fence to a shared slot, @obj must be locked with dma_resv_lock(), and
> + * Add a fence to a slot, @obj must be locked with dma_resv_lock(), and
>   * dma_resv_reserve_fences() has been called.
>   *
>   * See also &dma_resv.fence for a discussion of the semantics.
>   */
> -void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence)
> +void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence,
> +                       enum dma_resv_usage usage)
>  {
>         struct dma_resv_list *fobj;
>         struct dma_fence *old;
> @@ -274,44 +308,45 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence)
>
>         dma_resv_assert_held(obj);
>
> -       /* Drivers should not add containers here, instead add each fence
> -        * individually.
> +       /* TODO: Drivers should not add containers here, instead add each fence
> +        * individually. Disabled for now until we cleaned up amdgpu/ttm.
>          */
> -       WARN_ON(dma_fence_is_container(fence));
> +       /* WARN_ON(dma_fence_is_container(fence)); */
>
> -       fobj = dma_resv_shared_list(obj);
> -       count = fobj->shared_count;
> +       fobj = dma_resv_fences_list(obj);
> +       count = fobj->num_fences;
>
>         write_seqcount_begin(&obj->seq);
>
>         for (i = 0; i < count; ++i) {
> +               enum dma_resv_usage old_usage;
>
> -               old = rcu_dereference_protected(fobj->shared[i],
> -                                               dma_resv_held(obj));
> -               if (old->context == fence->context ||
> +               dma_resv_list_entry(fobj, i, obj, &old, &old_usage);
> +               if ((old->context == fence->context && old_usage >= usage) ||
>                     dma_fence_is_signaled(old))
>                         goto replace;
>         }
>
> -       BUG_ON(fobj->shared_count >= fobj->shared_max);
> +       BUG_ON(fobj->num_fences >= fobj->max_fences);
>         old = NULL;
>         count++;
>
>  replace:
> -       RCU_INIT_POINTER(fobj->shared[i], fence);
> -       /* pointer update must be visible before we extend the shared_count */
> -       smp_store_mb(fobj->shared_count, count);
> +       dma_resv_list_set(fobj, i, fence, usage);
> +       /* pointer update must be visible before we extend the num_fences */
> +       smp_store_mb(fobj->num_fences, count);
>
>         write_seqcount_end(&obj->seq);
>         dma_fence_put(old);
>  }
> -EXPORT_SYMBOL(dma_resv_add_shared_fence);
> +EXPORT_SYMBOL(dma_resv_add_fence);
>
>  /**
>   * dma_resv_replace_fences - replace fences in the dma_resv obj
>   * @obj: the reservation object
>   * @context: the context of the fences to replace
>   * @replacement: the new fence to use instead
> + * @usage: how the new fence is used, see enum dma_resv_usage
>   *
>   * Replace fences with a specified context with a new fence. Only valid if the
>   * operation represented by the original fence has no longer access to the
> @@ -321,107 +356,72 @@ EXPORT_SYMBOL(dma_resv_add_shared_fence);
>   * update fence which makes the resource inaccessible.
>   */
>  void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
> -                            struct dma_fence *replacement)
> +                            struct dma_fence *replacement,
> +                            enum dma_resv_usage usage)
>  {
>         struct dma_resv_list *list;
> -       struct dma_fence *old;
>         unsigned int i;
>
>         dma_resv_assert_held(obj);
>
> +       list = dma_resv_fences_list(obj);
>         write_seqcount_begin(&obj->seq);
> +       for (i = 0; list && i < list->num_fences; ++i) {
> +               struct dma_fence *old;
>
> -       old = dma_resv_excl_fence(obj);
> -       if (old->context == context) {
> -               RCU_INIT_POINTER(obj->fence_excl, dma_fence_get(replacement));
> -               dma_fence_put(old);
> -       }
> -
> -       list = dma_resv_shared_list(obj);
> -       for (i = 0; list && i < list->shared_count; ++i) {
> -               old = rcu_dereference_protected(list->shared[i],
> -                                               dma_resv_held(obj));
> +               dma_resv_list_entry(list, i, obj, &old, NULL);
>                 if (old->context != context)
>                         continue;
>
> -               rcu_assign_pointer(list->shared[i], dma_fence_get(replacement));
> +               dma_resv_list_set(list, i, replacement, usage);
>                 dma_fence_put(old);
>         }
> -
>         write_seqcount_end(&obj->seq);
>  }
>  EXPORT_SYMBOL(dma_resv_replace_fences);
>
>  /**
> - * dma_resv_add_excl_fence - Add an exclusive fence.
> - * @obj: the reservation object
> - * @fence: the exclusive fence to add
> + * dma_resv_iter_restart_unlocked - restart the unlocked iterator
> + * @cursor: The dma_resv_iter object to restart
>   *
> - * Add a fence to the exclusive slot. @obj must be locked with dma_resv_lock().
> - * See also &dma_resv.fence_excl for a discussion of the semantics.
> + * Restart the unlocked iteration by initializing the cursor object.
>   */
> -void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence)
> -{
> -       struct dma_fence *old_fence = dma_resv_excl_fence(obj);
> -
> -       dma_resv_assert_held(obj);
> -
> -       dma_fence_get(fence);
> -
> -       write_seqcount_begin(&obj->seq);
> -       /* write_seqcount_begin provides the necessary memory barrier */
> -       RCU_INIT_POINTER(obj->fence_excl, fence);
> -       write_seqcount_end(&obj->seq);
> -
> -       dma_fence_put(old_fence);
> -}
> -EXPORT_SYMBOL(dma_resv_add_excl_fence);
> -
> -/* Restart the iterator by initializing all the necessary fields, but not the
> - * relation to the dma_resv object. */
>  static void dma_resv_iter_restart_unlocked(struct dma_resv_iter *cursor)
>  {
>         cursor->seq = read_seqcount_begin(&cursor->obj->seq);
> -       cursor->index = -1;
> -       cursor->shared_count = 0;
> -       if (cursor->usage >= DMA_RESV_USAGE_READ) {
> -               cursor->fences = dma_resv_shared_list(cursor->obj);
> -               if (cursor->fences)
> -                       cursor->shared_count = cursor->fences->shared_count;
> -       } else {
> -               cursor->fences = NULL;
> -       }
> +       cursor->index = 0;
> +       cursor->num_fences = 0;
> +       cursor->fences = dma_resv_fences_list(cursor->obj);
> +       if (cursor->fences)
> +               cursor->num_fences = cursor->fences->num_fences;
>         cursor->is_restarted = true;
>  }
>
>  /* Walk to the next not signaled fence and grab a reference to it */
>  static void dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor)
>  {
> -       struct dma_resv *obj = cursor->obj;
> +       if (!cursor->fences)
> +               return;
>
>         do {
>                 /* Drop the reference from the previous round */
>                 dma_fence_put(cursor->fence);
>
> -               if (cursor->index == -1) {
> -                       cursor->fence = dma_resv_excl_fence(obj);
> -                       cursor->index++;
> -                       if (!cursor->fence)
> -                               continue;
> -
> -               } else if (!cursor->fences ||
> -                          cursor->index >= cursor->shared_count) {
> +               if (cursor->index >= cursor->num_fences) {
>                         cursor->fence = NULL;
>                         break;
>
> -               } else {
> -                       struct dma_resv_list *fences = cursor->fences;
> -                       unsigned int idx = cursor->index++;
> -
> -                       cursor->fence = rcu_dereference(fences->shared[idx]);
>                 }
> +
> +               dma_resv_list_entry(cursor->fences, cursor->index++,
> +                                   cursor->obj, &cursor->fence,
> +                                   &cursor->fence_usage);
>                 cursor->fence = dma_fence_get_rcu(cursor->fence);
> -               if (!cursor->fence || !dma_fence_is_signaled(cursor->fence))
> +               if (!cursor->fence)
> +                       break;
> +
> +               if (!dma_fence_is_signaled(cursor->fence) &&
> +                   cursor->usage >= cursor->fence_usage)
>                         break;
>         } while (true);
>  }
> @@ -496,15 +496,9 @@ struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor)
>         dma_resv_assert_held(cursor->obj);
>
>         cursor->index = 0;
> -       if (cursor->usage >= DMA_RESV_USAGE_READ)
> -               cursor->fences = dma_resv_shared_list(cursor->obj);
> -       else
> -               cursor->fences = NULL;
> -
> -       fence = dma_resv_excl_fence(cursor->obj);
> -       if (!fence)
> -               fence = dma_resv_iter_next(cursor);
> +       cursor->fences = dma_resv_fences_list(cursor->obj);
>
> +       fence = dma_resv_iter_next(cursor);
>         cursor->is_restarted = true;
>         return fence;
>  }
> @@ -519,17 +513,17 @@ EXPORT_SYMBOL_GPL(dma_resv_iter_first);
>   */
>  struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor)
>  {
> -       unsigned int idx;
> +       struct dma_fence *fence;
>
>         dma_resv_assert_held(cursor->obj);
>
>         cursor->is_restarted = false;
> -       if (!cursor->fences || cursor->index >= cursor->fences->shared_count)
> +       if (!cursor->fences || cursor->index >= cursor->fences->num_fences)
>                 return NULL;
>
> -       idx = cursor->index++;
> -       return rcu_dereference_protected(cursor->fences->shared[idx],
> -                                        dma_resv_held(cursor->obj));
> +       dma_resv_list_entry(cursor->fences, cursor->index++,
> +                           cursor->obj, &fence, &cursor->fence_usage);

Shouldn't we skip the current fence if cursor->fence_usage doesn't
match cursor->usage ? (similar to what is done wrt the unlocked
variant)


> +       return fence;
>  }
>  EXPORT_SYMBOL_GPL(dma_resv_iter_next);
>
> @@ -544,57 +538,43 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src)
>  {
>         struct dma_resv_iter cursor;
>         struct dma_resv_list *list;
> -       struct dma_fence *f, *excl;
> +       struct dma_fence *f;
>
>         dma_resv_assert_held(dst);
>
>         list = NULL;
> -       excl = NULL;
>
>         dma_resv_iter_begin(&cursor, src, DMA_RESV_USAGE_READ);
>         dma_resv_for_each_fence_unlocked(&cursor, f) {
>
>                 if (dma_resv_iter_is_restarted(&cursor)) {
>                         dma_resv_list_free(list);
> -                       dma_fence_put(excl);
> -
> -                       if (cursor.shared_count) {
> -                               list = dma_resv_list_alloc(cursor.shared_count);
> -                               if (!list) {
> -                                       dma_resv_iter_end(&cursor);
> -                                       return -ENOMEM;
> -                               }
>
> -                               list->shared_count = 0;
> -
> -                       } else {
> -                               list = NULL;
> +                       list = dma_resv_list_alloc(cursor.num_fences);
> +                       if (!list) {
> +                               dma_resv_iter_end(&cursor);
> +                               return -ENOMEM;
>                         }
> -                       excl = NULL;
> +                       list->num_fences = 0;
>                 }
>
>                 dma_fence_get(f);
> -               if (dma_resv_iter_is_exclusive(&cursor))
> -                       excl = f;
> -               else
> -                       RCU_INIT_POINTER(list->shared[list->shared_count++], f);
> +               dma_resv_list_set(list, list->num_fences++, f,
> +                                 dma_resv_iter_usage(&cursor));
>         }
>         dma_resv_iter_end(&cursor);
>
>         write_seqcount_begin(&dst->seq);
> -       excl = rcu_replace_pointer(dst->fence_excl, excl, dma_resv_held(dst));
> -       list = rcu_replace_pointer(dst->fence, list, dma_resv_held(dst));
> +       list = rcu_replace_pointer(dst->fences, list, dma_resv_held(dst));
>         write_seqcount_end(&dst->seq);
>
>         dma_resv_list_free(list);
> -       dma_fence_put(excl);
> -
>         return 0;
>  }
>  EXPORT_SYMBOL(dma_resv_copy_fences);
>
>  /**
> - * dma_resv_get_fences - Get an object's shared and exclusive
> + * dma_resv_get_fences - Get an object's fences
>   * fences without update side lock held
>   * @obj: the reservation object
>   * @usage: controls which fences to include, see enum dma_resv_usage.
> @@ -623,7 +603,7 @@ int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage,
>                         while (*num_fences)
>                                 dma_fence_put((*fences)[--(*num_fences)]);
>
> -                       count = cursor.shared_count + 1;
> +                       count = cursor.num_fences + 1;
>
>                         /* Eventually re-allocate the array */
>                         *fences = krealloc_array(*fences, count,
> @@ -695,8 +675,7 @@ int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage,
>  EXPORT_SYMBOL_GPL(dma_resv_get_singleton);
>
>  /**
> - * dma_resv_wait_timeout - Wait on reservation's objects
> - * shared and/or exclusive fences.
> + * dma_resv_wait_timeout - Wait on reservation's objects fences
>   * @obj: the reservation object
>   * @usage: controls which fences to include, see enum dma_resv_usage.
>   * @intr: if true, do interruptible wait
> @@ -769,13 +748,13 @@ EXPORT_SYMBOL_GPL(dma_resv_test_signaled);
>   */
>  void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq)
>  {
> +       static const char *usage[] = { "kernel", "write", "read", "other" };
>         struct dma_resv_iter cursor;
>         struct dma_fence *fence;
>
>         dma_resv_for_each_fence(&cursor, obj, true, fence) {
>                 seq_printf(seq, "\t%s fence:",
> -                          dma_resv_iter_is_exclusive(&cursor) ?
> -                               "Exclusive" : "Shared");
> +                          usage[dma_resv_iter_usage(&cursor)]);
>                 dma_fence_describe(fence, seq);
>         }
>  }
> diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c
> index d097981061b1..d0f7c2bfd4f0 100644
> --- a/drivers/dma-buf/st-dma-resv.c
> +++ b/drivers/dma-buf/st-dma-resv.c
> @@ -58,8 +58,9 @@ static int sanitycheck(void *arg)
>         return r;
>  }
>
> -static int test_signaling(void *arg, enum dma_resv_usage usage)
> +static int test_signaling(void *arg)
>  {
> +       enum dma_resv_usage usage = (unsigned long)arg;
>         struct dma_resv resv;
>         struct dma_fence *f;
>         int r;
> @@ -81,11 +82,7 @@ static int test_signaling(void *arg, enum dma_resv_usage usage)
>                 goto err_unlock;
>         }
>
> -       if (usage >= DMA_RESV_USAGE_READ)
> -               dma_resv_add_shared_fence(&resv, f);
> -       else
> -               dma_resv_add_excl_fence(&resv, f);
> -
> +       dma_resv_add_fence(&resv, f, usage);
>         if (dma_resv_test_signaled(&resv, usage)) {
>                 pr_err("Resv unexpectedly signaled\n");
>                 r = -EINVAL;
> @@ -105,18 +102,9 @@ static int test_signaling(void *arg, enum dma_resv_usage usage)
>         return r;
>  }
>
> -static int test_excl_signaling(void *arg)
> -{
> -       return test_signaling(arg, DMA_RESV_USAGE_WRITE);
> -}
> -
> -static int test_shared_signaling(void *arg)
> -{
> -       return test_signaling(arg, DMA_RESV_USAGE_READ);
> -}
> -
> -static int test_for_each(void *arg, enum dma_resv_usage usage)
> +static int test_for_each(void *arg)
>  {
> +       enum dma_resv_usage usage = (unsigned long)arg;
>         struct dma_resv_iter cursor;
>         struct dma_fence *f, *fence;
>         struct dma_resv resv;
> @@ -139,10 +127,7 @@ static int test_for_each(void *arg, enum dma_resv_usage usage)
>                 goto err_unlock;
>         }
>
> -       if (usage >= DMA_RESV_USAGE_READ)
> -               dma_resv_add_shared_fence(&resv, f);
> -       else
> -               dma_resv_add_excl_fence(&resv, f);
> +       dma_resv_add_fence(&resv, f, usage);
>
>         r = -ENOENT;
>         dma_resv_for_each_fence(&cursor, &resv, usage, fence) {
> @@ -156,8 +141,7 @@ static int test_for_each(void *arg, enum dma_resv_usage usage)
>                         r = -EINVAL;
>                         goto err_unlock;
>                 }
> -               if (dma_resv_iter_is_exclusive(&cursor) !=
> -                   (usage >= DMA_RESV_USAGE_READ)) {
> +               if (dma_resv_iter_usage(&cursor) != usage) {
>                         pr_err("Unexpected fence usage\n");
>                         r = -EINVAL;
>                         goto err_unlock;
> @@ -177,18 +161,9 @@ static int test_for_each(void *arg, enum dma_resv_usage usage)
>         return r;
>  }
>
> -static int test_excl_for_each(void *arg)
> -{
> -       return test_for_each(arg, DMA_RESV_USAGE_WRITE);
> -}
> -
> -static int test_shared_for_each(void *arg)
> -{
> -       return test_for_each(arg, DMA_RESV_USAGE_READ);
> -}
> -
> -static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage)
> +static int test_for_each_unlocked(void *arg)
>  {
> +       enum dma_resv_usage usage = (unsigned long)arg;
>         struct dma_resv_iter cursor;
>         struct dma_fence *f, *fence;
>         struct dma_resv resv;
> @@ -212,10 +187,7 @@ static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage)
>                 goto err_free;
>         }
>
> -       if (usage >= DMA_RESV_USAGE_READ)
> -               dma_resv_add_shared_fence(&resv, f);
> -       else
> -               dma_resv_add_excl_fence(&resv, f);
> +       dma_resv_add_fence(&resv, f, usage);
>         dma_resv_unlock(&resv);
>
>         r = -ENOENT;
> @@ -235,8 +207,7 @@ static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage)
>                         r = -EINVAL;
>                         goto err_iter_end;
>                 }
> -               if (dma_resv_iter_is_exclusive(&cursor) !=
> -                   (usage >= DMA_RESV_USAGE_READ)) {
> +               if (dma_resv_iter_usage(&cursor) != usage) {
>                         pr_err("Unexpected fence usage\n");
>                         r = -EINVAL;
>                         goto err_iter_end;
> @@ -262,18 +233,9 @@ static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage)
>         return r;
>  }
>
> -static int test_excl_for_each_unlocked(void *arg)
> -{
> -       return test_for_each_unlocked(arg, DMA_RESV_USAGE_WRITE);
> -}
> -
> -static int test_shared_for_each_unlocked(void *arg)
> -{
> -       return test_for_each_unlocked(arg, DMA_RESV_USAGE_READ);
> -}
> -
> -static int test_get_fences(void *arg, enum dma_resv_usage usage)
> +static int test_get_fences(void *arg)
>  {
> +       enum dma_resv_usage usage = (unsigned long)arg;
>         struct dma_fence *f, **fences = NULL;
>         struct dma_resv resv;
>         int r, i;
> @@ -296,10 +258,7 @@ static int test_get_fences(void *arg, enum dma_resv_usage usage)
>                 goto err_resv;
>         }
>
> -       if (usage >= DMA_RESV_USAGE_READ)
> -               dma_resv_add_shared_fence(&resv, f);
> -       else
> -               dma_resv_add_excl_fence(&resv, f);
> +       dma_resv_add_fence(&resv, f, usage);
>         dma_resv_unlock(&resv);
>
>         r = dma_resv_get_fences(&resv, usage, &i, &fences);
> @@ -324,30 +283,24 @@ static int test_get_fences(void *arg, enum dma_resv_usage usage)
>         return r;
>  }
>
> -static int test_excl_get_fences(void *arg)
> -{
> -       return test_get_fences(arg, DMA_RESV_USAGE_WRITE);
> -}
> -
> -static int test_shared_get_fences(void *arg)
> -{
> -       return test_get_fences(arg, DMA_RESV_USAGE_READ);
> -}
> -
>  int dma_resv(void)
>  {
>         static const struct subtest tests[] = {
>                 SUBTEST(sanitycheck),
> -               SUBTEST(test_excl_signaling),
> -               SUBTEST(test_shared_signaling),
> -               SUBTEST(test_excl_for_each),
> -               SUBTEST(test_shared_for_each),
> -               SUBTEST(test_excl_for_each_unlocked),
> -               SUBTEST(test_shared_for_each_unlocked),
> -               SUBTEST(test_excl_get_fences),
> -               SUBTEST(test_shared_get_fences),
> +               SUBTEST(test_signaling),
> +               SUBTEST(test_for_each),
> +               SUBTEST(test_for_each_unlocked),
> +               SUBTEST(test_get_fences),
>         };
> +       enum dma_resv_usage usage;
> +       int r;
>
>         spin_lock_init(&fence_lock);
> -       return subtests(tests, NULL);
> +       for (usage = DMA_RESV_USAGE_WRITE; usage <= DMA_RESV_USAGE_READ;
> +            ++usage) {
> +               r = subtests(tests, (void *)(unsigned long)usage);
> +               if (r)
> +                       return r;
> +       }
> +       return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 7de8f67f7dde..ab5d6b630a49 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -263,7 +263,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
>          */
>         replacement = dma_fence_get_stub();
>         dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context,
> -                               replacement);
> +                               replacement, DMA_RESV_USAGE_READ);
>         dma_fence_put(replacement);
>         return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 2bf909a4242a..1c039db976a9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -54,8 +54,8 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
>         bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
>         p->uf_entry.priority = 0;
>         p->uf_entry.tv.bo = &bo->tbo;
> -       /* One for TTM and one for the CS job */
> -       p->uf_entry.tv.num_shared = 2;
> +       /* One for TTM and two for the CS job */
> +       p->uf_entry.tv.num_shared = 3;
>
>         drm_gem_object_put(gobj);
>
> @@ -1284,7 +1284,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>                         break;
>                 }
>                 dma_fence_chain_init(chain, fence, dma_fence_get(p->fence), 1);
> -               rcu_assign_pointer(resv->fence_excl, &chain->base);
> +               dma_resv_add_fence(resv, &chain->base, DMA_RESV_USAGE_WRITE);
>                 e->chain = NULL;
>         }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 0a843cc54945..9085a6b1ad56 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -1393,10 +1393,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
>                 return;
>         }
>
> -       if (shared)
> -               dma_resv_add_shared_fence(resv, fence);
> -       else
> -               dma_resv_add_excl_fence(resv, fence);
> +       dma_resv_add_fence(resv, fence, shared ? DMA_RESV_USAGE_READ :
> +                          DMA_RESV_USAGE_WRITE);
>  }
>
>  /**
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
> index d7cd26dfaf8a..0cc036d93afc 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
> @@ -203,14 +203,10 @@ static void submit_attach_object_fences(struct etnaviv_gem_submit *submit)
>
>         for (i = 0; i < submit->nr_bos; i++) {
>                 struct drm_gem_object *obj = &submit->bos[i].obj->base;
> +               bool write = submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE;
>
> -               if (submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE)
> -                       dma_resv_add_excl_fence(obj->resv,
> -                                                         submit->out_fence);
> -               else
> -                       dma_resv_add_shared_fence(obj->resv,
> -                                                           submit->out_fence);
> -
> +               dma_resv_add_fence(obj->resv, submit->out_fence, write ?
> +                                  DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ);
>                 submit_unlock_object(submit, i);
>         }
>  }
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
> index 14a1c0ad8c3c..e7ae94ee1b44 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
> @@ -148,12 +148,13 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
>                 if (dma_resv_iter_is_restarted(&cursor))
>                         args->busy = 0;
>
> -               if (dma_resv_iter_is_exclusive(&cursor))
> -                       /* Translate the exclusive fence to the READ *and* WRITE engine */
> -                       args->busy |= busy_check_writer(fence);
> -               else
> -                       /* Translate shared fences to READ set of engines */
> -                       args->busy |= busy_check_reader(fence);
> +               /* Translate read fences to READ set of engines */
> +               args->busy |= busy_check_reader(fence);
> +       }
> +       dma_resv_iter_begin(&cursor, obj->base.resv, DMA_RESV_USAGE_WRITE);
> +       dma_resv_for_each_fence_unlocked(&cursor, fence) {
> +               /* Translate the write fences to the READ *and* WRITE engine */
> +               args->busy |= busy_check_writer(fence);
>         }
>         dma_resv_iter_end(&cursor);
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
> index 8a2223eb0ba9..887cb6b71ae4 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
> @@ -114,7 +114,8 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
>                                                 obj->base.resv, NULL, true,
>                                                 i915_fence_timeout(i915),
>                                                 I915_FENCE_GFP);
> -               dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma);
> +               dma_resv_add_fence(obj->base.resv, &clflush->base.dma,
> +                                  DMA_RESV_USAGE_WRITE);
>                 dma_fence_work_commit(&clflush->base);
>                 /*
>                  * We must have successfully populated the pages(since we are
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
> index 4de6500f3c55..e4a232e22f9d 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
> @@ -622,9 +622,8 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
>         if (IS_ERR_OR_NULL(copy_fence))
>                 return PTR_ERR_OR_ZERO(copy_fence);
>
> -       dma_resv_add_excl_fence(dst_bo->base.resv, copy_fence);
> -       dma_resv_add_shared_fence(src_bo->base.resv, copy_fence);
> -
> +       dma_resv_add_fence(dst_bo->base.resv, copy_fence, DMA_RESV_USAGE_WRITE);
> +       dma_resv_add_fence(src_bo->base.resv, copy_fence, DMA_RESV_USAGE_READ);
>         dma_fence_put(copy_fence);
>
>         return 0;
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index fe9f89289418..52fd6705a518 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -1640,7 +1640,8 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
>                 }
>
>                 if (fence) {
> -                       dma_resv_add_excl_fence(vma->obj->base.resv, fence);
> +                       dma_resv_add_fence(vma->obj->base.resv, fence,
> +                                          DMA_RESV_USAGE_WRITE);
>                         obj->write_domain = I915_GEM_DOMAIN_RENDER;
>                         obj->read_domains = 0;
>                 }
> @@ -1652,7 +1653,8 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
>                 }
>
>                 if (fence) {
> -                       dma_resv_add_shared_fence(vma->obj->base.resv, fence);
> +                       dma_resv_add_fence(vma->obj->base.resv, fence,
> +                                          DMA_RESV_USAGE_READ);
>                         obj->write_domain = 0;
>                 }
>         }
> diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c
> index 9435a3ca71c8..38caa7f78871 100644
> --- a/drivers/gpu/drm/lima/lima_gem.c
> +++ b/drivers/gpu/drm/lima/lima_gem.c
> @@ -366,7 +366,7 @@ int lima_gem_submit(struct drm_file *file, struct lima_submit *submit)
>                 if (submit->bos[i].flags & LIMA_SUBMIT_BO_WRITE)
>                         dma_resv_add_excl_fence(lima_bo_resv(bos[i]), fence);
>                 else
> -                       dma_resv_add_shared_fence(lima_bo_resv(bos[i]), fence);
> +                       dma_resv_add_fence(lima_bo_resv(bos[i]), fence);
>         }
>
>         drm_gem_unlock_reservations((struct drm_gem_object **)bos,
> diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
> index 993dbcd7a586..2786913be00a 100644
> --- a/drivers/gpu/drm/msm/msm_gem_submit.c
> +++ b/drivers/gpu/drm/msm/msm_gem_submit.c
> @@ -397,7 +397,7 @@ static void submit_attach_object_fences(struct msm_gem_submit *submit)
>                 if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE)
>                         dma_resv_add_excl_fence(obj->resv, submit->user_fence);
>                 else if (submit->bos[i].flags & MSM_SUBMIT_BO_READ)
> -                       dma_resv_add_shared_fence(obj->resv, submit->user_fence);
> +                       dma_resv_add_fence(obj->resv, submit->user_fence);
>         }
>  }
>
> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
> index c6bb4dbcd735..05076e530e7d 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
> @@ -1308,10 +1308,11 @@ nouveau_bo_fence(struct nouveau_bo *nvbo, struct nouveau_fence *fence, bool excl
>  {
>         struct dma_resv *resv = nvbo->bo.base.resv;
>
> -       if (exclusive)
> -               dma_resv_add_excl_fence(resv, &fence->base);
> -       else if (fence)
> -               dma_resv_add_shared_fence(resv, &fence->base);
> +       if (!fence)
> +               return;
> +
> +       dma_resv_add_fence(resv, &fence->base, exclusive ?
> +                          DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ);
>  }
>
>  static void
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
> index d5e81ccee01c..7f01dcf81fab 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fence.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
> @@ -360,9 +360,11 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan,
>                 dma_resv_for_each_fence(&cursor, resv,
>                                         dma_resv_usage_rw(exclusive),
>                                         fence) {
> +                       enum dma_resv_usage usage;
>                         struct nouveau_fence *f;
>
> -                       if (i == 0 && dma_resv_iter_is_exclusive(&cursor))
> +                       usage = dma_resv_iter_usage(&cursor);
> +                       if (i == 0 && usage == DMA_RESV_USAGE_WRITE)
>                                 continue;
>
>                         f = nouveau_local_fence(fence, chan->drm);
> diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
> index cde1e8ddaeaa..368d26da0d6a 100644
> --- a/drivers/gpu/drm/qxl/qxl_release.c
> +++ b/drivers/gpu/drm/qxl/qxl_release.c
> @@ -429,7 +429,8 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
>         list_for_each_entry(entry, &release->bos, head) {
>                 bo = entry->bo;
>
> -               dma_resv_add_shared_fence(bo->base.resv, &release->base);
> +               dma_resv_add_fence(bo->base.resv, &release->base,
> +                                  DMA_RESV_USAGE_READ);
>                 ttm_bo_move_to_lru_tail_unlocked(bo);
>                 dma_resv_unlock(bo->base.resv);
>         }
> diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
> index afca4bf59a8d..382121c26f81 100644
> --- a/drivers/gpu/drm/radeon/radeon_object.c
> +++ b/drivers/gpu/drm/radeon/radeon_object.c
> @@ -792,8 +792,6 @@ void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
>                 return;
>         }
>
> -       if (shared)
> -               dma_resv_add_shared_fence(resv, &fence->base);
> -       else
> -               dma_resv_add_excl_fence(resv, &fence->base);
> +       dma_resv_add_fence(resv, &fence->base, shared ?
> +                          DMA_RESV_USAGE_READ : DMA_RESV_USAGE_WRITE);
>  }
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index d4b2695606e2..6014c363d6e6 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -762,7 +762,7 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
>                 return ret;
>         }
>
> -       dma_resv_add_shared_fence(bo->base.resv, fence);
> +       dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_WRITE);
>
>         ret = dma_resv_reserve_fences(bo->base.resv, 1);
>         if (unlikely(ret)) {
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
> index 862d2f22412a..49689c7c8078 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
> @@ -508,7 +508,8 @@ static int ttm_bo_move_to_ghost(struct ttm_buffer_object *bo,
>         if (ret)
>                 return ret;
>
> -       dma_resv_add_excl_fence(&ghost_obj->base._resv, fence);
> +       dma_resv_add_fence(&ghost_obj->base._resv, fence,
> +                          DMA_RESV_USAGE_WRITE);
>
>         /**
>          * If we're not moving to fixed memory, the TTM object
> @@ -562,7 +563,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
>         struct ttm_resource_manager *man = ttm_manager_type(bdev, new_mem->mem_type);
>         int ret = 0;
>
> -       dma_resv_add_excl_fence(bo->base.resv, fence);
> +       dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_WRITE);
>         if (!evict)
>                 ret = ttm_bo_move_to_ghost(bo, fence, man->use_tt);
>         else if (!from->use_tt && pipeline)
> diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> index 789c645f004e..0eb995d25df1 100644
> --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
> @@ -154,10 +154,8 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
>         list_for_each_entry(entry, list, head) {
>                 struct ttm_buffer_object *bo = entry->bo;
>
> -               if (entry->num_shared)
> -                       dma_resv_add_shared_fence(bo->base.resv, fence);
> -               else
> -                       dma_resv_add_excl_fence(bo->base.resv, fence);
> +               dma_resv_add_fence(bo->base.resv, fence, entry->num_shared ?
> +                                  DMA_RESV_USAGE_READ : DMA_RESV_USAGE_WRITE);
>                 ttm_bo_move_to_lru_tail_unlocked(bo);
>                 dma_resv_unlock(bo->base.resv);
>         }
> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
> index 961812d33827..2352e9640922 100644
> --- a/drivers/gpu/drm/v3d/v3d_gem.c
> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> @@ -550,8 +550,8 @@ v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv,
>
>         for (i = 0; i < job->bo_count; i++) {
>                 /* XXX: Use shared fences for read-only objects. */
> -               dma_resv_add_excl_fence(job->bo[i]->resv,
> -                                       job->done_fence);
> +               dma_resv_add_fence(job->bo[i]->resv, job->done_fence,
> +                                  DMA_RESV_USAGE_WRITE);
>         }
>
>         drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx);
> diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
> index 594bd6bb00d2..38550317e025 100644
> --- a/drivers/gpu/drm/vc4/vc4_gem.c
> +++ b/drivers/gpu/drm/vc4/vc4_gem.c
> @@ -546,7 +546,7 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
>                 bo = to_vc4_bo(&exec->bo[i]->base);
>                 bo->seqno = seqno;
>
> -               dma_resv_add_shared_fence(bo->base.base.resv, exec->fence);
> +               dma_resv_add_fence(bo->base.base.resv, exec->fence);
>         }
>
>         list_for_each_entry(bo, &exec->unref_list, unref_head) {
> diff --git a/drivers/gpu/drm/vgem/vgem_fence.c b/drivers/gpu/drm/vgem/vgem_fence.c
> index 91fc4940c65a..c2a879734d40 100644
> --- a/drivers/gpu/drm/vgem/vgem_fence.c
> +++ b/drivers/gpu/drm/vgem/vgem_fence.c
> @@ -161,12 +161,9 @@ int vgem_fence_attach_ioctl(struct drm_device *dev,
>         /* Expose the fence via the dma-buf */
>         dma_resv_lock(resv, NULL);
>         ret = dma_resv_reserve_fences(resv, 1);
> -       if (!ret) {
> -               if (arg->flags & VGEM_FENCE_WRITE)
> -                       dma_resv_add_excl_fence(resv, fence);
> -               else
> -                       dma_resv_add_shared_fence(resv, fence);
> -       }
> +       if (!ret)
> +               dma_resv_add_fence(resv, fence, arg->flags & VGEM_FENCE_WRITE ?
> +                                  DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ);
>         dma_resv_unlock(resv);
>
>         /* Record the fence in our idr for later signaling */
> diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c
> index 1820ca6cf673..580a78809836 100644
> --- a/drivers/gpu/drm/virtio/virtgpu_gem.c
> +++ b/drivers/gpu/drm/virtio/virtgpu_gem.c
> @@ -250,7 +250,8 @@ void virtio_gpu_array_add_fence(struct virtio_gpu_object_array *objs,
>         int i;
>
>         for (i = 0; i < objs->nents; i++)
> -               dma_resv_add_excl_fence(objs->objs[i]->resv, fence);
> +               dma_resv_add_fence(objs->objs[i]->resv, fence,
> +                                  DMA_RESV_USAGE_WRITE);
>  }
>
>  void virtio_gpu_array_put_free(struct virtio_gpu_object_array *objs)
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> index b96884f7d03d..bec50223efe5 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> @@ -758,7 +758,8 @@ void vmw_bo_fence_single(struct ttm_buffer_object *bo,
>
>         ret = dma_resv_reserve_fences(bo->base.resv, 1);
>         if (!ret)
> -               dma_resv_add_excl_fence(bo->base.resv, &fence->base);
> +               dma_resv_add_fence(bo->base.resv, &fence->base,
> +                                  DMA_RESV_USAGE_WRITE);
>         else
>                 /* Last resort fallback when we are OOM */
>                 dma_fence_wait(&fence->base, false);
> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
> index 74083e62e19d..a8cfc1705d6a 100644
> --- a/include/linux/dma-buf.h
> +++ b/include/linux/dma-buf.h
> @@ -393,16 +393,13 @@ struct dma_buf {
>          * e.g. exposed in `Implicit Fence Poll Support`_ must follow the
>          * below rules.
>          *
> -        * - Drivers must add a shared fence through dma_resv_add_shared_fence()
> -        *   for anything the userspace API considers a read access. This highly
> -        *   depends upon the API and window system.
> +        * - Drivers must add a read fence through dma_resv_add_fence() with the
> +        *   DMA_RESV_USAGE_READ flag for anything the userspace API considers a
> +        *   read access. This highly depends upon the API and window system.
>          *
> -        * - Similarly drivers must set the exclusive fence through
> -        *   dma_resv_add_excl_fence() for anything the userspace API considers
> -        *   write access.
> -        *
> -        * - Drivers may just always set the exclusive fence, since that only
> -        *   causes unecessarily synchronization, but no correctness issues.
> +        * - Similarly drivers must add a write fence through
> +        *   dma_resv_add_fence() with the DMA_RESV_USAGE_WRITE flag for
> +        *   anything the userspace API considers write access.
>          *
>          * - Some drivers only expose a synchronous userspace API with no
>          *   pipelining across drivers. These do not set any fences for their
> @@ -413,7 +410,7 @@ struct dma_buf {
>          * Dynamic importers, see dma_buf_attachment_is_dynamic(), have
>          * additional constraints on how they set up fences:
>          *
> -        * - Dynamic importers must obey the exclusive fence and wait for it to
> +        * - Dynamic importers must obey the kernel fences and wait for them to
>          *   signal before allowing access to the buffer's underlying storage
>          *   through the device.
>          *
> diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
> index 658674c4b7b9..ae0436d7e7b8 100644
> --- a/include/linux/dma-resv.h
> +++ b/include/linux/dma-resv.h
> @@ -95,8 +95,8 @@ static inline enum dma_resv_usage dma_resv_usage_rw(bool write)
>  /**
>   * struct dma_resv - a reservation object manages fences for a buffer
>   *
> - * There are multiple uses for this, with sometimes slightly different rules in
> - * how the fence slots are used.
> + * This is a container for dma_fence objects which needs to handle multiple use
> + * cases.
>   *
>   * One use is to synchronize cross-driver access to a struct dma_buf, either for
>   * dynamic buffer management or just to handle implicit synchronization between
> @@ -126,47 +126,22 @@ struct dma_resv {
>          * @seq:
>          *
>          * Sequence count for managing RCU read-side synchronization, allows
> -        * read-only access to @fence_excl and @fence while ensuring we take a
> -        * consistent snapshot.
> +        * read-only access to @fences while ensuring we take a consistent
> +        * snapshot.
>          */
>         seqcount_ww_mutex_t seq;
>
>         /**
> -        * @fence_excl:
> +        * @fences:
>          *
> -        * The exclusive fence, if there is one currently.
> +        * Array of fences which where added to the dma_resv object
>          *
> -        * To guarantee that no fences are lost, this new fence must signal
> -        * only after the previous exclusive fence has signalled. If
> -        * semantically only a new access is added without actually treating the
> -        * previous one as a dependency the exclusive fences can be strung
> -        * together using struct dma_fence_chain.
> -        *
> -        * Note that actual semantics of what an exclusive or shared fence mean
> -        * is defined by the user, for reservation objects shared across drivers
> -        * see &dma_buf.resv.
> -        */
> -       struct dma_fence __rcu *fence_excl;
> -
> -       /**
> -        * @fence:
> -        *
> -        * List of current shared fences.
> -        *
> -        * There are no ordering constraints of shared fences against the
> -        * exclusive fence slot. If a waiter needs to wait for all access, it
> -        * has to wait for both sets of fences to signal.
> -        *
> -        * A new fence is added by calling dma_resv_add_shared_fence(). Since
> -        * this often needs to be done past the point of no return in command
> +        * A new fence is added by calling dma_resv_add_fence(). Since this
> +        * often needs to be done past the point of no return in command
>          * submission it cannot fail, and therefore sufficient slots need to be
>          * reserved by calling dma_resv_reserve_fences().
> -        *
> -        * Note that actual semantics of what an exclusive or shared fence mean
> -        * is defined by the user, for reservation objects shared across drivers
> -        * see &dma_buf.resv.
>          */
> -       struct dma_resv_list __rcu *fence;
> +       struct dma_resv_list __rcu *fences;
>  };
>
>  /**
> @@ -191,6 +166,9 @@ struct dma_resv_iter {
>         /** @fence: the currently handled fence */
>         struct dma_fence *fence;
>
> +       /** @fence_usage: the usage of the current fence */
> +       enum dma_resv_usage fence_usage;
> +
>         /** @seq: sequence number to check for modifications */
>         unsigned int seq;
>
> @@ -200,8 +178,8 @@ struct dma_resv_iter {
>         /** @fences: the shared fences; private, *MUST* not dereference  */
>         struct dma_resv_list *fences;
>
> -       /** @shared_count: number of shared fences */
> -       unsigned int shared_count;
> +       /** @num_fences: number of fences */
> +       unsigned int num_fences;
>
>         /** @is_restarted: true if this is the first returned fence */
>         bool is_restarted;
> @@ -240,14 +218,15 @@ static inline void dma_resv_iter_end(struct dma_resv_iter *cursor)
>  }
>
>  /**
> - * dma_resv_iter_is_exclusive - test if the current fence is the exclusive one
> + * dma_resv_iter_usage - Return the usage of the current fence
>   * @cursor: the cursor of the current position
>   *
> - * Returns true if the currently returned fence is the exclusive one.
> + * Returns the usage of the currently processed fence.
>   */
> -static inline bool dma_resv_iter_is_exclusive(struct dma_resv_iter *cursor)
> +static inline enum dma_resv_usage
> +dma_resv_iter_usage(struct dma_resv_iter *cursor)
>  {
> -       return cursor->index == 0;
> +       return cursor->fence_usage;
>  }
>
>  /**
> @@ -302,9 +281,9 @@ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor)
>  #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base)
>
>  #ifdef CONFIG_DEBUG_MUTEXES
> -void dma_resv_reset_shared_max(struct dma_resv *obj);
> +void dma_resv_reset_max_fences(struct dma_resv *obj);
>  #else
> -static inline void dma_resv_reset_shared_max(struct dma_resv *obj) {}
> +static inline void dma_resv_reset_max_fences(struct dma_resv *obj) {}
>  #endif
>
>  /**
> @@ -450,17 +429,18 @@ static inline struct ww_acquire_ctx *dma_resv_locking_ctx(struct dma_resv *obj)
>   */
>  static inline void dma_resv_unlock(struct dma_resv *obj)
>  {
> -       dma_resv_reset_shared_max(obj);
> +       dma_resv_reset_max_fences(obj);
>         ww_mutex_unlock(&obj->lock);
>  }
>
>  void dma_resv_init(struct dma_resv *obj);
>  void dma_resv_fini(struct dma_resv *obj);
>  int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences);
> -void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence);
> +void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence,
> +                       enum dma_resv_usage usage);
>  void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
> -                            struct dma_fence *fence);
> -void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence);
> +                            struct dma_fence *fence,
> +                            enum dma_resv_usage usage);
>  int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage,
>                         unsigned int *num_fences, struct dma_fence ***fences);
>  int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage,
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Nouveau] [PATCH 06/23] drm/nouveau: stop using dma_resv_excl_fence
  2022-03-21 13:58   ` Christian König
@ 2022-04-03 15:59     ` Christian König
  -1 siblings, 0 replies; 73+ messages in thread
From: Christian König @ 2022-04-03 15:59 UTC (permalink / raw)
  To: Christian König, Ben Skeggs, Karol Herbst, dri-devel,
	Lyude Paul, nouveau

Just a gentle ping to the nouveau guys.

Any more comments on this? Otherwise I'm pushing that with Daniels rb.

Thanks,
Christian.

Am 21.03.22 um 14:58 schrieb Christian König:
> Instead use the new dma_resv_get_singleton function.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Ben Skeggs <bskeggs@redhat.com>
> Cc: Karol Herbst <kherbst@redhat.com>
> Cc: Lyude Paul <lyude@redhat.com>
> Cc: nouveau@lists.freedesktop.org
> ---
>   drivers/gpu/drm/nouveau/nouveau_bo.c | 9 ++++++++-
>   1 file changed, 8 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
> index fa73fe57f97b..74f8652d2bd3 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
> @@ -959,7 +959,14 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
>   {
>   	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
>   	struct drm_device *dev = drm->dev;
> -	struct dma_fence *fence = dma_resv_excl_fence(bo->base.resv);
> +	struct dma_fence *fence;
> +	int ret;
> +
> +	/* TODO: This is actually a memory management dependency */
> +	ret = dma_resv_get_singleton(bo->base.resv, false, &fence);
> +	if (ret)
> +		dma_resv_wait_timeout(bo->base.resv, false, false,
> +				      MAX_SCHEDULE_TIMEOUT);
>   
>   	nv10_bo_put_tile_region(dev, *old_tile, fence);
>   	*old_tile = new_tile;


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 06/23] drm/nouveau: stop using dma_resv_excl_fence
@ 2022-04-03 15:59     ` Christian König
  0 siblings, 0 replies; 73+ messages in thread
From: Christian König @ 2022-04-03 15:59 UTC (permalink / raw)
  To: Christian König, Ben Skeggs, Karol Herbst, dri-devel,
	Lyude Paul, nouveau
  Cc: daniel.vetter

Just a gentle ping to the nouveau guys.

Any more comments on this? Otherwise I'm pushing that with Daniels rb.

Thanks,
Christian.

Am 21.03.22 um 14:58 schrieb Christian König:
> Instead use the new dma_resv_get_singleton function.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Ben Skeggs <bskeggs@redhat.com>
> Cc: Karol Herbst <kherbst@redhat.com>
> Cc: Lyude Paul <lyude@redhat.com>
> Cc: nouveau@lists.freedesktop.org
> ---
>   drivers/gpu/drm/nouveau/nouveau_bo.c | 9 ++++++++-
>   1 file changed, 8 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
> index fa73fe57f97b..74f8652d2bd3 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
> @@ -959,7 +959,14 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
>   {
>   	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
>   	struct drm_device *dev = drm->dev;
> -	struct dma_fence *fence = dma_resv_excl_fence(bo->base.resv);
> +	struct dma_fence *fence;
> +	int ret;
> +
> +	/* TODO: This is actually a memory management dependency */
> +	ret = dma_resv_get_singleton(bo->base.resv, false, &fence);
> +	if (ret)
> +		dma_resv_wait_timeout(bo->base.resv, false, false,
> +				      MAX_SCHEDULE_TIMEOUT);
>   
>   	nv10_bo_put_tile_region(dev, *old_tile, fence);
>   	*old_tile = new_tile;


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 17/23] dma-buf: specify usage while adding fences to dma_resv obj v5
  2022-04-02 22:16   ` Bas Nieuwenhuizen
@ 2022-04-03 17:48     ` Christian König
  0 siblings, 0 replies; 73+ messages in thread
From: Christian König @ 2022-04-03 17:48 UTC (permalink / raw)
  To: Bas Nieuwenhuizen; +Cc: Daniel Vetter, Christian König, ML dri-devel

Am 03.04.22 um 00:16 schrieb Bas Nieuwenhuizen:
> On Mon, Mar 21, 2022 at 2:59 PM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> [SNIP]
>> @@ -519,17 +513,17 @@ EXPORT_SYMBOL_GPL(dma_resv_iter_first);
>>    */
>>   struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor)
>>   {
>> -       unsigned int idx;
>> +       struct dma_fence *fence;
>>
>>          dma_resv_assert_held(cursor->obj);
>>
>>          cursor->is_restarted = false;
>> -       if (!cursor->fences || cursor->index >= cursor->fences->shared_count)
>> +       if (!cursor->fences || cursor->index >= cursor->fences->num_fences)
>>                  return NULL;
>>
>> -       idx = cursor->index++;
>> -       return rcu_dereference_protected(cursor->fences->shared[idx],
>> -                                        dma_resv_held(cursor->obj));
>> +       dma_resv_list_entry(cursor->fences, cursor->index++,
>> +                           cursor->obj, &fence, &cursor->fence_usage);
> Shouldn't we skip the current fence if cursor->fence_usage doesn't
> match cursor->usage ? (similar to what is done wrt the unlocked
> variant)

Oh, good point. Totally missed that.

Thanks,
Christian.

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 16/23] dma-buf: add enum dma_resv_usage v3
  2022-03-21 13:58 ` [PATCH 16/23] dma-buf: add enum dma_resv_usage v3 Christian König
  2022-03-29 15:24   ` Daniel Vetter
@ 2022-04-04  1:13   ` Bas Nieuwenhuizen
  1 sibling, 0 replies; 73+ messages in thread
From: Bas Nieuwenhuizen @ 2022-04-04  1:13 UTC (permalink / raw)
  To: Christian König; +Cc: Daniel Vetter, Christian König, ML dri-devel

On Mon, Mar 21, 2022 at 2:59 PM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> This change adds the dma_resv_usage enum and allows us to specify why a
> dma_resv object is queried for its containing fences.
>
> Additional to that a dma_resv_usage_rw() helper function is added to aid
> retrieving the fences for a read or write userspace submission.
>
> This is then deployed to the different query functions of the dma_resv
> object and all of their users. When the write paratermer was previously
> true we now use DMA_RESV_USAGE_WRITE and DMA_RESV_USAGE_READ otherwise.
>
> v2: add KERNEL/OTHER in separate patch
> v3: some kerneldoc suggestions by Daniel
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/dma-buf/dma-buf.c                     |  3 +-
>  drivers/dma-buf/dma-resv.c                    | 33 +++++----
>  drivers/dma-buf/st-dma-resv.c                 | 48 ++++++-------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        |  4 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_display.c   |  3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c       |  5 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c        |  4 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c    |  4 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c      |  3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       |  3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c       |  3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |  7 +-
>  .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  3 +-
>  drivers/gpu/drm/drm_gem.c                     |  6 +-
>  drivers/gpu/drm/drm_gem_atomic_helper.c       |  2 +-
>  drivers/gpu/drm/etnaviv/etnaviv_gem.c         |  6 +-
>  drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c  |  7 +-
>  .../gpu/drm/i915/display/intel_atomic_plane.c |  3 +-
>  drivers/gpu/drm/i915/gem/i915_gem_busy.c      |  4 +-
>  drivers/gpu/drm/i915/gem/i915_gem_lmem.c      |  2 +-
>  drivers/gpu/drm/i915/gem/i915_gem_userptr.c   |  2 +-
>  drivers/gpu/drm/i915/gem/i915_gem_wait.c      |  6 +-
>  .../drm/i915/gem/selftests/i915_gem_dmabuf.c  |  3 +-
>  drivers/gpu/drm/i915/i915_request.c           |  3 +-
>  drivers/gpu/drm/i915/i915_sw_fence.c          |  2 +-
>  drivers/gpu/drm/msm/msm_gem.c                 |  3 +-
>  drivers/gpu/drm/nouveau/dispnv50/wndw.c       |  3 +-
>  drivers/gpu/drm/nouveau/nouveau_bo.c          |  8 +--
>  drivers/gpu/drm/nouveau/nouveau_fence.c       |  8 ++-
>  drivers/gpu/drm/nouveau/nouveau_gem.c         |  3 +-
>  drivers/gpu/drm/panfrost/panfrost_drv.c       |  3 +-
>  drivers/gpu/drm/qxl/qxl_debugfs.c             |  3 +-
>  drivers/gpu/drm/radeon/radeon_display.c       |  3 +-
>  drivers/gpu/drm/radeon/radeon_gem.c           |  9 ++-
>  drivers/gpu/drm/radeon/radeon_mn.c            |  4 +-
>  drivers/gpu/drm/radeon/radeon_sync.c          |  2 +-
>  drivers/gpu/drm/radeon/radeon_uvd.c           |  4 +-
>  drivers/gpu/drm/scheduler/sched_main.c        |  3 +-
>  drivers/gpu/drm/ttm/ttm_bo.c                  | 18 ++---
>  drivers/gpu/drm/vgem/vgem_fence.c             |  4 +-
>  drivers/gpu/drm/virtio/virtgpu_ioctl.c        |  5 +-
>  drivers/gpu/drm/vmwgfx/vmwgfx_bo.c            |  4 +-
>  drivers/gpu/drm/vmwgfx/vmwgfx_resource.c      |  4 +-
>  drivers/infiniband/core/umem_dmabuf.c         |  3 +-
>  include/linux/dma-resv.h                      | 69 +++++++++++++++----
>  46 files changed, 208 insertions(+), 127 deletions(-)
>
> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> index 602b12d7470d..528983d3ba64 100644
> --- a/drivers/dma-buf/dma-buf.c
> +++ b/drivers/dma-buf/dma-buf.c

In dma-buf.c I think we also need to update dma_buf_poll_add_cb to use
the usage flags instead of the bool for dma_resv_for_each_fence.


> @@ -1124,7 +1124,8 @@ static int __dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
>         long ret;
>
>         /* Wait on any implicit rendering fences */
> -       ret = dma_resv_wait_timeout(resv, write, true, MAX_SCHEDULE_TIMEOUT);
> +       ret = dma_resv_wait_timeout(resv, dma_resv_usage_rw(write),
> +                                   true, MAX_SCHEDULE_TIMEOUT);
>         if (ret < 0)
>                 return ret;
>
> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
> index 78a32da2cb0b..bb7b023c2d33 100644
> --- a/drivers/dma-buf/dma-resv.c
> +++ b/drivers/dma-buf/dma-resv.c
> @@ -384,7 +384,7 @@ static void dma_resv_iter_restart_unlocked(struct dma_resv_iter *cursor)
>         cursor->seq = read_seqcount_begin(&cursor->obj->seq);
>         cursor->index = -1;
>         cursor->shared_count = 0;
> -       if (cursor->all_fences) {
> +       if (cursor->usage >= DMA_RESV_USAGE_READ) {
>                 cursor->fences = dma_resv_shared_list(cursor->obj);
>                 if (cursor->fences)
>                         cursor->shared_count = cursor->fences->shared_count;
> @@ -496,7 +496,7 @@ struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor)
>         dma_resv_assert_held(cursor->obj);
>
>         cursor->index = 0;
> -       if (cursor->all_fences)
> +       if (cursor->usage >= DMA_RESV_USAGE_READ)
>                 cursor->fences = dma_resv_shared_list(cursor->obj);
>         else
>                 cursor->fences = NULL;
> @@ -551,7 +551,7 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src)
>         list = NULL;
>         excl = NULL;
>
> -       dma_resv_iter_begin(&cursor, src, true);
> +       dma_resv_iter_begin(&cursor, src, DMA_RESV_USAGE_READ);
>         dma_resv_for_each_fence_unlocked(&cursor, f) {
>
>                 if (dma_resv_iter_is_restarted(&cursor)) {
> @@ -597,7 +597,7 @@ EXPORT_SYMBOL(dma_resv_copy_fences);
>   * dma_resv_get_fences - Get an object's shared and exclusive
>   * fences without update side lock held
>   * @obj: the reservation object
> - * @write: true if we should return all fences
> + * @usage: controls which fences to include, see enum dma_resv_usage.
>   * @num_fences: the number of fences returned
>   * @fences: the array of fence ptrs returned (array is krealloc'd to the
>   * required size, and must be freed by caller)
> @@ -605,7 +605,7 @@ EXPORT_SYMBOL(dma_resv_copy_fences);
>   * Retrieve all fences from the reservation object.
>   * Returns either zero or -ENOMEM.
>   */
> -int dma_resv_get_fences(struct dma_resv *obj, bool write,
> +int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage,
>                         unsigned int *num_fences, struct dma_fence ***fences)
>  {
>         struct dma_resv_iter cursor;
> @@ -614,7 +614,7 @@ int dma_resv_get_fences(struct dma_resv *obj, bool write,
>         *num_fences = 0;
>         *fences = NULL;
>
> -       dma_resv_iter_begin(&cursor, obj, write);
> +       dma_resv_iter_begin(&cursor, obj, usage);
>         dma_resv_for_each_fence_unlocked(&cursor, fence) {
>
>                 if (dma_resv_iter_is_restarted(&cursor)) {
> @@ -646,7 +646,7 @@ EXPORT_SYMBOL_GPL(dma_resv_get_fences);
>  /**
>   * dma_resv_get_singleton - Get a single fence for all the fences
>   * @obj: the reservation object
> - * @write: true if we should return all fences
> + * @usage: controls which fences to include, see enum dma_resv_usage.
>   * @fence: the resulting fence
>   *
>   * Get a single fence representing all the fences inside the resv object.
> @@ -656,7 +656,7 @@ EXPORT_SYMBOL_GPL(dma_resv_get_fences);
>   * object since that can lead to stack corruption when finalizing the
>   * dma_fence_array.
>   */
> -int dma_resv_get_singleton(struct dma_resv *obj, bool write,
> +int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage,
>                            struct dma_fence **fence)
>  {
>         struct dma_fence_array *array;
> @@ -664,7 +664,7 @@ int dma_resv_get_singleton(struct dma_resv *obj, bool write,
>         unsigned count;
>         int r;
>
> -       r = dma_resv_get_fences(obj, write, &count, &fences);
> +       r = dma_resv_get_fences(obj, usage, &count, &fences);
>          if (r)
>                 return r;
>
> @@ -698,7 +698,7 @@ EXPORT_SYMBOL_GPL(dma_resv_get_singleton);
>   * dma_resv_wait_timeout - Wait on reservation's objects
>   * shared and/or exclusive fences.
>   * @obj: the reservation object
> - * @wait_all: if true, wait on all fences, else wait on just exclusive fence
> + * @usage: controls which fences to include, see enum dma_resv_usage.
>   * @intr: if true, do interruptible wait
>   * @timeout: timeout value in jiffies or zero to return immediately
>   *
> @@ -708,14 +708,14 @@ EXPORT_SYMBOL_GPL(dma_resv_get_singleton);
>   * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or
>   * greater than zer on success.
>   */
> -long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr,
> -                          unsigned long timeout)
> +long dma_resv_wait_timeout(struct dma_resv *obj, enum dma_resv_usage usage,
> +                          bool intr, unsigned long timeout)
>  {
>         long ret = timeout ? timeout : 1;
>         struct dma_resv_iter cursor;
>         struct dma_fence *fence;
>
> -       dma_resv_iter_begin(&cursor, obj, wait_all);
> +       dma_resv_iter_begin(&cursor, obj, usage);
>         dma_resv_for_each_fence_unlocked(&cursor, fence) {
>
>                 ret = dma_fence_wait_timeout(fence, intr, ret);
> @@ -735,8 +735,7 @@ EXPORT_SYMBOL_GPL(dma_resv_wait_timeout);
>   * dma_resv_test_signaled - Test if a reservation object's fences have been
>   * signaled.
>   * @obj: the reservation object
> - * @test_all: if true, test all fences, otherwise only test the exclusive
> - * fence
> + * @usage: controls which fences to include, see enum dma_resv_usage.
>   *
>   * Callers are not required to hold specific locks, but maybe hold
>   * dma_resv_lock() already.
> @@ -745,12 +744,12 @@ EXPORT_SYMBOL_GPL(dma_resv_wait_timeout);
>   *
>   * True if all fences signaled, else false.
>   */
> -bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
> +bool dma_resv_test_signaled(struct dma_resv *obj, enum dma_resv_usage usage)
>  {
>         struct dma_resv_iter cursor;
>         struct dma_fence *fence;
>
> -       dma_resv_iter_begin(&cursor, obj, test_all);
> +       dma_resv_iter_begin(&cursor, obj, usage);
>         dma_resv_for_each_fence_unlocked(&cursor, fence) {
>                 dma_resv_iter_end(&cursor);
>                 return false;
> diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c
> index d2e61f6ae989..d097981061b1 100644
> --- a/drivers/dma-buf/st-dma-resv.c
> +++ b/drivers/dma-buf/st-dma-resv.c
> @@ -58,7 +58,7 @@ static int sanitycheck(void *arg)
>         return r;
>  }
>
> -static int test_signaling(void *arg, bool shared)
> +static int test_signaling(void *arg, enum dma_resv_usage usage)
>  {
>         struct dma_resv resv;
>         struct dma_fence *f;
> @@ -81,18 +81,18 @@ static int test_signaling(void *arg, bool shared)
>                 goto err_unlock;
>         }
>
> -       if (shared)
> +       if (usage >= DMA_RESV_USAGE_READ)
>                 dma_resv_add_shared_fence(&resv, f);
>         else
>                 dma_resv_add_excl_fence(&resv, f);
>
> -       if (dma_resv_test_signaled(&resv, shared)) {
> +       if (dma_resv_test_signaled(&resv, usage)) {
>                 pr_err("Resv unexpectedly signaled\n");
>                 r = -EINVAL;
>                 goto err_unlock;
>         }
>         dma_fence_signal(f);
> -       if (!dma_resv_test_signaled(&resv, shared)) {
> +       if (!dma_resv_test_signaled(&resv, usage)) {
>                 pr_err("Resv not reporting signaled\n");
>                 r = -EINVAL;
>                 goto err_unlock;
> @@ -107,15 +107,15 @@ static int test_signaling(void *arg, bool shared)
>
>  static int test_excl_signaling(void *arg)
>  {
> -       return test_signaling(arg, false);
> +       return test_signaling(arg, DMA_RESV_USAGE_WRITE);
>  }
>
>  static int test_shared_signaling(void *arg)
>  {
> -       return test_signaling(arg, true);
> +       return test_signaling(arg, DMA_RESV_USAGE_READ);
>  }
>
> -static int test_for_each(void *arg, bool shared)
> +static int test_for_each(void *arg, enum dma_resv_usage usage)
>  {
>         struct dma_resv_iter cursor;
>         struct dma_fence *f, *fence;
> @@ -139,13 +139,13 @@ static int test_for_each(void *arg, bool shared)
>                 goto err_unlock;
>         }
>
> -       if (shared)
> +       if (usage >= DMA_RESV_USAGE_READ)
>                 dma_resv_add_shared_fence(&resv, f);
>         else
>                 dma_resv_add_excl_fence(&resv, f);
>
>         r = -ENOENT;
> -       dma_resv_for_each_fence(&cursor, &resv, shared, fence) {
> +       dma_resv_for_each_fence(&cursor, &resv, usage, fence) {
>                 if (!r) {
>                         pr_err("More than one fence found\n");
>                         r = -EINVAL;
> @@ -156,7 +156,8 @@ static int test_for_each(void *arg, bool shared)
>                         r = -EINVAL;
>                         goto err_unlock;
>                 }
> -               if (dma_resv_iter_is_exclusive(&cursor) != !shared) {
> +               if (dma_resv_iter_is_exclusive(&cursor) !=
> +                   (usage >= DMA_RESV_USAGE_READ)) {
>                         pr_err("Unexpected fence usage\n");
>                         r = -EINVAL;
>                         goto err_unlock;
> @@ -178,15 +179,15 @@ static int test_for_each(void *arg, bool shared)
>
>  static int test_excl_for_each(void *arg)
>  {
> -       return test_for_each(arg, false);
> +       return test_for_each(arg, DMA_RESV_USAGE_WRITE);
>  }
>
>  static int test_shared_for_each(void *arg)
>  {
> -       return test_for_each(arg, true);
> +       return test_for_each(arg, DMA_RESV_USAGE_READ);
>  }
>
> -static int test_for_each_unlocked(void *arg, bool shared)
> +static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage)
>  {
>         struct dma_resv_iter cursor;
>         struct dma_fence *f, *fence;
> @@ -211,14 +212,14 @@ static int test_for_each_unlocked(void *arg, bool shared)
>                 goto err_free;
>         }
>
> -       if (shared)
> +       if (usage >= DMA_RESV_USAGE_READ)
>                 dma_resv_add_shared_fence(&resv, f);
>         else
>                 dma_resv_add_excl_fence(&resv, f);
>         dma_resv_unlock(&resv);
>
>         r = -ENOENT;
> -       dma_resv_iter_begin(&cursor, &resv, shared);
> +       dma_resv_iter_begin(&cursor, &resv, usage);
>         dma_resv_for_each_fence_unlocked(&cursor, fence) {
>                 if (!r) {
>                         pr_err("More than one fence found\n");
> @@ -234,7 +235,8 @@ static int test_for_each_unlocked(void *arg, bool shared)
>                         r = -EINVAL;
>                         goto err_iter_end;
>                 }
> -               if (dma_resv_iter_is_exclusive(&cursor) != !shared) {
> +               if (dma_resv_iter_is_exclusive(&cursor) !=
> +                   (usage >= DMA_RESV_USAGE_READ)) {
>                         pr_err("Unexpected fence usage\n");
>                         r = -EINVAL;
>                         goto err_iter_end;
> @@ -262,15 +264,15 @@ static int test_for_each_unlocked(void *arg, bool shared)
>
>  static int test_excl_for_each_unlocked(void *arg)
>  {
> -       return test_for_each_unlocked(arg, false);
> +       return test_for_each_unlocked(arg, DMA_RESV_USAGE_WRITE);
>  }
>
>  static int test_shared_for_each_unlocked(void *arg)
>  {
> -       return test_for_each_unlocked(arg, true);
> +       return test_for_each_unlocked(arg, DMA_RESV_USAGE_READ);
>  }
>
> -static int test_get_fences(void *arg, bool shared)
> +static int test_get_fences(void *arg, enum dma_resv_usage usage)
>  {
>         struct dma_fence *f, **fences = NULL;
>         struct dma_resv resv;
> @@ -294,13 +296,13 @@ static int test_get_fences(void *arg, bool shared)
>                 goto err_resv;
>         }
>
> -       if (shared)
> +       if (usage >= DMA_RESV_USAGE_READ)
>                 dma_resv_add_shared_fence(&resv, f);
>         else
>                 dma_resv_add_excl_fence(&resv, f);
>         dma_resv_unlock(&resv);
>
> -       r = dma_resv_get_fences(&resv, shared, &i, &fences);
> +       r = dma_resv_get_fences(&resv, usage, &i, &fences);
>         if (r) {
>                 pr_err("get_fences failed\n");
>                 goto err_free;
> @@ -324,12 +326,12 @@ static int test_get_fences(void *arg, bool shared)
>
>  static int test_excl_get_fences(void *arg)
>  {
> -       return test_get_fences(arg, false);
> +       return test_get_fences(arg, DMA_RESV_USAGE_WRITE);
>  }
>
>  static int test_shared_get_fences(void *arg)
>  {
> -       return test_get_fences(arg, true);
> +       return test_get_fences(arg, DMA_RESV_USAGE_READ);
>  }
>
>  int dma_resv(void)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 02488a824fe8..2bf909a4242a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1278,7 +1278,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>                  * submission in a dma_fence_chain and add it as exclusive
>                  * fence.
>                  */
> -               dma_resv_for_each_fence(&cursor, resv, false, fence) {
> +               dma_resv_for_each_fence(&cursor, resv,
> +                                       DMA_RESV_USAGE_WRITE,
> +                                       fence) {
>                         break;
>                 }
>                 dma_fence_chain_init(chain, fence, dma_fence_get(p->fence), 1);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> index e76b96d55551..cefa404d7842 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> @@ -200,8 +200,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
>                 goto unpin;
>         }
>
> -       /* TODO: Unify this with other drivers */
> -       r = dma_resv_get_fences(new_abo->tbo.base.resv, true,
> +       r = dma_resv_get_fences(new_abo->tbo.base.resv, DMA_RESV_USAGE_WRITE,
>                                 &work->shared_count,
>                                 &work->shared);
>         if (unlikely(r != 0)) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 9b12cab5e606..3a5fe05c7a7e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -526,7 +526,8 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
>                 return -ENOENT;
>         }
>         robj = gem_to_amdgpu_bo(gobj);
> -       ret = dma_resv_wait_timeout(robj->tbo.base.resv, true, true, timeout);
> +       ret = dma_resv_wait_timeout(robj->tbo.base.resv, DMA_RESV_USAGE_READ,
> +                                   true, timeout);
>
>         /* ret == 0 means not signaled,
>          * ret > 0 means signaled
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> index 888d97143177..490d2a7a3e2b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> @@ -111,7 +111,7 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,
>         struct dma_fence *fence;
>         int r;
>
> -       r = dma_resv_get_singleton(resv, true, &fence);
> +       r = dma_resv_get_singleton(resv, DMA_RESV_USAGE_READ, &fence);
>         if (r)
>                 goto fallback;
>
> @@ -139,7 +139,8 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,
>         /* Not enough memory for the delayed delete, as last resort
>          * block for all the fences to complete.
>          */
> -       dma_resv_wait_timeout(resv, true, false, MAX_SCHEDULE_TIMEOUT);
> +       dma_resv_wait_timeout(resv, DMA_RESV_USAGE_READ,
> +                             false, MAX_SCHEDULE_TIMEOUT);
>         amdgpu_pasid_free(pasid);
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> index 4b153daf283d..86f5248676b0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> @@ -75,8 +75,8 @@ static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni,
>
>         mmu_interval_set_seq(mni, cur_seq);
>
> -       r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false,
> -                                 MAX_SCHEDULE_TIMEOUT);
> +       r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_READ,
> +                                 false, MAX_SCHEDULE_TIMEOUT);
>         mutex_unlock(&adev->notifier_lock);
>         if (r <= 0)
>                 DRM_ERROR("(%ld) failed to wait for user bo\n", r);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 2f808decd8d9..0a843cc54945 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -765,8 +765,8 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
>                 return 0;
>         }
>
> -       r = dma_resv_wait_timeout(bo->tbo.base.resv, false, false,
> -                                 MAX_SCHEDULE_TIMEOUT);
> +       r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_WRITE,
> +                                 false, MAX_SCHEDULE_TIMEOUT);
>         if (r < 0)
>                 return r;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> index 40e06745fae9..744e144e5fc2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> @@ -259,7 +259,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
>         if (resv == NULL)
>                 return -EINVAL;
>
> -       dma_resv_for_each_fence(&cursor, resv, true, f) {
> +       /* TODO: Use DMA_RESV_USAGE_READ here */
> +       dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, f) {
>                 dma_fence_chain_for_each(f, f) {
>                         struct dma_fence *tmp = dma_fence_chain_contained(f);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 5859ed0552a4..9ffd8c4c34a0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1356,7 +1356,8 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
>          * If true, then return false as any KFD process needs all its BOs to
>          * be resident to run successfully
>          */
> -       dma_resv_for_each_fence(&resv_cursor, bo->base.resv, true, f) {
> +       dma_resv_for_each_fence(&resv_cursor, bo->base.resv,
> +                               DMA_RESV_USAGE_READ, f) {
>                 if (amdkfd_fence_check_mm(f, current->mm))
>                         return false;
>         }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> index 6f8de11a17f1..33deb0df62fd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> @@ -1162,7 +1162,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
>         ib->length_dw = 16;
>
>         if (direct) {
> -               r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false,
> +               r = dma_resv_wait_timeout(bo->tbo.base.resv,
> +                                         DMA_RESV_USAGE_WRITE, false,
>                                           msecs_to_jiffies(10));
>                 if (r == 0)
>                         r = -ETIMEDOUT;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 0ab85280e8ed..f3235aad7282 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2105,7 +2105,7 @@ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>         struct dma_resv_iter cursor;
>         struct dma_fence *fence;
>
> -       dma_resv_for_each_fence(&cursor, resv, true, fence) {
> +       dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, fence) {
>                 /* Add a callback for each fence in the reservation object */
>                 amdgpu_vm_prt_get(adev);
>                 amdgpu_vm_add_prt_cb(adev, fence);
> @@ -2707,7 +2707,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
>                 return true;
>
>         /* Don't evict VM page tables while they are busy */
> -       if (!dma_resv_test_signaled(bo->tbo.base.resv, true))
> +       if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_READ))
>                 return false;
>
>         /* Try to block ongoing updates */
> @@ -2887,7 +2887,8 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
>   */
>  long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
>  {
> -       timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv, true,
> +       timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv,
> +                                       DMA_RESV_USAGE_READ,
>                                         true, timeout);
>         if (timeout <= 0)
>                 return timeout;
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 0eeb394e949c..c9532642559c 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -9199,7 +9199,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
>                  * deadlock during GPU reset when this fence will not signal
>                  * but we hold reservation lock for the BO.
>                  */
> -               r = dma_resv_wait_timeout(abo->tbo.base.resv, true, false,
> +               r = dma_resv_wait_timeout(abo->tbo.base.resv,
> +                                         DMA_RESV_USAGE_WRITE, false,
>                                           msecs_to_jiffies(5000));
>                 if (unlikely(r <= 0))
>                         DRM_ERROR("Waiting for fences timed out!");
> diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
> index 3c888db59ea4..54079d762051 100644
> --- a/drivers/gpu/drm/drm_gem.c
> +++ b/drivers/gpu/drm/drm_gem.c
> @@ -771,7 +771,8 @@ long drm_gem_dma_resv_wait(struct drm_file *filep, u32 handle,
>                 return -EINVAL;
>         }
>
> -       ret = dma_resv_wait_timeout(obj->resv, wait_all, true, timeout);
> +       ret = dma_resv_wait_timeout(obj->resv, dma_resv_usage_rw(wait_all),
> +                                   true, timeout);
>         if (ret == 0)
>                 ret = -ETIME;
>         else if (ret > 0)
> @@ -1345,7 +1346,8 @@ int drm_gem_fence_array_add_implicit(struct xarray *fence_array,
>         struct dma_fence *fence;
>         int ret = 0;
>
> -       dma_resv_for_each_fence(&cursor, obj->resv, write, fence) {
> +       dma_resv_for_each_fence(&cursor, obj->resv, dma_resv_usage_rw(write),
> +                               fence) {
>                 ret = drm_gem_fence_array_add(fence_array, fence);
>                 if (ret)
>                         break;
> diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c
> index 9338ddb7edff..a6d89aed0bda 100644
> --- a/drivers/gpu/drm/drm_gem_atomic_helper.c
> +++ b/drivers/gpu/drm/drm_gem_atomic_helper.c
> @@ -151,7 +151,7 @@ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_st
>                 return 0;
>
>         obj = drm_gem_fb_get_obj(state->fb, 0);
> -       ret = dma_resv_get_singleton(obj->resv, false, &fence);
> +       ret = dma_resv_get_singleton(obj->resv, DMA_RESV_USAGE_WRITE, &fence);
>         if (ret)
>                 return ret;
>
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
> index d5314aa28ff7..507172e2780b 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
> @@ -380,12 +380,14 @@ int etnaviv_gem_cpu_prep(struct drm_gem_object *obj, u32 op,
>         }
>
>         if (op & ETNA_PREP_NOSYNC) {
> -               if (!dma_resv_test_signaled(obj->resv, write))
> +               if (!dma_resv_test_signaled(obj->resv,
> +                                           dma_resv_usage_rw(write)))
>                         return -EBUSY;
>         } else {
>                 unsigned long remain = etnaviv_timeout_to_jiffies(timeout);
>
> -               ret = dma_resv_wait_timeout(obj->resv, write, true, remain);
> +               ret = dma_resv_wait_timeout(obj->resv, dma_resv_usage_rw(write),
> +                                           true, remain);
>                 if (ret <= 0)
>                         return ret == 0 ? -ETIMEDOUT : ret;
>         }
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
> index b808ddb9da48..d7cd26dfaf8a 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
> @@ -178,6 +178,7 @@ static int submit_fence_sync(struct etnaviv_gem_submit *submit)
>         for (i = 0; i < submit->nr_bos; i++) {
>                 struct etnaviv_gem_submit_bo *bo = &submit->bos[i];
>                 struct dma_resv *robj = bo->obj->base.resv;
> +               enum dma_resv_usage usage;
>
>                 ret = dma_resv_reserve_fences(robj, 1);
>                 if (ret)
> @@ -186,9 +187,9 @@ static int submit_fence_sync(struct etnaviv_gem_submit *submit)
>                 if (submit->flags & ETNA_SUBMIT_NO_IMPLICIT)
>                         continue;
>
> -               ret = dma_resv_get_fences(robj,
> -                                         bo->flags & ETNA_SUBMIT_BO_WRITE,
> -                                         &bo->nr_shared, &bo->shared);
> +               usage = dma_resv_usage_rw(bo->flags & ETNA_SUBMIT_BO_WRITE);
> +               ret = dma_resv_get_fences(robj, usage, &bo->nr_shared,
> +                                         &bo->shared);
>                 if (ret)
>                         return ret;
>         }
> diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c
> index c2c512cd8ec0..2c3bb8aecd07 100644
> --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c
> +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c
> @@ -799,7 +799,8 @@ intel_prepare_plane_fb(struct drm_plane *_plane,
>                 if (ret < 0)
>                         goto unpin_fb;
>
> -               dma_resv_iter_begin(&cursor, obj->base.resv, false);
> +               dma_resv_iter_begin(&cursor, obj->base.resv,
> +                                   DMA_RESV_USAGE_WRITE);
>                 dma_resv_for_each_fence_unlocked(&cursor, fence) {
>                         add_rps_boost_after_vblank(new_plane_state->hw.crtc,
>                                                    fence);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
> index 470fdfd61a0f..14a1c0ad8c3c 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
> @@ -138,12 +138,12 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
>          * Alternatively, we can trade that extra information on read/write
>          * activity with
>          *      args->busy =
> -        *              !dma_resv_test_signaled(obj->resv, true);
> +        *              !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ);
>          * to report the overall busyness. This is what the wait-ioctl does.
>          *
>          */
>         args->busy = 0;
> -       dma_resv_iter_begin(&cursor, obj->base.resv, true);
> +       dma_resv_iter_begin(&cursor, obj->base.resv, DMA_RESV_USAGE_READ);
>         dma_resv_for_each_fence_unlocked(&cursor, fence) {
>                 if (dma_resv_iter_is_restarted(&cursor))
>                         args->busy = 0;
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
> index 444f8268b9c5..a200d3e66573 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
> @@ -66,7 +66,7 @@ bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj)
>         struct intel_memory_region *mr = READ_ONCE(obj->mm.region);
>
>  #ifdef CONFIG_LOCKDEP
> -       GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, true) &&
> +       GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, DMA_RESV_USAGE_READ) &&
>                     i915_gem_object_evictable(obj));
>  #endif
>         return mr && (mr->type == INTEL_MEMORY_LOCAL ||
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
> index 3cc01c30dd62..60feff9160de 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
> @@ -85,7 +85,7 @@ static bool i915_gem_userptr_invalidate(struct mmu_interval_notifier *mni,
>                 return true;
>
>         /* we will unbind on next submission, still have userptr pins */
> -       r = dma_resv_wait_timeout(obj->base.resv, true, false,
> +       r = dma_resv_wait_timeout(obj->base.resv, DMA_RESV_USAGE_READ, false,
>                                   MAX_SCHEDULE_TIMEOUT);
>         if (r <= 0)
>                 drm_err(&i915->drm, "(%ld) failed to wait for idle\n", r);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
> index dab3d30c09a0..319936f91ac5 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
> @@ -40,7 +40,8 @@ i915_gem_object_wait_reservation(struct dma_resv *resv,
>         struct dma_fence *fence;
>         long ret = timeout ?: 1;
>
> -       dma_resv_iter_begin(&cursor, resv, flags & I915_WAIT_ALL);
> +       dma_resv_iter_begin(&cursor, resv,
> +                           dma_resv_usage_rw(flags & I915_WAIT_ALL));
>         dma_resv_for_each_fence_unlocked(&cursor, fence) {
>                 ret = i915_gem_object_wait_fence(fence, flags, timeout);
>                 if (ret <= 0)
> @@ -117,7 +118,8 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
>         struct dma_resv_iter cursor;
>         struct dma_fence *fence;
>
> -       dma_resv_iter_begin(&cursor, obj->base.resv, flags & I915_WAIT_ALL);
> +       dma_resv_iter_begin(&cursor, obj->base.resv,
> +                           dma_resv_usage_rw(flags & I915_WAIT_ALL));
>         dma_resv_for_each_fence_unlocked(&cursor, fence)
>                 i915_gem_fence_wait_priority(fence, attr);
>         dma_resv_iter_end(&cursor);
> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
> index 3cc74b0fed06..342df658e0fc 100644
> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
> @@ -219,7 +219,8 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915,
>                 goto out_detach;
>         }
>
> -       timeout = dma_resv_wait_timeout(dmabuf->resv, false, true, 5 * HZ);
> +       timeout = dma_resv_wait_timeout(dmabuf->resv, DMA_RESV_USAGE_WRITE,
> +                                       true, 5 * HZ);
>         if (!timeout) {
>                 pr_err("dmabuf wait for exclusive fence timed out.\n");
>                 timeout = -ETIME;
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 76cf5ac91e94..17d7216ce221 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1594,7 +1594,8 @@ i915_request_await_object(struct i915_request *to,
>         struct dma_fence *fence;
>         int ret = 0;
>
> -       dma_resv_for_each_fence(&cursor, obj->base.resv, write, fence) {
> +       dma_resv_for_each_fence(&cursor, obj->base.resv,
> +                               dma_resv_usage_rw(write), fence) {
>                 ret = i915_request_await_dma_fence(to, fence);
>                 if (ret)
>                         break;
> diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
> index 2a74a9a1cafe..ae984c66c48a 100644
> --- a/drivers/gpu/drm/i915/i915_sw_fence.c
> +++ b/drivers/gpu/drm/i915/i915_sw_fence.c
> @@ -585,7 +585,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
>         debug_fence_assert(fence);
>         might_sleep_if(gfpflags_allow_blocking(gfp));
>
> -       dma_resv_iter_begin(&cursor, resv, write);
> +       dma_resv_iter_begin(&cursor, resv, dma_resv_usage_rw(write));
>         dma_resv_for_each_fence_unlocked(&cursor, f) {
>                 pending = i915_sw_fence_await_dma_fence(fence, f, timeout,
>                                                         gfp);
> diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
> index 02b9ae65a96a..01bbb5f2d462 100644
> --- a/drivers/gpu/drm/msm/msm_gem.c
> +++ b/drivers/gpu/drm/msm/msm_gem.c
> @@ -848,7 +848,8 @@ int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout)
>                 op & MSM_PREP_NOSYNC ? 0 : timeout_to_jiffies(timeout);
>         long ret;
>
> -       ret = dma_resv_wait_timeout(obj->resv, write, true,  remain);
> +       ret = dma_resv_wait_timeout(obj->resv, dma_resv_usage_rw(write),
> +                                   true,  remain);
>         if (ret == 0)
>                 return remain == 0 ? -EBUSY : -ETIMEDOUT;
>         else if (ret < 0)
> diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
> index e2faf92e4831..8642b84ea20c 100644
> --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c
> +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
> @@ -558,7 +558,8 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state)
>                         asyw->image.handle[0] = ctxdma->object.handle;
>         }
>
> -       ret = dma_resv_get_singleton(nvbo->bo.base.resv, false,
> +       ret = dma_resv_get_singleton(nvbo->bo.base.resv,
> +                                    DMA_RESV_USAGE_WRITE,
>                                      &asyw->state.fence);
>         if (ret)
>                 return ret;
> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
> index 74f8652d2bd3..c6bb4dbcd735 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
> @@ -962,11 +962,11 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
>         struct dma_fence *fence;
>         int ret;
>
> -       /* TODO: This is actually a memory management dependency */
> -       ret = dma_resv_get_singleton(bo->base.resv, false, &fence);
> +       ret = dma_resv_get_singleton(bo->base.resv, DMA_RESV_USAGE_WRITE,
> +                                    &fence);
>         if (ret)
> -               dma_resv_wait_timeout(bo->base.resv, false, false,
> -                                     MAX_SCHEDULE_TIMEOUT);
> +               dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_WRITE,
> +                                     false, MAX_SCHEDULE_TIMEOUT);
>
>         nv10_bo_put_tile_region(dev, *old_tile, fence);
>         *old_tile = new_tile;
> diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
> index 0268259e97eb..d5e81ccee01c 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_fence.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
> @@ -350,14 +350,16 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan,
>         if (ret)
>                 return ret;
>
> -       /* Waiting for the exclusive fence first causes performance regressions
> -        * under some circumstances. So manually wait for the shared ones first.
> +       /* Waiting for the writes first causes performance regressions
> +        * under some circumstances. So manually wait for the reads first.
>          */
>         for (i = 0; i < 2; ++i) {
>                 struct dma_resv_iter cursor;
>                 struct dma_fence *fence;
>
> -               dma_resv_for_each_fence(&cursor, resv, exclusive, fence) {
> +               dma_resv_for_each_fence(&cursor, resv,
> +                                       dma_resv_usage_rw(exclusive),
> +                                       fence) {
>                         struct nouveau_fence *f;
>
>                         if (i == 0 && dma_resv_iter_is_exclusive(&cursor))
> diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
> index 9416bee92141..fab542a758ff 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_gem.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
> @@ -962,7 +962,8 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data,
>                 return -ENOENT;
>         nvbo = nouveau_gem_object(gem);
>
> -       lret = dma_resv_wait_timeout(nvbo->bo.base.resv, write, true,
> +       lret = dma_resv_wait_timeout(nvbo->bo.base.resv,
> +                                    dma_resv_usage_rw(write), true,
>                                      no_wait ? 0 : 30 * HZ);
>         if (!lret)
>                 ret = -EBUSY;
> diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> index 94b6f0a19c83..7fcbc2a5b6cd 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> @@ -316,7 +316,8 @@ panfrost_ioctl_wait_bo(struct drm_device *dev, void *data,
>         if (!gem_obj)
>                 return -ENOENT;
>
> -       ret = dma_resv_wait_timeout(gem_obj->resv, true, true, timeout);
> +       ret = dma_resv_wait_timeout(gem_obj->resv, DMA_RESV_USAGE_READ,
> +                                   true, timeout);
>         if (!ret)
>                 ret = timeout ? -ETIMEDOUT : -EBUSY;
>
> diff --git a/drivers/gpu/drm/qxl/qxl_debugfs.c b/drivers/gpu/drm/qxl/qxl_debugfs.c
> index 6a36b0fd845c..33e5889d6608 100644
> --- a/drivers/gpu/drm/qxl/qxl_debugfs.c
> +++ b/drivers/gpu/drm/qxl/qxl_debugfs.c
> @@ -61,7 +61,8 @@ qxl_debugfs_buffers_info(struct seq_file *m, void *data)
>                 struct dma_fence *fence;
>                 int rel = 0;
>
> -               dma_resv_iter_begin(&cursor, bo->tbo.base.resv, true);
> +               dma_resv_iter_begin(&cursor, bo->tbo.base.resv,
> +                                   DMA_RESV_USAGE_READ);
>                 dma_resv_for_each_fence_unlocked(&cursor, fence) {
>                         if (dma_resv_iter_is_restarted(&cursor))
>                                 rel = 0;
> diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
> index f60e826cd292..57ff2b723c87 100644
> --- a/drivers/gpu/drm/radeon/radeon_display.c
> +++ b/drivers/gpu/drm/radeon/radeon_display.c
> @@ -533,7 +533,8 @@ static int radeon_crtc_page_flip_target(struct drm_crtc *crtc,
>                 DRM_ERROR("failed to pin new rbo buffer before flip\n");
>                 goto cleanup;
>         }
> -       r = dma_resv_get_singleton(new_rbo->tbo.base.resv, false, &work->fence);
> +       r = dma_resv_get_singleton(new_rbo->tbo.base.resv, DMA_RESV_USAGE_WRITE,
> +                                  &work->fence);
>         if (r) {
>                 radeon_bo_unreserve(new_rbo);
>                 DRM_ERROR("failed to get new rbo buffer fences\n");
> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
> index a36a4f2c76b0..71bf9299e45c 100644
> --- a/drivers/gpu/drm/radeon/radeon_gem.c
> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
> @@ -161,7 +161,9 @@ static int radeon_gem_set_domain(struct drm_gem_object *gobj,
>         }
>         if (domain == RADEON_GEM_DOMAIN_CPU) {
>                 /* Asking for cpu access wait for object idle */
> -               r = dma_resv_wait_timeout(robj->tbo.base.resv, true, true, 30 * HZ);
> +               r = dma_resv_wait_timeout(robj->tbo.base.resv,
> +                                         DMA_RESV_USAGE_READ,
> +                                         true, 30 * HZ);
>                 if (!r)
>                         r = -EBUSY;
>
> @@ -523,7 +525,7 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
>         }
>         robj = gem_to_radeon_bo(gobj);
>
> -       r = dma_resv_test_signaled(robj->tbo.base.resv, true);
> +       r = dma_resv_test_signaled(robj->tbo.base.resv, DMA_RESV_USAGE_READ);
>         if (r == 0)
>                 r = -EBUSY;
>         else
> @@ -552,7 +554,8 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
>         }
>         robj = gem_to_radeon_bo(gobj);
>
> -       ret = dma_resv_wait_timeout(robj->tbo.base.resv, true, true, 30 * HZ);
> +       ret = dma_resv_wait_timeout(robj->tbo.base.resv, DMA_RESV_USAGE_READ,
> +                                   true, 30 * HZ);
>         if (ret == 0)
>                 r = -EBUSY;
>         else if (ret < 0)
> diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c
> index 9fa88549c89e..68ebeb1bdfff 100644
> --- a/drivers/gpu/drm/radeon/radeon_mn.c
> +++ b/drivers/gpu/drm/radeon/radeon_mn.c
> @@ -66,8 +66,8 @@ static bool radeon_mn_invalidate(struct mmu_interval_notifier *mn,
>                 return true;
>         }
>
> -       r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false,
> -                                 MAX_SCHEDULE_TIMEOUT);
> +       r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_READ,
> +                                 false, MAX_SCHEDULE_TIMEOUT);
>         if (r <= 0)
>                 DRM_ERROR("(%ld) failed to wait for user bo\n", r);
>
> diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c
> index b991ba1bcd51..49bbb2266c0f 100644
> --- a/drivers/gpu/drm/radeon/radeon_sync.c
> +++ b/drivers/gpu/drm/radeon/radeon_sync.c
> @@ -96,7 +96,7 @@ int radeon_sync_resv(struct radeon_device *rdev,
>         struct dma_fence *f;
>         int r = 0;
>
> -       dma_resv_for_each_fence(&cursor, resv, shared, f) {
> +       dma_resv_for_each_fence(&cursor, resv, dma_resv_usage_rw(shared), f) {
>                 fence = to_radeon_fence(f);
>                 if (fence && fence->rdev == rdev)
>                         radeon_sync_fence(sync, fence);
> diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
> index 377f9cdb5b53..4000ad2f39ba 100644
> --- a/drivers/gpu/drm/radeon/radeon_uvd.c
> +++ b/drivers/gpu/drm/radeon/radeon_uvd.c
> @@ -478,8 +478,8 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo,
>                 return -EINVAL;
>         }
>
> -       r = dma_resv_wait_timeout(bo->tbo.base.resv, false, false,
> -                                 MAX_SCHEDULE_TIMEOUT);
> +       r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_WRITE,
> +                                 false, MAX_SCHEDULE_TIMEOUT);
>         if (r <= 0) {
>                 DRM_ERROR("Failed waiting for UVD message (%d)!\n", r);
>                 return r ? r : -ETIME;
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index b81fceb0b8a2..0a1377dac58d 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -703,7 +703,8 @@ int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job,
>         struct dma_fence *fence;
>         int ret;
>
> -       dma_resv_for_each_fence(&cursor, obj->resv, write, fence) {
> +       dma_resv_for_each_fence(&cursor, obj->resv, dma_resv_usage_rw(write),
> +                               fence) {
>                 /* Make sure to grab an additional ref on the added fence */
>                 dma_fence_get(fence);
>                 ret = drm_sched_job_add_dependency(job, fence);
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 1dd6f13bb03c..d4b2695606e2 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -272,7 +272,7 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo)
>         struct dma_resv_iter cursor;
>         struct dma_fence *fence;
>
> -       dma_resv_iter_begin(&cursor, resv, true);
> +       dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_READ);
>         dma_resv_for_each_fence_unlocked(&cursor, fence) {
>                 if (!fence->ops->signaled)
>                         dma_fence_enable_sw_signaling(fence);
> @@ -301,7 +301,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>         struct dma_resv *resv = &bo->base._resv;
>         int ret;
>
> -       if (dma_resv_test_signaled(resv, true))
> +       if (dma_resv_test_signaled(resv, DMA_RESV_USAGE_READ))
>                 ret = 0;
>         else
>                 ret = -EBUSY;
> @@ -313,7 +313,8 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
>                         dma_resv_unlock(bo->base.resv);
>                 spin_unlock(&bo->bdev->lru_lock);
>
> -               lret = dma_resv_wait_timeout(resv, true, interruptible,
> +               lret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_READ,
> +                                            interruptible,
>                                              30 * HZ);
>
>                 if (lret < 0)
> @@ -416,7 +417,8 @@ static void ttm_bo_release(struct kref *kref)
>                         /* Last resort, if we fail to allocate memory for the
>                          * fences block for the BO to become idle
>                          */
> -                       dma_resv_wait_timeout(bo->base.resv, true, false,
> +                       dma_resv_wait_timeout(bo->base.resv,
> +                                             DMA_RESV_USAGE_READ, false,
>                                               30 * HZ);
>                 }
>
> @@ -427,7 +429,7 @@ static void ttm_bo_release(struct kref *kref)
>                 ttm_mem_io_free(bdev, bo->resource);
>         }
>
> -       if (!dma_resv_test_signaled(bo->base.resv, true) ||
> +       if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ) ||
>             !dma_resv_trylock(bo->base.resv)) {
>                 /* The BO is not idle, resurrect it for delayed destroy */
>                 ttm_bo_flush_all_fences(bo);
> @@ -1072,14 +1074,14 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
>         long timeout = 15 * HZ;
>
>         if (no_wait) {
> -               if (dma_resv_test_signaled(bo->base.resv, true))
> +               if (dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ))
>                         return 0;
>                 else
>                         return -EBUSY;
>         }
>
> -       timeout = dma_resv_wait_timeout(bo->base.resv, true, interruptible,
> -                                       timeout);
> +       timeout = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_READ,
> +                                       interruptible, timeout);
>         if (timeout < 0)
>                 return timeout;
>
> diff --git a/drivers/gpu/drm/vgem/vgem_fence.c b/drivers/gpu/drm/vgem/vgem_fence.c
> index 2ddbebca87d9..91fc4940c65a 100644
> --- a/drivers/gpu/drm/vgem/vgem_fence.c
> +++ b/drivers/gpu/drm/vgem/vgem_fence.c
> @@ -130,6 +130,7 @@ int vgem_fence_attach_ioctl(struct drm_device *dev,
>         struct vgem_file *vfile = file->driver_priv;
>         struct dma_resv *resv;
>         struct drm_gem_object *obj;
> +       enum dma_resv_usage usage;
>         struct dma_fence *fence;
>         int ret;
>
> @@ -151,7 +152,8 @@ int vgem_fence_attach_ioctl(struct drm_device *dev,
>
>         /* Check for a conflicting fence */
>         resv = obj->resv;
> -       if (!dma_resv_test_signaled(resv, arg->flags & VGEM_FENCE_WRITE)) {
> +       usage = dma_resv_usage_rw(arg->flags & VGEM_FENCE_WRITE);
> +       if (!dma_resv_test_signaled(resv, usage)) {
>                 ret = -EBUSY;
>                 goto err_fence;
>         }
> diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
> index c708bab555c6..5577cc7408b2 100644
> --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c
> +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c
> @@ -518,9 +518,10 @@ static int virtio_gpu_wait_ioctl(struct drm_device *dev, void *data,
>                 return -ENOENT;
>
>         if (args->flags & VIRTGPU_WAIT_NOWAIT) {
> -               ret = dma_resv_test_signaled(obj->resv, true);
> +               ret = dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ);
>         } else {
> -               ret = dma_resv_wait_timeout(obj->resv, true, true, timeout);
> +               ret = dma_resv_wait_timeout(obj->resv, DMA_RESV_USAGE_READ,
> +                                           true, timeout);
>         }
>         if (ret == 0)
>                 ret = -EBUSY;
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> index fe13aa8b4a64..b96884f7d03d 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
> @@ -528,8 +528,8 @@ static int vmw_user_bo_synccpu_grab(struct vmw_buffer_object *vmw_bo,
>         if (flags & drm_vmw_synccpu_allow_cs) {
>                 long lret;
>
> -               lret = dma_resv_wait_timeout(bo->base.resv, true, true,
> -                                            nonblock ? 0 :
> +               lret = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_READ,
> +                                            true, nonblock ? 0 :
>                                              MAX_SCHEDULE_TIMEOUT);
>                 if (!lret)
>                         return -EBUSY;
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> index 36c3b5db7e69..39081dbf9ac8 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> @@ -1166,8 +1166,8 @@ int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start,
>                 if (bo->moving)
>                         dma_fence_put(bo->moving);
>
> -               /* TODO: This is actually a memory management dependency */
> -               return dma_resv_get_singleton(bo->base.resv, false,
> +               return dma_resv_get_singleton(bo->base.resv,
> +                                             DMA_RESV_USAGE_WRITE,
>                                               &bo->moving);
>         }
>
> diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c
> index d32cd7538835..f9901d273b8e 100644
> --- a/drivers/infiniband/core/umem_dmabuf.c
> +++ b/drivers/infiniband/core/umem_dmabuf.c
> @@ -67,7 +67,8 @@ int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf)
>          * may be not up-to-date. Wait for the exporter to finish
>          * the migration.
>          */
> -       return dma_resv_wait_timeout(umem_dmabuf->attach->dmabuf->resv, false,
> +       return dma_resv_wait_timeout(umem_dmabuf->attach->dmabuf->resv,
> +                                    DMA_RESV_USAGE_WRITE,
>                                      false, MAX_SCHEDULE_TIMEOUT);
>  }
>  EXPORT_SYMBOL(ib_umem_dmabuf_map_pages);
> diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
> index 5fa04d0fccad..658674c4b7b9 100644
> --- a/include/linux/dma-resv.h
> +++ b/include/linux/dma-resv.h
> @@ -49,6 +49,49 @@ extern struct ww_class reservation_ww_class;
>
>  struct dma_resv_list;
>
> +/**
> + * enum dma_resv_usage - how the fences from a dma_resv obj are used
> + *
> + * This enum describes the different use cases for a dma_resv object and
> + * controls which fences are returned when queried.
> + */
> +enum dma_resv_usage {
> +       /**
> +        * @DMA_RESV_USAGE_WRITE: Implicit write synchronization.
> +        *
> +        * This should only be used for userspace command submissions which add
> +        * an implicit write dependency.
> +        */
> +       DMA_RESV_USAGE_WRITE,
> +
> +       /**
> +        * @DMA_RESV_USAGE_READ: Implicit read synchronization.
> +        *
> +        * This should only be used for userspace command submissions which add
> +        * an implicit read dependency.
> +        */
> +       DMA_RESV_USAGE_READ,
> +};
> +
> +/**
> + * dma_resv_usage_rw - helper for implicit sync
> + * @write: true if we create a new implicit sync write
> + *
> + * This returns the implicit synchronization usage for write or read accesses,
> + * see enum dma_resv_usage.
> + */
> +static inline enum dma_resv_usage dma_resv_usage_rw(bool write)
> +{
> +       /* This looks confusing at first sight, but is indeed correct.
> +        *
> +        * The rational is that new write operations needs to wait for the
> +        * existing read and write operations to finish.
> +        * But a new read operation only needs to wait for the existing write
> +        * operations to finish.
> +        */
> +       return write ? DMA_RESV_USAGE_READ : DMA_RESV_USAGE_WRITE;
> +}
> +
>  /**
>   * struct dma_resv - a reservation object manages fences for a buffer
>   *
> @@ -142,8 +185,8 @@ struct dma_resv_iter {
>         /** @obj: The dma_resv object we iterate over */
>         struct dma_resv *obj;
>
> -       /** @all_fences: If all fences should be returned */
> -       bool all_fences;
> +       /** @usage: Controls which fences are returned */
> +       enum dma_resv_usage usage;
>
>         /** @fence: the currently handled fence */
>         struct dma_fence *fence;
> @@ -173,14 +216,14 @@ struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor);
>   * dma_resv_iter_begin - initialize a dma_resv_iter object
>   * @cursor: The dma_resv_iter object to initialize
>   * @obj: The dma_resv object which we want to iterate over
> - * @all_fences: If all fences should be returned or just the exclusive one
> + * @usage: controls which fences to include, see enum dma_resv_usage.
>   */
>  static inline void dma_resv_iter_begin(struct dma_resv_iter *cursor,
>                                        struct dma_resv *obj,
> -                                      bool all_fences)
> +                                      enum dma_resv_usage usage)
>  {
>         cursor->obj = obj;
> -       cursor->all_fences = all_fences;
> +       cursor->usage = usage;
>         cursor->fence = NULL;
>  }
>
> @@ -241,7 +284,7 @@ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor)
>   * dma_resv_for_each_fence - fence iterator
>   * @cursor: a struct dma_resv_iter pointer
>   * @obj: a dma_resv object pointer
> - * @all_fences: true if all fences should be returned
> + * @usage: controls which fences to return
>   * @fence: the current fence
>   *
>   * Iterate over the fences in a struct dma_resv object while holding the
> @@ -250,8 +293,8 @@ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor)
>   * valid as long as the lock is held and so no extra reference to the fence is
>   * taken.
>   */
> -#define dma_resv_for_each_fence(cursor, obj, all_fences, fence)        \
> -       for (dma_resv_iter_begin(cursor, obj, all_fences),      \
> +#define dma_resv_for_each_fence(cursor, obj, usage, fence)     \
> +       for (dma_resv_iter_begin(cursor, obj, usage),   \
>              fence = dma_resv_iter_first(cursor); fence;        \
>              fence = dma_resv_iter_next(cursor))
>
> @@ -418,14 +461,14 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence);
>  void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
>                              struct dma_fence *fence);
>  void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence);
> -int dma_resv_get_fences(struct dma_resv *obj, bool write,
> +int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage,
>                         unsigned int *num_fences, struct dma_fence ***fences);
> -int dma_resv_get_singleton(struct dma_resv *obj, bool write,
> +int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage,
>                            struct dma_fence **fence);
>  int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src);
> -long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr,
> -                          unsigned long timeout);
> -bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all);
> +long dma_resv_wait_timeout(struct dma_resv *obj, enum dma_resv_usage usage,
> +                          bool intr, unsigned long timeout);
> +bool dma_resv_test_signaled(struct dma_resv *obj, enum dma_resv_usage usage);
>  void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq);
>
>  #endif /* _LINUX_RESERVATION_H */
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Nouveau] [PATCH 06/23] drm/nouveau: stop using dma_resv_excl_fence
  2022-04-03 15:59     ` Christian König
@ 2022-04-04 11:47       ` Karol Herbst
  -1 siblings, 0 replies; 73+ messages in thread
From: Karol Herbst @ 2022-04-04 11:47 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel, Ben Skeggs, Christian König, nouveau

On Sun, Apr 3, 2022 at 5:59 PM Christian König <christian.koenig@amd.com> wrote:
>
> Just a gentle ping to the nouveau guys.
>
> Any more comments on this? Otherwise I'm pushing that with Daniels rb.
>

It looks fine, but given that this area broke in the past I will try
to do some testing either before or after you push it. As long as we
do so before 5.19 it should be okay I think.

Unless somebody knowing more about this code has anything else to say.

> Thanks,
> Christian.
>
> Am 21.03.22 um 14:58 schrieb Christian König:
> > Instead use the new dma_resv_get_singleton function.
> >
> > Signed-off-by: Christian König <christian.koenig@amd.com>
> > Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > Cc: Ben Skeggs <bskeggs@redhat.com>
> > Cc: Karol Herbst <kherbst@redhat.com>
> > Cc: Lyude Paul <lyude@redhat.com>
> > Cc: nouveau@lists.freedesktop.org
> > ---
> >   drivers/gpu/drm/nouveau/nouveau_bo.c | 9 ++++++++-
> >   1 file changed, 8 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
> > index fa73fe57f97b..74f8652d2bd3 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
> > +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
> > @@ -959,7 +959,14 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
> >   {
> >       struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
> >       struct drm_device *dev = drm->dev;
> > -     struct dma_fence *fence = dma_resv_excl_fence(bo->base.resv);
> > +     struct dma_fence *fence;
> > +     int ret;
> > +
> > +     /* TODO: This is actually a memory management dependency */
> > +     ret = dma_resv_get_singleton(bo->base.resv, false, &fence);
> > +     if (ret)
> > +             dma_resv_wait_timeout(bo->base.resv, false, false,
> > +                                   MAX_SCHEDULE_TIMEOUT);
> >
> >       nv10_bo_put_tile_region(dev, *old_tile, fence);
> >       *old_tile = new_tile;
>


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 06/23] drm/nouveau: stop using dma_resv_excl_fence
@ 2022-04-04 11:47       ` Karol Herbst
  0 siblings, 0 replies; 73+ messages in thread
From: Karol Herbst @ 2022-04-04 11:47 UTC (permalink / raw)
  To: Christian König
  Cc: Daniel Vetter, dri-devel, Ben Skeggs, Christian König, nouveau

On Sun, Apr 3, 2022 at 5:59 PM Christian König <christian.koenig@amd.com> wrote:
>
> Just a gentle ping to the nouveau guys.
>
> Any more comments on this? Otherwise I'm pushing that with Daniels rb.
>

It looks fine, but given that this area broke in the past I will try
to do some testing either before or after you push it. As long as we
do so before 5.19 it should be okay I think.

Unless somebody knowing more about this code has anything else to say.

> Thanks,
> Christian.
>
> Am 21.03.22 um 14:58 schrieb Christian König:
> > Instead use the new dma_resv_get_singleton function.
> >
> > Signed-off-by: Christian König <christian.koenig@amd.com>
> > Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > Cc: Ben Skeggs <bskeggs@redhat.com>
> > Cc: Karol Herbst <kherbst@redhat.com>
> > Cc: Lyude Paul <lyude@redhat.com>
> > Cc: nouveau@lists.freedesktop.org
> > ---
> >   drivers/gpu/drm/nouveau/nouveau_bo.c | 9 ++++++++-
> >   1 file changed, 8 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
> > index fa73fe57f97b..74f8652d2bd3 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
> > +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
> > @@ -959,7 +959,14 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
> >   {
> >       struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
> >       struct drm_device *dev = drm->dev;
> > -     struct dma_fence *fence = dma_resv_excl_fence(bo->base.resv);
> > +     struct dma_fence *fence;
> > +     int ret;
> > +
> > +     /* TODO: This is actually a memory management dependency */
> > +     ret = dma_resv_get_singleton(bo->base.resv, false, &fence);
> > +     if (ret)
> > +             dma_resv_wait_timeout(bo->base.resv, false, false,
> > +                                   MAX_SCHEDULE_TIMEOUT);
> >
> >       nv10_bo_put_tile_region(dev, *old_tile, fence);
> >       *old_tile = new_tile;
>


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [Nouveau] [PATCH 06/23] drm/nouveau: stop using dma_resv_excl_fence
  2022-04-04 11:47       ` Karol Herbst
@ 2022-04-05  7:25         ` Christian König
  -1 siblings, 0 replies; 73+ messages in thread
From: Christian König @ 2022-04-05  7:25 UTC (permalink / raw)
  To: Karol Herbst, Christian König; +Cc: nouveau, Ben Skeggs, dri-devel

Am 04.04.22 um 13:47 schrieb Karol Herbst:
> On Sun, Apr 3, 2022 at 5:59 PM Christian König <christian.koenig@amd.com> wrote:
>> Just a gentle ping to the nouveau guys.
>>
>> Any more comments on this? Otherwise I'm pushing that with Daniels rb.
>>
> It looks fine, but given that this area broke in the past I will try
> to do some testing either before or after you push it. As long as we
> do so before 5.19 it should be okay I think.

Ok that's sounds good enough to me. Going to push it to drm-misc-next now.

Thanks,
Christian.

>
> Unless somebody knowing more about this code has anything else to say.
>
>> Thanks,
>> Christian.
>>
>> Am 21.03.22 um 14:58 schrieb Christian König:
>>> Instead use the new dma_resv_get_singleton function.
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>>> Cc: Ben Skeggs <bskeggs@redhat.com>
>>> Cc: Karol Herbst <kherbst@redhat.com>
>>> Cc: Lyude Paul <lyude@redhat.com>
>>> Cc: nouveau@lists.freedesktop.org
>>> ---
>>>    drivers/gpu/drm/nouveau/nouveau_bo.c | 9 ++++++++-
>>>    1 file changed, 8 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
>>> index fa73fe57f97b..74f8652d2bd3 100644
>>> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
>>> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
>>> @@ -959,7 +959,14 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
>>>    {
>>>        struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
>>>        struct drm_device *dev = drm->dev;
>>> -     struct dma_fence *fence = dma_resv_excl_fence(bo->base.resv);
>>> +     struct dma_fence *fence;
>>> +     int ret;
>>> +
>>> +     /* TODO: This is actually a memory management dependency */
>>> +     ret = dma_resv_get_singleton(bo->base.resv, false, &fence);
>>> +     if (ret)
>>> +             dma_resv_wait_timeout(bo->base.resv, false, false,
>>> +                                   MAX_SCHEDULE_TIMEOUT);
>>>
>>>        nv10_bo_put_tile_region(dev, *old_tile, fence);
>>>        *old_tile = new_tile;


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 06/23] drm/nouveau: stop using dma_resv_excl_fence
@ 2022-04-05  7:25         ` Christian König
  0 siblings, 0 replies; 73+ messages in thread
From: Christian König @ 2022-04-05  7:25 UTC (permalink / raw)
  To: Karol Herbst, Christian König
  Cc: nouveau, Ben Skeggs, dri-devel, Daniel Vetter

Am 04.04.22 um 13:47 schrieb Karol Herbst:
> On Sun, Apr 3, 2022 at 5:59 PM Christian König <christian.koenig@amd.com> wrote:
>> Just a gentle ping to the nouveau guys.
>>
>> Any more comments on this? Otherwise I'm pushing that with Daniels rb.
>>
> It looks fine, but given that this area broke in the past I will try
> to do some testing either before or after you push it. As long as we
> do so before 5.19 it should be okay I think.

Ok that's sounds good enough to me. Going to push it to drm-misc-next now.

Thanks,
Christian.

>
> Unless somebody knowing more about this code has anything else to say.
>
>> Thanks,
>> Christian.
>>
>> Am 21.03.22 um 14:58 schrieb Christian König:
>>> Instead use the new dma_resv_get_singleton function.
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>>> Cc: Ben Skeggs <bskeggs@redhat.com>
>>> Cc: Karol Herbst <kherbst@redhat.com>
>>> Cc: Lyude Paul <lyude@redhat.com>
>>> Cc: nouveau@lists.freedesktop.org
>>> ---
>>>    drivers/gpu/drm/nouveau/nouveau_bo.c | 9 ++++++++-
>>>    1 file changed, 8 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
>>> index fa73fe57f97b..74f8652d2bd3 100644
>>> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
>>> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
>>> @@ -959,7 +959,14 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
>>>    {
>>>        struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
>>>        struct drm_device *dev = drm->dev;
>>> -     struct dma_fence *fence = dma_resv_excl_fence(bo->base.resv);
>>> +     struct dma_fence *fence;
>>> +     int ret;
>>> +
>>> +     /* TODO: This is actually a memory management dependency */
>>> +     ret = dma_resv_get_singleton(bo->base.resv, false, &fence);
>>> +     if (ret)
>>> +             dma_resv_wait_timeout(bo->base.resv, false, false,
>>> +                                   MAX_SCHEDULE_TIMEOUT);
>>>
>>>        nv10_bo_put_tile_region(dev, *old_tile, fence);
>>>        *old_tile = new_tile;


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 10/23] dma-buf: finally make dma_resv_excl_fence private v2
  2022-03-23 13:36   ` Daniel Vetter
@ 2022-04-05  7:58     ` Christian König
  0 siblings, 0 replies; 73+ messages in thread
From: Christian König @ 2022-04-05  7:58 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: daniel.vetter, Christian König, dri-devel

Am 23.03.22 um 14:36 schrieb Daniel Vetter:
> On Mon, Mar 21, 2022 at 02:58:43PM +0100, Christian König wrote:
>> Drivers should never touch this directly.
>>
>> v2: fix rebase clash
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>
> I guess as soon as we have the rdma ack you can land up to this patch?

It also needed the nouveau and vmwgfx acks, but I just pushed it. Finally :)

Christian.

> -Daniel
>
>> ---
>>   drivers/dma-buf/dma-resv.c |  6 ++++++
>>   include/linux/dma-resv.h   | 17 -----------------
>>   2 files changed, 6 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
>> index c09fd8da0c85..1c9af97fe904 100644
>> --- a/drivers/dma-buf/dma-resv.c
>> +++ b/drivers/dma-buf/dma-resv.c
>> @@ -140,6 +140,12 @@ void dma_resv_fini(struct dma_resv *obj)
>>   }
>>   EXPORT_SYMBOL(dma_resv_fini);
>>   
>> +static inline struct dma_fence *
>> +dma_resv_excl_fence(struct dma_resv *obj)
>> +{
>> +       return rcu_dereference_check(obj->fence_excl, dma_resv_held(obj));
>> +}
>> +
>>   static inline struct dma_resv_list *dma_resv_shared_list(struct dma_resv *obj)
>>   {
>>   	return rcu_dereference_check(obj->fence, dma_resv_held(obj));
>> diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
>> index 08512c1e215d..20e13f36710a 100644
>> --- a/include/linux/dma-resv.h
>> +++ b/include/linux/dma-resv.h
>> @@ -423,23 +423,6 @@ static inline void dma_resv_unlock(struct dma_resv *obj)
>>   	ww_mutex_unlock(&obj->lock);
>>   }
>>   
>> -/**
>> - * dma_resv_excl_fence - return the object's exclusive fence
>> - * @obj: the reservation object
>> - *
>> - * Returns the exclusive fence (if any). Caller must either hold the objects
>> - * through dma_resv_lock() or the RCU read side lock through rcu_read_lock(),
>> - * or one of the variants of each
>> - *
>> - * RETURNS
>> - * The exclusive fence or NULL
>> - */
>> -static inline struct dma_fence *
>> -dma_resv_excl_fence(struct dma_resv *obj)
>> -{
>> -	return rcu_dereference_check(obj->fence_excl, dma_resv_held(obj));
>> -}
>> -
>>   void dma_resv_init(struct dma_resv *obj);
>>   void dma_resv_fini(struct dma_resv *obj);
>>   int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences);
>> -- 
>> 2.25.1
>>


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 23/23] drm/ttm: remove bo->moving
  2022-03-29 16:02   ` Daniel Vetter
@ 2022-04-05  9:25     ` Christian König
  2022-04-05 12:51       ` Daniel Vetter
  0 siblings, 1 reply; 73+ messages in thread
From: Christian König @ 2022-04-05  9:25 UTC (permalink / raw)
  To: Daniel Vetter, Christian König; +Cc: daniel.vetter, dri-devel

Am 29.03.22 um 18:02 schrieb Daniel Vetter:
> On Mon, Mar 21, 2022 at 02:58:56PM +0100, Christian König wrote:
> [SNIP]
>>   static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo,
>> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
>> index f999fdd927df..c6d02c98a19a 100644
>> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
>> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
>> @@ -1163,12 +1163,6 @@ int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start,
>>   		*num_prefault = __KERNEL_DIV_ROUND_UP(last_cleaned - res_start,
>>   						      PAGE_SIZE);
>>   		vmw_bo_fence_single(bo, NULL);
>> -		if (bo->moving)
>> -			dma_fence_put(bo->moving);
>> -
>> -		return dma_resv_get_singleton(bo->base.resv,
>> -					      DMA_RESV_USAGE_KERNEL,
>> -					      &bo->moving);
> This seems to be entirely misplaced and I'm pretty sure doesn't even
> compile interim.

Mhm, removing that is correctly placed as far as I can see.

What VMWGFX does here is to update bo->moving to please TTM, but since 
we now drop the bo->moving fence from TTM and always wait for all fences 
with DMA_RESV_USAGE_KERNEL before allowing CPU access that workaround 
isn't necessary any more.

>>   	}
>>   
>>   	return 0;
>> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
>> index c17b2df9178b..4c7134550262 100644
>> --- a/include/drm/ttm/ttm_bo_api.h
>> +++ b/include/drm/ttm/ttm_bo_api.h
>> @@ -97,7 +97,6 @@ struct ttm_tt;
>>    * @lru: List head for the lru list.
>>    * @ddestroy: List head for the delayed destroy list.
>>    * @swap: List head for swap LRU list.
>> - * @moving: Fence set when BO is moving
>>    * @offset: The current GPU offset, which can have different meanings
>>    * depending on the memory type. For SYSTEM type memory, it should be 0.
>>    * @cur_placement: Hint of current placement.
>> @@ -150,7 +149,6 @@ struct ttm_buffer_object {
>>   	 * Members protected by a bo reservation.
>>   	 */
>>   
>> -	struct dma_fence *moving;
>>   	unsigned priority;
>>   	unsigned pin_count;
> Aside from the vmwgfx thing this looks good. With the vmwgfx patch split
> issue (I think it's just that) fixed:
>
> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

Is it enough if I explain why we update VMWGFX in the commit message?

Thanks,
Christian.

>
>>   
>> -- 
>> 2.25.1
>>


^ permalink raw reply	[flat|nested] 73+ messages in thread

* Re: [PATCH 23/23] drm/ttm: remove bo->moving
  2022-04-05  9:25     ` Christian König
@ 2022-04-05 12:51       ` Daniel Vetter
  0 siblings, 0 replies; 73+ messages in thread
From: Daniel Vetter @ 2022-04-05 12:51 UTC (permalink / raw)
  To: Christian König; +Cc: Christian König, dri-devel

On Tue, 5 Apr 2022 at 11:25, Christian König <christian.koenig@amd.com> wrote:
>
> Am 29.03.22 um 18:02 schrieb Daniel Vetter:
> > On Mon, Mar 21, 2022 at 02:58:56PM +0100, Christian König wrote:
> > [SNIP]
> >>   static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo,
> >> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> >> index f999fdd927df..c6d02c98a19a 100644
> >> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> >> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
> >> @@ -1163,12 +1163,6 @@ int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start,
> >>              *num_prefault = __KERNEL_DIV_ROUND_UP(last_cleaned - res_start,
> >>                                                    PAGE_SIZE);
> >>              vmw_bo_fence_single(bo, NULL);
> >> -            if (bo->moving)
> >> -                    dma_fence_put(bo->moving);
> >> -
> >> -            return dma_resv_get_singleton(bo->base.resv,
> >> -                                          DMA_RESV_USAGE_KERNEL,
> >> -                                          &bo->moving);
> > This seems to be entirely misplaced and I'm pretty sure doesn't even
> > compile interim.
>
> Mhm, removing that is correctly placed as far as I can see.
>
> What VMWGFX does here is to update bo->moving to please TTM, but since
> we now drop the bo->moving fence from TTM and always wait for all fences
> with DMA_RESV_USAGE_KERNEL before allowing CPU access that workaround
> isn't necessary any more.

Hm yeah that makes sense. Just out of paranoid would be good if you
can get an ack on the previous patch that downgrades from USAGE_WRITE
to USAGE_KERNEL here from vmwgfx folks, but I guess that should be
fine.

Just from reading the commit that introduce this is looks a little bit
like the intent is actually to make any USAGE_WRITE a mandatory fence
you can never cheat out of, but maybe I got this all wrong.

>
> >>      }
> >>
> >>      return 0;
> >> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> >> index c17b2df9178b..4c7134550262 100644
> >> --- a/include/drm/ttm/ttm_bo_api.h
> >> +++ b/include/drm/ttm/ttm_bo_api.h
> >> @@ -97,7 +97,6 @@ struct ttm_tt;
> >>    * @lru: List head for the lru list.
> >>    * @ddestroy: List head for the delayed destroy list.
> >>    * @swap: List head for swap LRU list.
> >> - * @moving: Fence set when BO is moving
> >>    * @offset: The current GPU offset, which can have different meanings
> >>    * depending on the memory type. For SYSTEM type memory, it should be 0.
> >>    * @cur_placement: Hint of current placement.
> >> @@ -150,7 +149,6 @@ struct ttm_buffer_object {
> >>       * Members protected by a bo reservation.
> >>       */
> >>
> >> -    struct dma_fence *moving;
> >>      unsigned priority;
> >>      unsigned pin_count;
> > Aside from the vmwgfx thing this looks good. With the vmwgfx patch split
> > issue (I think it's just that) fixed:
> >
> > Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>
> Is it enough if I explain why we update VMWGFX in the commit message?

Yeah sounds fine to me, this patch isn't a functional change now that
you explained it that's clear. I'm still wondering whether there's
something fishy going on, but that's on earlier patches.
-Daniel

>
> Thanks,
> Christian.
>
> >
> >>
> >> --
> >> 2.25.1
> >>
>


-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 73+ messages in thread

end of thread, other threads:[~2022-04-05 12:51 UTC | newest]

Thread overview: 73+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-03-21 13:58 [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
2022-03-21 13:58 ` [PATCH 02/23] dma-buf: finally make the dma_resv_list private v2 Christian König
2022-03-21 13:58 ` [PATCH 03/23] dma-buf: add dma_resv_get_singleton v2 Christian König
2022-04-01  8:21   ` Christian König
2022-04-01 15:57     ` Daniel Vetter
2022-03-21 13:58 ` [PATCH 04/23] RDMA: use dma_resv_wait() instead of extracting the fence Christian König
2022-03-21 13:58   ` Christian König
2022-03-23 13:22   ` Daniel Vetter
2022-03-23 13:22     ` Daniel Vetter
2022-03-23 16:32     ` Jason Gunthorpe
2022-03-23 16:32       ` Jason Gunthorpe
2022-03-23 17:34       ` Daniel Vetter
2022-03-23 17:34         ` Daniel Vetter
2022-03-21 13:58 ` [PATCH 05/23] drm/etnaviv: stop using dma_resv_excl_fence Christian König
2022-03-23 15:58   ` Lucas Stach
2022-03-21 13:58 ` [Nouveau] [PATCH 06/23] drm/nouveau: " Christian König
2022-03-21 13:58   ` Christian König
2022-04-03 15:59   ` [Nouveau] " Christian König
2022-04-03 15:59     ` Christian König
2022-04-04 11:47     ` [Nouveau] " Karol Herbst
2022-04-04 11:47       ` Karol Herbst
2022-04-05  7:25       ` [Nouveau] " Christian König
2022-04-05  7:25         ` Christian König
2022-03-21 13:58 ` [PATCH 07/23] drm/vmwgfx: " Christian König
2022-03-21 14:02   ` Zack Rusin
2022-03-21 14:12     ` Christian König
2022-03-21 15:11       ` Zack Rusin
2022-03-22  7:13         ` Christian König
2022-03-22 12:40           ` Zack Rusin
2022-03-21 13:58 ` [PATCH 08/23] drm/radeon: " Christian König
2022-03-21 13:58 ` [PATCH 09/23] drm/amdgpu: use dma_resv_for_each_fence for CS workaround Christian König
2022-03-21 13:58 ` [PATCH 10/23] dma-buf: finally make dma_resv_excl_fence private v2 Christian König
2022-03-23 13:36   ` Daniel Vetter
2022-04-05  7:58     ` Christian König
2022-03-21 13:58 ` [PATCH 11/23] dma-buf: drop the DAG approach for the dma_resv object v2 Christian König
2022-03-23 13:40   ` Daniel Vetter
2022-03-21 13:58 ` [PATCH 12/23] dma-buf/drivers: make reserving a shared slot mandatory v3 Christian König
2022-03-28 17:14   ` Daniel Vetter
2022-03-31 12:07     ` Christian König
2022-03-31 16:42       ` Daniel Vetter
2022-03-21 13:58 ` [PATCH 13/23] drm/atomic-helper: support more than one write fence in drm_gem_plane_helper_prepare_fb Christian König
2022-03-21 13:58 ` [Nouveau] [PATCH 14/23] drm/nouveau: support more than one write fence in fenv50_wndw_prepare_fb Christian König
2022-03-21 13:58   ` Christian König
2022-03-21 13:58 ` [PATCH 15/23] drm/amdgpu: use dma_resv_get_singleton in amdgpu_pasid_free_cb Christian König
2022-03-21 13:58 ` [PATCH 16/23] dma-buf: add enum dma_resv_usage v3 Christian König
2022-03-29 15:24   ` Daniel Vetter
2022-04-04  1:13   ` Bas Nieuwenhuizen
2022-03-21 13:58 ` [PATCH 17/23] dma-buf: specify usage while adding fences to dma_resv obj v5 Christian König
2022-03-29 15:43   ` Daniel Vetter
2022-04-01 15:01     ` Christian König
2022-04-01 16:16       ` Daniel Vetter
2022-04-02 22:16   ` Bas Nieuwenhuizen
2022-04-03 17:48     ` Christian König
2022-03-21 13:58 ` [PATCH 18/23] drm/amdgpu: remove dma_resv workaround Christian König
2022-03-29 15:47   ` Daniel Vetter
2022-03-21 13:58 ` [PATCH 19/23] dma-buf: add DMA_RESV_USAGE_KERNEL v2 Christian König
2022-03-29 15:53   ` Daniel Vetter
2022-03-21 13:58 ` [PATCH 20/23] dma-buf: add DMA_RESV_USAGE_BOOKKEEP v2 Christian König
2022-03-29 16:06   ` Daniel Vetter
2022-03-21 13:58 ` [PATCH 21/23] dma-buf: wait for map to complete for static attachments Christian König
2022-03-29 15:56   ` Daniel Vetter
2022-03-21 13:58 ` [PATCH 22/23] drm/i915: drop bo->moving dependency Christian König
2022-03-21 13:58   ` [Intel-gfx] " Christian König
2022-03-29 15:57   ` Daniel Vetter
2022-03-29 15:57     ` [Intel-gfx] " Daniel Vetter
2022-03-21 13:58 ` [PATCH 23/23] drm/ttm: remove bo->moving Christian König
2022-03-29 16:02   ` Daniel Vetter
2022-04-05  9:25     ` Christian König
2022-04-05 12:51       ` Daniel Vetter
2022-03-21 14:03 ` [PATCH 01/23] dma-buf: add dma_resv_replace_fences v2 Christian König
2022-03-23 13:09 ` Daniel Vetter
2022-03-23 15:55 ` Felix Kuehling
2022-03-23 15:57   ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.