dri-devel.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/6] dma-buf: add dynamic DMA-buf handling v10
@ 2019-06-18 11:54 Christian König
  2019-06-18 11:54 ` [PATCH 2/6] drm/ttm: remove the backing store if no placement is given Christian König
                   ` (5 more replies)
  0 siblings, 6 replies; 17+ messages in thread
From: Christian König @ 2019-06-18 11:54 UTC (permalink / raw)
  To: intel-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On the exporter side we add optional explicit pinning callbacks. If those
callbacks are implemented the framework no longer caches sg tables and the
map/unmap callbacks are always called with the lock of the reservation object
held.

On the importer side we add an optional invalidate callback. This callback is
used by the exporter to inform the importers that their mappings should be
destroyed as soon as possible.

This allows the exporter to provide the mappings without the need to pin
the backing store.

v2: don't try to invalidate mappings when the callback is NULL,
    lock the reservation obj while using the attachments,
    add helper to set the callback
v3: move flag for invalidation support into the DMA-buf,
    use new attach_info structure to set the callback
v4: use importer_priv field instead of mangling exporter priv.
v5: drop invalidation_supported flag
v6: squash together with pin/unpin changes
v7: pin/unpin takes an attachment now
v8: nuke dma_buf_attachment_(map|unmap)_locked,
    everything is now handled backward compatible
v9: always cache when export/importer don't agree on dynamic handling
v10: minimal style cleanup

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/dma-buf.c | 188 ++++++++++++++++++++++++++++++++++++--
 include/linux/dma-buf.h   | 109 ++++++++++++++++++++--
 2 files changed, 283 insertions(+), 14 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 6c15deb5d4ad..9c9c95a88655 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -531,6 +531,9 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
 		return ERR_PTR(-EINVAL);
 	}
 
+	if (WARN_ON(exp_info->ops->cache_sgt_mapping && exp_info->ops->pin))
+		return ERR_PTR(-EINVAL);
+
 	if (!try_module_get(exp_info->owner))
 		return ERR_PTR(-ENOENT);
 
@@ -651,10 +654,12 @@ void dma_buf_put(struct dma_buf *dmabuf)
 EXPORT_SYMBOL_GPL(dma_buf_put);
 
 /**
- * dma_buf_attach - Add the device to dma_buf's attachments list; optionally,
+ * dma_buf_dynamic_attach - Add the device to dma_buf's attachments list; optionally,
  * calls attach() of dma_buf_ops to allow device-specific attach functionality
- * @dmabuf:	[in]	buffer to attach device to.
- * @dev:	[in]	device to be attached.
+ * @dmabuf:		[in]	buffer to attach device to.
+ * @dev:		[in]	device to be attached.
+ * @importer_ops	[in]	importer operations for the attachment
+ * @importer_priv	[in]	importer private pointer for the attachment
  *
  * Returns struct dma_buf_attachment pointer for this attachment. Attachments
  * must be cleaned up by calling dma_buf_detach().
@@ -668,8 +673,10 @@ EXPORT_SYMBOL_GPL(dma_buf_put);
  * accessible to @dev, and cannot be moved to a more suitable place. This is
  * indicated with the error code -EBUSY.
  */
-struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
-					  struct device *dev)
+struct dma_buf_attachment *
+dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev,
+		       const struct dma_buf_attach_ops *importer_ops,
+		       void *importer_priv)
 {
 	struct dma_buf_attachment *attach;
 	int ret;
@@ -683,6 +690,8 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
 
 	attach->dev = dev;
 	attach->dmabuf = dmabuf;
+	attach->importer_ops = importer_ops;
+	attach->importer_priv = importer_priv;
 
 	mutex_lock(&dmabuf->lock);
 
@@ -691,16 +700,72 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
 		if (ret)
 			goto err_attach;
 	}
+	reservation_object_lock(dmabuf->resv, NULL);
 	list_add(&attach->node, &dmabuf->attachments);
+	reservation_object_unlock(dmabuf->resv);
 
 	mutex_unlock(&dmabuf->lock);
 
+	/* When either the importer or the exporter can't handle dynamic
+	 * mappings we cache the mapping here to avoid issues with the
+	 * reservation object lock.
+	 */
+	if (dma_buf_attachment_is_dynamic(attach) !=
+	    dma_buf_is_dynamic(dmabuf)) {
+		struct sg_table *sgt;
+
+		if (dma_buf_is_dynamic(attach->dmabuf)) {
+			reservation_object_lock(attach->dmabuf->resv, NULL);
+			ret = dma_buf_pin(attach);
+			if (ret)
+				goto err_unlock;
+		}
+
+		sgt = dmabuf->ops->map_dma_buf(attach, DMA_BIDIRECTIONAL);
+		if (!sgt)
+			sgt = ERR_PTR(-ENOMEM);
+		if (IS_ERR(sgt)) {
+			ret = PTR_ERR(sgt);
+			goto err_unpin;
+		}
+		if (dma_buf_is_dynamic(attach->dmabuf))
+			reservation_object_unlock(attach->dmabuf->resv);
+		attach->sgt = sgt;
+		attach->dir = DMA_BIDIRECTIONAL;
+	}
+
 	return attach;
 
 err_attach:
 	kfree(attach);
 	mutex_unlock(&dmabuf->lock);
 	return ERR_PTR(ret);
+
+err_unpin:
+	if (dma_buf_is_dynamic(attach->dmabuf))
+		dma_buf_unpin(attach);
+
+err_unlock:
+	if (dma_buf_is_dynamic(attach->dmabuf))
+		reservation_object_unlock(attach->dmabuf->resv);
+
+	dma_buf_detach(dmabuf, attach);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(dma_buf_dynamic_attach);
+
+/**
+ * dma_buf_attach - Wrapper for dma_buf_dynamic_attach
+ * @dmabuf:	[in]	buffer to attach device to.
+ * @dev:	[in]	device to be attached.
+ *
+ * Wrapper to call dma_buf_dynamic_attach() for drivers which still use a static
+ * mapping.
+ */
+struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
+					  struct device *dev)
+{
+	return dma_buf_dynamic_attach(dmabuf, dev, NULL, NULL);
 }
 EXPORT_SYMBOL_GPL(dma_buf_attach);
 
@@ -717,11 +782,22 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
 	if (WARN_ON(!dmabuf || !attach))
 		return;
 
-	if (attach->sgt)
+	if (attach->sgt) {
+		if (dma_buf_is_dynamic(attach->dmabuf))
+			reservation_object_lock(attach->dmabuf->resv, NULL);
+
 		dmabuf->ops->unmap_dma_buf(attach, attach->sgt, attach->dir);
 
+		if (dma_buf_is_dynamic(attach->dmabuf)) {
+			dma_buf_unpin(attach);
+			reservation_object_unlock(attach->dmabuf->resv);
+		}
+	}
+
 	mutex_lock(&dmabuf->lock);
+	reservation_object_lock(dmabuf->resv, NULL);
 	list_del(&attach->node);
+	reservation_object_unlock(dmabuf->resv);
 	if (dmabuf->ops->detach)
 		dmabuf->ops->detach(dmabuf, attach);
 
@@ -730,6 +806,44 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
 }
 EXPORT_SYMBOL_GPL(dma_buf_detach);
 
+/**
+ * dma_buf_pin - Lock down the DMA-buf
+ *
+ * @attach:	[in]	attachment which should be pinned
+ *
+ * Returns:
+ * 0 on success, negative error code on failure.
+ */
+int dma_buf_pin(struct dma_buf_attachment *attach)
+{
+	struct dma_buf *dmabuf = attach->dmabuf;
+	int ret = 0;
+
+	reservation_object_assert_held(dmabuf->resv);
+
+	if (dmabuf->ops->pin)
+		ret = dmabuf->ops->pin(attach);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dma_buf_pin);
+
+/**
+ * dma_buf_unpin - Remove lock from DMA-buf
+ *
+ * @attach:	[in]	attachment which should be unpinned
+ */
+void dma_buf_unpin(struct dma_buf_attachment *attach)
+{
+	struct dma_buf *dmabuf = attach->dmabuf;
+
+	reservation_object_assert_held(dmabuf->resv);
+
+	if (dmabuf->ops->unpin)
+		dmabuf->ops->unpin(attach);
+}
+EXPORT_SYMBOL_GPL(dma_buf_unpin);
+
 /**
  * dma_buf_map_attachment - Returns the scatterlist table of the attachment;
  * mapped into _device_ address space. Is a wrapper for map_dma_buf() of the
@@ -749,6 +863,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
 					enum dma_data_direction direction)
 {
 	struct sg_table *sg_table;
+	int r;
 
 	might_sleep();
 
@@ -767,10 +882,38 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
 		return attach->sgt;
 	}
 
+	if (dma_buf_attachment_is_dynamic(attach)) {
+		reservation_object_assert_held(attach->dmabuf->resv);
+
+		/*
+		 * Mapping a DMA-buf can trigger its invalidation, prevent
+		 * sending this event to the caller by temporary removing
+		 * this attachment from the list.
+		 */
+		list_del(&attach->node);
+
+	} else if (dma_buf_is_dynamic(attach->dmabuf)) {
+		reservation_object_lock(attach->dmabuf->resv, NULL);
+		r = dma_buf_pin(attach);
+		if (r) {
+			reservation_object_unlock(attach->dmabuf->resv);
+			return ERR_PTR(r);
+		}
+	}
+
 	sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
 	if (!sg_table)
 		sg_table = ERR_PTR(-ENOMEM);
 
+	if (dma_buf_attachment_is_dynamic(attach)) {
+		list_add(&attach->node, &attach->dmabuf->attachments);
+
+	} else if (dma_buf_is_dynamic(attach->dmabuf)) {
+		if (IS_ERR(sg_table))
+			dma_buf_unpin(attach);
+		reservation_object_unlock(attach->dmabuf->resv);
+	}
+
 	if (!IS_ERR(sg_table) && attach->dmabuf->ops->cache_sgt_mapping) {
 		attach->sgt = sg_table;
 		attach->dir = direction;
@@ -802,10 +945,41 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
 	if (attach->sgt == sg_table)
 		return;
 
+	if (dma_buf_attachment_is_dynamic(attach))
+		reservation_object_assert_held(attach->dmabuf->resv);
+	else if (dma_buf_is_dynamic(attach->dmabuf))
+		reservation_object_lock(attach->dmabuf->resv, NULL);
+
 	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table, direction);
+
+	if (dma_buf_is_dynamic(attach->dmabuf) &&
+	    !dma_buf_attachment_is_dynamic(attach)) {
+		dma_buf_unpin(attach);
+		reservation_object_unlock(attach->dmabuf->resv);
+	}
 }
 EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
 
+/**
+ * dma_buf_invalidate_mappings - invalidate all mappings of this dma_buf
+ *
+ * @dmabuf:	[in]	buffer which mappings should be invalidated
+ *
+ * Informs all attachmenst that they need to destroy and recreated all their
+ * mappings.
+ */
+void dma_buf_invalidate_mappings(struct dma_buf *dmabuf)
+{
+	struct dma_buf_attachment *attach;
+
+	reservation_object_assert_held(dmabuf->resv);
+
+	list_for_each_entry(attach, &dmabuf->attachments, node)
+		if (attach->importer_ops && attach->importer_ops->invalidate)
+			attach->importer_ops->invalidate(attach);
+}
+EXPORT_SYMBOL_GPL(dma_buf_invalidate_mappings);
+
 /**
  * DOC: cpu access
  *
@@ -1225,10 +1399,12 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
 		seq_puts(s, "\tAttached Devices:\n");
 		attach_count = 0;
 
+		reservation_object_lock(buf_obj->resv, NULL);
 		list_for_each_entry(attach_obj, &buf_obj->attachments, node) {
 			seq_printf(s, "\t%s\n", dev_name(attach_obj->dev));
 			attach_count++;
 		}
+		reservation_object_unlock(buf_obj->resv);
 
 		seq_printf(s, "Total %d devices attached\n\n",
 				attach_count);
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 01ad5b942a6f..f9c96bf56bc8 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -92,14 +92,40 @@ struct dma_buf_ops {
 	 */
 	void (*detach)(struct dma_buf *, struct dma_buf_attachment *);
 
+	/**
+	 * @pin:
+	 *
+	 * This is called by dma_buf_pin and lets the exporter know that the
+	 * DMA-buf can't be moved any more.
+	 *
+	 * This is called with the dmabuf->resv object locked.
+	 *
+	 * This callback is optional.
+	 *
+	 * Returns:
+	 *
+	 * 0 on success, negative error code on failure.
+	 */
+	int (*pin)(struct dma_buf_attachment *attach);
+
+	/**
+	 * @unpin:
+	 *
+	 * This is called by dma_buf_unpin and lets the exporter know that the
+	 * DMA-buf can be moved again.
+	 *
+	 * This is called with the dmabuf->resv object locked.
+	 *
+	 * This callback is optional.
+	 */
+	void (*unpin)(struct dma_buf_attachment *attach);
+
 	/**
 	 * @map_dma_buf:
 	 *
 	 * This is called by dma_buf_map_attachment() and is used to map a
 	 * shared &dma_buf into device address space, and it is mandatory. It
-	 * can only be called if @attach has been called successfully. This
-	 * essentially pins the DMA buffer into place, and it cannot be moved
-	 * any more
+	 * can only be called if @attach has been called successfully.
 	 *
 	 * This call may sleep, e.g. when the backing storage first needs to be
 	 * allocated, or moved to a location suitable for all currently attached
@@ -120,6 +146,9 @@ struct dma_buf_ops {
 	 * any other kind of sharing that the exporter might wish to make
 	 * available to buffer-users.
 	 *
+	 * This is always called with the dmabuf->resv object locked when
+	 * the pin/unpin callbacks are implemented.
+	 *
 	 * Returns:
 	 *
 	 * A &sg_table scatter list of or the backing storage of the DMA buffer,
@@ -137,9 +166,6 @@ struct dma_buf_ops {
 	 *
 	 * This is called by dma_buf_unmap_attachment() and should unmap and
 	 * release the &sg_table allocated in @map_dma_buf, and it is mandatory.
-	 * It should also unpin the backing storage if this is the last mapping
-	 * of the DMA buffer, it the exporter supports backing storage
-	 * migration.
 	 */
 	void (*unmap_dma_buf)(struct dma_buf_attachment *,
 			      struct sg_table *,
@@ -330,6 +356,35 @@ struct dma_buf {
 	} cb_excl, cb_shared;
 };
 
+/**
+ * struct dma_buf_attach_ops - importer operations for an attachment
+ * @invalidate: [optional] invalidate all mappings made using this attachment.
+ *
+ * Attachment operations implemented by the importer.
+ */
+struct dma_buf_attach_ops {
+	/**
+	 * @invalidate:
+	 *
+	 * If the invalidate callback is provided the framework can avoid
+	 * pinning the backing store while mappings exists.
+	 *
+	 * This callback is called with the lock of the reservation object
+	 * associated with the dma_buf held and the mapping function must be
+	 * called with this lock held as well. This makes sure that no mapping
+	 * is created concurrently with an ongoing invalidation.
+	 *
+	 * After the callback all existing mappings are still valid until all
+	 * fences in the dma_bufs reservation object are signaled. After getting an
+	 * invalidation callback all mappings should be destroyed by the importer using
+	 * the normal dma_buf_unmap_attachment() function as soon as possible.
+	 *
+	 * New mappings can be created immediately, but can't be used before the
+	 * exclusive fence in the dma_bufs reservation object is signaled.
+	 */
+	void (*invalidate)(struct dma_buf_attachment *attach);
+};
+
 /**
  * struct dma_buf_attachment - holds device-buffer attachment data
  * @dmabuf: buffer for this attachment.
@@ -338,6 +393,8 @@ struct dma_buf {
  * @sgt: cached mapping.
  * @dir: direction of cached mapping.
  * @priv: exporter specific attachment data.
+ * @importer_ops: importer operations for this attachment.
+ * @importer_priv: importer specific attachment data.
  *
  * This structure holds the attachment information between the dma_buf buffer
  * and its user device(s). The list contains one attachment struct per device
@@ -355,6 +412,9 @@ struct dma_buf_attachment {
 	struct sg_table *sgt;
 	enum dma_data_direction dir;
 	void *priv;
+
+	const struct dma_buf_attach_ops *importer_ops;
+	void *importer_priv;
 };
 
 /**
@@ -405,10 +465,42 @@ static inline void get_dma_buf(struct dma_buf *dmabuf)
 	get_file(dmabuf->file);
 }
 
+/**
+ * dma_buf_is_dynamic - check if a DMA-buf uses dynamic mappings.
+ * @dmabuf: the DMA-buf to check
+ *
+ * Returns true if a DMA-buf exporter wants to create dynamic sg table mappings
+ * for each attachment. False if only a single static sg table should be used.
+ */
+static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf)
+{
+	return !!dmabuf->ops->pin;
+}
+
+/**
+ * dma_buf_attachment_is_dynamic - check if a DMA-buf attachment uses dynamic
+ * mappinsg
+ * @attach: the DMA-buf attachment to check
+ *
+ * Returns true if a DMA-buf importer wants to use dynamic sg table mappings and
+ * calls the map/unmap functions with the reservation object locked.
+ */
+static inline bool
+dma_buf_attachment_is_dynamic(struct dma_buf_attachment *attach)
+{
+	return attach->importer_ops && attach->importer_ops->invalidate;
+}
+
 struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
-							struct device *dev);
+					  struct device *dev);
+struct dma_buf_attachment *
+dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev,
+		       const struct dma_buf_attach_ops *importer_ops,
+		       void *importer_priv);
 void dma_buf_detach(struct dma_buf *dmabuf,
-				struct dma_buf_attachment *dmabuf_attach);
+		    struct dma_buf_attachment *attach);
+int dma_buf_pin(struct dma_buf_attachment *attach);
+void dma_buf_unpin(struct dma_buf_attachment *attach);
 
 struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info);
 
@@ -420,6 +512,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *,
 					enum dma_data_direction);
 void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *,
 				enum dma_data_direction);
+void dma_buf_invalidate_mappings(struct dma_buf *dma_buf);
 int dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
 			     enum dma_data_direction dir);
 int dma_buf_end_cpu_access(struct dma_buf *dma_buf,
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 2/6] drm/ttm: remove the backing store if no placement is given
  2019-06-18 11:54 [PATCH 1/6] dma-buf: add dynamic DMA-buf handling v10 Christian König
@ 2019-06-18 11:54 ` Christian König
  2019-06-18 11:54 ` [PATCH 3/6] drm/ttm: use the parent resv for ghost objects v2 Christian König
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 17+ messages in thread
From: Christian König @ 2019-06-18 11:54 UTC (permalink / raw)
  To: intel-gfx, dri-devel, amd-gfx

Pipeline removal of the BOs backing store when no placement is given
during validation.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index c7de667d482a..682a1a035b35 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1240,6 +1240,18 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
 	uint32_t new_flags;
 
 	reservation_object_assert_held(bo->resv);
+
+	/*
+	 * Remove the backing store if no placement is given.
+	 */
+	if (!placement->num_placement && !placement->num_busy_placement) {
+		ret = ttm_bo_pipeline_gutting(bo);
+		if (ret)
+			return ret;
+
+		return ttm_tt_create(bo, false);
+	}
+
 	/*
 	 * Check whether we need to move buffer.
 	 */
-- 
2.17.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 3/6] drm/ttm: use the parent resv for ghost objects v2
  2019-06-18 11:54 [PATCH 1/6] dma-buf: add dynamic DMA-buf handling v10 Christian König
  2019-06-18 11:54 ` [PATCH 2/6] drm/ttm: remove the backing store if no placement is given Christian König
@ 2019-06-18 11:54 ` Christian König
       [not found] ` <20190618115455.1253-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 17+ messages in thread
From: Christian König @ 2019-06-18 11:54 UTC (permalink / raw)
  To: intel-gfx, dri-devel, amd-gfx

This way we can even pipeline imported BO evictions.

v2: Limit this to only cases when the parent object uses a separate
    reservation object as well. This fixes another OOM problem.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo_util.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 895d77d799e4..95f47d685921 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -517,9 +517,11 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 	kref_init(&fbo->base.kref);
 	fbo->base.destroy = &ttm_transfered_destroy;
 	fbo->base.acc_size = 0;
-	fbo->base.resv = &fbo->base.ttm_resv;
-	reservation_object_init(fbo->base.resv);
-	ret = reservation_object_trylock(fbo->base.resv);
+	if (bo->resv == &bo->ttm_resv)
+		fbo->base.resv = &fbo->base.ttm_resv;
+
+	reservation_object_init(&fbo->base.ttm_resv);
+	ret = reservation_object_trylock(&fbo->base.ttm_resv);
 	WARN_ON(!ret);
 
 	*new_obj = &fbo->base;
@@ -716,7 +718,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 		if (ret)
 			return ret;
 
-		reservation_object_add_excl_fence(ghost_obj->resv, fence);
+		reservation_object_add_excl_fence(&ghost_obj->ttm_resv, fence);
 
 		/**
 		 * If we're not moving to fixed memory, the TTM object
@@ -729,7 +731,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 		else
 			bo->ttm = NULL;
 
-		ttm_bo_unreserve(ghost_obj);
+		reservation_object_unlock(&ghost_obj->ttm_resv);
 		ttm_bo_put(ghost_obj);
 	}
 
@@ -772,7 +774,7 @@ int ttm_bo_pipeline_move(struct ttm_buffer_object *bo,
 		if (ret)
 			return ret;
 
-		reservation_object_add_excl_fence(ghost_obj->resv, fence);
+		reservation_object_add_excl_fence(&ghost_obj->ttm_resv, fence);
 
 		/**
 		 * If we're not moving to fixed memory, the TTM object
@@ -785,7 +787,7 @@ int ttm_bo_pipeline_move(struct ttm_buffer_object *bo,
 		else
 			bo->ttm = NULL;
 
-		ttm_bo_unreserve(ghost_obj);
+		reservation_object_unlock(&ghost_obj->ttm_resv);
 		ttm_bo_put(ghost_obj);
 
 	} else if (from->flags & TTM_MEMTYPE_FLAG_FIXED) {
@@ -841,7 +843,7 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo)
 	if (ret)
 		return ret;
 
-	ret = reservation_object_copy_fences(ghost->resv, bo->resv);
+	ret = reservation_object_copy_fences(&ghost->ttm_resv, bo->resv);
 	/* Last resort, wait for the BO to be idle when we are OOM */
 	if (ret)
 		ttm_bo_wait(bo, false, false);
@@ -850,7 +852,7 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo)
 	bo->mem.mem_type = TTM_PL_SYSTEM;
 	bo->ttm = NULL;
 
-	ttm_bo_unreserve(ghost);
+	reservation_object_unlock(&ghost->ttm_resv);
 	ttm_bo_put(ghost);
 
 	return 0;
-- 
2.17.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 4/6] drm/amdgpu: use allowed_domains for exported DMA-bufs
       [not found] ` <20190618115455.1253-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2019-06-18 11:54   ` Christian König
  0 siblings, 0 replies; 17+ messages in thread
From: Christian König @ 2019-06-18 11:54 UTC (permalink / raw)
  To: intel-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Avoid that we ping/pong the buffers when we stop to pin DMA-buf
exports by using the allowed domains for exported buffers.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index dc63707e426f..0da512341194 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -28,6 +28,7 @@
 #include <linux/file.h>
 #include <linux/pagemap.h>
 #include <linux/sync_file.h>
+#include <linux/dma-buf.h>
 
 #include <drm/amdgpu_drm.h>
 #include <drm/drm_syncobj.h>
@@ -414,7 +415,9 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 	/* Don't move this buffer if we have depleted our allowance
 	 * to move it. Don't move anything if the threshold is zero.
 	 */
-	if (p->bytes_moved < p->bytes_moved_threshold) {
+	if (p->bytes_moved < p->bytes_moved_threshold &&
+	    (!bo->gem_base.dma_buf ||
+	    list_empty(&bo->gem_base.dma_buf->attachments))) {
 		if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
 		    (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
 			/* And don't move a CPU_ACCESS_REQUIRED BO to limited
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 5/6] drm/amdgpu: add independent DMA-buf export v6
  2019-06-18 11:54 [PATCH 1/6] dma-buf: add dynamic DMA-buf handling v10 Christian König
                   ` (2 preceding siblings ...)
       [not found] ` <20190618115455.1253-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2019-06-18 11:54 ` Christian König
  2019-06-18 11:54 ` [PATCH 6/6] drm/amdgpu: add independent DMA-buf import v6 Christian König
  2019-06-21  9:20 ` [Intel-gfx] [PATCH 1/6] dma-buf: add dynamic DMA-buf handling v10 Daniel Vetter
  5 siblings, 0 replies; 17+ messages in thread
From: Christian König @ 2019-06-18 11:54 UTC (permalink / raw)
  To: intel-gfx, dri-devel, amd-gfx

The caching of SGT's is actually quite harmful and should probably removed
altogether when all drivers are audited.

Start by providing a separate DMA-buf export implementation in amdgpu.
This is also a prerequisite of unpinned DMA-buf handling.

v2: fix unintended recursion, remove debugging leftovers
v3: split out from unpinned DMA-buf work
v4: rebase on top of new no_sgt_cache flag
v5: fix some warnings by including amdgpu_dma_buf.h
v6: fix locking for non amdgpu exports

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 210 +++++++++++++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h |   1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |   1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |   5 +
 4 files changed, 139 insertions(+), 78 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 489041df1f45..579e33b31f2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -34,26 +34,11 @@
 #include "amdgpu.h"
 #include "amdgpu_display.h"
 #include "amdgpu_gem.h"
+#include "amdgpu_dma_buf.h"
 #include <drm/amdgpu_drm.h>
 #include <linux/dma-buf.h>
 #include <linux/dma-fence-array.h>
 
-/**
- * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table
- * implementation
- * @obj: GEM buffer object (BO)
- *
- * Returns:
- * A scatter/gather table for the pinned pages of the BO's memory.
- */
-struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
-{
-	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
-	int npages = bo->tbo.num_pages;
-
-	return drm_prime_pages_to_sg(bo->tbo.ttm->pages, npages);
-}
-
 /**
  * amdgpu_gem_prime_vmap - &dma_buf_ops.vmap implementation
  * @obj: GEM BO
@@ -179,92 +164,163 @@ __reservation_object_make_exclusive(struct reservation_object *obj)
 }
 
 /**
- * amdgpu_dma_buf_map_attach - &dma_buf_ops.attach implementation
- * @dma_buf: Shared DMA buffer
+ * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
+ *
+ * @dmabuf: DMA-buf where we attach to
+ * @attach: attachment to add
+ *
+ * Add the attachment as user to the exported DMA-buf.
+ */
+static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
+				 struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = dmabuf->priv;
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	int r;
+
+	if (attach->dev->driver == adev->dev->driver)
+		return 0;
+
+	r = amdgpu_bo_reserve(bo, false);
+	if (unlikely(r != 0))
+		return r;
+
+	/*
+	 * We only create shared fences for internal use, but importers
+	 * of the dmabuf rely on exclusive fences for implicitly
+	 * tracking write hazards. As any of the current fences may
+	 * correspond to a write, we need to convert all existing
+	 * fences on the reservation object into a single exclusive
+	 * fence.
+	 */
+	r = __reservation_object_make_exclusive(bo->tbo.resv);
+	if (r)
+		return r;
+
+	bo->prime_shared_count++;
+	amdgpu_bo_unreserve(bo);
+	return 0;
+}
+
+/**
+ * amdgpu_dma_buf_detach - &dma_buf_ops.detach implementation
+ *
+ * @dmabuf: DMA-buf where we remove the attachment from
+ * @attach: the attachment to remove
+ *
+ * Called when an attachment is removed from the DMA-buf.
+ */
+static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
+				  struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = dmabuf->priv;
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+
+	if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
+		bo->prime_shared_count--;
+}
+
+/**
+ * amdgpu_dma_buf_pin - &dma_buf_ops.pin implementation
+ *
+ * @attach: attachment to pin down
+ *
+ * Pin the BO which is backing the DMA-buf so that it can't move any more.
+ */
+static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+	/* pin buffer into GTT */
+	return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+}
+
+/**
+ * amdgpu_dma_buf_unpin - &dma_buf_ops.unpin implementation
+ *
+ * @attach: attachment to unpin
+ *
+ * Unpin a previously pinned BO to make it movable again.
+ */
+static void amdgpu_dma_buf_unpin(struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+	amdgpu_bo_unpin(bo);
+}
+
+/**
+ * amdgpu_dma_buf_map_dma_buf - &dma_buf_ops.map_dma_buf implementation
  * @attach: DMA-buf attachment
+ * @dir: DMA direction
  *
  * Makes sure that the shared DMA buffer can be accessed by the target device.
  * For now, simply pins it to the GTT domain, where it should be accessible by
  * all DMA devices.
  *
  * Returns:
- * 0 on success or a negative error code on failure.
+ * sg_table filled with the DMA addresses to use or ERR_PRT with negative error
+ * code.
  */
-static int amdgpu_dma_buf_map_attach(struct dma_buf *dma_buf,
-				     struct dma_buf_attachment *attach)
+static struct sg_table *
+amdgpu_dma_buf_map_dma_buf(struct dma_buf_attachment *attach,
+			   enum dma_data_direction dir)
 {
+	struct dma_buf *dma_buf = attach->dmabuf;
 	struct drm_gem_object *obj = dma_buf->priv;
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
-	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	struct sg_table *sgt;
 	long r;
 
-	r = drm_gem_map_attach(dma_buf, attach);
-	if (r)
-		return r;
+	if (!bo->pin_count) {
+		/* move buffer into GTT */
+		struct ttm_operation_ctx ctx = { false, false };
 
-	r = amdgpu_bo_reserve(bo, false);
-	if (unlikely(r != 0))
-		goto error_detach;
-
-
-	if (attach->dev->driver != adev->dev->driver) {
-		/*
-		 * We only create shared fences for internal use, but importers
-		 * of the dmabuf rely on exclusive fences for implicitly
-		 * tracking write hazards. As any of the current fences may
-		 * correspond to a write, we need to convert all existing
-		 * fences on the reservation object into a single exclusive
-		 * fence.
-		 */
-		r = __reservation_object_make_exclusive(bo->tbo.resv);
+		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 		if (r)
-			goto error_unreserve;
+			return ERR_PTR(r);
 	}
 
-	/* pin buffer into GTT */
-	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
-	if (r)
-		goto error_unreserve;
+	sgt = drm_prime_pages_to_sg(bo->tbo.ttm->pages, bo->tbo.num_pages);
+	if (IS_ERR(sgt))
+		return sgt;
 
-	if (attach->dev->driver != adev->dev->driver)
-		bo->prime_shared_count++;
+	if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
+			      DMA_ATTR_SKIP_CPU_SYNC))
+		goto error_free;
 
-error_unreserve:
-	amdgpu_bo_unreserve(bo);
+	return sgt;
 
-error_detach:
-	if (r)
-		drm_gem_map_detach(dma_buf, attach);
-	return r;
+error_free:
+	sg_free_table(sgt);
+	kfree(sgt);
+	return ERR_PTR(-ENOMEM);
 }
 
 /**
- * amdgpu_dma_buf_map_detach - &dma_buf_ops.detach implementation
- * @dma_buf: Shared DMA buffer
+ * amdgpu_gem_unmap_dma_buf - &dma_buf_ops.unmap_dma_buf implementation
  * @attach: DMA-buf attachment
+ * @sgt: sg_table to unmap
+ * @dir: DMA direction
  *
  * This is called when a shared DMA buffer no longer needs to be accessible by
  * another device. For now, simply unpins the buffer from GTT.
  */
-static void amdgpu_dma_buf_map_detach(struct dma_buf *dma_buf,
-				      struct dma_buf_attachment *attach)
+static void amdgpu_dma_buf_unmap_dma_buf(struct dma_buf_attachment *attach,
+					 struct sg_table *sgt,
+					 enum dma_data_direction dir)
 {
-	struct drm_gem_object *obj = dma_buf->priv;
-	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
-	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-	int ret = 0;
-
-	ret = amdgpu_bo_reserve(bo, true);
-	if (unlikely(ret != 0))
-		goto error;
+	if (!sgt)
+		return;
 
-	amdgpu_bo_unpin(bo);
-	if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
-		bo->prime_shared_count--;
-	amdgpu_bo_unreserve(bo);
-
-error:
-	drm_gem_map_detach(dma_buf, attach);
+	dma_unmap_sg(attach->dev, sgt->sgl, sgt->nents, dir);
+	sg_free_table(sgt);
+	kfree(sgt);
 }
 
 /**
@@ -322,10 +378,12 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
 }
 
 const struct dma_buf_ops amdgpu_dmabuf_ops = {
-	.attach = amdgpu_dma_buf_map_attach,
-	.detach = amdgpu_dma_buf_map_detach,
-	.map_dma_buf = drm_gem_map_dma_buf,
-	.unmap_dma_buf = drm_gem_unmap_dma_buf,
+	.attach = amdgpu_dma_buf_attach,
+	.detach = amdgpu_dma_buf_detach,
+	.pin = amdgpu_dma_buf_pin,
+	.unpin = amdgpu_dma_buf_unpin,
+	.map_dma_buf = amdgpu_dma_buf_map_dma_buf,
+	.unmap_dma_buf = amdgpu_dma_buf_unmap_dma_buf,
 	.release = drm_gem_dmabuf_release,
 	.begin_cpu_access = amdgpu_dma_buf_begin_cpu_access,
 	.mmap = drm_gem_dmabuf_mmap,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
index c7056cbe8685..f1522292814c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
@@ -25,7 +25,6 @@
 
 #include <drm/drm_gem.h>
 
-struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
 struct drm_gem_object *
 amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
 				 struct dma_buf_attachment *attach,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 0a577a389024..5f3b7cab8390 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1334,7 +1334,6 @@ static struct drm_driver kms_driver = {
 	.gem_prime_export = amdgpu_gem_prime_export,
 	.gem_prime_import = amdgpu_gem_prime_import,
 	.gem_prime_res_obj = amdgpu_gem_prime_res_obj,
-	.gem_prime_get_sg_table = amdgpu_gem_prime_get_sg_table,
 	.gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table,
 	.gem_prime_vmap = amdgpu_gem_prime_vmap,
 	.gem_prime_vunmap = amdgpu_gem_prime_vunmap,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 16f96f2e3671..7843b3c6ca54 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -31,6 +31,7 @@
  */
 #include <linux/list.h>
 #include <linux/slab.h>
+#include <linux/dma-buf.h>
 
 #include <drm/amdgpu_drm.h>
 #include <drm/drm_cache.h>
@@ -1194,6 +1195,10 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 
 	amdgpu_bo_kunmap(abo);
 
+	if (abo->gem_base.dma_buf && !abo->gem_base.import_attach &&
+	    bo->mem.mem_type != TTM_PL_SYSTEM)
+		dma_buf_invalidate_mappings(abo->gem_base.dma_buf);
+
 	/* remember the eviction */
 	if (evict)
 		atomic64_inc(&adev->num_evictions);
-- 
2.17.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 6/6] drm/amdgpu: add independent DMA-buf import v6
  2019-06-18 11:54 [PATCH 1/6] dma-buf: add dynamic DMA-buf handling v10 Christian König
                   ` (3 preceding siblings ...)
  2019-06-18 11:54 ` [PATCH 5/6] drm/amdgpu: add independent DMA-buf export v6 Christian König
@ 2019-06-18 11:54 ` Christian König
  2019-06-21  9:20 ` [Intel-gfx] [PATCH 1/6] dma-buf: add dynamic DMA-buf handling v10 Daniel Vetter
  5 siblings, 0 replies; 17+ messages in thread
From: Christian König @ 2019-06-18 11:54 UTC (permalink / raw)
  To: intel-gfx, dri-devel, amd-gfx

Instead of relying on the DRM functions just implement our own import
functions. This prepares support for taking care of unpinned DMA-buf.

v2: enable for all exporters, not just amdgpu, fix invalidation
    handling, lock reservation object while setting callback
v3: change to new dma_buf attach interface
v4: split out from unpinned DMA-buf work
v5: rebased and cleanup on new DMA-buf interface
v6: squash with invalidation callback change,
    stop using _(map|unmap)_locked

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 65 ++++++++++++++++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h |  4 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  6 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c     | 32 ++++++++--
 5 files changed, 83 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 579e33b31f2d..31511d7de8a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -424,31 +424,28 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 }
 
 /**
- * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table
- * implementation
+ * amdgpu_dma_buf_create_obj - create BO for DMA-buf import
+ *
  * @dev: DRM device
- * @attach: DMA-buf attachment
- * @sg: Scatter/gather table
+ * @dma_buf: DMA-buf
  *
- * Imports shared DMA buffer memory exported by another device.
+ * Creates an empty SG BO for DMA-buf import.
  *
  * Returns:
  * A new GEM BO of the given DRM device, representing the memory
  * described by the given DMA-buf attachment and scatter/gather table.
  */
-struct drm_gem_object *
-amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
-				 struct dma_buf_attachment *attach,
-				 struct sg_table *sg)
+static struct drm_gem_object *
+amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
 {
-	struct reservation_object *resv = attach->dmabuf->resv;
+	struct reservation_object *resv = dma_buf->resv;
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_bo *bo;
 	struct amdgpu_bo_param bp;
 	int ret;
 
 	memset(&bp, 0, sizeof(bp));
-	bp.size = attach->dmabuf->size;
+	bp.size = dma_buf->size;
 	bp.byte_align = PAGE_SIZE;
 	bp.domain = AMDGPU_GEM_DOMAIN_CPU;
 	bp.flags = 0;
@@ -459,11 +456,9 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
 	if (ret)
 		goto error;
 
-	bo->tbo.sg = sg;
-	bo->tbo.ttm->sg = sg;
 	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
 	bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
-	if (attach->dmabuf->ops != &amdgpu_dmabuf_ops)
+	if (dma_buf->ops != &amdgpu_dmabuf_ops)
 		bo->prime_shared_count = 1;
 
 	ww_mutex_unlock(&resv->lock);
@@ -474,6 +469,32 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
 	return ERR_PTR(ret);
 }
 
+/**
+ * amdgpu_gem_prime_invalidate_mappings - &attach.invalidate implementation
+ *
+ * @attach: the DMA-buf attachment
+ *
+ * Invalidate the DMA-buf attachment, making sure that the we re-create the
+ * mapping before the next use.
+ */
+static void
+amdgpu_gem_prime_invalidate_mappings(struct dma_buf_attachment *attach)
+{
+	struct ttm_operation_ctx ctx = { false, false };
+	struct drm_gem_object *obj = attach->importer_priv;
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+	struct ttm_placement placement = {};
+	int r;
+
+	r = ttm_bo_validate(&bo->tbo, &placement, &ctx);
+	if (r)
+		DRM_ERROR("Failed to invalidate DMA-buf import (%d))\n", r);
+}
+
+static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops = {
+	.invalidate = amdgpu_gem_prime_invalidate_mappings
+};
+
 /**
  * amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation
  * @dev: DRM device
@@ -488,6 +509,7 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
 struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
 					    struct dma_buf *dma_buf)
 {
+	struct dma_buf_attachment *attach;
 	struct drm_gem_object *obj;
 
 	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
@@ -502,5 +524,18 @@ struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
 		}
 	}
 
-	return drm_gem_prime_import(dev, dma_buf);
+	obj = amdgpu_dma_buf_create_obj(dev, dma_buf);
+	if (IS_ERR(obj))
+		return obj;
+
+	attach = dma_buf_dynamic_attach(dma_buf, dev->dev,
+					&amdgpu_dma_buf_attach_ops, obj);
+	if (IS_ERR(attach)) {
+		drm_gem_object_put(obj);
+		return ERR_CAST(attach);
+	}
+
+	get_dma_buf(dma_buf);
+	obj->import_attach = attach;
+	return obj;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
index f1522292814c..2765413770b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
@@ -25,10 +25,6 @@
 
 #include <drm/drm_gem.h>
 
-struct drm_gem_object *
-amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
-				 struct dma_buf_attachment *attach,
-				 struct sg_table *sg);
 struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 					struct drm_gem_object *gobj,
 					int flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 5f3b7cab8390..76d445916c9a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1334,7 +1334,6 @@ static struct drm_driver kms_driver = {
 	.gem_prime_export = amdgpu_gem_prime_export,
 	.gem_prime_import = amdgpu_gem_prime_import,
 	.gem_prime_res_obj = amdgpu_gem_prime_res_obj,
-	.gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table,
 	.gem_prime_vmap = amdgpu_gem_prime_vmap,
 	.gem_prime_vunmap = amdgpu_gem_prime_vunmap,
 	.gem_prime_mmap = amdgpu_gem_prime_mmap,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 7843b3c6ca54..99bcf6e710a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -850,6 +850,9 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
 		return 0;
 	}
 
+	if (bo->gem_base.import_attach)
+		dma_buf_pin(bo->gem_base.import_attach);
+
 	bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
 	/* force to pin into visible video ram */
 	if (!(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS))
@@ -933,6 +936,9 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)
 
 	amdgpu_bo_subtract_pin_size(bo);
 
+	if (bo->gem_base.import_attach)
+		dma_buf_unpin(bo->gem_base.import_attach);
+
 	for (i = 0; i < bo->placement.num_placement; i++) {
 		bo->placements[i].lpfn = 0;
 		bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index d81bebf76310..c89fa58a7914 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -39,6 +39,7 @@
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/swiotlb.h>
+#include <linux/dma-buf.h>
 
 #include <drm/ttm/ttm_bo_api.h>
 #include <drm/ttm/ttm_bo_driver.h>
@@ -711,6 +712,7 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
  */
 struct amdgpu_ttm_tt {
 	struct ttm_dma_tt	ttm;
+	struct drm_gem_object	*gobj;
 	u64			offset;
 	uint64_t		userptr;
 	struct task_struct	*usertask;
@@ -1198,6 +1200,7 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
 		return NULL;
 	}
 	gtt->ttm.ttm.func = &amdgpu_backend_func;
+	gtt->gobj = &ttm_to_amdgpu_bo(bo)->gem_base;
 
 	/* allocate space for the uninitialized page entries */
 	if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags)) {
@@ -1218,7 +1221,6 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
-	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
 
 	/* user pages are bound by amdgpu_ttm_tt_pin_userptr() */
 	if (gtt && gtt->userptr) {
@@ -1231,7 +1233,19 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
 		return 0;
 	}
 
-	if (slave && ttm->sg) {
+	if (ttm->page_flags & TTM_PAGE_FLAG_SG) {
+		if (!ttm->sg) {
+			struct dma_buf_attachment *attach;
+			struct sg_table *sgt;
+
+			attach = gtt->gobj->import_attach;
+			sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
+			if (IS_ERR(sgt))
+				return PTR_ERR(sgt);
+
+			ttm->sg = sgt;
+		}
+
 		drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
 						 gtt->ttm.dma_address,
 						 ttm->num_pages);
@@ -1258,9 +1272,8 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
  */
 static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
 {
-	struct amdgpu_device *adev;
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
-	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
+	struct amdgpu_device *adev;
 
 	if (gtt && gtt->userptr) {
 		amdgpu_ttm_tt_set_user_pages(ttm, NULL);
@@ -1269,7 +1282,16 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
 		return;
 	}
 
-	if (slave)
+	if (ttm->sg && gtt->gobj->import_attach) {
+		struct dma_buf_attachment *attach;
+
+		attach = gtt->gobj->import_attach;
+		dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL);
+		ttm->sg = NULL;
+		return;
+	}
+
+	if (ttm->page_flags & TTM_PAGE_FLAG_SG)
 		return;
 
 	adev = amdgpu_ttm_adev(ttm->bdev);
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [Intel-gfx] [PATCH 1/6] dma-buf: add dynamic DMA-buf handling v10
  2019-06-18 11:54 [PATCH 1/6] dma-buf: add dynamic DMA-buf handling v10 Christian König
                   ` (4 preceding siblings ...)
  2019-06-18 11:54 ` [PATCH 6/6] drm/amdgpu: add independent DMA-buf import v6 Christian König
@ 2019-06-21  9:20 ` Daniel Vetter
       [not found]   ` <20190621092022.GB12905-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
  5 siblings, 1 reply; 17+ messages in thread
From: Daniel Vetter @ 2019-06-21  9:20 UTC (permalink / raw)
  To: Christian König; +Cc: intel-gfx, amd-gfx, dri-devel

On Tue, Jun 18, 2019 at 01:54:50PM +0200, Christian König wrote:
> On the exporter side we add optional explicit pinning callbacks. If those
> callbacks are implemented the framework no longer caches sg tables and the
> map/unmap callbacks are always called with the lock of the reservation object
> held.
> 
> On the importer side we add an optional invalidate callback. This callback is
> used by the exporter to inform the importers that their mappings should be
> destroyed as soon as possible.
> 
> This allows the exporter to provide the mappings without the need to pin
> the backing store.
> 
> v2: don't try to invalidate mappings when the callback is NULL,
>     lock the reservation obj while using the attachments,
>     add helper to set the callback
> v3: move flag for invalidation support into the DMA-buf,
>     use new attach_info structure to set the callback
> v4: use importer_priv field instead of mangling exporter priv.
> v5: drop invalidation_supported flag
> v6: squash together with pin/unpin changes
> v7: pin/unpin takes an attachment now
> v8: nuke dma_buf_attachment_(map|unmap)_locked,
>     everything is now handled backward compatible
> v9: always cache when export/importer don't agree on dynamic handling
> v10: minimal style cleanup
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/dma-buf/dma-buf.c | 188 ++++++++++++++++++++++++++++++++++++--
>  include/linux/dma-buf.h   | 109 ++++++++++++++++++++--
>  2 files changed, 283 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> index 6c15deb5d4ad..9c9c95a88655 100644
> --- a/drivers/dma-buf/dma-buf.c
> +++ b/drivers/dma-buf/dma-buf.c
> @@ -531,6 +531,9 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
>  		return ERR_PTR(-EINVAL);
>  	}
>  
> +	if (WARN_ON(exp_info->ops->cache_sgt_mapping && exp_info->ops->pin))
> +		return ERR_PTR(-EINVAL);
> +
>  	if (!try_module_get(exp_info->owner))
>  		return ERR_PTR(-ENOENT);
>  
> @@ -651,10 +654,12 @@ void dma_buf_put(struct dma_buf *dmabuf)
>  EXPORT_SYMBOL_GPL(dma_buf_put);
>  
>  /**
> - * dma_buf_attach - Add the device to dma_buf's attachments list; optionally,
> + * dma_buf_dynamic_attach - Add the device to dma_buf's attachments list; optionally,
>   * calls attach() of dma_buf_ops to allow device-specific attach functionality
> - * @dmabuf:	[in]	buffer to attach device to.
> - * @dev:	[in]	device to be attached.
> + * @dmabuf:		[in]	buffer to attach device to.
> + * @dev:		[in]	device to be attached.
> + * @importer_ops	[in]	importer operations for the attachment
> + * @importer_priv	[in]	importer private pointer for the attachment
>   *
>   * Returns struct dma_buf_attachment pointer for this attachment. Attachments
>   * must be cleaned up by calling dma_buf_detach().
> @@ -668,8 +673,10 @@ EXPORT_SYMBOL_GPL(dma_buf_put);
>   * accessible to @dev, and cannot be moved to a more suitable place. This is
>   * indicated with the error code -EBUSY.
>   */
> -struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
> -					  struct device *dev)
> +struct dma_buf_attachment *
> +dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev,
> +		       const struct dma_buf_attach_ops *importer_ops,
> +		       void *importer_priv)
>  {
>  	struct dma_buf_attachment *attach;
>  	int ret;
> @@ -683,6 +690,8 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
>  
>  	attach->dev = dev;
>  	attach->dmabuf = dmabuf;
> +	attach->importer_ops = importer_ops;
> +	attach->importer_priv = importer_priv;
>  
>  	mutex_lock(&dmabuf->lock);
>  
> @@ -691,16 +700,72 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
>  		if (ret)
>  			goto err_attach;
>  	}
> +	reservation_object_lock(dmabuf->resv, NULL);
>  	list_add(&attach->node, &dmabuf->attachments);
> +	reservation_object_unlock(dmabuf->resv);
>  
>  	mutex_unlock(&dmabuf->lock);
>  
> +	/* When either the importer or the exporter can't handle dynamic
> +	 * mappings we cache the mapping here to avoid issues with the
> +	 * reservation object lock.
> +	 */
> +	if (dma_buf_attachment_is_dynamic(attach) !=
> +	    dma_buf_is_dynamic(dmabuf)) {
> +		struct sg_table *sgt;
> +
> +		if (dma_buf_is_dynamic(attach->dmabuf)) {
> +			reservation_object_lock(attach->dmabuf->resv, NULL);
> +			ret = dma_buf_pin(attach);
> +			if (ret)
> +				goto err_unlock;
> +		}
> +
> +		sgt = dmabuf->ops->map_dma_buf(attach, DMA_BIDIRECTIONAL);
> +		if (!sgt)
> +			sgt = ERR_PTR(-ENOMEM);
> +		if (IS_ERR(sgt)) {
> +			ret = PTR_ERR(sgt);
> +			goto err_unpin;
> +		}
> +		if (dma_buf_is_dynamic(attach->dmabuf))
> +			reservation_object_unlock(attach->dmabuf->resv);
> +		attach->sgt = sgt;
> +		attach->dir = DMA_BIDIRECTIONAL;
> +	}
> +
>  	return attach;
>  
>  err_attach:
>  	kfree(attach);
>  	mutex_unlock(&dmabuf->lock);
>  	return ERR_PTR(ret);
> +
> +err_unpin:
> +	if (dma_buf_is_dynamic(attach->dmabuf))
> +		dma_buf_unpin(attach);
> +
> +err_unlock:
> +	if (dma_buf_is_dynamic(attach->dmabuf))
> +		reservation_object_unlock(attach->dmabuf->resv);
> +
> +	dma_buf_detach(dmabuf, attach);
> +	return ERR_PTR(ret);
> +}
> +EXPORT_SYMBOL_GPL(dma_buf_dynamic_attach);
> +
> +/**
> + * dma_buf_attach - Wrapper for dma_buf_dynamic_attach
> + * @dmabuf:	[in]	buffer to attach device to.
> + * @dev:	[in]	device to be attached.
> + *
> + * Wrapper to call dma_buf_dynamic_attach() for drivers which still use a static
> + * mapping.
> + */
> +struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
> +					  struct device *dev)
> +{
> +	return dma_buf_dynamic_attach(dmabuf, dev, NULL, NULL);
>  }
>  EXPORT_SYMBOL_GPL(dma_buf_attach);
>  
> @@ -717,11 +782,22 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
>  	if (WARN_ON(!dmabuf || !attach))
>  		return;
>  
> -	if (attach->sgt)
> +	if (attach->sgt) {
> +		if (dma_buf_is_dynamic(attach->dmabuf))
> +			reservation_object_lock(attach->dmabuf->resv, NULL);
> +
>  		dmabuf->ops->unmap_dma_buf(attach, attach->sgt, attach->dir);
>  
> +		if (dma_buf_is_dynamic(attach->dmabuf)) {
> +			dma_buf_unpin(attach);
> +			reservation_object_unlock(attach->dmabuf->resv);
> +		}
> +	}
> +
>  	mutex_lock(&dmabuf->lock);
> +	reservation_object_lock(dmabuf->resv, NULL);
>  	list_del(&attach->node);
> +	reservation_object_unlock(dmabuf->resv);
>  	if (dmabuf->ops->detach)
>  		dmabuf->ops->detach(dmabuf, attach);
>  
> @@ -730,6 +806,44 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
>  }
>  EXPORT_SYMBOL_GPL(dma_buf_detach);
>  
> +/**
> + * dma_buf_pin - Lock down the DMA-buf
> + *
> + * @attach:	[in]	attachment which should be pinned
> + *
> + * Returns:
> + * 0 on success, negative error code on failure.
> + */
> +int dma_buf_pin(struct dma_buf_attachment *attach)
> +{
> +	struct dma_buf *dmabuf = attach->dmabuf;
> +	int ret = 0;
> +
> +	reservation_object_assert_held(dmabuf->resv);
> +
> +	if (dmabuf->ops->pin)
> +		ret = dmabuf->ops->pin(attach);
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(dma_buf_pin);
> +
> +/**
> + * dma_buf_unpin - Remove lock from DMA-buf
> + *
> + * @attach:	[in]	attachment which should be unpinned
> + */
> +void dma_buf_unpin(struct dma_buf_attachment *attach)
> +{
> +	struct dma_buf *dmabuf = attach->dmabuf;
> +
> +	reservation_object_assert_held(dmabuf->resv);
> +
> +	if (dmabuf->ops->unpin)
> +		dmabuf->ops->unpin(attach);
> +}
> +EXPORT_SYMBOL_GPL(dma_buf_unpin);
> +
>  /**
>   * dma_buf_map_attachment - Returns the scatterlist table of the attachment;
>   * mapped into _device_ address space. Is a wrapper for map_dma_buf() of the
> @@ -749,6 +863,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
>  					enum dma_data_direction direction)
>  {
>  	struct sg_table *sg_table;
> +	int r;
>  
>  	might_sleep();
>  
> @@ -767,10 +882,38 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
>  		return attach->sgt;
>  	}
>  
> +	if (dma_buf_attachment_is_dynamic(attach)) {
> +		reservation_object_assert_held(attach->dmabuf->resv);
> +
> +		/*
> +		 * Mapping a DMA-buf can trigger its invalidation, prevent
> +		 * sending this event to the caller by temporary removing
> +		 * this attachment from the list.
> +		 */
> +		list_del(&attach->node);

I'm still hung up about this, that still feels like leaking random ttm
implementation details into the dma-buf interfaces. And it's asymmetric:

- When acquiring a buffer mapping (whether p2p or system memory sg or
  whatever) we always have to wait for pending fences before we can access
  the buffer. At least for full dynamic dma-buf access.

- Same is true when dropping a mapping: We could drop the mapping
  immediately, but only actually release it when that fence has signalled.
  Then this hack here wouldn't be necessary.

It feels a bit like this is just an artifact of how ttm currently does bo
moves with the shadow bo. There's other ways to fix that, you could just
have a memory manager reservation of a given range or whatever and a
release fence from when it's actually good to use.

Imo the below semantics would be much cleaner:

- invalidate may add new fences
- invalidate _must_ unmap its mappings
- an unmap must wait for current fences before the mapping can be
  released.

Imo there's no reason why unmap is special, and the only thing where we
don't use fences to gate access to resources/memory when it's in the
process of getting moved around.

btw this is like the 2nd or 3rd time I'm typing this, haven't seen your
thoughts on this yet.

Thanks, Daniel

> +
> +	} else if (dma_buf_is_dynamic(attach->dmabuf)) {
> +		reservation_object_lock(attach->dmabuf->resv, NULL);
> +		r = dma_buf_pin(attach);
> +		if (r) {
> +			reservation_object_unlock(attach->dmabuf->resv);
> +			return ERR_PTR(r);
> +		}
> +	}
> +
>  	sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
>  	if (!sg_table)
>  		sg_table = ERR_PTR(-ENOMEM);
>  
> +	if (dma_buf_attachment_is_dynamic(attach)) {
> +		list_add(&attach->node, &attach->dmabuf->attachments);
> +
> +	} else if (dma_buf_is_dynamic(attach->dmabuf)) {
> +		if (IS_ERR(sg_table))
> +			dma_buf_unpin(attach);
> +		reservation_object_unlock(attach->dmabuf->resv);
> +	}
> +
>  	if (!IS_ERR(sg_table) && attach->dmabuf->ops->cache_sgt_mapping) {
>  		attach->sgt = sg_table;
>  		attach->dir = direction;
> @@ -802,10 +945,41 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
>  	if (attach->sgt == sg_table)
>  		return;
>  
> +	if (dma_buf_attachment_is_dynamic(attach))
> +		reservation_object_assert_held(attach->dmabuf->resv);
> +	else if (dma_buf_is_dynamic(attach->dmabuf))
> +		reservation_object_lock(attach->dmabuf->resv, NULL);
> +
>  	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table, direction);
> +
> +	if (dma_buf_is_dynamic(attach->dmabuf) &&
> +	    !dma_buf_attachment_is_dynamic(attach)) {
> +		dma_buf_unpin(attach);
> +		reservation_object_unlock(attach->dmabuf->resv);
> +	}
>  }
>  EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
>  
> +/**
> + * dma_buf_invalidate_mappings - invalidate all mappings of this dma_buf
> + *
> + * @dmabuf:	[in]	buffer which mappings should be invalidated
> + *
> + * Informs all attachmenst that they need to destroy and recreated all their
> + * mappings.
> + */
> +void dma_buf_invalidate_mappings(struct dma_buf *dmabuf)
> +{
> +	struct dma_buf_attachment *attach;
> +
> +	reservation_object_assert_held(dmabuf->resv);
> +
> +	list_for_each_entry(attach, &dmabuf->attachments, node)
> +		if (attach->importer_ops && attach->importer_ops->invalidate)
> +			attach->importer_ops->invalidate(attach);
> +}
> +EXPORT_SYMBOL_GPL(dma_buf_invalidate_mappings);
> +
>  /**
>   * DOC: cpu access
>   *
> @@ -1225,10 +1399,12 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
>  		seq_puts(s, "\tAttached Devices:\n");
>  		attach_count = 0;
>  
> +		reservation_object_lock(buf_obj->resv, NULL);
>  		list_for_each_entry(attach_obj, &buf_obj->attachments, node) {
>  			seq_printf(s, "\t%s\n", dev_name(attach_obj->dev));
>  			attach_count++;
>  		}
> +		reservation_object_unlock(buf_obj->resv);
>  
>  		seq_printf(s, "Total %d devices attached\n\n",
>  				attach_count);
> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
> index 01ad5b942a6f..f9c96bf56bc8 100644
> --- a/include/linux/dma-buf.h
> +++ b/include/linux/dma-buf.h
> @@ -92,14 +92,40 @@ struct dma_buf_ops {
>  	 */
>  	void (*detach)(struct dma_buf *, struct dma_buf_attachment *);
>  
> +	/**
> +	 * @pin:
> +	 *
> +	 * This is called by dma_buf_pin and lets the exporter know that the
> +	 * DMA-buf can't be moved any more.
> +	 *
> +	 * This is called with the dmabuf->resv object locked.
> +	 *
> +	 * This callback is optional.
> +	 *
> +	 * Returns:
> +	 *
> +	 * 0 on success, negative error code on failure.
> +	 */
> +	int (*pin)(struct dma_buf_attachment *attach);
> +
> +	/**
> +	 * @unpin:
> +	 *
> +	 * This is called by dma_buf_unpin and lets the exporter know that the
> +	 * DMA-buf can be moved again.
> +	 *
> +	 * This is called with the dmabuf->resv object locked.
> +	 *
> +	 * This callback is optional.
> +	 */
> +	void (*unpin)(struct dma_buf_attachment *attach);
> +
>  	/**
>  	 * @map_dma_buf:
>  	 *
>  	 * This is called by dma_buf_map_attachment() and is used to map a
>  	 * shared &dma_buf into device address space, and it is mandatory. It
> -	 * can only be called if @attach has been called successfully. This
> -	 * essentially pins the DMA buffer into place, and it cannot be moved
> -	 * any more
> +	 * can only be called if @attach has been called successfully.
>  	 *
>  	 * This call may sleep, e.g. when the backing storage first needs to be
>  	 * allocated, or moved to a location suitable for all currently attached
> @@ -120,6 +146,9 @@ struct dma_buf_ops {
>  	 * any other kind of sharing that the exporter might wish to make
>  	 * available to buffer-users.
>  	 *
> +	 * This is always called with the dmabuf->resv object locked when
> +	 * the pin/unpin callbacks are implemented.
> +	 *
>  	 * Returns:
>  	 *
>  	 * A &sg_table scatter list of or the backing storage of the DMA buffer,
> @@ -137,9 +166,6 @@ struct dma_buf_ops {
>  	 *
>  	 * This is called by dma_buf_unmap_attachment() and should unmap and
>  	 * release the &sg_table allocated in @map_dma_buf, and it is mandatory.
> -	 * It should also unpin the backing storage if this is the last mapping
> -	 * of the DMA buffer, it the exporter supports backing storage
> -	 * migration.
>  	 */
>  	void (*unmap_dma_buf)(struct dma_buf_attachment *,
>  			      struct sg_table *,
> @@ -330,6 +356,35 @@ struct dma_buf {
>  	} cb_excl, cb_shared;
>  };
>  
> +/**
> + * struct dma_buf_attach_ops - importer operations for an attachment
> + * @invalidate: [optional] invalidate all mappings made using this attachment.
> + *
> + * Attachment operations implemented by the importer.
> + */
> +struct dma_buf_attach_ops {
> +	/**
> +	 * @invalidate:
> +	 *
> +	 * If the invalidate callback is provided the framework can avoid
> +	 * pinning the backing store while mappings exists.
> +	 *
> +	 * This callback is called with the lock of the reservation object
> +	 * associated with the dma_buf held and the mapping function must be
> +	 * called with this lock held as well. This makes sure that no mapping
> +	 * is created concurrently with an ongoing invalidation.
> +	 *
> +	 * After the callback all existing mappings are still valid until all
> +	 * fences in the dma_bufs reservation object are signaled. After getting an
> +	 * invalidation callback all mappings should be destroyed by the importer using
> +	 * the normal dma_buf_unmap_attachment() function as soon as possible.
> +	 *
> +	 * New mappings can be created immediately, but can't be used before the
> +	 * exclusive fence in the dma_bufs reservation object is signaled.
> +	 */
> +	void (*invalidate)(struct dma_buf_attachment *attach);
> +};
> +
>  /**
>   * struct dma_buf_attachment - holds device-buffer attachment data
>   * @dmabuf: buffer for this attachment.
> @@ -338,6 +393,8 @@ struct dma_buf {
>   * @sgt: cached mapping.
>   * @dir: direction of cached mapping.
>   * @priv: exporter specific attachment data.
> + * @importer_ops: importer operations for this attachment.
> + * @importer_priv: importer specific attachment data.
>   *
>   * This structure holds the attachment information between the dma_buf buffer
>   * and its user device(s). The list contains one attachment struct per device
> @@ -355,6 +412,9 @@ struct dma_buf_attachment {
>  	struct sg_table *sgt;
>  	enum dma_data_direction dir;
>  	void *priv;
> +
> +	const struct dma_buf_attach_ops *importer_ops;
> +	void *importer_priv;
>  };
>  
>  /**
> @@ -405,10 +465,42 @@ static inline void get_dma_buf(struct dma_buf *dmabuf)
>  	get_file(dmabuf->file);
>  }
>  
> +/**
> + * dma_buf_is_dynamic - check if a DMA-buf uses dynamic mappings.
> + * @dmabuf: the DMA-buf to check
> + *
> + * Returns true if a DMA-buf exporter wants to create dynamic sg table mappings
> + * for each attachment. False if only a single static sg table should be used.
> + */
> +static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf)
> +{
> +	return !!dmabuf->ops->pin;
> +}
> +
> +/**
> + * dma_buf_attachment_is_dynamic - check if a DMA-buf attachment uses dynamic
> + * mappinsg
> + * @attach: the DMA-buf attachment to check
> + *
> + * Returns true if a DMA-buf importer wants to use dynamic sg table mappings and
> + * calls the map/unmap functions with the reservation object locked.
> + */
> +static inline bool
> +dma_buf_attachment_is_dynamic(struct dma_buf_attachment *attach)
> +{
> +	return attach->importer_ops && attach->importer_ops->invalidate;
> +}
> +
>  struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
> -							struct device *dev);
> +					  struct device *dev);
> +struct dma_buf_attachment *
> +dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev,
> +		       const struct dma_buf_attach_ops *importer_ops,
> +		       void *importer_priv);
>  void dma_buf_detach(struct dma_buf *dmabuf,
> -				struct dma_buf_attachment *dmabuf_attach);
> +		    struct dma_buf_attachment *attach);
> +int dma_buf_pin(struct dma_buf_attachment *attach);
> +void dma_buf_unpin(struct dma_buf_attachment *attach);
>  
>  struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info);
>  
> @@ -420,6 +512,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *,
>  					enum dma_data_direction);
>  void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *,
>  				enum dma_data_direction);
> +void dma_buf_invalidate_mappings(struct dma_buf *dma_buf);
>  int dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
>  			     enum dma_data_direction dir);
>  int dma_buf_end_cpu_access(struct dma_buf *dma_buf,
> -- 
> 2.17.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Intel-gfx] [PATCH 1/6] dma-buf: add dynamic DMA-buf handling v10
       [not found]   ` <20190621092022.GB12905-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
@ 2019-06-21  9:55     ` Christian König
  2019-06-21 10:32       ` Daniel Vetter
  0 siblings, 1 reply; 17+ messages in thread
From: Christian König @ 2019-06-21  9:55 UTC (permalink / raw)
  To: Daniel Vetter
  Cc: intel-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 21.06.19 um 11:20 schrieb Daniel Vetter:
> On Tue, Jun 18, 2019 at 01:54:50PM +0200, Christian König wrote:
>> On the exporter side we add optional explicit pinning callbacks. If those
>> callbacks are implemented the framework no longer caches sg tables and the
>> map/unmap callbacks are always called with the lock of the reservation object
>> held.
>>
>> On the importer side we add an optional invalidate callback. This callback is
>> used by the exporter to inform the importers that their mappings should be
>> destroyed as soon as possible.
>>
>> This allows the exporter to provide the mappings without the need to pin
>> the backing store.
>>
>> v2: don't try to invalidate mappings when the callback is NULL,
>>      lock the reservation obj while using the attachments,
>>      add helper to set the callback
>> v3: move flag for invalidation support into the DMA-buf,
>>      use new attach_info structure to set the callback
>> v4: use importer_priv field instead of mangling exporter priv.
>> v5: drop invalidation_supported flag
>> v6: squash together with pin/unpin changes
>> v7: pin/unpin takes an attachment now
>> v8: nuke dma_buf_attachment_(map|unmap)_locked,
>>      everything is now handled backward compatible
>> v9: always cache when export/importer don't agree on dynamic handling
>> v10: minimal style cleanup
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/dma-buf/dma-buf.c | 188 ++++++++++++++++++++++++++++++++++++--
>>   include/linux/dma-buf.h   | 109 ++++++++++++++++++++--
>>   2 files changed, 283 insertions(+), 14 deletions(-)
>>
>> [SNIP]
>> +	if (dma_buf_attachment_is_dynamic(attach)) {
>> +		reservation_object_assert_held(attach->dmabuf->resv);
>> +
>> +		/*
>> +		 * Mapping a DMA-buf can trigger its invalidation, prevent
>> +		 * sending this event to the caller by temporary removing
>> +		 * this attachment from the list.
>> +		 */
>> +		list_del(&attach->node);
> I'm still hung up about this, that still feels like leaking random ttm
> implementation details into the dma-buf interfaces. And it's asymmetric:
>
> - When acquiring a buffer mapping (whether p2p or system memory sg or
>    whatever) we always have to wait for pending fences before we can access
>    the buffer. At least for full dynamic dma-buf access.
>
> - Same is true when dropping a mapping: We could drop the mapping
>    immediately, but only actually release it when that fence has signalled.
>    Then this hack here wouldn't be necessary.
>
> It feels a bit like this is just an artifact of how ttm currently does bo
> moves with the shadow bo. There's other ways to fix that, you could just
> have a memory manager reservation of a given range or whatever and a
> release fence from when it's actually good to use.

No, that is for handling a completely different case :)

>
> Imo the below semantics would be much cleaner:
>
> - invalidate may add new fences
> - invalidate _must_ unmap its mappings
> - an unmap must wait for current fences before the mapping can be
>    released.
>
> Imo there's no reason why unmap is special, and the only thing where we
> don't use fences to gate access to resources/memory when it's in the
> process of getting moved around.

Well in general I want to avoid waiting for fences as much as possible. 
But the key point here is that this actually won't help with the problem 
I'm trying to solve.

> btw this is like the 2nd or 3rd time I'm typing this, haven't seen your
> thoughts on this yet.

Yeah, and I'm responding for the 3rd time now that you are 
misunderstanding why we need this here :)

Maybe I can make that clear with an example:

1. You got a sharing between device A (exporter) and B (importer) which 
uses P2P.

2. Now device C (importer) comes along and wants to use the DMA-buf 
object as well.

3. The handling now figures out that we can't do P2P between device A 
and device C (for whatever reason).

4. The map_attachment implementation in device driver A doesn't want to 
fail with -EBUSY and migrates the DMA-buf somewhere where both device A 
and device C can access it.

5. This migration will result in sending an invalidation event around. 
And here it doesn't make sense to send this invalidation event to device 
C, because we know that device C is actually causing this event and 
doesn't have a valid mapping.


One alternative would be to completely disallow buffer migration which 
can cause invalidation in the drivers map_attachment call. But with 
dynamic handling you definitely need to be able to migrate in the 
map_attachment call for swapping evicted things back into a place where 
they are accessible. So that would make it harder for drivers to get it 
right.


Another alternative (and that's what I implemented initially) is to make 
sure the driver calling map_attachment can handle invalidation events 
re-entering itself while doing so. But then you add another tricky thing 
for drivers to handle which could be done in the general code.


The reason I don't have that on unmap is that I think migrating things 
on unmap doesn't make sense. If you think otherwise it certainly does 
make sense to add that there as well.

So this is just the most defensive thing I was able to come up with, 
which leaves the least possibility for drivers to do something stupid.

Thanks,
Christian.

>
> Thanks, Daniel
>
>> +
>> +	} else if (dma_buf_is_dynamic(attach->dmabuf)) {
>> +		reservation_object_lock(attach->dmabuf->resv, NULL);
>> +		r = dma_buf_pin(attach);
>> +		if (r) {
>> +			reservation_object_unlock(attach->dmabuf->resv);
>> +			return ERR_PTR(r);
>> +		}
>> +	}
>> +
>>   	sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
>>   	if (!sg_table)
>>   		sg_table = ERR_PTR(-ENOMEM);
>>   
>> +	if (dma_buf_attachment_is_dynamic(attach)) {
>> +		list_add(&attach->node, &attach->dmabuf->attachments);
>> +
>> +	} else if (dma_buf_is_dynamic(attach->dmabuf)) {
>> +		if (IS_ERR(sg_table))
>> +			dma_buf_unpin(attach);
>> +		reservation_object_unlock(attach->dmabuf->resv);
>> +	}
>> +
>>   	if (!IS_ERR(sg_table) && attach->dmabuf->ops->cache_sgt_mapping) {
>>   		attach->sgt = sg_table;
>>   		attach->dir = direction;
>> @@ -802,10 +945,41 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
>>   	if (attach->sgt == sg_table)
>>   		return;
>>   
>> +	if (dma_buf_attachment_is_dynamic(attach))
>> +		reservation_object_assert_held(attach->dmabuf->resv);
>> +	else if (dma_buf_is_dynamic(attach->dmabuf))
>> +		reservation_object_lock(attach->dmabuf->resv, NULL);
>> +
>>   	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table, direction);
>> +
>> +	if (dma_buf_is_dynamic(attach->dmabuf) &&
>> +	    !dma_buf_attachment_is_dynamic(attach)) {
>> +		dma_buf_unpin(attach);
>> +		reservation_object_unlock(attach->dmabuf->resv);
>> +	}
>>   }
>>   EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
>>   
>> +/**
>> + * dma_buf_invalidate_mappings - invalidate all mappings of this dma_buf
>> + *
>> + * @dmabuf:	[in]	buffer which mappings should be invalidated
>> + *
>> + * Informs all attachmenst that they need to destroy and recreated all their
>> + * mappings.
>> + */
>> +void dma_buf_invalidate_mappings(struct dma_buf *dmabuf)
>> +{
>> +	struct dma_buf_attachment *attach;
>> +
>> +	reservation_object_assert_held(dmabuf->resv);
>> +
>> +	list_for_each_entry(attach, &dmabuf->attachments, node)
>> +		if (attach->importer_ops && attach->importer_ops->invalidate)
>> +			attach->importer_ops->invalidate(attach);
>> +}
>> +EXPORT_SYMBOL_GPL(dma_buf_invalidate_mappings);
>> +
>>   /**
>>    * DOC: cpu access
>>    *
>> @@ -1225,10 +1399,12 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
>>   		seq_puts(s, "\tAttached Devices:\n");
>>   		attach_count = 0;
>>   
>> +		reservation_object_lock(buf_obj->resv, NULL);
>>   		list_for_each_entry(attach_obj, &buf_obj->attachments, node) {
>>   			seq_printf(s, "\t%s\n", dev_name(attach_obj->dev));
>>   			attach_count++;
>>   		}
>> +		reservation_object_unlock(buf_obj->resv);
>>   
>>   		seq_printf(s, "Total %d devices attached\n\n",
>>   				attach_count);
>> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
>> index 01ad5b942a6f..f9c96bf56bc8 100644
>> --- a/include/linux/dma-buf.h
>> +++ b/include/linux/dma-buf.h
>> @@ -92,14 +92,40 @@ struct dma_buf_ops {
>>   	 */
>>   	void (*detach)(struct dma_buf *, struct dma_buf_attachment *);
>>   
>> +	/**
>> +	 * @pin:
>> +	 *
>> +	 * This is called by dma_buf_pin and lets the exporter know that the
>> +	 * DMA-buf can't be moved any more.
>> +	 *
>> +	 * This is called with the dmabuf->resv object locked.
>> +	 *
>> +	 * This callback is optional.
>> +	 *
>> +	 * Returns:
>> +	 *
>> +	 * 0 on success, negative error code on failure.
>> +	 */
>> +	int (*pin)(struct dma_buf_attachment *attach);
>> +
>> +	/**
>> +	 * @unpin:
>> +	 *
>> +	 * This is called by dma_buf_unpin and lets the exporter know that the
>> +	 * DMA-buf can be moved again.
>> +	 *
>> +	 * This is called with the dmabuf->resv object locked.
>> +	 *
>> +	 * This callback is optional.
>> +	 */
>> +	void (*unpin)(struct dma_buf_attachment *attach);
>> +
>>   	/**
>>   	 * @map_dma_buf:
>>   	 *
>>   	 * This is called by dma_buf_map_attachment() and is used to map a
>>   	 * shared &dma_buf into device address space, and it is mandatory. It
>> -	 * can only be called if @attach has been called successfully. This
>> -	 * essentially pins the DMA buffer into place, and it cannot be moved
>> -	 * any more
>> +	 * can only be called if @attach has been called successfully.
>>   	 *
>>   	 * This call may sleep, e.g. when the backing storage first needs to be
>>   	 * allocated, or moved to a location suitable for all currently attached
>> @@ -120,6 +146,9 @@ struct dma_buf_ops {
>>   	 * any other kind of sharing that the exporter might wish to make
>>   	 * available to buffer-users.
>>   	 *
>> +	 * This is always called with the dmabuf->resv object locked when
>> +	 * the pin/unpin callbacks are implemented.
>> +	 *
>>   	 * Returns:
>>   	 *
>>   	 * A &sg_table scatter list of or the backing storage of the DMA buffer,
>> @@ -137,9 +166,6 @@ struct dma_buf_ops {
>>   	 *
>>   	 * This is called by dma_buf_unmap_attachment() and should unmap and
>>   	 * release the &sg_table allocated in @map_dma_buf, and it is mandatory.
>> -	 * It should also unpin the backing storage if this is the last mapping
>> -	 * of the DMA buffer, it the exporter supports backing storage
>> -	 * migration.
>>   	 */
>>   	void (*unmap_dma_buf)(struct dma_buf_attachment *,
>>   			      struct sg_table *,
>> @@ -330,6 +356,35 @@ struct dma_buf {
>>   	} cb_excl, cb_shared;
>>   };
>>   
>> +/**
>> + * struct dma_buf_attach_ops - importer operations for an attachment
>> + * @invalidate: [optional] invalidate all mappings made using this attachment.
>> + *
>> + * Attachment operations implemented by the importer.
>> + */
>> +struct dma_buf_attach_ops {
>> +	/**
>> +	 * @invalidate:
>> +	 *
>> +	 * If the invalidate callback is provided the framework can avoid
>> +	 * pinning the backing store while mappings exists.
>> +	 *
>> +	 * This callback is called with the lock of the reservation object
>> +	 * associated with the dma_buf held and the mapping function must be
>> +	 * called with this lock held as well. This makes sure that no mapping
>> +	 * is created concurrently with an ongoing invalidation.
>> +	 *
>> +	 * After the callback all existing mappings are still valid until all
>> +	 * fences in the dma_bufs reservation object are signaled. After getting an
>> +	 * invalidation callback all mappings should be destroyed by the importer using
>> +	 * the normal dma_buf_unmap_attachment() function as soon as possible.
>> +	 *
>> +	 * New mappings can be created immediately, but can't be used before the
>> +	 * exclusive fence in the dma_bufs reservation object is signaled.
>> +	 */
>> +	void (*invalidate)(struct dma_buf_attachment *attach);
>> +};
>> +
>>   /**
>>    * struct dma_buf_attachment - holds device-buffer attachment data
>>    * @dmabuf: buffer for this attachment.
>> @@ -338,6 +393,8 @@ struct dma_buf {
>>    * @sgt: cached mapping.
>>    * @dir: direction of cached mapping.
>>    * @priv: exporter specific attachment data.
>> + * @importer_ops: importer operations for this attachment.
>> + * @importer_priv: importer specific attachment data.
>>    *
>>    * This structure holds the attachment information between the dma_buf buffer
>>    * and its user device(s). The list contains one attachment struct per device
>> @@ -355,6 +412,9 @@ struct dma_buf_attachment {
>>   	struct sg_table *sgt;
>>   	enum dma_data_direction dir;
>>   	void *priv;
>> +
>> +	const struct dma_buf_attach_ops *importer_ops;
>> +	void *importer_priv;
>>   };
>>   
>>   /**
>> @@ -405,10 +465,42 @@ static inline void get_dma_buf(struct dma_buf *dmabuf)
>>   	get_file(dmabuf->file);
>>   }
>>   
>> +/**
>> + * dma_buf_is_dynamic - check if a DMA-buf uses dynamic mappings.
>> + * @dmabuf: the DMA-buf to check
>> + *
>> + * Returns true if a DMA-buf exporter wants to create dynamic sg table mappings
>> + * for each attachment. False if only a single static sg table should be used.
>> + */
>> +static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf)
>> +{
>> +	return !!dmabuf->ops->pin;
>> +}
>> +
>> +/**
>> + * dma_buf_attachment_is_dynamic - check if a DMA-buf attachment uses dynamic
>> + * mappinsg
>> + * @attach: the DMA-buf attachment to check
>> + *
>> + * Returns true if a DMA-buf importer wants to use dynamic sg table mappings and
>> + * calls the map/unmap functions with the reservation object locked.
>> + */
>> +static inline bool
>> +dma_buf_attachment_is_dynamic(struct dma_buf_attachment *attach)
>> +{
>> +	return attach->importer_ops && attach->importer_ops->invalidate;
>> +}
>> +
>>   struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
>> -							struct device *dev);
>> +					  struct device *dev);
>> +struct dma_buf_attachment *
>> +dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev,
>> +		       const struct dma_buf_attach_ops *importer_ops,
>> +		       void *importer_priv);
>>   void dma_buf_detach(struct dma_buf *dmabuf,
>> -				struct dma_buf_attachment *dmabuf_attach);
>> +		    struct dma_buf_attachment *attach);
>> +int dma_buf_pin(struct dma_buf_attachment *attach);
>> +void dma_buf_unpin(struct dma_buf_attachment *attach);
>>   
>>   struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info);
>>   
>> @@ -420,6 +512,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *,
>>   					enum dma_data_direction);
>>   void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *,
>>   				enum dma_data_direction);
>> +void dma_buf_invalidate_mappings(struct dma_buf *dma_buf);
>>   int dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
>>   			     enum dma_data_direction dir);
>>   int dma_buf_end_cpu_access(struct dma_buf *dma_buf,
>> -- 
>> 2.17.1
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/6] dma-buf: add dynamic DMA-buf handling v10
  2019-06-21  9:55     ` Christian König
@ 2019-06-21 10:32       ` Daniel Vetter
  2019-06-21 12:06         ` Christian König
  0 siblings, 1 reply; 17+ messages in thread
From: Daniel Vetter @ 2019-06-21 10:32 UTC (permalink / raw)
  To: Christian König; +Cc: intel-gfx, amd-gfx list, dri-devel

On Fri, Jun 21, 2019 at 11:55 AM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Am 21.06.19 um 11:20 schrieb Daniel Vetter:
> > On Tue, Jun 18, 2019 at 01:54:50PM +0200, Christian König wrote:
> >> On the exporter side we add optional explicit pinning callbacks. If those
> >> callbacks are implemented the framework no longer caches sg tables and the
> >> map/unmap callbacks are always called with the lock of the reservation object
> >> held.
> >>
> >> On the importer side we add an optional invalidate callback. This callback is
> >> used by the exporter to inform the importers that their mappings should be
> >> destroyed as soon as possible.
> >>
> >> This allows the exporter to provide the mappings without the need to pin
> >> the backing store.
> >>
> >> v2: don't try to invalidate mappings when the callback is NULL,
> >>      lock the reservation obj while using the attachments,
> >>      add helper to set the callback
> >> v3: move flag for invalidation support into the DMA-buf,
> >>      use new attach_info structure to set the callback
> >> v4: use importer_priv field instead of mangling exporter priv.
> >> v5: drop invalidation_supported flag
> >> v6: squash together with pin/unpin changes
> >> v7: pin/unpin takes an attachment now
> >> v8: nuke dma_buf_attachment_(map|unmap)_locked,
> >>      everything is now handled backward compatible
> >> v9: always cache when export/importer don't agree on dynamic handling
> >> v10: minimal style cleanup
> >>
> >> Signed-off-by: Christian König <christian.koenig@amd.com>
> >> ---
> >>   drivers/dma-buf/dma-buf.c | 188 ++++++++++++++++++++++++++++++++++++--
> >>   include/linux/dma-buf.h   | 109 ++++++++++++++++++++--
> >>   2 files changed, 283 insertions(+), 14 deletions(-)
> >>
> >> [SNIP]
> >> +    if (dma_buf_attachment_is_dynamic(attach)) {
> >> +            reservation_object_assert_held(attach->dmabuf->resv);
> >> +
> >> +            /*
> >> +             * Mapping a DMA-buf can trigger its invalidation, prevent
> >> +             * sending this event to the caller by temporary removing
> >> +             * this attachment from the list.
> >> +             */
> >> +            list_del(&attach->node);
> > I'm still hung up about this, that still feels like leaking random ttm
> > implementation details into the dma-buf interfaces. And it's asymmetric:
> >
> > - When acquiring a buffer mapping (whether p2p or system memory sg or
> >    whatever) we always have to wait for pending fences before we can access
> >    the buffer. At least for full dynamic dma-buf access.
> >
> > - Same is true when dropping a mapping: We could drop the mapping
> >    immediately, but only actually release it when that fence has signalled.
> >    Then this hack here wouldn't be necessary.
> >
> > It feels a bit like this is just an artifact of how ttm currently does bo
> > moves with the shadow bo. There's other ways to fix that, you could just
> > have a memory manager reservation of a given range or whatever and a
> > release fence from when it's actually good to use.
>
> No, that is for handling a completely different case :)
>
> >
> > Imo the below semantics would be much cleaner:
> >
> > - invalidate may add new fences
> > - invalidate _must_ unmap its mappings
> > - an unmap must wait for current fences before the mapping can be
> >    released.
> >
> > Imo there's no reason why unmap is special, and the only thing where we
> > don't use fences to gate access to resources/memory when it's in the
> > process of getting moved around.
>
> Well in general I want to avoid waiting for fences as much as possible.
> But the key point here is that this actually won't help with the problem
> I'm trying to solve.

The point of using fences is not to wait on them. I mean if you have
the shadow ttm bo on the lru you also don't wait for that fence to
retire before you insert the shadow bo onto the lru. You don't even
wait when you try to use that memory again, you just pipeline more
stuff on top.

In the end it will be the exact same amount of fences and waiting in
both solutions. One just leaks less implementationt details (at least
in my opinion) across the dma-buf border.

> > btw this is like the 2nd or 3rd time I'm typing this, haven't seen your
> > thoughts on this yet.
>
> Yeah, and I'm responding for the 3rd time now that you are
> misunderstanding why we need this here :)
>
> Maybe I can make that clear with an example:
>
> 1. You got a sharing between device A (exporter) and B (importer) which
> uses P2P.
>
> 2. Now device C (importer) comes along and wants to use the DMA-buf
> object as well.
>
> 3. The handling now figures out that we can't do P2P between device A
> and device C (for whatever reason).
>
> 4. The map_attachment implementation in device driver A doesn't want to
> fail with -EBUSY and migrates the DMA-buf somewhere where both device A
> and device C can access it.
>
> 5. This migration will result in sending an invalidation event around.
> And here it doesn't make sense to send this invalidation event to device
> C, because we know that device C is actually causing this event and
> doesn't have a valid mapping.

Hm I thought the last time around there was a different scenario, with
just one importer:

- importer has a mapping, gets an ->invalidate call.
- importer arranges for the mappings/usage to get torn down, maybe
updating fences, all from ->invalidate. But the mapping itself wont
disappear.
- exporter moves buffer to new places (for whatever reasons it felt
that was the thing to do).
- importer does another execbuf, the exporter needs to move the buffer
back. Again it calls ->invalidate, but on a mapping it already has
called ->invalidate on, and to prevent that silliness we take the
importer temporary off the list.

Your scenario here is new, and iirc my suggestion back then was to
count the number of pending mappings so you don't go around calling
->invalidate on mappings that don't exist.

But even if you fix your scenario here there's still the issue that we
can receive invalidates on a mapping we've already torn down and which
is on the process of disappearing. That's kinda the part I don't think
is great semantics.

> One alternative would be to completely disallow buffer migration which
> can cause invalidation in the drivers map_attachment call. But with
> dynamic handling you definitely need to be able to migrate in the
> map_attachment call for swapping evicted things back into a place where
> they are accessible. So that would make it harder for drivers to get it
> right.

Nah, that defeats the point. Also, it's not the problem of getting an
->invalidate for something that's already been ->invalidated. It could
also be some 3rd party which causes another buffer move, and then
again importers would get an ->invalidate for something that they've
cleaned up already.

> Another alternative (and that's what I implemented initially) is to make
> sure the driver calling map_attachment can handle invalidation events
> re-entering itself while doing so. But then you add another tricky thing
> for drivers to handle which could be done in the general code.

Nah that sounds even worse :-)

> The reason I don't have that on unmap is that I think migrating things
> on unmap doesn't make sense. If you think otherwise it certainly does
> make sense to add that there as well.

The problem isn't the recursion, but the book-keeping. There's imo two cases:

- your scenario, where you call ->invalidate on an attachment which
doesn't have a mapping. I'll call that very lazy accounting, feels
like a bug :-) It's also very easy to fix by keeping track who
actually has a mapping, and then you fix it everywhere, not just for
the specific case of a recursion into the same caller.

- calling invalidate multiple times. That's my scenario (or your older
one), where you call invalidate again on something that's already
invalidated. Just keeping track of who actually has a mapping wont fix
that, imo the proper fix is to to pipeline the unmapping using fences.
But I guess there's other fixes too possible.

Either way none of this is about recursion, I think the recursive case
is simply the one where you've hit this already. Drivers will have to
handle all these additional ->invalidates no matter what with your
current proposal. After all the point here is that the exporter can
move the buffers around whenever it feels like, for whatever reasons.

For solutions I think there's roughly three:
- importers need to deal. You don't like that, I agree
- exporters need to deal, probably not much better, but I think
stricter contract is better in itself at least.
- dma-buf.c keeps better track of mappings and which have been
invalidated already

We could also combine the last two with some helpers, e.g. if your
exporter really expects importers to delay the unmap until it's no
longer in use, then we could do a small helper which puts all these
unmaps onto a list with a worker. But I think you want to integrate
that into your exporters lru management directly.

> So this is just the most defensive thing I was able to come up with,
> which leaves the least possibility for drivers to do something stupid.

Maybe we're still talking past each another, but I feel like the big
issues are all still there. Problem identified, yes, solved, no.

Thanks, Daniel

>
> Thanks,
> Christian.
>
> >
> > Thanks, Daniel
> >
> >> +
> >> +    } else if (dma_buf_is_dynamic(attach->dmabuf)) {
> >> +            reservation_object_lock(attach->dmabuf->resv, NULL);
> >> +            r = dma_buf_pin(attach);
> >> +            if (r) {
> >> +                    reservation_object_unlock(attach->dmabuf->resv);
> >> +                    return ERR_PTR(r);
> >> +            }
> >> +    }
> >> +
> >>      sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
> >>      if (!sg_table)
> >>              sg_table = ERR_PTR(-ENOMEM);
> >>
> >> +    if (dma_buf_attachment_is_dynamic(attach)) {
> >> +            list_add(&attach->node, &attach->dmabuf->attachments);
> >> +
> >> +    } else if (dma_buf_is_dynamic(attach->dmabuf)) {
> >> +            if (IS_ERR(sg_table))
> >> +                    dma_buf_unpin(attach);
> >> +            reservation_object_unlock(attach->dmabuf->resv);
> >> +    }
> >> +
> >>      if (!IS_ERR(sg_table) && attach->dmabuf->ops->cache_sgt_mapping) {
> >>              attach->sgt = sg_table;
> >>              attach->dir = direction;
> >> @@ -802,10 +945,41 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
> >>      if (attach->sgt == sg_table)
> >>              return;
> >>
> >> +    if (dma_buf_attachment_is_dynamic(attach))
> >> +            reservation_object_assert_held(attach->dmabuf->resv);
> >> +    else if (dma_buf_is_dynamic(attach->dmabuf))
> >> +            reservation_object_lock(attach->dmabuf->resv, NULL);
> >> +
> >>      attach->dmabuf->ops->unmap_dma_buf(attach, sg_table, direction);
> >> +
> >> +    if (dma_buf_is_dynamic(attach->dmabuf) &&
> >> +        !dma_buf_attachment_is_dynamic(attach)) {
> >> +            dma_buf_unpin(attach);
> >> +            reservation_object_unlock(attach->dmabuf->resv);
> >> +    }
> >>   }
> >>   EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
> >>
> >> +/**
> >> + * dma_buf_invalidate_mappings - invalidate all mappings of this dma_buf
> >> + *
> >> + * @dmabuf: [in]    buffer which mappings should be invalidated
> >> + *
> >> + * Informs all attachmenst that they need to destroy and recreated all their
> >> + * mappings.
> >> + */
> >> +void dma_buf_invalidate_mappings(struct dma_buf *dmabuf)
> >> +{
> >> +    struct dma_buf_attachment *attach;
> >> +
> >> +    reservation_object_assert_held(dmabuf->resv);
> >> +
> >> +    list_for_each_entry(attach, &dmabuf->attachments, node)
> >> +            if (attach->importer_ops && attach->importer_ops->invalidate)
> >> +                    attach->importer_ops->invalidate(attach);
> >> +}
> >> +EXPORT_SYMBOL_GPL(dma_buf_invalidate_mappings);
> >> +
> >>   /**
> >>    * DOC: cpu access
> >>    *
> >> @@ -1225,10 +1399,12 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
> >>              seq_puts(s, "\tAttached Devices:\n");
> >>              attach_count = 0;
> >>
> >> +            reservation_object_lock(buf_obj->resv, NULL);
> >>              list_for_each_entry(attach_obj, &buf_obj->attachments, node) {
> >>                      seq_printf(s, "\t%s\n", dev_name(attach_obj->dev));
> >>                      attach_count++;
> >>              }
> >> +            reservation_object_unlock(buf_obj->resv);
> >>
> >>              seq_printf(s, "Total %d devices attached\n\n",
> >>                              attach_count);
> >> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
> >> index 01ad5b942a6f..f9c96bf56bc8 100644
> >> --- a/include/linux/dma-buf.h
> >> +++ b/include/linux/dma-buf.h
> >> @@ -92,14 +92,40 @@ struct dma_buf_ops {
> >>       */
> >>      void (*detach)(struct dma_buf *, struct dma_buf_attachment *);
> >>
> >> +    /**
> >> +     * @pin:
> >> +     *
> >> +     * This is called by dma_buf_pin and lets the exporter know that the
> >> +     * DMA-buf can't be moved any more.
> >> +     *
> >> +     * This is called with the dmabuf->resv object locked.
> >> +     *
> >> +     * This callback is optional.
> >> +     *
> >> +     * Returns:
> >> +     *
> >> +     * 0 on success, negative error code on failure.
> >> +     */
> >> +    int (*pin)(struct dma_buf_attachment *attach);
> >> +
> >> +    /**
> >> +     * @unpin:
> >> +     *
> >> +     * This is called by dma_buf_unpin and lets the exporter know that the
> >> +     * DMA-buf can be moved again.
> >> +     *
> >> +     * This is called with the dmabuf->resv object locked.
> >> +     *
> >> +     * This callback is optional.
> >> +     */
> >> +    void (*unpin)(struct dma_buf_attachment *attach);
> >> +
> >>      /**
> >>       * @map_dma_buf:
> >>       *
> >>       * This is called by dma_buf_map_attachment() and is used to map a
> >>       * shared &dma_buf into device address space, and it is mandatory. It
> >> -     * can only be called if @attach has been called successfully. This
> >> -     * essentially pins the DMA buffer into place, and it cannot be moved
> >> -     * any more
> >> +     * can only be called if @attach has been called successfully.
> >>       *
> >>       * This call may sleep, e.g. when the backing storage first needs to be
> >>       * allocated, or moved to a location suitable for all currently attached
> >> @@ -120,6 +146,9 @@ struct dma_buf_ops {
> >>       * any other kind of sharing that the exporter might wish to make
> >>       * available to buffer-users.
> >>       *
> >> +     * This is always called with the dmabuf->resv object locked when
> >> +     * the pin/unpin callbacks are implemented.
> >> +     *
> >>       * Returns:
> >>       *
> >>       * A &sg_table scatter list of or the backing storage of the DMA buffer,
> >> @@ -137,9 +166,6 @@ struct dma_buf_ops {
> >>       *
> >>       * This is called by dma_buf_unmap_attachment() and should unmap and
> >>       * release the &sg_table allocated in @map_dma_buf, and it is mandatory.
> >> -     * It should also unpin the backing storage if this is the last mapping
> >> -     * of the DMA buffer, it the exporter supports backing storage
> >> -     * migration.
> >>       */
> >>      void (*unmap_dma_buf)(struct dma_buf_attachment *,
> >>                            struct sg_table *,
> >> @@ -330,6 +356,35 @@ struct dma_buf {
> >>      } cb_excl, cb_shared;
> >>   };
> >>
> >> +/**
> >> + * struct dma_buf_attach_ops - importer operations for an attachment
> >> + * @invalidate: [optional] invalidate all mappings made using this attachment.
> >> + *
> >> + * Attachment operations implemented by the importer.
> >> + */
> >> +struct dma_buf_attach_ops {
> >> +    /**
> >> +     * @invalidate:
> >> +     *
> >> +     * If the invalidate callback is provided the framework can avoid
> >> +     * pinning the backing store while mappings exists.
> >> +     *
> >> +     * This callback is called with the lock of the reservation object
> >> +     * associated with the dma_buf held and the mapping function must be
> >> +     * called with this lock held as well. This makes sure that no mapping
> >> +     * is created concurrently with an ongoing invalidation.
> >> +     *
> >> +     * After the callback all existing mappings are still valid until all
> >> +     * fences in the dma_bufs reservation object are signaled. After getting an
> >> +     * invalidation callback all mappings should be destroyed by the importer using
> >> +     * the normal dma_buf_unmap_attachment() function as soon as possible.
> >> +     *
> >> +     * New mappings can be created immediately, but can't be used before the
> >> +     * exclusive fence in the dma_bufs reservation object is signaled.
> >> +     */
> >> +    void (*invalidate)(struct dma_buf_attachment *attach);
> >> +};
> >> +
> >>   /**
> >>    * struct dma_buf_attachment - holds device-buffer attachment data
> >>    * @dmabuf: buffer for this attachment.
> >> @@ -338,6 +393,8 @@ struct dma_buf {
> >>    * @sgt: cached mapping.
> >>    * @dir: direction of cached mapping.
> >>    * @priv: exporter specific attachment data.
> >> + * @importer_ops: importer operations for this attachment.
> >> + * @importer_priv: importer specific attachment data.
> >>    *
> >>    * This structure holds the attachment information between the dma_buf buffer
> >>    * and its user device(s). The list contains one attachment struct per device
> >> @@ -355,6 +412,9 @@ struct dma_buf_attachment {
> >>      struct sg_table *sgt;
> >>      enum dma_data_direction dir;
> >>      void *priv;
> >> +
> >> +    const struct dma_buf_attach_ops *importer_ops;
> >> +    void *importer_priv;
> >>   };
> >>
> >>   /**
> >> @@ -405,10 +465,42 @@ static inline void get_dma_buf(struct dma_buf *dmabuf)
> >>      get_file(dmabuf->file);
> >>   }
> >>
> >> +/**
> >> + * dma_buf_is_dynamic - check if a DMA-buf uses dynamic mappings.
> >> + * @dmabuf: the DMA-buf to check
> >> + *
> >> + * Returns true if a DMA-buf exporter wants to create dynamic sg table mappings
> >> + * for each attachment. False if only a single static sg table should be used.
> >> + */
> >> +static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf)
> >> +{
> >> +    return !!dmabuf->ops->pin;
> >> +}
> >> +
> >> +/**
> >> + * dma_buf_attachment_is_dynamic - check if a DMA-buf attachment uses dynamic
> >> + * mappinsg
> >> + * @attach: the DMA-buf attachment to check
> >> + *
> >> + * Returns true if a DMA-buf importer wants to use dynamic sg table mappings and
> >> + * calls the map/unmap functions with the reservation object locked.
> >> + */
> >> +static inline bool
> >> +dma_buf_attachment_is_dynamic(struct dma_buf_attachment *attach)
> >> +{
> >> +    return attach->importer_ops && attach->importer_ops->invalidate;
> >> +}
> >> +
> >>   struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
> >> -                                                    struct device *dev);
> >> +                                      struct device *dev);
> >> +struct dma_buf_attachment *
> >> +dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev,
> >> +                   const struct dma_buf_attach_ops *importer_ops,
> >> +                   void *importer_priv);
> >>   void dma_buf_detach(struct dma_buf *dmabuf,
> >> -                            struct dma_buf_attachment *dmabuf_attach);
> >> +                struct dma_buf_attachment *attach);
> >> +int dma_buf_pin(struct dma_buf_attachment *attach);
> >> +void dma_buf_unpin(struct dma_buf_attachment *attach);
> >>
> >>   struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info);
> >>
> >> @@ -420,6 +512,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *,
> >>                                      enum dma_data_direction);
> >>   void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *,
> >>                              enum dma_data_direction);
> >> +void dma_buf_invalidate_mappings(struct dma_buf *dma_buf);
> >>   int dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
> >>                           enum dma_data_direction dir);
> >>   int dma_buf_end_cpu_access(struct dma_buf *dma_buf,
> >> --
> >> 2.17.1
> >>
> >> _______________________________________________
> >> Intel-gfx mailing list
> >> Intel-gfx@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
>


-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/6] dma-buf: add dynamic DMA-buf handling v10
  2019-06-21 10:32       ` Daniel Vetter
@ 2019-06-21 12:06         ` Christian König
       [not found]           ` <78db8951-2e62-2a9d-3d87-3b458fbba880-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 17+ messages in thread
From: Christian König @ 2019-06-21 12:06 UTC (permalink / raw)
  To: Daniel Vetter, Christian König; +Cc: intel-gfx, dri-devel, amd-gfx list

Am 21.06.19 um 12:32 schrieb Daniel Vetter:
> On Fri, Jun 21, 2019 at 11:55 AM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> Am 21.06.19 um 11:20 schrieb Daniel Vetter:
>>> On Tue, Jun 18, 2019 at 01:54:50PM +0200, Christian König wrote:
>>> [SNIP]
>>> Imo the below semantics would be much cleaner:
>>>
>>> - invalidate may add new fences
>>> - invalidate _must_ unmap its mappings
>>> - an unmap must wait for current fences before the mapping can be
>>>     released.
>>>
>>> Imo there's no reason why unmap is special, and the only thing where we
>>> don't use fences to gate access to resources/memory when it's in the
>>> process of getting moved around.
>> Well in general I want to avoid waiting for fences as much as possible.
>> But the key point here is that this actually won't help with the problem
>> I'm trying to solve.
> The point of using fences is not to wait on them. I mean if you have
> the shadow ttm bo on the lru you also don't wait for that fence to
> retire before you insert the shadow bo onto the lru. You don't even
> wait when you try to use that memory again, you just pipeline more
> stuff on top.

Correct.

Ok, if I understand it correctly your suggestion is to move the 
responsibility to delay destruction of mappings until they are no longer 
used from the importer to the exporter based on the fences of the 
reservation object.

I seriously don't think that this is a good idea because you need to 
move the tracking who is using which mapping from the importer to the 
exporter as well. So duplicating quite a bunch of housekeeping.

On the other hand that we have this house keeping in the importer 
because we get it for free from TTM. But I can't think of a way other 
memory management backends would do this without keeping the sg table 
around either.

> In the end it will be the exact same amount of fences and waiting in
> both solutions. One just leaks less implementationt details (at least
> in my opinion) across the dma-buf border.

I agree that leaking implementation details over the DMA-buf border is a 
bad idea.

But I can assure you that this has absolutely nothing todo with the 
ghost object handling of TTM. ghost objects doesn't even receive an 
invalidation, they are just a possible implementation of the delayed 
destruction of sg tables.

>>> btw this is like the 2nd or 3rd time I'm typing this, haven't seen your
>>> thoughts on this yet.
>> Yeah, and I'm responding for the 3rd time now that you are
>> misunderstanding why we need this here :)
>>
>> Maybe I can make that clear with an example:
>>
>> 1. You got a sharing between device A (exporter) and B (importer) which
>> uses P2P.
>>
>> 2. Now device C (importer) comes along and wants to use the DMA-buf
>> object as well.
>>
>> 3. The handling now figures out that we can't do P2P between device A
>> and device C (for whatever reason).
>>
>> 4. The map_attachment implementation in device driver A doesn't want to
>> fail with -EBUSY and migrates the DMA-buf somewhere where both device A
>> and device C can access it.
>>
>> 5. This migration will result in sending an invalidation event around.
>> And here it doesn't make sense to send this invalidation event to device
>> C, because we know that device C is actually causing this event and
>> doesn't have a valid mapping.
> Hm I thought the last time around there was a different scenario, with
> just one importer:
>
> - importer has a mapping, gets an ->invalidate call.
> - importer arranges for the mappings/usage to get torn down, maybe
> updating fences, all from ->invalidate. But the mapping itself wont
> disappear.
> - exporter moves buffer to new places (for whatever reasons it felt
> that was the thing to do).
> - importer does another execbuf, the exporter needs to move the buffer
> back. Again it calls ->invalidate, but on a mapping it already has
> called ->invalidate on, and to prevent that silliness we take the
> importer temporary off the list.

Mhm, strange I don't remember giving this explanation. It also doesn't 
make to much sense, but see below.

> Your scenario here is new, and iirc my suggestion back then was to
> count the number of pending mappings so you don't go around calling
> ->invalidate on mappings that don't exist.

Well the key point is we don't call invalidate on mappings, but we call 
invalidate on attachments.

When the invalidate on an attachment is received all the importer should 
at least start to tear down all mappings.

> But even if you fix your scenario here there's still the issue that we
> can receive invalidates on a mapping we've already torn down and which
> is on the process of disappearing. That's kinda the part I don't think
> is great semantics.

Yeah, that is a rather valid point.

Currently it is perfectly valid to receive an invalidation when you 
don't even have any mappings at all.

But this is intentional, because otherwise I would need to move the 
housekeeping which mappings are currently made from the importer to the 
exporter.

And as explained above that would essentially mean double housekeeping.

> [SNIP]
>> The reason I don't have that on unmap is that I think migrating things
>> on unmap doesn't make sense. If you think otherwise it certainly does
>> make sense to add that there as well.
> The problem isn't the recursion, but the book-keeping. There's imo two cases:

Completely agree, yes.

> - your scenario, where you call ->invalidate on an attachment which
> doesn't have a mapping. I'll call that very lazy accounting, feels
> like a bug :-) It's also very easy to fix by keeping track who
> actually has a mapping, and then you fix it everywhere, not just for
> the specific case of a recursion into the same caller.

Yeah, exactly. Unfortunately it's not so easy to handle as just a counter.

When somebody unmaps a mapping you need to know if that is already 
invalidated or not. And this requires tracking of each mapping.

> - calling invalidate multiple times. That's my scenario (or your older
> one), where you call invalidate again on something that's already
> invalidated. Just keeping track of who actually has a mapping wont fix
> that, imo the proper fix is to to pipeline the unmapping using fences.

Unmapping using fences is exactly what I implemented using the TTM ghost 
objects.

The question is really who should implements this? The exporter or the 
importer?

> But I guess there's other fixes too possible.
>
> Either way none of this is about recursion, I think the recursive case
> is simply the one where you've hit this already. Drivers will have to
> handle all these additional ->invalidates no matter what with your
> current proposal. After all the point here is that the exporter can
> move the buffers around whenever it feels like, for whatever reasons.

The recursion case is still perfectly valid. In the importer I need to 
ignore invalidations which are caused by creating a mapping.

Otherwise it is perfectly possible that we invalidate a mapping because 
of its creation which will result in creating a new one....

So even if you fix up your mapping case, you absolutely still need this 
to prevent recursion :)

> For solutions I think there's roughly three:
> - importers need to deal. You don't like that, I agree
> - exporters need to deal, probably not much better, but I think
> stricter contract is better in itself at least.
> - dma-buf.c keeps better track of mappings and which have been
> invalidated already
>
> We could also combine the last two with some helpers, e.g. if your
> exporter really expects importers to delay the unmap until it's no
> longer in use, then we could do a small helper which puts all these
> unmaps onto a list with a worker. But I think you want to integrate
> that into your exporters lru management directly.
>
>> So this is just the most defensive thing I was able to come up with,
>> which leaves the least possibility for drivers to do something stupid.
> Maybe we're still talking past each another, but I feel like the big
> issues are all still there. Problem identified, yes, solved, no.

Yeah, agreed your last mail sounded like we are absolutely on the same 
page on the problem now.

Christian.

>
> Thanks, Daniel
>
>> Thanks,
>> Christian.
>>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Intel-gfx] [PATCH 1/6] dma-buf: add dynamic DMA-buf handling v10
       [not found]           ` <78db8951-2e62-2a9d-3d87-3b458fbba880-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2019-06-21 16:27             ` Daniel Vetter
       [not found]               ` <20190621162753.GI12905-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
  0 siblings, 1 reply; 17+ messages in thread
From: Daniel Vetter @ 2019-06-21 16:27 UTC (permalink / raw)
  To: christian.koenig-5C7GfCeVMHo
  Cc: dri-devel, intel-gfx, amd-gfx list, Daniel Vetter

On Fri, Jun 21, 2019 at 02:06:54PM +0200, Christian König wrote:
> Am 21.06.19 um 12:32 schrieb Daniel Vetter:
> > On Fri, Jun 21, 2019 at 11:55 AM Christian König
> > <ckoenig.leichtzumerken@gmail.com> wrote:
> > > Am 21.06.19 um 11:20 schrieb Daniel Vetter:
> > > > On Tue, Jun 18, 2019 at 01:54:50PM +0200, Christian König wrote:
> > > > [SNIP]
> > > > Imo the below semantics would be much cleaner:
> > > > 
> > > > - invalidate may add new fences
> > > > - invalidate _must_ unmap its mappings
> > > > - an unmap must wait for current fences before the mapping can be
> > > >     released.
> > > > 
> > > > Imo there's no reason why unmap is special, and the only thing where we
> > > > don't use fences to gate access to resources/memory when it's in the
> > > > process of getting moved around.
> > > Well in general I want to avoid waiting for fences as much as possible.
> > > But the key point here is that this actually won't help with the problem
> > > I'm trying to solve.
> > The point of using fences is not to wait on them. I mean if you have
> > the shadow ttm bo on the lru you also don't wait for that fence to
> > retire before you insert the shadow bo onto the lru. You don't even
> > wait when you try to use that memory again, you just pipeline more
> > stuff on top.
> 
> Correct.
> 
> Ok, if I understand it correctly your suggestion is to move the
> responsibility to delay destruction of mappings until they are no longer
> used from the importer to the exporter based on the fences of the
> reservation object.
> 
> I seriously don't think that this is a good idea because you need to move
> the tracking who is using which mapping from the importer to the exporter as
> well. So duplicating quite a bunch of housekeeping.
> 
> On the other hand that we have this house keeping in the importer because we
> get it for free from TTM. But I can't think of a way other memory management
> backends would do this without keeping the sg table around either.
> 
> > In the end it will be the exact same amount of fences and waiting in
> > both solutions. One just leaks less implementationt details (at least
> > in my opinion) across the dma-buf border.
> 
> I agree that leaking implementation details over the DMA-buf border is a bad
> idea.
> 
> But I can assure you that this has absolutely nothing todo with the ghost
> object handling of TTM. ghost objects doesn't even receive an invalidation,
> they are just a possible implementation of the delayed destruction of sg
> tables.
> 
> > > > btw this is like the 2nd or 3rd time I'm typing this, haven't seen your
> > > > thoughts on this yet.
> > > Yeah, and I'm responding for the 3rd time now that you are
> > > misunderstanding why we need this here :)
> > > 
> > > Maybe I can make that clear with an example:
> > > 
> > > 1. You got a sharing between device A (exporter) and B (importer) which
> > > uses P2P.
> > > 
> > > 2. Now device C (importer) comes along and wants to use the DMA-buf
> > > object as well.
> > > 
> > > 3. The handling now figures out that we can't do P2P between device A
> > > and device C (for whatever reason).
> > > 
> > > 4. The map_attachment implementation in device driver A doesn't want to
> > > fail with -EBUSY and migrates the DMA-buf somewhere where both device A
> > > and device C can access it.
> > > 
> > > 5. This migration will result in sending an invalidation event around.
> > > And here it doesn't make sense to send this invalidation event to device
> > > C, because we know that device C is actually causing this event and
> > > doesn't have a valid mapping.
> > Hm I thought the last time around there was a different scenario, with
> > just one importer:
> > 
> > - importer has a mapping, gets an ->invalidate call.
> > - importer arranges for the mappings/usage to get torn down, maybe
> > updating fences, all from ->invalidate. But the mapping itself wont
> > disappear.
> > - exporter moves buffer to new places (for whatever reasons it felt
> > that was the thing to do).
> > - importer does another execbuf, the exporter needs to move the buffer
> > back. Again it calls ->invalidate, but on a mapping it already has
> > called ->invalidate on, and to prevent that silliness we take the
> > importer temporary off the list.
> 
> Mhm, strange I don't remember giving this explanation. It also doesn't make
> to much sense, but see below.

Yeah maybe I mixed things up somewhere. I guess you started with the first
scenario, I replied with "why don't we track this in the exporter or
dma-buf.c then?" and then the thread died out or something.
> 
> > Your scenario here is new, and iirc my suggestion back then was to
> > count the number of pending mappings so you don't go around calling
> > ->invalidate on mappings that don't exist.
> 
> Well the key point is we don't call invalidate on mappings, but we call
> invalidate on attachments.
> 
> When the invalidate on an attachment is received all the importer should at
> least start to tear down all mappings.

Hm, so either we invalidate mappings instead (pretty big change for
dma-buf, but maybe worth it). Or importers need to deal with invalidate on
stuff they're don't even have mapped anywhere anyway.

> > But even if you fix your scenario here there's still the issue that we
> > can receive invalidates on a mapping we've already torn down and which
> > is on the process of disappearing. That's kinda the part I don't think
> > is great semantics.
> 
> Yeah, that is a rather valid point.
> 
> Currently it is perfectly valid to receive an invalidation when you don't
> even have any mappings at all.
> 
> But this is intentional, because otherwise I would need to move the
> housekeeping which mappings are currently made from the importer to the
> exporter.
> 
> And as explained above that would essentially mean double housekeeping.
> 
> > [SNIP]
> > > The reason I don't have that on unmap is that I think migrating things
> > > on unmap doesn't make sense. If you think otherwise it certainly does
> > > make sense to add that there as well.
> > The problem isn't the recursion, but the book-keeping. There's imo two cases:
> 
> Completely agree, yes.
> 
> > - your scenario, where you call ->invalidate on an attachment which
> > doesn't have a mapping. I'll call that very lazy accounting, feels
> > like a bug :-) It's also very easy to fix by keeping track who
> > actually has a mapping, and then you fix it everywhere, not just for
> > the specific case of a recursion into the same caller.
> 
> Yeah, exactly. Unfortunately it's not so easy to handle as just a counter.
> 
> When somebody unmaps a mapping you need to know if that is already
> invalidated or not. And this requires tracking of each mapping.

Yeah we'd need to track mappings. Well, someone has to track mappings, and
atm it seems to be a mix of both importer and exporter (and dma-buf.c).

> > - calling invalidate multiple times. That's my scenario (or your older
> > one), where you call invalidate again on something that's already
> > invalidated. Just keeping track of who actually has a mapping wont fix
> > that, imo the proper fix is to to pipeline the unmapping using fences.
> 
> Unmapping using fences is exactly what I implemented using the TTM ghost
> objects.
> 
> The question is really who should implements this? The exporter or the
> importer?
> 
> > But I guess there's other fixes too possible.
> > 
> > Either way none of this is about recursion, I think the recursive case
> > is simply the one where you've hit this already. Drivers will have to
> > handle all these additional ->invalidates no matter what with your
> > current proposal. After all the point here is that the exporter can
> > move the buffers around whenever it feels like, for whatever reasons.
> 
> The recursion case is still perfectly valid. In the importer I need to
> ignore invalidations which are caused by creating a mapping.
> 
> Otherwise it is perfectly possible that we invalidate a mapping because of
> its creation which will result in creating a new one....
> 
> So even if you fix up your mapping case, you absolutely still need this to
> prevent recursion :)

Hm, but if we stop tracking attachments and instead start tracking
mappings, then how is that possible:
1. importer has no mappings
2. importer creates attachment. still no mapping
3. importer calls dma_buf_attach_map_sg, still no mapping at this point
4. we call into the exporter implementation. still no mapping
5. exporter does whatever it does. still no mapping
6. exporter finishes. conceptually from the dma-buf pov, _this_ is where
the mapping starts to exist.
7. invalidates (hey the exporter maybe changed its mind!) are totally
fine, and will be serialized with ww_mutex.

So I kinda don understand why the exporter here is allowed to call
invalidate too early (the mapping doesn't exist yet from dma-buf pov), and
dma-buf needs to filter it out.

But anywhere else where we might call ->invalidate and there's not yet a
mapping (again purely from dma-buf pov), there the importer is supposed to
do the filter.

Someone needs to keep track of all this, and I want clear
responsibilities. What they are exactly is not that important.

> > For solutions I think there's roughly three:
> > - importers need to deal. You don't like that, I agree
> > - exporters need to deal, probably not much better, but I think
> > stricter contract is better in itself at least.
> > - dma-buf.c keeps better track of mappings and which have been
> > invalidated already
> > 
> > We could also combine the last two with some helpers, e.g. if your
> > exporter really expects importers to delay the unmap until it's no
> > longer in use, then we could do a small helper which puts all these
> > unmaps onto a list with a worker. But I think you want to integrate
> > that into your exporters lru management directly.
> > 
> > > So this is just the most defensive thing I was able to come up with,
> > > which leaves the least possibility for drivers to do something stupid.
> > Maybe we're still talking past each another, but I feel like the big
> > issues are all still there. Problem identified, yes, solved, no.
> 
> Yeah, agreed your last mail sounded like we are absolutely on the same page
> on the problem now.

So I pondered a few ideas while working out:

1) We drop this filtering. Importer needs to keep track of all its
mappings and filter out invalidates that aren't for that specific importer
(either because already invalidated, or not yet mapped, or whatever).
Feels fragile.

2) dma-buf.c keeps track of all the mappings. Will be quite invasive I
think, and will duplicate what either the importer or exporter are already
doing anyway. We might need a map_dynamic_sg so that the invalidate
happens on that, and the invalidate is one-shot (i.e. once unmapped you
can never use the same mapping again, you need to create a new one). Will
probably be quite a bit of code churn.

3) Like two, but instead of creating something new we change the semantics
of attachments. For dynamic dma-buf importers, create a new
dma_buf_attach_and_map_sg, which does both, together, atomically. Same for
unmap. For unmap I see a clever option here:  These attachements are
one-shot only, i.e. you attach_and_map_sg, then you get an invalidate,
after that the attachment is dead. Can't ever remap it. I think this would
neatly solve the "you've invalidated this already" issue. But it is a bit
more code churn I guess. Also, it breaks the idea of using the attachment
to indicate for which devices a buffer should be accessible, so it's not
accidentally pinned into a bad spot. We might still need that even for
dynamic importers.

4) The exporter keeps track of which attachments have a mapping, and
invalidates them individually. Not sure this would actually solve
anything because of the double-invalidate thing. If we go with this we'd
probably need to put the responsibility of delaying the unmap after the
corresponding fence has signalled also onto the exporter. So importer
would unmap from it's ->invalidate callback, but exporter needs to obey
the current fences attached to the resv_obj. Also feels a bit fragile
since it depends upon the exporter instead of shared code to do the right
thing.

I think there's probably a few more variants that might work, but those is
what I came up with.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Intel-gfx] [PATCH 1/6] dma-buf: add dynamic DMA-buf handling v10
       [not found]               ` <20190621162753.GI12905-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
@ 2019-06-24 11:23                 ` Koenig, Christian
  2019-06-24 13:58                   ` Christian König
       [not found]                   ` <4d868c4c-cc00-bfa9-b6f5-969b76497cab-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 2 replies; 17+ messages in thread
From: Koenig, Christian @ 2019-06-24 11:23 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx, dri-devel, amd-gfx list

Am 21.06.19 um 18:27 schrieb Daniel Vetter:
>>> Your scenario here is new, and iirc my suggestion back then was to
>>> count the number of pending mappings so you don't go around calling
>>> ->invalidate on mappings that don't exist.
>> Well the key point is we don't call invalidate on mappings, but we call
>> invalidate on attachments.
>>
>> When the invalidate on an attachment is received all the importer should at
>> least start to tear down all mappings.
> Hm, so either we invalidate mappings instead (pretty big change for
> dma-buf, but maybe worth it). Or importers need to deal with invalidate on
> stuff they're don't even have mapped anywhere anyway.

I actually I don't see a problem with this, but see below.

>> [SNIP]
>>> - your scenario, where you call ->invalidate on an attachment which
>>> doesn't have a mapping. I'll call that very lazy accounting, feels
>>> like a bug :-) It's also very easy to fix by keeping track who
>>> actually has a mapping, and then you fix it everywhere, not just for
>>> the specific case of a recursion into the same caller.
>> Yeah, exactly. Unfortunately it's not so easy to handle as just a counter.
>>
>> When somebody unmaps a mapping you need to know if that is already
>> invalidated or not. And this requires tracking of each mapping.
> Yeah we'd need to track mappings. Well, someone has to track mappings, and
> atm it seems to be a mix of both importer and exporter (and dma-buf.c).

Maybe I'm missing something, but I don't see the mix?

Only the importer is responsible to tracking mappings, e.g. the importer 
calls dma_buf_map_attachment() when it needs a mapping and calls 
dma_buf_unmap_attachment() when it is done with the mapping.

In between those two neither the exporter nor the DMA-buf cares about 
what mappings currently exist. And apart from debugging I actually don't 
see a reason why they should.

>> [SNIP]
>>> But I guess there's other fixes too possible.
>>>
>>> Either way none of this is about recursion, I think the recursive case
>>> is simply the one where you've hit this already. Drivers will have to
>>> handle all these additional ->invalidates no matter what with your
>>> current proposal. After all the point here is that the exporter can
>>> move the buffers around whenever it feels like, for whatever reasons.
>> The recursion case is still perfectly valid. In the importer I need to
>> ignore invalidations which are caused by creating a mapping.
>>
>> Otherwise it is perfectly possible that we invalidate a mapping because of
>> its creation which will result in creating a new one....
>>
>> So even if you fix up your mapping case, you absolutely still need this to
>> prevent recursion :)
> Hm, but if we stop tracking attachments and instead start tracking
> mappings, then how is that possible:

Yeah, but why should we do this? I don't see a benefit here. Importers 
just create/destroy mappings as they need them.

> 1. importer has no mappings
> 2. importer creates attachment. still no mapping
> 3. importer calls dma_buf_attach_map_sg, still no mapping at this point
> 4. we call into the exporter implementation. still no mapping
> 5. exporter does whatever it does. still no mapping
> 6. exporter finishes. conceptually from the dma-buf pov, _this_ is where
> the mapping starts to exist.
> 7. invalidates (hey the exporter maybe changed its mind!) are totally
> fine, and will be serialized with ww_mutex.
>
> So I kinda don understand why the exporter here is allowed to call
> invalidate too early (the mapping doesn't exist yet from dma-buf pov), and
> dma-buf needs to filter it out.
>
> But anywhere else where we might call ->invalidate and there's not yet a
> mapping (again purely from dma-buf pov), there the importer is supposed to
> do the filter.

Maybe this becomes clearer if we call the callback "moved" instead of 
"invalidated"?

I mean this is actually all about the exporter informing the importer 
that the DMA-buf in question is moving to a new location.

That we need to create a new mapping and destroy the old one at some 
point is an implementation detail on the importer.

I mean the long term idea is to use this for notification that a buffer 
is moving inside the same driver as well. And in this particular case I 
actually don't think that we would create mappings at all. Thinking more 
about it this is actually a really good argument to favor the 
implementation as it is currently.

> Someone needs to keep track of all this, and I want clear
> responsibilities. What they are exactly is not that important.

Clear responsibilities is indeed a good idea.

>>> We could also combine the last two with some helpers, e.g. if your
>>> exporter really expects importers to delay the unmap until it's no
>>> longer in use, then we could do a small helper which puts all these
>>> unmaps onto a list with a worker. But I think you want to integrate
>>> that into your exporters lru management directly.
>>>
>>>> So this is just the most defensive thing I was able to come up with,
>>>> which leaves the least possibility for drivers to do something stupid.
>>> Maybe we're still talking past each another, but I feel like the big
>>> issues are all still there. Problem identified, yes, solved, no.
>> Yeah, agreed your last mail sounded like we are absolutely on the same page
>> on the problem now.
> So I pondered a few ideas while working out:
>
> 1) We drop this filtering. Importer needs to keep track of all its
> mappings and filter out invalidates that aren't for that specific importer
> (either because already invalidated, or not yet mapped, or whatever).
> Feels fragile.
>
> 2) dma-buf.c keeps track of all the mappings. Will be quite invasive I
> think, and will duplicate what either the importer or exporter are already
> doing anyway. We might need a map_dynamic_sg so that the invalidate
> happens on that, and the invalidate is one-shot (i.e. once unmapped you
> can never use the same mapping again, you need to create a new one). Will
> probably be quite a bit of code churn.
>
> 3) Like two, but instead of creating something new we change the semantics
> of attachments. For dynamic dma-buf importers, create a new
> dma_buf_attach_and_map_sg, which does both, together, atomically. Same for
> unmap. For unmap I see a clever option here:  These attachements are
> one-shot only, i.e. you attach_and_map_sg, then you get an invalidate,
> after that the attachment is dead. Can't ever remap it. I think this would
> neatly solve the "you've invalidated this already" issue. But it is a bit
> more code churn I guess. Also, it breaks the idea of using the attachment
> to indicate for which devices a buffer should be accessible, so it's not
> accidentally pinned into a bad spot. We might still need that even for
> dynamic importers.
>
> 4) The exporter keeps track of which attachments have a mapping, and
> invalidates them individually. Not sure this would actually solve
> anything because of the double-invalidate thing. If we go with this we'd
> probably need to put the responsibility of delaying the unmap after the
> corresponding fence has signalled also onto the exporter. So importer
> would unmap from it's ->invalidate callback, but exporter needs to obey
> the current fences attached to the resv_obj. Also feels a bit fragile
> since it depends upon the exporter instead of shared code to do the right
> thing.

5) Just keep it as it is currently implemented, but we work on the 
documentation to make it clear how it is supposed to work.

I mean as far as I can see this is actually doing exactly what is expected.

> I think there's probably a few more variants that might work, but those is
> what I came up with.

I will take a moment and look into #1 as well, but I still don't see the 
need to change anything.

Christian.

> -Daniel

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Intel-gfx] [PATCH 1/6] dma-buf: add dynamic DMA-buf handling v10
  2019-06-24 11:23                 ` Koenig, Christian
@ 2019-06-24 13:58                   ` Christian König
  2019-06-24 14:41                     ` Daniel Vetter
       [not found]                   ` <4d868c4c-cc00-bfa9-b6f5-969b76497cab-5C7GfCeVMHo@public.gmane.org>
  1 sibling, 1 reply; 17+ messages in thread
From: Christian König @ 2019-06-24 13:58 UTC (permalink / raw)
  To: Koenig, Christian, Daniel Vetter; +Cc: intel-gfx, amd-gfx list, dri-devel

Am 24.06.19 um 13:23 schrieb Koenig, Christian:
> Am 21.06.19 um 18:27 schrieb Daniel Vetter:
>
>> So I pondered a few ideas while working out:
>>
>> 1) We drop this filtering. Importer needs to keep track of all its
>> mappings and filter out invalidates that aren't for that specific importer
>> (either because already invalidated, or not yet mapped, or whatever).
>> Feels fragile.
>>
>> [SNIP]
> [SNIP]
>
> I will take a moment and look into #1 as well, but I still don't see the
> need to change anything.

That turned out much cleaner than I thought it would be. Essentially it 
is only a single extra line of code in amdgpu.

Going to send that out as a patch set in a minute.

Christian.

>
> Christian.
>

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Intel-gfx] [PATCH 1/6] dma-buf: add dynamic DMA-buf handling v10
       [not found]                   ` <4d868c4c-cc00-bfa9-b6f5-969b76497cab-5C7GfCeVMHo@public.gmane.org>
@ 2019-06-24 14:38                     ` Daniel Vetter
  0 siblings, 0 replies; 17+ messages in thread
From: Daniel Vetter @ 2019-06-24 14:38 UTC (permalink / raw)
  To: Koenig, Christian; +Cc: dri-devel, intel-gfx, amd-gfx list, Daniel Vetter

On Mon, Jun 24, 2019 at 11:23:34AM +0000, Koenig, Christian wrote:
> Am 21.06.19 um 18:27 schrieb Daniel Vetter:
> >>> Your scenario here is new, and iirc my suggestion back then was to
> >>> count the number of pending mappings so you don't go around calling
> >>> ->invalidate on mappings that don't exist.
> >> Well the key point is we don't call invalidate on mappings, but we call
> >> invalidate on attachments.
> >>
> >> When the invalidate on an attachment is received all the importer should at
> >> least start to tear down all mappings.
> > Hm, so either we invalidate mappings instead (pretty big change for
> > dma-buf, but maybe worth it). Or importers need to deal with invalidate on
> > stuff they're don't even have mapped anywhere anyway.
> 
> I actually I don't see a problem with this, but see below.
> 
> >> [SNIP]
> >>> - your scenario, where you call ->invalidate on an attachment which
> >>> doesn't have a mapping. I'll call that very lazy accounting, feels
> >>> like a bug :-) It's also very easy to fix by keeping track who
> >>> actually has a mapping, and then you fix it everywhere, not just for
> >>> the specific case of a recursion into the same caller.
> >> Yeah, exactly. Unfortunately it's not so easy to handle as just a counter.
> >>
> >> When somebody unmaps a mapping you need to know if that is already
> >> invalidated or not. And this requires tracking of each mapping.
> > Yeah we'd need to track mappings. Well, someone has to track mappings, and
> > atm it seems to be a mix of both importer and exporter (and dma-buf.c).
> 
> Maybe I'm missing something, but I don't see the mix?
> 
> Only the importer is responsible to tracking mappings, e.g. the importer 
> calls dma_buf_map_attachment() when it needs a mapping and calls 
> dma_buf_unmap_attachment() when it is done with the mapping.
> 
> In between those two neither the exporter nor the DMA-buf cares about 
> what mappings currently exist. And apart from debugging I actually don't 
> see a reason why they should.

Atm importer has to track which mappings it actually has. Plus dma-buf.c
also tracks that, somehow, with your recursion-preventation code. So
that's the mix.

> >> [SNIP]
> >>> But I guess there's other fixes too possible.
> >>>
> >>> Either way none of this is about recursion, I think the recursive case
> >>> is simply the one where you've hit this already. Drivers will have to
> >>> handle all these additional ->invalidates no matter what with your
> >>> current proposal. After all the point here is that the exporter can
> >>> move the buffers around whenever it feels like, for whatever reasons.
> >> The recursion case is still perfectly valid. In the importer I need to
> >> ignore invalidations which are caused by creating a mapping.
> >>
> >> Otherwise it is perfectly possible that we invalidate a mapping because of
> >> its creation which will result in creating a new one....
> >>
> >> So even if you fix up your mapping case, you absolutely still need this to
> >> prevent recursion :)
> > Hm, but if we stop tracking attachments and instead start tracking
> > mappings, then how is that possible:
> 
> Yeah, but why should we do this? I don't see a benefit here. Importers 
> just create/destroy mappings as they need them.
> 
> > 1. importer has no mappings
> > 2. importer creates attachment. still no mapping
> > 3. importer calls dma_buf_attach_map_sg, still no mapping at this point
> > 4. we call into the exporter implementation. still no mapping
> > 5. exporter does whatever it does. still no mapping
> > 6. exporter finishes. conceptually from the dma-buf pov, _this_ is where
> > the mapping starts to exist.
> > 7. invalidates (hey the exporter maybe changed its mind!) are totally
> > fine, and will be serialized with ww_mutex.
> >
> > So I kinda don understand why the exporter here is allowed to call
> > invalidate too early (the mapping doesn't exist yet from dma-buf pov), and
> > dma-buf needs to filter it out.
> >
> > But anywhere else where we might call ->invalidate and there's not yet a
> > mapping (again purely from dma-buf pov), there the importer is supposed to
> > do the filter.
> 
> Maybe this becomes clearer if we call the callback "moved" instead of 
> "invalidated"?
> 
> I mean this is actually all about the exporter informing the importer 
> that the DMA-buf in question is moving to a new location.
> 
> That we need to create a new mapping and destroy the old one at some 
> point is an implementation detail on the importer.
> 
> I mean the long term idea is to use this for notification that a buffer 
> is moving inside the same driver as well. And in this particular case I 
> actually don't think that we would create mappings at all. Thinking more 
> about it this is actually a really good argument to favor the 
> implementation as it is currently.
> 
> > Someone needs to keep track of all this, and I want clear
> > responsibilities. What they are exactly is not that important.
> 
> Clear responsibilities is indeed a good idea.
> 
> >>> We could also combine the last two with some helpers, e.g. if your
> >>> exporter really expects importers to delay the unmap until it's no
> >>> longer in use, then we could do a small helper which puts all these
> >>> unmaps onto a list with a worker. But I think you want to integrate
> >>> that into your exporters lru management directly.
> >>>
> >>>> So this is just the most defensive thing I was able to come up with,
> >>>> which leaves the least possibility for drivers to do something stupid.
> >>> Maybe we're still talking past each another, but I feel like the big
> >>> issues are all still there. Problem identified, yes, solved, no.
> >> Yeah, agreed your last mail sounded like we are absolutely on the same page
> >> on the problem now.
> > So I pondered a few ideas while working out:
> >
> > 1) We drop this filtering. Importer needs to keep track of all its
> > mappings and filter out invalidates that aren't for that specific importer
> > (either because already invalidated, or not yet mapped, or whatever).
> > Feels fragile.
> >
> > 2) dma-buf.c keeps track of all the mappings. Will be quite invasive I
> > think, and will duplicate what either the importer or exporter are already
> > doing anyway. We might need a map_dynamic_sg so that the invalidate
> > happens on that, and the invalidate is one-shot (i.e. once unmapped you
> > can never use the same mapping again, you need to create a new one). Will
> > probably be quite a bit of code churn.
> >
> > 3) Like two, but instead of creating something new we change the semantics
> > of attachments. For dynamic dma-buf importers, create a new
> > dma_buf_attach_and_map_sg, which does both, together, atomically. Same for
> > unmap. For unmap I see a clever option here:  These attachements are
> > one-shot only, i.e. you attach_and_map_sg, then you get an invalidate,
> > after that the attachment is dead. Can't ever remap it. I think this would
> > neatly solve the "you've invalidated this already" issue. But it is a bit
> > more code churn I guess. Also, it breaks the idea of using the attachment
> > to indicate for which devices a buffer should be accessible, so it's not
> > accidentally pinned into a bad spot. We might still need that even for
> > dynamic importers.
> >
> > 4) The exporter keeps track of which attachments have a mapping, and
> > invalidates them individually. Not sure this would actually solve
> > anything because of the double-invalidate thing. If we go with this we'd
> > probably need to put the responsibility of delaying the unmap after the
> > corresponding fence has signalled also onto the exporter. So importer
> > would unmap from it's ->invalidate callback, but exporter needs to obey
> > the current fences attached to the resv_obj. Also feels a bit fragile
> > since it depends upon the exporter instead of shared code to do the right
> > thing.
> 
> 5) Just keep it as it is currently implemented, but we work on the 
> documentation to make it clear how it is supposed to work.
> 
> I mean as far as I can see this is actually doing exactly what is expected.

It's surprises me, so at least subverts my expectations.

> > I think there's probably a few more variants that might work, but those is
> > what I came up with.
> 
> I will take a moment and look into #1 as well, but I still don't see the 
> need to change anything.

One thing that's tricky is races here, i.e. what if an ->invalidate from
the exporter races with a map from the importer. The usual way to solve
these is by being really careful when you publish a new $thing, in this
case when we add the new mapping to whatever tracking list.

If we quench all these races with the ww_mutex in the reservation, then
the importer can trivially prevent the recursion you're preventing
explicitly here in dma-buf.c already. If we go with something more
fancy/lockless (atm definitely not the plan), then only the exporter can
stop the races through very careful ordering.

Anyway, I feel like we at least understand each another now about what the
question is ("who's responsible for tracking mappings"). Answer still a
bit in the open.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Intel-gfx] [PATCH 1/6] dma-buf: add dynamic DMA-buf handling v10
  2019-06-24 13:58                   ` Christian König
@ 2019-06-24 14:41                     ` Daniel Vetter
  2019-06-25  7:20                       ` Koenig, Christian
  0 siblings, 1 reply; 17+ messages in thread
From: Daniel Vetter @ 2019-06-24 14:41 UTC (permalink / raw)
  To: christian.koenig; +Cc: intel-gfx, amd-gfx list, dri-devel

On Mon, Jun 24, 2019 at 03:58:00PM +0200, Christian König wrote:
> Am 24.06.19 um 13:23 schrieb Koenig, Christian:
> > Am 21.06.19 um 18:27 schrieb Daniel Vetter:
> > 
> > > So I pondered a few ideas while working out:
> > > 
> > > 1) We drop this filtering. Importer needs to keep track of all its
> > > mappings and filter out invalidates that aren't for that specific importer
> > > (either because already invalidated, or not yet mapped, or whatever).
> > > Feels fragile.
> > > 
> > > [SNIP]
> > [SNIP]
> > 
> > I will take a moment and look into #1 as well, but I still don't see the
> > need to change anything.
> 
> That turned out much cleaner than I thought it would be. Essentially it is
> only a single extra line of code in amdgpu.
> 
> Going to send that out as a patch set in a minute.

Yeah I mean kinda expected that because:
- everything's protected with ww_mutex anyway
- importer needs to keep track of mappings anways
So really all it needs to do is not be stupid and add the mapping it just
created to its tracking while still holding the ww_mutex. Similar on
invalidate/unmap.

With that all we need is a huge note in the docs that importers need to
keep track of their mappings and dtrt (with all the examples here spelled
out in the appropriate kerneldoc). And then I'm happy :-)

Cheers, Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/6] dma-buf: add dynamic DMA-buf handling v10
  2019-06-24 14:41                     ` Daniel Vetter
@ 2019-06-25  7:20                       ` Koenig, Christian
       [not found]                         ` <d2933e7a-5dd1-70af-cbb6-69755fcbbc5e-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 17+ messages in thread
From: Koenig, Christian @ 2019-06-25  7:20 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx, amd-gfx list, dri-devel

Am 24.06.19 um 16:41 schrieb Daniel Vetter:
> On Mon, Jun 24, 2019 at 03:58:00PM +0200, Christian König wrote:
>> Am 24.06.19 um 13:23 schrieb Koenig, Christian:
>>> Am 21.06.19 um 18:27 schrieb Daniel Vetter:
>>>
>>>> So I pondered a few ideas while working out:
>>>>
>>>> 1) We drop this filtering. Importer needs to keep track of all its
>>>> mappings and filter out invalidates that aren't for that specific importer
>>>> (either because already invalidated, or not yet mapped, or whatever).
>>>> Feels fragile.
>>>>
>>>> [SNIP]
>>> [SNIP]
>>>
>>> I will take a moment and look into #1 as well, but I still don't see the
>>> need to change anything.
>> That turned out much cleaner than I thought it would be. Essentially it is
>> only a single extra line of code in amdgpu.
>>
>> Going to send that out as a patch set in a minute.
> Yeah I mean kinda expected that because:
> - everything's protected with ww_mutex anyway
> - importer needs to keep track of mappings anways
> So really all it needs to do is not be stupid and add the mapping it just
> created to its tracking while still holding the ww_mutex. Similar on
> invalidate/unmap.
>
> With that all we need is a huge note in the docs that importers need to
> keep track of their mappings and dtrt (with all the examples here spelled
> out in the appropriate kerneldoc). And then I'm happy :-)

Should I also rename the invalidate callback into move_notify? Would 
kind of make sense since we are not necessary directly invalidating 
mappings.

Christian.

>
> Cheers, Daniel

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [Intel-gfx] [PATCH 1/6] dma-buf: add dynamic DMA-buf handling v10
       [not found]                         ` <d2933e7a-5dd1-70af-cbb6-69755fcbbc5e-5C7GfCeVMHo@public.gmane.org>
@ 2019-06-25  7:40                           ` Daniel Vetter
  0 siblings, 0 replies; 17+ messages in thread
From: Daniel Vetter @ 2019-06-25  7:40 UTC (permalink / raw)
  To: Koenig, Christian; +Cc: intel-gfx, amd-gfx list, dri-devel, Daniel Vetter

On Tue, Jun 25, 2019 at 07:20:12AM +0000, Koenig, Christian wrote:
> Am 24.06.19 um 16:41 schrieb Daniel Vetter:
> > On Mon, Jun 24, 2019 at 03:58:00PM +0200, Christian König wrote:
> >> Am 24.06.19 um 13:23 schrieb Koenig, Christian:
> >>> Am 21.06.19 um 18:27 schrieb Daniel Vetter:
> >>>
> >>>> So I pondered a few ideas while working out:
> >>>>
> >>>> 1) We drop this filtering. Importer needs to keep track of all its
> >>>> mappings and filter out invalidates that aren't for that specific importer
> >>>> (either because already invalidated, or not yet mapped, or whatever).
> >>>> Feels fragile.
> >>>>
> >>>> [SNIP]
> >>> [SNIP]
> >>>
> >>> I will take a moment and look into #1 as well, but I still don't see the
> >>> need to change anything.
> >> That turned out much cleaner than I thought it would be. Essentially it is
> >> only a single extra line of code in amdgpu.
> >>
> >> Going to send that out as a patch set in a minute.
> > Yeah I mean kinda expected that because:
> > - everything's protected with ww_mutex anyway
> > - importer needs to keep track of mappings anways
> > So really all it needs to do is not be stupid and add the mapping it just
> > created to its tracking while still holding the ww_mutex. Similar on
> > invalidate/unmap.
> >
> > With that all we need is a huge note in the docs that importers need to
> > keep track of their mappings and dtrt (with all the examples here spelled
> > out in the appropriate kerneldoc). And then I'm happy :-)
> 
> Should I also rename the invalidate callback into move_notify? Would 
> kind of make sense since we are not necessary directly invalidating 
> mappings.

Hm maybe. I'll review the entire pile and probably reply with a docs patch
anyway. It would help in making it clear you get the callback even when
there's no mapping to invalidate for you ...
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2019-06-25  7:40 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-18 11:54 [PATCH 1/6] dma-buf: add dynamic DMA-buf handling v10 Christian König
2019-06-18 11:54 ` [PATCH 2/6] drm/ttm: remove the backing store if no placement is given Christian König
2019-06-18 11:54 ` [PATCH 3/6] drm/ttm: use the parent resv for ghost objects v2 Christian König
     [not found] ` <20190618115455.1253-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2019-06-18 11:54   ` [PATCH 4/6] drm/amdgpu: use allowed_domains for exported DMA-bufs Christian König
2019-06-18 11:54 ` [PATCH 5/6] drm/amdgpu: add independent DMA-buf export v6 Christian König
2019-06-18 11:54 ` [PATCH 6/6] drm/amdgpu: add independent DMA-buf import v6 Christian König
2019-06-21  9:20 ` [Intel-gfx] [PATCH 1/6] dma-buf: add dynamic DMA-buf handling v10 Daniel Vetter
     [not found]   ` <20190621092022.GB12905-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
2019-06-21  9:55     ` Christian König
2019-06-21 10:32       ` Daniel Vetter
2019-06-21 12:06         ` Christian König
     [not found]           ` <78db8951-2e62-2a9d-3d87-3b458fbba880-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-06-21 16:27             ` [Intel-gfx] " Daniel Vetter
     [not found]               ` <20190621162753.GI12905-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
2019-06-24 11:23                 ` Koenig, Christian
2019-06-24 13:58                   ` Christian König
2019-06-24 14:41                     ` Daniel Vetter
2019-06-25  7:20                       ` Koenig, Christian
     [not found]                         ` <d2933e7a-5dd1-70af-cbb6-69755fcbbc5e-5C7GfCeVMHo@public.gmane.org>
2019-06-25  7:40                           ` [Intel-gfx] " Daniel Vetter
     [not found]                   ` <4d868c4c-cc00-bfa9-b6f5-969b76497cab-5C7GfCeVMHo@public.gmane.org>
2019-06-24 14:38                     ` Daniel Vetter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).