All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/9] dma-buf: start caching of sg_table objects
@ 2019-05-07  8:13 Christian König
  2019-05-07  8:13 ` [PATCH 2/9] dma-buf: add dynamic DMA-buf handling v7 Christian König
                   ` (8 more replies)
  0 siblings, 9 replies; 20+ messages in thread
From: Christian König @ 2019-05-07  8:13 UTC (permalink / raw)
  To: dri-devel

To allow a smooth transition from pinning buffer objects to dynamic
invalidation we first start to cache the sg_table for an attachment.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/dma-buf.c | 24 ++++++++++++++++++++++++
 include/linux/dma-buf.h   | 14 ++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 7c858020d14b..775e13f54083 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -573,6 +573,20 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
 	list_add(&attach->node, &dmabuf->attachments);
 
 	mutex_unlock(&dmabuf->lock);
+
+	if (!dma_buf_is_dynamic(dmabuf)) {
+		struct sg_table *sgt;
+
+		sgt = dmabuf->ops->map_dma_buf(attach, DMA_BIDIRECTIONAL);
+		if (!sgt)
+			sgt = ERR_PTR(-ENOMEM);
+		if (IS_ERR(sgt)) {
+			dma_buf_detach(dmabuf, attach);
+			return ERR_CAST(sgt);
+		}
+		attach->sgt = sgt;
+	}
+
 	return attach;
 
 err_attach:
@@ -595,6 +609,10 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
 	if (WARN_ON(!dmabuf || !attach))
 		return;
 
+	if (attach->sgt)
+		dmabuf->ops->unmap_dma_buf(attach, attach->sgt,
+					   DMA_BIDIRECTIONAL);
+
 	mutex_lock(&dmabuf->lock);
 	list_del(&attach->node);
 	if (dmabuf->ops->detach)
@@ -630,6 +648,9 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
 	if (WARN_ON(!attach || !attach->dmabuf))
 		return ERR_PTR(-EINVAL);
 
+	if (attach->sgt)
+		return attach->sgt;
+
 	sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
 	if (!sg_table)
 		sg_table = ERR_PTR(-ENOMEM);
@@ -657,6 +678,9 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
 	if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
 		return;
 
+	if (attach->sgt == sg_table)
+		return;
+
 	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
 						direction);
 }
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 58725f890b5b..52031fdc75bb 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -322,6 +322,7 @@ struct dma_buf_attachment {
 	struct dma_buf *dmabuf;
 	struct device *dev;
 	struct list_head node;
+	struct sg_table *sgt;
 	void *priv;
 };
 
@@ -373,6 +374,19 @@ static inline void get_dma_buf(struct dma_buf *dmabuf)
 	get_file(dmabuf->file);
 }
 
+/**
+ * dma_buf_is_dynamic - check if a DMA-buf uses dynamic mappings.
+ * @dmabuf: the DMA-buf to check
+ *
+ * Returns true if a DMA-buf exporter wants to create dynamic sg table mappings
+ * for each attachment. False if only a single static sg table should be used.
+ */
+static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf)
+{
+	/* Always use a static mapping for now */
+	return false;
+}
+
 struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
 							struct device *dev);
 void dma_buf_detach(struct dma_buf *dmabuf,
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 2/9] dma-buf: add dynamic DMA-buf handling v7
  2019-05-07  8:13 [PATCH 1/9] dma-buf: start caching of sg_table objects Christian König
@ 2019-05-07  8:13 ` Christian König
  2019-05-08  9:51   ` Daniel Vetter
  2019-05-07  8:13 ` [PATCH 3/9] drm: remove prime sg_table caching Christian König
                   ` (7 subsequent siblings)
  8 siblings, 1 reply; 20+ messages in thread
From: Christian König @ 2019-05-07  8:13 UTC (permalink / raw)
  To: dri-devel

On the exporter side we add optional explicit pinning callbacks. If those
callbacks are implemented the framework no longer caches sg tables and the
map/unmap callbacks are always called with the lock of the reservation object
held.

On the importer side we add an optional invalidate callback. This callback is
used by the exporter to inform the importers that their mappings should be
destroyed as soon as possible.

This allows the exporter to provide the mappings without the need to pin
the backing store.

v2: don't try to invalidate mappings when the callback is NULL,
    lock the reservation obj while using the attachments,
    add helper to set the callback
v3: move flag for invalidation support into the DMA-buf,
    use new attach_info structure to set the callback
v4: use importer_priv field instead of mangling exporter priv.
v5: drop invalidation_supported flag
v6: squash together with pin/unpin changes
v7: pin/unpin takes an attachment now

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/dma-buf.c | 190 ++++++++++++++++++++++++++++++++++++--
 include/linux/dma-buf.h   |  91 ++++++++++++++++--
 2 files changed, 261 insertions(+), 20 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 775e13f54083..464a4c38df6c 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -530,10 +530,12 @@ void dma_buf_put(struct dma_buf *dmabuf)
 EXPORT_SYMBOL_GPL(dma_buf_put);
 
 /**
- * dma_buf_attach - Add the device to dma_buf's attachments list; optionally,
+ * dma_buf_dynamic_attach - Add the device to dma_buf's attachments list; optionally,
  * calls attach() of dma_buf_ops to allow device-specific attach functionality
- * @dmabuf:	[in]	buffer to attach device to.
- * @dev:	[in]	device to be attached.
+ * @dmabuf:		[in]	buffer to attach device to.
+ * @dev:		[in]	device to be attached.
+ * @importer_ops	[in]	importer operations for the attachment
+ * @importer_priv	[in]	importer private pointer for the attachment
  *
  * Returns struct dma_buf_attachment pointer for this attachment. Attachments
  * must be cleaned up by calling dma_buf_detach().
@@ -547,8 +549,10 @@ EXPORT_SYMBOL_GPL(dma_buf_put);
  * accessible to @dev, and cannot be moved to a more suitable place. This is
  * indicated with the error code -EBUSY.
  */
-struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
-					  struct device *dev)
+struct dma_buf_attachment *
+dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev,
+		       const struct dma_buf_attach_ops *importer_ops,
+		       void *importer_priv)
 {
 	struct dma_buf_attachment *attach;
 	int ret;
@@ -562,6 +566,8 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
 
 	attach->dev = dev;
 	attach->dmabuf = dmabuf;
+	attach->importer_ops = importer_ops;
+	attach->importer_priv = importer_priv;
 
 	mutex_lock(&dmabuf->lock);
 
@@ -570,7 +576,9 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
 		if (ret)
 			goto err_attach;
 	}
+	reservation_object_lock(dmabuf->resv, NULL);
 	list_add(&attach->node, &dmabuf->attachments);
+	reservation_object_unlock(dmabuf->resv);
 
 	mutex_unlock(&dmabuf->lock);
 
@@ -594,6 +602,21 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
 	mutex_unlock(&dmabuf->lock);
 	return ERR_PTR(ret);
 }
+EXPORT_SYMBOL_GPL(dma_buf_dynamic_attach);
+
+/**
+ * dma_buf_attach - Wrapper for dma_buf_dynamic_attach
+ * @dmabuf:	[in]	buffer to attach device to.
+ * @dev:	[in]	device to be attached.
+ *
+ * Wrapper to call dma_buf_dynamic_attach() for drivers which still use a static
+ * mapping.
+ */
+struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
+					  struct device *dev)
+{
+	return dma_buf_dynamic_attach(dmabuf, dev, NULL, NULL);
+}
 EXPORT_SYMBOL_GPL(dma_buf_attach);
 
 /**
@@ -614,7 +637,9 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
 					   DMA_BIDIRECTIONAL);
 
 	mutex_lock(&dmabuf->lock);
+	reservation_object_lock(dmabuf->resv, NULL);
 	list_del(&attach->node);
+	reservation_object_unlock(dmabuf->resv);
 	if (dmabuf->ops->detach)
 		dmabuf->ops->detach(dmabuf, attach);
 
@@ -623,6 +648,100 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
 }
 EXPORT_SYMBOL_GPL(dma_buf_detach);
 
+/**
+ * dma_buf_pin - Lock down the DMA-buf
+ *
+ * @attach:	[in]	attachment which should be pinned
+ *
+ * Returns:
+ * 0 on success, negative error code on failure.
+ */
+int dma_buf_pin(struct dma_buf_attachment *attach)
+{
+	struct dma_buf *dmabuf = attach->dmabuf;
+	int ret = 0;
+
+	reservation_object_assert_held(dmabuf->resv);
+
+	if (dmabuf->ops->pin)
+		ret = dmabuf->ops->pin(attach);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dma_buf_pin);
+
+/**
+ * dma_buf_unpin - Remove lock from DMA-buf
+ *
+ * @attach:	[in]	attachment which should be unpinned
+ */
+void dma_buf_unpin(struct dma_buf_attachment *attach)
+{
+	struct dma_buf *dmabuf = attach->dmabuf;
+
+	reservation_object_assert_held(dmabuf->resv);
+
+	if (dmabuf->ops->unpin)
+		dmabuf->ops->unpin(attach);
+}
+EXPORT_SYMBOL_GPL(dma_buf_unpin);
+
+/**
+ * dma_buf_map_attachment_locked - Maps the buffer into _device_ address space
+ * with the reservation lock held. Is a wrapper for map_dma_buf() of the
+ *
+ * Returns the scatterlist table of the attachment;
+ * dma_buf_ops.
+ * @attach:	[in]	attachment whose scatterlist is to be returned
+ * @direction:	[in]	direction of DMA transfer
+ *
+ * Returns sg_table containing the scatterlist to be returned; returns ERR_PTR
+ * on error. May return -EINTR if it is interrupted by a signal.
+ *
+ * A mapping must be unmapped by using dma_buf_unmap_attachment().
+ */
+struct sg_table *
+dma_buf_map_attachment_locked(struct dma_buf_attachment *attach,
+			      enum dma_data_direction direction)
+{
+	struct sg_table *sg_table;
+	int r;
+
+	might_sleep();
+	reservation_object_assert_held(attach->dmabuf->resv);
+
+	if (attach->sgt)
+		return attach->sgt;
+
+	if (attach->importer_ops && attach->importer_ops->invalidate) {
+		/*
+		 * Mapping a DMA-buf can trigger its invalidation, prevent
+		 * sending this event to the caller by temporary removing
+		 * this attachment from the list.
+		 */
+		list_del(&attach->node);
+	} else {
+		r = dma_buf_pin(attach);
+		if (r)
+			return ERR_PTR(r);
+	}
+
+	sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
+	if (attach->importer_ops && attach->importer_ops->invalidate)
+		list_add(&attach->node, &attach->dmabuf->attachments);
+	if (!sg_table)
+		sg_table = ERR_PTR(-ENOMEM);
+
+	if (IS_ERR(sg_table)) {
+		reservation_object_lock(attach->dmabuf->resv, NULL);
+		dma_buf_unpin(attach);
+		reservation_object_unlock(attach->dmabuf->resv);
+	}
+
+	return sg_table;
+}
+EXPORT_SYMBOL_GPL(dma_buf_map_attachment_locked);
+
 /**
  * dma_buf_map_attachment - Returns the scatterlist table of the attachment;
  * mapped into _device_ address space. Is a wrapper for map_dma_buf() of the
@@ -651,14 +770,42 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
 	if (attach->sgt)
 		return attach->sgt;
 
-	sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
-	if (!sg_table)
-		sg_table = ERR_PTR(-ENOMEM);
+	reservation_object_lock(attach->dmabuf->resv, NULL);
+	sg_table = dma_buf_map_attachment(attach, direction);
+	reservation_object_unlock(attach->dmabuf->resv);
 
 	return sg_table;
 }
 EXPORT_SYMBOL_GPL(dma_buf_map_attachment);
 
+/**
+ * dma_buf_unmap_attachment_locked - unmaps the buffer with reservation lock
+ * held, should deallocate the associated scatterlist. Is a wrapper for
+ * unmap_dma_buf() of dma_buf_ops.
+ * @attach:	[in]	attachment to unmap buffer from
+ * @sg_table:	[in]	scatterlist info of the buffer to unmap
+ * @direction:  [in]    direction of DMA transfer
+ *
+ * This unmaps a DMA mapping for @attached obtained by
+ * dma_buf_map_attachment_locked().
+ */
+void dma_buf_unmap_attachment_locked(struct dma_buf_attachment *attach,
+				     struct sg_table *sg_table,
+				     enum dma_data_direction direction)
+{
+	might_sleep();
+	reservation_object_assert_held(attach->dmabuf->resv);
+
+	if (attach->sgt == sg_table)
+		return;
+
+	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
+						direction);
+	if (!(attach->importer_ops && attach->importer_ops->invalidate))
+		dma_buf_unpin(attach);
+}
+EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment_locked);
+
 /**
  * dma_buf_unmap_attachment - unmaps and decreases usecount of the buffer;might
  * deallocate the scatterlist associated. Is a wrapper for unmap_dma_buf() of
@@ -681,11 +828,32 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
 	if (attach->sgt == sg_table)
 		return;
 
-	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
-						direction);
+	reservation_object_lock(attach->dmabuf->resv, NULL);
+	dma_buf_unmap_attachment_locked(attach, sg_table, direction);
+	reservation_object_unlock(attach->dmabuf->resv);
 }
 EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
 
+/**
+ * dma_buf_invalidate_mappings - invalidate all mappings of this dma_buf
+ *
+ * @dmabuf:	[in]	buffer which mappings should be invalidated
+ *
+ * Informs all attachmenst that they need to destroy and recreated all their
+ * mappings.
+ */
+void dma_buf_invalidate_mappings(struct dma_buf *dmabuf)
+{
+	struct dma_buf_attachment *attach;
+
+	reservation_object_assert_held(dmabuf->resv);
+
+	list_for_each_entry(attach, &dmabuf->attachments, node)
+		if (attach->importer_ops && attach->importer_ops->invalidate)
+			attach->importer_ops->invalidate(attach);
+}
+EXPORT_SYMBOL_GPL(dma_buf_invalidate_mappings);
+
 /**
  * DOC: cpu access
  *
@@ -1098,10 +1266,12 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
 		seq_puts(s, "\tAttached Devices:\n");
 		attach_count = 0;
 
+		reservation_object_lock(buf_obj->resv, NULL);
 		list_for_each_entry(attach_obj, &buf_obj->attachments, node) {
 			seq_printf(s, "\t%s\n", dev_name(attach_obj->dev));
 			attach_count++;
 		}
+		reservation_object_unlock(buf_obj->resv);
 
 		seq_printf(s, "Total %d devices attached\n\n",
 				attach_count);
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 52031fdc75bb..f5681ca15d09 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -90,14 +90,40 @@ struct dma_buf_ops {
 	 */
 	void (*detach)(struct dma_buf *, struct dma_buf_attachment *);
 
+	/**
+	 * @pin:
+	 *
+	 * This is called by dma_buf_pin and lets the exporter know that the
+	 * DMA-buf can't be moved any more.
+	 *
+	 * This is called with the dmabuf->resv object locked.
+	 *
+	 * This callback is optional.
+	 *
+	 * Returns:
+	 *
+	 * 0 on success, negative error code on failure.
+	 */
+	int (*pin)(struct dma_buf_attachment *);
+
+	/**
+	 * @unpin:
+	 *
+	 * This is called by dma_buf_unpin and lets the exporter know that the
+	 * DMA-buf can be moved again.
+	 *
+	 * This is called with the dmabuf->resv object locked.
+	 *
+	 * This callback is optional.
+	 */
+	void (*unpin)(struct dma_buf_attachment *);
+
 	/**
 	 * @map_dma_buf:
 	 *
 	 * This is called by dma_buf_map_attachment() and is used to map a
 	 * shared &dma_buf into device address space, and it is mandatory. It
-	 * can only be called if @attach has been called successfully. This
-	 * essentially pins the DMA buffer into place, and it cannot be moved
-	 * any more
+	 * can only be called if @attach has been called successfully.
 	 *
 	 * This call may sleep, e.g. when the backing storage first needs to be
 	 * allocated, or moved to a location suitable for all currently attached
@@ -118,6 +144,9 @@ struct dma_buf_ops {
 	 * any other kind of sharing that the exporter might wish to make
 	 * available to buffer-users.
 	 *
+	 * This is always called with the dmabuf->resv object locked when
+	 * the pin/unpin callbacks are implemented.
+	 *
 	 * Returns:
 	 *
 	 * A &sg_table scatter list of or the backing storage of the DMA buffer,
@@ -135,9 +164,6 @@ struct dma_buf_ops {
 	 *
 	 * This is called by dma_buf_unmap_attachment() and should unmap and
 	 * release the &sg_table allocated in @map_dma_buf, and it is mandatory.
-	 * It should also unpin the backing storage if this is the last mapping
-	 * of the DMA buffer, it the exporter supports backing storage
-	 * migration.
 	 */
 	void (*unmap_dma_buf)(struct dma_buf_attachment *,
 			      struct sg_table *,
@@ -302,12 +328,43 @@ struct dma_buf {
 	} cb_excl, cb_shared;
 };
 
+/**
+ * struct dma_buf_attach_ops - importer operations for an attachment
+ * @invalidate: [optional] invalidate all mappings made using this attachment.
+ *
+ * Attachment operations implemented by the importer.
+ */
+struct dma_buf_attach_ops {
+	/**
+	 * @invalidate:
+	 *
+	 * If the invalidate callback is provided the framework can avoid
+	 * pinning the backing store while mappings exists.
+	 *
+	 * This callback is called with the lock of the reservation object
+	 * associated with the dma_buf held and the mapping function must be
+	 * called with this lock held as well. This makes sure that no mapping
+	 * is created concurrently with an ongoing invalidation.
+	 *
+	 * After the callback all existing mappings are still valid until all
+	 * fences in the dma_bufs reservation object are signaled. After getting an
+	 * invalidation callback all mappings should be destroyed by the importer using
+	 * the normal dma_buf_unmap_attachment() function as soon as possible.
+	 *
+	 * New mappings can be created immediately, but can't be used before the
+	 * exclusive fence in the dma_bufs reservation object is signaled.
+	 */
+	void (*invalidate)(struct dma_buf_attachment *attach);
+};
+
 /**
  * struct dma_buf_attachment - holds device-buffer attachment data
  * @dmabuf: buffer for this attachment.
  * @dev: device attached to the buffer.
  * @node: list of dma_buf_attachment.
  * @priv: exporter specific attachment data.
+ * @importer_ops: importer operations for this attachment.
+ * @importer_priv: importer specific attachment data.
  *
  * This structure holds the attachment information between the dma_buf buffer
  * and its user device(s). The list contains one attachment struct per device
@@ -324,6 +381,9 @@ struct dma_buf_attachment {
 	struct list_head node;
 	struct sg_table *sgt;
 	void *priv;
+
+	const struct dma_buf_attach_ops *importer_ops;
+	void *importer_priv;
 };
 
 /**
@@ -383,14 +443,19 @@ static inline void get_dma_buf(struct dma_buf *dmabuf)
  */
 static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf)
 {
-	/* Always use a static mapping for now */
-	return false;
+	return !!dmabuf->ops->pin;
 }
 
 struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
-							struct device *dev);
+					  struct device *dev);
+struct dma_buf_attachment *
+dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev,
+		       const struct dma_buf_attach_ops *importer_ops,
+		       void *importer_priv);
 void dma_buf_detach(struct dma_buf *dmabuf,
-				struct dma_buf_attachment *dmabuf_attach);
+		    struct dma_buf_attachment *attach);
+int dma_buf_pin(struct dma_buf_attachment *attach);
+void dma_buf_unpin(struct dma_buf_attachment *attach);
 
 struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info);
 
@@ -398,10 +463,16 @@ int dma_buf_fd(struct dma_buf *dmabuf, int flags);
 struct dma_buf *dma_buf_get(int fd);
 void dma_buf_put(struct dma_buf *dmabuf);
 
+struct sg_table *dma_buf_map_attachment_locked(struct dma_buf_attachment *,
+					       enum dma_data_direction);
 struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *,
 					enum dma_data_direction);
+void dma_buf_unmap_attachment_locked(struct dma_buf_attachment *,
+				     struct sg_table *,
+				     enum dma_data_direction);
 void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *,
 				enum dma_data_direction);
+void dma_buf_invalidate_mappings(struct dma_buf *dma_buf);
 int dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
 			     enum dma_data_direction dir);
 int dma_buf_end_cpu_access(struct dma_buf *dma_buf,
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 3/9] drm: remove prime sg_table caching
  2019-05-07  8:13 [PATCH 1/9] dma-buf: start caching of sg_table objects Christian König
  2019-05-07  8:13 ` [PATCH 2/9] dma-buf: add dynamic DMA-buf handling v7 Christian König
@ 2019-05-07  8:13 ` Christian König
  2019-05-08  9:57   ` Daniel Vetter
  2019-05-07  8:13 ` [PATCH 4/9] drm/ttm: remove the backing store if no placement is given Christian König
                   ` (6 subsequent siblings)
  8 siblings, 1 reply; 20+ messages in thread
From: Christian König @ 2019-05-07  8:13 UTC (permalink / raw)
  To: dri-devel

That is now done by the DMA-buf helpers instead.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/drm_prime.c | 76 ++++++++-----------------------------
 1 file changed, 16 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 231e3f6d5f41..90f5230cc0d5 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -86,11 +86,6 @@ struct drm_prime_member {
 	struct rb_node handle_rb;
 };
 
-struct drm_prime_attachment {
-	struct sg_table *sgt;
-	enum dma_data_direction dir;
-};
-
 static int drm_prime_add_buf_handle(struct drm_prime_file_private *prime_fpriv,
 				    struct dma_buf *dma_buf, uint32_t handle)
 {
@@ -188,25 +183,16 @@ static int drm_prime_lookup_buf_handle(struct drm_prime_file_private *prime_fpri
  * @dma_buf: buffer to attach device to
  * @attach: buffer attachment data
  *
- * Allocates &drm_prime_attachment and calls &drm_driver.gem_prime_pin for
- * device specific attachment. This can be used as the &dma_buf_ops.attach
- * callback.
+ * Calls &drm_driver.gem_prime_pin for device specific handling. This can be
+ * used as the &dma_buf_ops.attach callback.
  *
  * Returns 0 on success, negative error code on failure.
  */
 int drm_gem_map_attach(struct dma_buf *dma_buf,
 		       struct dma_buf_attachment *attach)
 {
-	struct drm_prime_attachment *prime_attach;
 	struct drm_gem_object *obj = dma_buf->priv;
 
-	prime_attach = kzalloc(sizeof(*prime_attach), GFP_KERNEL);
-	if (!prime_attach)
-		return -ENOMEM;
-
-	prime_attach->dir = DMA_NONE;
-	attach->priv = prime_attach;
-
 	return drm_gem_pin(obj);
 }
 EXPORT_SYMBOL(drm_gem_map_attach);
@@ -222,26 +208,8 @@ EXPORT_SYMBOL(drm_gem_map_attach);
 void drm_gem_map_detach(struct dma_buf *dma_buf,
 			struct dma_buf_attachment *attach)
 {
-	struct drm_prime_attachment *prime_attach = attach->priv;
 	struct drm_gem_object *obj = dma_buf->priv;
 
-	if (prime_attach) {
-		struct sg_table *sgt = prime_attach->sgt;
-
-		if (sgt) {
-			if (prime_attach->dir != DMA_NONE)
-				dma_unmap_sg_attrs(attach->dev, sgt->sgl,
-						   sgt->nents,
-						   prime_attach->dir,
-						   DMA_ATTR_SKIP_CPU_SYNC);
-			sg_free_table(sgt);
-		}
-
-		kfree(sgt);
-		kfree(prime_attach);
-		attach->priv = NULL;
-	}
-
 	drm_gem_unpin(obj);
 }
 EXPORT_SYMBOL(drm_gem_map_detach);
@@ -286,39 +254,22 @@ void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpr
 struct sg_table *drm_gem_map_dma_buf(struct dma_buf_attachment *attach,
 				     enum dma_data_direction dir)
 {
-	struct drm_prime_attachment *prime_attach = attach->priv;
 	struct drm_gem_object *obj = attach->dmabuf->priv;
 	struct sg_table *sgt;
 
-	if (WARN_ON(dir == DMA_NONE || !prime_attach))
+	if (WARN_ON(dir == DMA_NONE))
 		return ERR_PTR(-EINVAL);
 
-	/* return the cached mapping when possible */
-	if (prime_attach->dir == dir)
-		return prime_attach->sgt;
-
-	/*
-	 * two mappings with different directions for the same attachment are
-	 * not allowed
-	 */
-	if (WARN_ON(prime_attach->dir != DMA_NONE))
-		return ERR_PTR(-EBUSY);
-
 	if (obj->funcs)
 		sgt = obj->funcs->get_sg_table(obj);
 	else
 		sgt = obj->dev->driver->gem_prime_get_sg_table(obj);
 
-	if (!IS_ERR(sgt)) {
-		if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
-				      DMA_ATTR_SKIP_CPU_SYNC)) {
-			sg_free_table(sgt);
-			kfree(sgt);
-			sgt = ERR_PTR(-ENOMEM);
-		} else {
-			prime_attach->sgt = sgt;
-			prime_attach->dir = dir;
-		}
+	if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
+			      DMA_ATTR_SKIP_CPU_SYNC)) {
+		sg_free_table(sgt);
+		kfree(sgt);
+		sgt = ERR_PTR(-ENOMEM);
 	}
 
 	return sgt;
@@ -331,14 +282,19 @@ EXPORT_SYMBOL(drm_gem_map_dma_buf);
  * @sgt: scatterlist info of the buffer to unmap
  * @dir: direction of DMA transfer
  *
- * Not implemented. The unmap is done at drm_gem_map_detach().  This can be
- * used as the &dma_buf_ops.unmap_dma_buf callback.
+ * This can be used as the &dma_buf_ops.unmap_dma_buf callback.
  */
 void drm_gem_unmap_dma_buf(struct dma_buf_attachment *attach,
 			   struct sg_table *sgt,
 			   enum dma_data_direction dir)
 {
-	/* nothing to be done here */
+	if (!sgt)
+		return;
+
+	dma_unmap_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
+			   DMA_ATTR_SKIP_CPU_SYNC);
+	sg_free_table(sgt);
+	kfree(sgt);
 }
 EXPORT_SYMBOL(drm_gem_unmap_dma_buf);
 
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 4/9] drm/ttm: remove the backing store if no placement is given
  2019-05-07  8:13 [PATCH 1/9] dma-buf: start caching of sg_table objects Christian König
  2019-05-07  8:13 ` [PATCH 2/9] dma-buf: add dynamic DMA-buf handling v7 Christian König
  2019-05-07  8:13 ` [PATCH 3/9] drm: remove prime sg_table caching Christian König
@ 2019-05-07  8:13 ` Christian König
  2019-05-07  8:13 ` [PATCH 5/9] drm/ttm: use the parent resv for ghost objects Christian König
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 20+ messages in thread
From: Christian König @ 2019-05-07  8:13 UTC (permalink / raw)
  To: dri-devel

Pipeline removal of the BOs backing store when no placement is given
during validation.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 2845fceb2fbd..8502b3ed2d88 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1160,6 +1160,18 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
 	uint32_t new_flags;
 
 	reservation_object_assert_held(bo->resv);
+
+	/*
+	 * Remove the backing store if no placement is given.
+	 */
+	if (!placement->num_placement && !placement->num_busy_placement) {
+		ret = ttm_bo_pipeline_gutting(bo);
+		if (ret)
+			return ret;
+
+		return ttm_tt_create(bo, false);
+	}
+
 	/*
 	 * Check whether we need to move buffer.
 	 */
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 5/9] drm/ttm: use the parent resv for ghost objects
  2019-05-07  8:13 [PATCH 1/9] dma-buf: start caching of sg_table objects Christian König
                   ` (2 preceding siblings ...)
  2019-05-07  8:13 ` [PATCH 4/9] drm/ttm: remove the backing store if no placement is given Christian König
@ 2019-05-07  8:13 ` Christian König
  2019-05-07  8:13 ` [PATCH 6/9] drm/amdgpu: use allowed_domains for exported DMA-bufs Christian König
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 20+ messages in thread
From: Christian König @ 2019-05-07  8:13 UTC (permalink / raw)
  To: dri-devel

This way we can even pipeline imported BO evictions.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo_util.c | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 895d77d799e4..97f35c4bda35 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -486,7 +486,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 				      struct ttm_buffer_object **new_obj)
 {
 	struct ttm_transfer_obj *fbo;
-	int ret;
 
 	fbo = kmalloc(sizeof(*fbo), GFP_KERNEL);
 	if (!fbo)
@@ -517,10 +516,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 	kref_init(&fbo->base.kref);
 	fbo->base.destroy = &ttm_transfered_destroy;
 	fbo->base.acc_size = 0;
-	fbo->base.resv = &fbo->base.ttm_resv;
-	reservation_object_init(fbo->base.resv);
-	ret = reservation_object_trylock(fbo->base.resv);
-	WARN_ON(!ret);
+	reservation_object_init(&fbo->base.ttm_resv);
 
 	*new_obj = &fbo->base;
 	return 0;
@@ -716,8 +712,6 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 		if (ret)
 			return ret;
 
-		reservation_object_add_excl_fence(ghost_obj->resv, fence);
-
 		/**
 		 * If we're not moving to fixed memory, the TTM object
 		 * needs to stay alive. Otherwhise hang it on the ghost
@@ -729,7 +723,6 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 		else
 			bo->ttm = NULL;
 
-		ttm_bo_unreserve(ghost_obj);
 		ttm_bo_put(ghost_obj);
 	}
 
@@ -772,8 +765,6 @@ int ttm_bo_pipeline_move(struct ttm_buffer_object *bo,
 		if (ret)
 			return ret;
 
-		reservation_object_add_excl_fence(ghost_obj->resv, fence);
-
 		/**
 		 * If we're not moving to fixed memory, the TTM object
 		 * needs to stay alive. Otherwhise hang it on the ghost
@@ -785,7 +776,6 @@ int ttm_bo_pipeline_move(struct ttm_buffer_object *bo,
 		else
 			bo->ttm = NULL;
 
-		ttm_bo_unreserve(ghost_obj);
 		ttm_bo_put(ghost_obj);
 
 	} else if (from->flags & TTM_MEMTYPE_FLAG_FIXED) {
@@ -841,16 +831,10 @@ int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo)
 	if (ret)
 		return ret;
 
-	ret = reservation_object_copy_fences(ghost->resv, bo->resv);
-	/* Last resort, wait for the BO to be idle when we are OOM */
-	if (ret)
-		ttm_bo_wait(bo, false, false);
-
 	memset(&bo->mem, 0, sizeof(bo->mem));
 	bo->mem.mem_type = TTM_PL_SYSTEM;
 	bo->ttm = NULL;
 
-	ttm_bo_unreserve(ghost);
 	ttm_bo_put(ghost);
 
 	return 0;
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 6/9] drm/amdgpu: use allowed_domains for exported DMA-bufs
  2019-05-07  8:13 [PATCH 1/9] dma-buf: start caching of sg_table objects Christian König
                   ` (3 preceding siblings ...)
  2019-05-07  8:13 ` [PATCH 5/9] drm/ttm: use the parent resv for ghost objects Christian König
@ 2019-05-07  8:13 ` Christian König
  2019-05-07  8:13 ` [PATCH 7/9] drm/amdgpu: add independent DMA-buf export v3 Christian König
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 20+ messages in thread
From: Christian König @ 2019-05-07  8:13 UTC (permalink / raw)
  To: dri-devel

Avoid that we ping/pong the buffers when we stop to pin DMA-buf
exports by using the allowed domains for exported buffers.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index d0e221c8d940..d6223e41e358 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -26,6 +26,7 @@
  */
 #include <linux/pagemap.h>
 #include <linux/sync_file.h>
+#include <linux/dma-buf.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
 #include <drm/drm_syncobj.h>
@@ -412,7 +413,9 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 	/* Don't move this buffer if we have depleted our allowance
 	 * to move it. Don't move anything if the threshold is zero.
 	 */
-	if (p->bytes_moved < p->bytes_moved_threshold) {
+	if (p->bytes_moved < p->bytes_moved_threshold &&
+	    (!bo->gem_base.dma_buf ||
+	    list_empty(&bo->gem_base.dma_buf->attachments))) {
 		if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
 		    (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
 			/* And don't move a CPU_ACCESS_REQUIRED BO to limited
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 7/9] drm/amdgpu: add independent DMA-buf export v3
  2019-05-07  8:13 [PATCH 1/9] dma-buf: start caching of sg_table objects Christian König
                   ` (4 preceding siblings ...)
  2019-05-07  8:13 ` [PATCH 6/9] drm/amdgpu: use allowed_domains for exported DMA-bufs Christian König
@ 2019-05-07  8:13 ` Christian König
  2019-05-07  8:13 ` [PATCH 8/9] drm/amdgpu: add independent DMA-buf import v5 Christian König
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 20+ messages in thread
From: Christian König @ 2019-05-07  8:13 UTC (permalink / raw)
  To: dri-devel

The caching of SGT's is actually quite harmful and should probably removed
altogether when all drivers are audited.

Start by providing a separate DMA-buf export implementation in amdgpu. This is
also a prerequisite of unpinned DMA-buf handling.

v2: fix unintended recursion, remove debugging leftovers
v3: split out from unpinned DMA-buf work
v4: rebase on top of new no_sgt_cache flag

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 204 ++++++++++++--------
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h |   1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |   1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |   5 +
 4 files changed, 133 insertions(+), 78 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 4711cf1b5bd2..68a071060793 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -40,22 +40,6 @@
 #include <linux/dma-buf.h>
 #include <linux/dma-fence-array.h>
 
-/**
- * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table
- * implementation
- * @obj: GEM buffer object (BO)
- *
- * Returns:
- * A scatter/gather table for the pinned pages of the BO's memory.
- */
-struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
-{
-	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
-	int npages = bo->tbo.num_pages;
-
-	return drm_prime_pages_to_sg(bo->tbo.ttm->pages, npages);
-}
-
 /**
  * amdgpu_gem_prime_vmap - &dma_buf_ops.vmap implementation
  * @obj: GEM BO
@@ -181,92 +165,158 @@ __reservation_object_make_exclusive(struct reservation_object *obj)
 }
 
 /**
- * amdgpu_dma_buf_map_attach - &dma_buf_ops.attach implementation
- * @dma_buf: Shared DMA buffer
+ * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
+ *
+ * @dmabuf: DMA-buf where we attach to
+ * @attach: attachment to add
+ *
+ * Add the attachment as user to the exported DMA-buf.
+ */
+static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
+				 struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = dmabuf->priv;
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	int r;
+
+	if (attach->dev->driver == adev->dev->driver)
+		return 0;
+
+	/*
+	 * We only create shared fences for internal use, but importers
+	 * of the dmabuf rely on exclusive fences for implicitly
+	 * tracking write hazards. As any of the current fences may
+	 * correspond to a write, we need to convert all existing
+	 * fences on the reservation object into a single exclusive
+	 * fence.
+	 */
+	r = __reservation_object_make_exclusive(bo->tbo.resv);
+	if (r)
+		return r;
+
+	bo->prime_shared_count++;
+	return 0;
+}
+
+/**
+ * amdgpu_dma_buf_detach - &dma_buf_ops.detach implementation
+ *
+ * @dmabuf: DMA-buf where we remove the attachment from
+ * @attach: the attachment to remove
+ *
+ * Called when an attachment is removed from the DMA-buf.
+ */
+static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
+				  struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = dmabuf->priv;
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+
+	if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
+		bo->prime_shared_count--;
+}
+
+/**
+ * amdgpu_dma_buf_pin - &dma_buf_ops.pin implementation
+ *
+ * @attach: attachment to pin down
+ *
+ * Pin the BO which is backing the DMA-buf so that it can't move any more.
+ */
+static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+	/* pin buffer into GTT */
+	return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+}
+
+/**
+ * amdgpu_dma_buf_unpin - &dma_buf_ops.unpin implementation
+ *
+ * @attach: attachment to unpin
+ *
+ * Unpin a previously pinned BO to make it movable again.
+ */
+static void amdgpu_dma_buf_unpin(struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+	amdgpu_bo_unpin(bo);
+}
+
+/**
+ * amdgpu_dma_buf_map_dma_buf - &dma_buf_ops.map_dma_buf implementation
  * @attach: DMA-buf attachment
+ * @dir: DMA direction
  *
  * Makes sure that the shared DMA buffer can be accessed by the target device.
  * For now, simply pins it to the GTT domain, where it should be accessible by
  * all DMA devices.
  *
  * Returns:
- * 0 on success or a negative error code on failure.
+ * sg_table filled with the DMA addresses to use or ERR_PRT with negative error
+ * code.
  */
-static int amdgpu_dma_buf_map_attach(struct dma_buf *dma_buf,
-				     struct dma_buf_attachment *attach)
+static struct sg_table *
+amdgpu_dma_buf_map_dma_buf(struct dma_buf_attachment *attach,
+			   enum dma_data_direction dir)
 {
+	struct dma_buf *dma_buf = attach->dmabuf;
 	struct drm_gem_object *obj = dma_buf->priv;
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
-	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	struct sg_table *sgt;
 	long r;
 
-	r = drm_gem_map_attach(dma_buf, attach);
-	if (r)
-		return r;
+	if (!bo->pin_count) {
+		/* move buffer into GTT */
+		struct ttm_operation_ctx ctx = { false, false };
 
-	r = amdgpu_bo_reserve(bo, false);
-	if (unlikely(r != 0))
-		goto error_detach;
-
-
-	if (attach->dev->driver != adev->dev->driver) {
-		/*
-		 * We only create shared fences for internal use, but importers
-		 * of the dmabuf rely on exclusive fences for implicitly
-		 * tracking write hazards. As any of the current fences may
-		 * correspond to a write, we need to convert all existing
-		 * fences on the reservation object into a single exclusive
-		 * fence.
-		 */
-		r = __reservation_object_make_exclusive(bo->tbo.resv);
+		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 		if (r)
-			goto error_unreserve;
+			return ERR_PTR(r);
 	}
 
-	/* pin buffer into GTT */
-	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
-	if (r)
-		goto error_unreserve;
+	sgt = drm_prime_pages_to_sg(bo->tbo.ttm->pages, bo->tbo.num_pages);
+	if (IS_ERR(sgt))
+		return sgt;
 
-	if (attach->dev->driver != adev->dev->driver)
-		bo->prime_shared_count++;
+	if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
+			      DMA_ATTR_SKIP_CPU_SYNC))
+		goto error_free;
 
-error_unreserve:
-	amdgpu_bo_unreserve(bo);
+	return sgt;
 
-error_detach:
-	if (r)
-		drm_gem_map_detach(dma_buf, attach);
-	return r;
+error_free:
+	sg_free_table(sgt);
+	kfree(sgt);
+	return ERR_PTR(-ENOMEM);
 }
 
 /**
- * amdgpu_dma_buf_map_detach - &dma_buf_ops.detach implementation
- * @dma_buf: Shared DMA buffer
+ * amdgpu_gem_unmap_dma_buf - &dma_buf_ops.unmap_dma_buf implementation
  * @attach: DMA-buf attachment
+ * @sgt: sg_table to unmap
+ * @dir: DMA direction
  *
  * This is called when a shared DMA buffer no longer needs to be accessible by
  * another device. For now, simply unpins the buffer from GTT.
  */
-static void amdgpu_dma_buf_map_detach(struct dma_buf *dma_buf,
-				      struct dma_buf_attachment *attach)
+static void amdgpu_dma_buf_unmap_dma_buf(struct dma_buf_attachment *attach,
+					 struct sg_table *sgt,
+					 enum dma_data_direction dir)
 {
-	struct drm_gem_object *obj = dma_buf->priv;
-	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
-	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-	int ret = 0;
-
-	ret = amdgpu_bo_reserve(bo, true);
-	if (unlikely(ret != 0))
-		goto error;
-
-	amdgpu_bo_unpin(bo);
-	if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
-		bo->prime_shared_count--;
-	amdgpu_bo_unreserve(bo);
+	if (!sgt)
+		return;
 
-error:
-	drm_gem_map_detach(dma_buf, attach);
+	dma_unmap_sg(attach->dev, sgt->sgl, sgt->nents, dir);
+	sg_free_table(sgt);
+	kfree(sgt);
 }
 
 /**
@@ -324,10 +374,12 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
 }
 
 const struct dma_buf_ops amdgpu_dmabuf_ops = {
-	.attach = amdgpu_dma_buf_map_attach,
-	.detach = amdgpu_dma_buf_map_detach,
-	.map_dma_buf = drm_gem_map_dma_buf,
-	.unmap_dma_buf = drm_gem_unmap_dma_buf,
+	.attach = amdgpu_dma_buf_attach,
+	.detach = amdgpu_dma_buf_detach,
+	.pin = amdgpu_dma_buf_pin,
+	.unpin = amdgpu_dma_buf_unpin,
+	.map_dma_buf = amdgpu_dma_buf_map_dma_buf,
+	.unmap_dma_buf = amdgpu_dma_buf_unmap_dma_buf,
 	.release = drm_gem_dmabuf_release,
 	.begin_cpu_access = amdgpu_dma_buf_begin_cpu_access,
 	.mmap = drm_gem_dmabuf_mmap,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
index c7056cbe8685..f1522292814c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
@@ -25,7 +25,6 @@
 
 #include <drm/drm_gem.h>
 
-struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
 struct drm_gem_object *
 amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
 				 struct dma_buf_attachment *attach,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index a334d3b4944e..70c5cd2d5fb4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1321,7 +1321,6 @@ static struct drm_driver kms_driver = {
 	.gem_prime_export = amdgpu_gem_prime_export,
 	.gem_prime_import = amdgpu_gem_prime_import,
 	.gem_prime_res_obj = amdgpu_gem_prime_res_obj,
-	.gem_prime_get_sg_table = amdgpu_gem_prime_get_sg_table,
 	.gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table,
 	.gem_prime_vmap = amdgpu_gem_prime_vmap,
 	.gem_prime_vunmap = amdgpu_gem_prime_vunmap,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 93b2c5a48a71..d26e2f0b88d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -31,6 +31,7 @@
  */
 #include <linux/list.h>
 #include <linux/slab.h>
+#include <linux/dma-buf.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
 #include <drm/drm_cache.h>
@@ -1194,6 +1195,10 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 
 	amdgpu_bo_kunmap(abo);
 
+	if (abo->gem_base.dma_buf && !abo->gem_base.import_attach &&
+	    bo->mem.mem_type != TTM_PL_SYSTEM)
+		dma_buf_invalidate_mappings(abo->gem_base.dma_buf);
+
 	/* remember the eviction */
 	if (evict)
 		atomic64_inc(&adev->num_evictions);
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 8/9] drm/amdgpu: add independent DMA-buf import v5
  2019-05-07  8:13 [PATCH 1/9] dma-buf: start caching of sg_table objects Christian König
                   ` (5 preceding siblings ...)
  2019-05-07  8:13 ` [PATCH 7/9] drm/amdgpu: add independent DMA-buf export v3 Christian König
@ 2019-05-07  8:13 ` Christian König
  2019-05-07  8:13 ` [PATCH 9/9] drm/amdgpu: add DMA-buf invalidation callback v2 Christian König
  2019-05-08  9:15 ` [PATCH 1/9] dma-buf: start caching of sg_table objects Daniel Vetter
  8 siblings, 0 replies; 20+ messages in thread
From: Christian König @ 2019-05-07  8:13 UTC (permalink / raw)
  To: dri-devel

Instead of relying on the DRM functions just implement our own import
functions. This prepares support for taking care of unpinned DMA-buf.

v2: enable for all exporters, not just amdgpu, fix invalidation
    handling, lock reservation object while setting callback
v3: change to new dma_buf attach interface
v4: split out from unpinned DMA-buf work
v5: rebased and cleanup on new DMA-buf interface

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 38 +++++++++++++--------
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h |  4 ---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c     |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c     | 34 +++++++++++++++---
 4 files changed, 52 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 68a071060793..0f4fe21d1b2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -420,31 +420,28 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 }
 
 /**
- * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table
- * implementation
+ * amdgpu_dma_buf_create_obj - create BO for DMA-buf import
+ *
  * @dev: DRM device
- * @attach: DMA-buf attachment
- * @sg: Scatter/gather table
+ * @dma_buf: DMA-buf
  *
- * Imports shared DMA buffer memory exported by another device.
+ * Creates an empty SG BO for DMA-buf import.
  *
  * Returns:
  * A new GEM BO of the given DRM device, representing the memory
  * described by the given DMA-buf attachment and scatter/gather table.
  */
-struct drm_gem_object *
-amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
-				 struct dma_buf_attachment *attach,
-				 struct sg_table *sg)
+static struct drm_gem_object *
+amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
 {
-	struct reservation_object *resv = attach->dmabuf->resv;
+	struct reservation_object *resv = dma_buf->resv;
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_bo *bo;
 	struct amdgpu_bo_param bp;
 	int ret;
 
 	memset(&bp, 0, sizeof(bp));
-	bp.size = attach->dmabuf->size;
+	bp.size = dma_buf->size;
 	bp.byte_align = PAGE_SIZE;
 	bp.domain = AMDGPU_GEM_DOMAIN_CPU;
 	bp.flags = 0;
@@ -455,11 +452,9 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
 	if (ret)
 		goto error;
 
-	bo->tbo.sg = sg;
-	bo->tbo.ttm->sg = sg;
 	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
 	bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
-	if (attach->dmabuf->ops != &amdgpu_dmabuf_ops)
+	if (dma_buf->ops != &amdgpu_dmabuf_ops)
 		bo->prime_shared_count = 1;
 
 	ww_mutex_unlock(&resv->lock);
@@ -484,6 +479,7 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
 struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
 					    struct dma_buf *dma_buf)
 {
+	struct dma_buf_attachment *attach;
 	struct drm_gem_object *obj;
 
 	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
@@ -498,5 +494,17 @@ struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
 		}
 	}
 
-	return drm_gem_prime_import(dev, dma_buf);
+	obj = amdgpu_dma_buf_create_obj(dev, dma_buf);
+	if (IS_ERR(obj))
+		return obj;
+
+	attach = dma_buf_attach(dma_buf, dev->dev);
+	if (IS_ERR(attach)) {
+		drm_gem_object_put(obj);
+		return ERR_CAST(attach);
+	}
+
+	get_dma_buf(dma_buf);
+	obj->import_attach = attach;
+	return obj;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
index f1522292814c..2765413770b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
@@ -25,10 +25,6 @@
 
 #include <drm/drm_gem.h>
 
-struct drm_gem_object *
-amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
-				 struct dma_buf_attachment *attach,
-				 struct sg_table *sg);
 struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 					struct drm_gem_object *gobj,
 					int flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 70c5cd2d5fb4..47ff2981d877 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1321,7 +1321,6 @@ static struct drm_driver kms_driver = {
 	.gem_prime_export = amdgpu_gem_prime_export,
 	.gem_prime_import = amdgpu_gem_prime_import,
 	.gem_prime_res_obj = amdgpu_gem_prime_res_obj,
-	.gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table,
 	.gem_prime_vmap = amdgpu_gem_prime_vmap,
 	.gem_prime_vunmap = amdgpu_gem_prime_vunmap,
 	.gem_prime_mmap = amdgpu_gem_prime_mmap,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c14198737dcd..afccca5b1f5f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -44,6 +44,7 @@
 #include <linux/debugfs.h>
 #include <linux/iommu.h>
 #include <linux/hmm.h>
+#include <linux/dma-buf.h>
 #include "amdgpu.h"
 #include "amdgpu_object.h"
 #include "amdgpu_trace.h"
@@ -706,6 +707,7 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
  */
 struct amdgpu_ttm_tt {
 	struct ttm_dma_tt	ttm;
+	struct drm_gem_object	*gobj;
 	u64			offset;
 	uint64_t		userptr;
 	struct task_struct	*usertask;
@@ -1179,6 +1181,7 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
 		return NULL;
 	}
 	gtt->ttm.ttm.func = &amdgpu_backend_func;
+	gtt->gobj = &ttm_to_amdgpu_bo(bo)->gem_base;
 
 	/* allocate space for the uninitialized page entries */
 	if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags)) {
@@ -1199,7 +1202,6 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
-	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
 
 	/* user pages are bound by amdgpu_ttm_tt_pin_userptr() */
 	if (gtt && gtt->userptr) {
@@ -1212,7 +1214,20 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
 		return 0;
 	}
 
-	if (slave && ttm->sg) {
+	if (ttm->page_flags & TTM_PAGE_FLAG_SG) {
+		if (!ttm->sg) {
+			struct dma_buf_attachment *attach;
+			struct sg_table *sgt;
+
+			attach = gtt->gobj->import_attach;
+			sgt = dma_buf_map_attachment_locked(attach,
+							    DMA_BIDIRECTIONAL);
+			if (IS_ERR(sgt))
+				return PTR_ERR(sgt);
+
+			ttm->sg = sgt;
+		}
+
 		drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
 						 gtt->ttm.dma_address,
 						 ttm->num_pages);
@@ -1239,9 +1254,8 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
  */
 static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
 {
-	struct amdgpu_device *adev;
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
-	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
+	struct amdgpu_device *adev;
 
 	if (gtt && gtt->userptr) {
 		amdgpu_ttm_tt_set_user_pages(ttm, NULL);
@@ -1250,7 +1264,17 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
 		return;
 	}
 
-	if (slave)
+	if (ttm->sg && gtt->gobj->import_attach) {
+		struct dma_buf_attachment *attach;
+
+		attach = gtt->gobj->import_attach;
+		dma_buf_unmap_attachment_locked(attach, ttm->sg,
+						DMA_BIDIRECTIONAL);
+		ttm->sg = NULL;
+		return;
+	}
+
+	if (ttm->page_flags & TTM_PAGE_FLAG_SG)
 		return;
 
 	adev = amdgpu_ttm_adev(ttm->bdev);
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 9/9] drm/amdgpu: add DMA-buf invalidation callback v2
  2019-05-07  8:13 [PATCH 1/9] dma-buf: start caching of sg_table objects Christian König
                   ` (6 preceding siblings ...)
  2019-05-07  8:13 ` [PATCH 8/9] drm/amdgpu: add independent DMA-buf import v5 Christian König
@ 2019-05-07  8:13 ` Christian König
  2019-05-08  9:15 ` [PATCH 1/9] dma-buf: start caching of sg_table objects Daniel Vetter
  8 siblings, 0 replies; 20+ messages in thread
From: Christian König @ 2019-05-07  8:13 UTC (permalink / raw)
  To: dri-devel

Allow for invalidation of imported DMA-bufs.

v2: add dma_buf_pin/dma_buf_unpin support

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 29 ++++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c  |  6 +++++
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 0f4fe21d1b2b..63dedc93c3d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -465,6 +465,32 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
 	return ERR_PTR(ret);
 }
 
+/**
+ * amdgpu_gem_prime_invalidate_mappings - &attach.invalidate implementation
+ *
+ * @attach: the DMA-buf attachment
+ *
+ * Invalidate the DMA-buf attachment, making sure that the we re-create the
+ * mapping before the next use.
+ */
+static void
+amdgpu_gem_prime_invalidate_mappings(struct dma_buf_attachment *attach)
+{
+	struct ttm_operation_ctx ctx = { false, false };
+	struct drm_gem_object *obj = attach->importer_priv;
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+	struct ttm_placement placement = {};
+	int r;
+
+	r = ttm_bo_validate(&bo->tbo, &placement, &ctx);
+	if (r)
+		DRM_ERROR("Failed to invalidate DMA-buf import (%d))\n", r);
+}
+
+static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops = {
+	.invalidate = amdgpu_gem_prime_invalidate_mappings
+};
+
 /**
  * amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation
  * @dev: DRM device
@@ -498,7 +524,8 @@ struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
 	if (IS_ERR(obj))
 		return obj;
 
-	attach = dma_buf_attach(dma_buf, dev->dev);
+	attach = dma_buf_dynamic_attach(dma_buf, dev->dev,
+					&amdgpu_dma_buf_attach_ops, obj);
 	if (IS_ERR(attach)) {
 		drm_gem_object_put(obj);
 		return ERR_CAST(attach);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index d26e2f0b88d2..cf01da083c77 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -850,6 +850,9 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
 		return 0;
 	}
 
+	if (bo->gem_base.import_attach)
+		dma_buf_pin(bo->gem_base.import_attach);
+
 	bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
 	/* force to pin into visible video ram */
 	if (!(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS))
@@ -933,6 +936,9 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)
 
 	amdgpu_bo_subtract_pin_size(bo);
 
+	if (bo->gem_base.import_attach)
+		dma_buf_unpin(bo->gem_base.import_attach);
+
 	for (i = 0; i < bo->placement.num_placement; i++) {
 		bo->placements[i].lpfn = 0;
 		bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/9] dma-buf: start caching of sg_table objects
  2019-05-07  8:13 [PATCH 1/9] dma-buf: start caching of sg_table objects Christian König
                   ` (7 preceding siblings ...)
  2019-05-07  8:13 ` [PATCH 9/9] drm/amdgpu: add DMA-buf invalidation callback v2 Christian König
@ 2019-05-08  9:15 ` Daniel Vetter
  2019-05-08 10:11   ` Daniel Vetter
  2019-05-08 11:13   ` Christian König
  8 siblings, 2 replies; 20+ messages in thread
From: Daniel Vetter @ 2019-05-08  9:15 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

On Tue, May 07, 2019 at 10:13:30AM +0200, Christian König wrote:
> To allow a smooth transition from pinning buffer objects to dynamic
> invalidation we first start to cache the sg_table for an attachment.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/dma-buf/dma-buf.c | 24 ++++++++++++++++++++++++
>  include/linux/dma-buf.h   | 14 ++++++++++++++
>  2 files changed, 38 insertions(+)
> 
> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> index 7c858020d14b..775e13f54083 100644
> --- a/drivers/dma-buf/dma-buf.c
> +++ b/drivers/dma-buf/dma-buf.c
> @@ -573,6 +573,20 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
>  	list_add(&attach->node, &dmabuf->attachments);
>  
>  	mutex_unlock(&dmabuf->lock);
> +
> +	if (!dma_buf_is_dynamic(dmabuf)) {
> +		struct sg_table *sgt;
> +
> +		sgt = dmabuf->ops->map_dma_buf(attach, DMA_BIDIRECTIONAL);
> +		if (!sgt)
> +			sgt = ERR_PTR(-ENOMEM);
> +		if (IS_ERR(sgt)) {
> +			dma_buf_detach(dmabuf, attach);
> +			return ERR_CAST(sgt);
> +		}
> +		attach->sgt = sgt;
> +	}
> +
>  	return attach;
>  
>  err_attach:
> @@ -595,6 +609,10 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
>  	if (WARN_ON(!dmabuf || !attach))
>  		return;
>  
> +	if (attach->sgt)
> +		dmabuf->ops->unmap_dma_buf(attach, attach->sgt,
> +					   DMA_BIDIRECTIONAL);
> +
>  	mutex_lock(&dmabuf->lock);
>  	list_del(&attach->node);
>  	if (dmabuf->ops->detach)
> @@ -630,6 +648,9 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
>  	if (WARN_ON(!attach || !attach->dmabuf))
>  		return ERR_PTR(-EINVAL);
>  
> +	if (attach->sgt)
> +		return attach->sgt;
> +
>  	sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
>  	if (!sg_table)
>  		sg_table = ERR_PTR(-ENOMEM);
> @@ -657,6 +678,9 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
>  	if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
>  		return;
>  
> +	if (attach->sgt == sg_table)
> +		return;
> +
>  	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
>  						direction);
>  }
> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
> index 58725f890b5b..52031fdc75bb 100644
> --- a/include/linux/dma-buf.h
> +++ b/include/linux/dma-buf.h
> @@ -322,6 +322,7 @@ struct dma_buf_attachment {
>  	struct dma_buf *dmabuf;
>  	struct device *dev;
>  	struct list_head node;
> +	struct sg_table *sgt;
>  	void *priv;
>  };
>  
> @@ -373,6 +374,19 @@ static inline void get_dma_buf(struct dma_buf *dmabuf)
>  	get_file(dmabuf->file);
>  }
>  
> +/**
> + * dma_buf_is_dynamic - check if a DMA-buf uses dynamic mappings.
> + * @dmabuf: the DMA-buf to check
> + *
> + * Returns true if a DMA-buf exporter wants to create dynamic sg table mappings
> + * for each attachment. False if only a single static sg table should be used.
> + */
> +static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf)
> +{
> +	/* Always use a static mapping for now */
> +	return false;

Hm I still expect that later on we'll want this to be decided by the
attachment: It's only dynamic if both the exporter and the importer
support dynamic dma-buf management, otherwise we need to pin.

But anyway, I feel like we need to go over the entire thing anyway once
more when p2p has landed, on this:

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> +}
> +
>  struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
>  							struct device *dev);
>  void dma_buf_detach(struct dma_buf *dmabuf,
> -- 
> 2.17.1
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/9] dma-buf: add dynamic DMA-buf handling v7
  2019-05-07  8:13 ` [PATCH 2/9] dma-buf: add dynamic DMA-buf handling v7 Christian König
@ 2019-05-08  9:51   ` Daniel Vetter
  0 siblings, 0 replies; 20+ messages in thread
From: Daniel Vetter @ 2019-05-08  9:51 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

On Tue, May 07, 2019 at 10:13:31AM +0200, Christian König wrote:
> On the exporter side we add optional explicit pinning callbacks. If those
> callbacks are implemented the framework no longer caches sg tables and the
> map/unmap callbacks are always called with the lock of the reservation object
> held.
> 
> On the importer side we add an optional invalidate callback. This callback is
> used by the exporter to inform the importers that their mappings should be
> destroyed as soon as possible.
> 
> This allows the exporter to provide the mappings without the need to pin
> the backing store.
> 
> v2: don't try to invalidate mappings when the callback is NULL,
>     lock the reservation obj while using the attachments,
>     add helper to set the callback
> v3: move flag for invalidation support into the DMA-buf,
>     use new attach_info structure to set the callback
> v4: use importer_priv field instead of mangling exporter priv.
> v5: drop invalidation_supported flag
> v6: squash together with pin/unpin changes

Imo much better, I can keep the entire thing in my brain now (and the sgt
caching patch is simple enough that I can remember what it does).

> v7: pin/unpin takes an attachment now
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/dma-buf/dma-buf.c | 190 ++++++++++++++++++++++++++++++++++++--
>  include/linux/dma-buf.h   |  91 ++++++++++++++++--
>  2 files changed, 261 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> index 775e13f54083..464a4c38df6c 100644
> --- a/drivers/dma-buf/dma-buf.c
> +++ b/drivers/dma-buf/dma-buf.c
> @@ -530,10 +530,12 @@ void dma_buf_put(struct dma_buf *dmabuf)
>  EXPORT_SYMBOL_GPL(dma_buf_put);
>  
>  /**
> - * dma_buf_attach - Add the device to dma_buf's attachments list; optionally,
> + * dma_buf_dynamic_attach - Add the device to dma_buf's attachments list; optionally,
>   * calls attach() of dma_buf_ops to allow device-specific attach functionality
> - * @dmabuf:	[in]	buffer to attach device to.
> - * @dev:	[in]	device to be attached.
> + * @dmabuf:		[in]	buffer to attach device to.
> + * @dev:		[in]	device to be attached.
> + * @importer_ops	[in]	importer operations for the attachment
> + * @importer_priv	[in]	importer private pointer for the attachment
>   *
>   * Returns struct dma_buf_attachment pointer for this attachment. Attachments
>   * must be cleaned up by calling dma_buf_detach().
> @@ -547,8 +549,10 @@ EXPORT_SYMBOL_GPL(dma_buf_put);
>   * accessible to @dev, and cannot be moved to a more suitable place. This is
>   * indicated with the error code -EBUSY.
>   */
> -struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
> -					  struct device *dev)
> +struct dma_buf_attachment *
> +dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev,
> +		       const struct dma_buf_attach_ops *importer_ops,
> +		       void *importer_priv)
>  {
>  	struct dma_buf_attachment *attach;
>  	int ret;
> @@ -562,6 +566,8 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
>  
>  	attach->dev = dev;
>  	attach->dmabuf = dmabuf;
> +	attach->importer_ops = importer_ops;
> +	attach->importer_priv = importer_priv;
>  
>  	mutex_lock(&dmabuf->lock);
>  
> @@ -570,7 +576,9 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
>  		if (ret)
>  			goto err_attach;
>  	}
> +	reservation_object_lock(dmabuf->resv, NULL);
>  	list_add(&attach->node, &dmabuf->attachments);
> +	reservation_object_unlock(dmabuf->resv);
>  
>  	mutex_unlock(&dmabuf->lock);
>  
> @@ -594,6 +602,21 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
>  	mutex_unlock(&dmabuf->lock);
>  	return ERR_PTR(ret);
>  }
> +EXPORT_SYMBOL_GPL(dma_buf_dynamic_attach);
> +
> +/**
> + * dma_buf_attach - Wrapper for dma_buf_dynamic_attach
> + * @dmabuf:	[in]	buffer to attach device to.
> + * @dev:	[in]	device to be attached.
> + *
> + * Wrapper to call dma_buf_dynamic_attach() for drivers which still use a static
> + * mapping.
> + */
> +struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
> +					  struct device *dev)
> +{
> +	return dma_buf_dynamic_attach(dmabuf, dev, NULL, NULL);
> +}
>  EXPORT_SYMBOL_GPL(dma_buf_attach);
>  
>  /**
> @@ -614,7 +637,9 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
>  					   DMA_BIDIRECTIONAL);
>  
>  	mutex_lock(&dmabuf->lock);
> +	reservation_object_lock(dmabuf->resv, NULL);
>  	list_del(&attach->node);
> +	reservation_object_unlock(dmabuf->resv);
>  	if (dmabuf->ops->detach)
>  		dmabuf->ops->detach(dmabuf, attach);
>  
> @@ -623,6 +648,100 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
>  }
>  EXPORT_SYMBOL_GPL(dma_buf_detach);
>  
> +/**
> + * dma_buf_pin - Lock down the DMA-buf
> + *
> + * @attach:	[in]	attachment which should be pinned
> + *
> + * Returns:
> + * 0 on success, negative error code on failure.
> + */
> +int dma_buf_pin(struct dma_buf_attachment *attach)

ocd: Needs now dma_buf_attachment_pin, similar for unpin.

> +{
> +	struct dma_buf *dmabuf = attach->dmabuf;
> +	int ret = 0;
> +
> +	reservation_object_assert_held(dmabuf->resv);
> +
> +	if (dmabuf->ops->pin)
> +		ret = dmabuf->ops->pin(attach);
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(dma_buf_pin);
> +
> +/**
> + * dma_buf_unpin - Remove lock from DMA-buf
> + *
> + * @attach:	[in]	attachment which should be unpinned
> + */
> +void dma_buf_unpin(struct dma_buf_attachment *attach)
> +{
> +	struct dma_buf *dmabuf = attach->dmabuf;
> +
> +	reservation_object_assert_held(dmabuf->resv);
> +
> +	if (dmabuf->ops->unpin)
> +		dmabuf->ops->unpin(attach);
> +}
> +EXPORT_SYMBOL_GPL(dma_buf_unpin);
> +
> +/**
> + * dma_buf_map_attachment_locked - Maps the buffer into _device_ address space
> + * with the reservation lock held. Is a wrapper for map_dma_buf() of the
> + *
> + * Returns the scatterlist table of the attachment;
> + * dma_buf_ops.
> + * @attach:	[in]	attachment whose scatterlist is to be returned
> + * @direction:	[in]	direction of DMA transfer
> + *
> + * Returns sg_table containing the scatterlist to be returned; returns ERR_PTR
> + * on error. May return -EINTR if it is interrupted by a signal.
> + *
> + * A mapping must be unmapped by using dma_buf_unmap_attachment().
> + */
> +struct sg_table *
> +dma_buf_map_attachment_locked(struct dma_buf_attachment *attach,
> +			      enum dma_data_direction direction)
> +{
> +	struct sg_table *sg_table;
> +	int r;
> +
> +	might_sleep();
> +	reservation_object_assert_held(attach->dmabuf->resv);
> +
> +	if (attach->sgt)
> +		return attach->sgt;
> +
> +	if (attach->importer_ops && attach->importer_ops->invalidate) {
> +		/*
> +		 * Mapping a DMA-buf can trigger its invalidation, prevent
> +		 * sending this event to the caller by temporary removing
> +		 * this attachment from the list.
> +		 */
> +		list_del(&attach->node);
I still feel like this is a very fragile hack here, and the only reason is
that we allow multiple mappings of the same attachment to coexist. From my
understanding:

1. import maps
2. exporter wants to move the buffer, calls invalidate
3. importer makes sure all processing has stopped in its ->invalidate
hook, which might involve add more fences to the reservation object
4. exporter does the buffer copy to the new place, involving dma engines
and obeying all the fences in the reservation object
5. importer needs the buffer again, calls map, gets a new location
[ importer now has 2 mappings in-flight, hooray this is fun, especially if
userspace manages your gpu va space and so both need to be mapped in the
same place.]
6. eventually importer retires the old work and finally calls unmap on the
old mapping

This feels like a mess of an api. I'm kinda inclined to require that the
->invalidate hook must call attachment_unmap, and require that exporters
delay the unmapping until all fences are done (they need to do that
anyway, and exporter needs to handle ghost objects anyway so can also hang
a few more sg tables of that I think). Even cleaner interface would be a
dynamic_unmap where you explicitly pass a fence that must be completed
before the attachment is gone.

Irrespective of that api bikeshed I think there's a bigger issue of using
the attachment list for invalidation:
1. importer attaches
2. exporter decides to move, calls invalidate
3. importer gets an ->invalidate for an attachment they haven't even
mapped yet.
-> (probably) boom somewhere, like the recursion you're preventing above
results in tears.

If we go with my suggestion above, which results in there being at most 1
mapping per dynamic dma-buf attachment (plus ghost mappings waiting for a
dma_fence, but they don't count), then all we need is an
attachment->is_mapped flag to filter these out, set in dynamic_map and
cleared in dynamic_unmap. I think that's also much cleaner than your
temporary list_del trickery.

With your overlapping mapping api the tracking gets a bit more tricky, you
need to remember which one is the current mapping so that you don't clear
the ->is_mapped flag when the importer unmaps a ghost mapping.

> +	} else {
> +		r = dma_buf_pin(attach);

Hm, I think this changed compared to older versions, and what we're
defacto doing now with drm prime sgt caching. If you have a dynamic
exporter, but not a dynamic importer, then:
- we will no longer cache the sgt
- pinning will only happen here in attachment_map

Aside from the behaviour change I think this also breaks the locking,
because it forces the reservation_lock into attach_map paths for
non-dynamic importers, which will piss of lockdep.

I think the only option we have is to redefine _is_dynamic as:

dma_buf_attachment_is_dynamic(attach)
{
	attach->buf->ops->pin && attach->ops->invalidate;
}

I.e. only when both importer and exporter are dynamic can we inflict the
reservation_lock onto them in attachment_map/unmap. For all others we need
need to cache the sgt, and if it's a dynamic exporter, pin the buffer
already at attach time.

> +		if (r)
> +			return ERR_PTR(r);
> +	}
> +
> +	sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
> +	if (attach->importer_ops && attach->importer_ops->invalidate)
> +		list_add(&attach->node, &attach->dmabuf->attachments);
> +	if (!sg_table)
> +		sg_table = ERR_PTR(-ENOMEM);
> +
> +	if (IS_ERR(sg_table)) {
> +		reservation_object_lock(attach->dmabuf->resv, NULL);
> +		dma_buf_unpin(attach);
> +		reservation_object_unlock(attach->dmabuf->resv);
> +	}
> +
> +	return sg_table;
> +}
> +EXPORT_SYMBOL_GPL(dma_buf_map_attachment_locked);
> +
>  /**
>   * dma_buf_map_attachment - Returns the scatterlist table of the attachment;
>   * mapped into _device_ address space. Is a wrapper for map_dma_buf() of the
> @@ -651,14 +770,42 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
>  	if (attach->sgt)
>  		return attach->sgt;
>  
> -	sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
> -	if (!sg_table)
> -		sg_table = ERR_PTR(-ENOMEM);
> +	reservation_object_lock(attach->dmabuf->resv, NULL);
> +	sg_table = dma_buf_map_attachment(attach, direction);
> +	reservation_object_unlock(attach->dmabuf->resv);
>  
>  	return sg_table;
>  }
>  EXPORT_SYMBOL_GPL(dma_buf_map_attachment);
>  
> +/**
> + * dma_buf_unmap_attachment_locked - unmaps the buffer with reservation lock
> + * held, should deallocate the associated scatterlist. Is a wrapper for
> + * unmap_dma_buf() of dma_buf_ops.
> + * @attach:	[in]	attachment to unmap buffer from
> + * @sg_table:	[in]	scatterlist info of the buffer to unmap
> + * @direction:  [in]    direction of DMA transfer
> + *
> + * This unmaps a DMA mapping for @attached obtained by
> + * dma_buf_map_attachment_locked().
> + */
> +void dma_buf_unmap_attachment_locked(struct dma_buf_attachment *attach,
> +				     struct sg_table *sg_table,
> +				     enum dma_data_direction direction)
> +{
> +	might_sleep();
> +	reservation_object_assert_held(attach->dmabuf->resv);
> +
> +	if (attach->sgt == sg_table)
> +		return;
> +
> +	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
> +						direction);
> +	if (!(attach->importer_ops && attach->importer_ops->invalidate))
> +		dma_buf_unpin(attach);
> +}
> +EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment_locked);
> +
>  /**
>   * dma_buf_unmap_attachment - unmaps and decreases usecount of the buffer;might
>   * deallocate the scatterlist associated. Is a wrapper for unmap_dma_buf() of
> @@ -681,11 +828,32 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
>  	if (attach->sgt == sg_table)
>  		return;
>  
> -	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
> -						direction);
> +	reservation_object_lock(attach->dmabuf->resv, NULL);
> +	dma_buf_unmap_attachment_locked(attach, sg_table, direction);
> +	reservation_object_unlock(attach->dmabuf->resv);
>  }
>  EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
>  
> +/**
> + * dma_buf_invalidate_mappings - invalidate all mappings of this dma_buf
> + *
> + * @dmabuf:	[in]	buffer which mappings should be invalidated
> + *
> + * Informs all attachmenst that they need to destroy and recreated all their
> + * mappings.
> + */
> +void dma_buf_invalidate_mappings(struct dma_buf *dmabuf)
> +{
> +	struct dma_buf_attachment *attach;
> +
> +	reservation_object_assert_held(dmabuf->resv);
> +
> +	list_for_each_entry(attach, &dmabuf->attachments, node)
> +		if (attach->importer_ops && attach->importer_ops->invalidate)
> +			attach->importer_ops->invalidate(attach);
> +}
> +EXPORT_SYMBOL_GPL(dma_buf_invalidate_mappings);
> +
>  /**
>   * DOC: cpu access
>   *
> @@ -1098,10 +1266,12 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
>  		seq_puts(s, "\tAttached Devices:\n");
>  		attach_count = 0;
>  
> +		reservation_object_lock(buf_obj->resv, NULL);
>  		list_for_each_entry(attach_obj, &buf_obj->attachments, node) {
>  			seq_printf(s, "\t%s\n", dev_name(attach_obj->dev));
>  			attach_count++;
>  		}
> +		reservation_object_unlock(buf_obj->resv);
>  
>  		seq_printf(s, "Total %d devices attached\n\n",
>  				attach_count);
> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
> index 52031fdc75bb..f5681ca15d09 100644
> --- a/include/linux/dma-buf.h
> +++ b/include/linux/dma-buf.h
> @@ -90,14 +90,40 @@ struct dma_buf_ops {
>  	 */
>  	void (*detach)(struct dma_buf *, struct dma_buf_attachment *);
>  
> +	/**
> +	 * @pin:
> +	 *
> +	 * This is called by dma_buf_pin and lets the exporter know that the

dma_buf_pin() so it hyperlinks. Similar everywhere else.

> +	 * DMA-buf can't be moved any more.
> +	 *
> +	 * This is called with the dmabuf->resv object locked.
> +	 *
> +	 * This callback is optional.
> +	 *
> +	 * Returns:
> +	 *
> +	 * 0 on success, negative error code on failure.
> +	 */
> +	int (*pin)(struct dma_buf_attachment *);
> +
> +	/**
> +	 * @unpin:
> +	 *
> +	 * This is called by dma_buf_unpin and lets the exporter know that the
> +	 * DMA-buf can be moved again.
> +	 *
> +	 * This is called with the dmabuf->resv object locked.
> +	 *
> +	 * This callback is optional.
> +	 */
> +	void (*unpin)(struct dma_buf_attachment *);
> +
>  	/**
>  	 * @map_dma_buf:
>  	 *
>  	 * This is called by dma_buf_map_attachment() and is used to map a
>  	 * shared &dma_buf into device address space, and it is mandatory. It
> -	 * can only be called if @attach has been called successfully. This
> -	 * essentially pins the DMA buffer into place, and it cannot be moved
> -	 * any more
> +	 * can only be called if @attach has been called successfully.
>  	 *
>  	 * This call may sleep, e.g. when the backing storage first needs to be
>  	 * allocated, or moved to a location suitable for all currently attached
> @@ -118,6 +144,9 @@ struct dma_buf_ops {
>  	 * any other kind of sharing that the exporter might wish to make
>  	 * available to buffer-users.
>  	 *
> +	 * This is always called with the dmabuf->resv object locked when
> +	 * the pin/unpin callbacks are implemented.
> +	 *
>  	 * Returns:
>  	 *
>  	 * A &sg_table scatter list of or the backing storage of the DMA buffer,
> @@ -135,9 +164,6 @@ struct dma_buf_ops {
>  	 *
>  	 * This is called by dma_buf_unmap_attachment() and should unmap and
>  	 * release the &sg_table allocated in @map_dma_buf, and it is mandatory.
> -	 * It should also unpin the backing storage if this is the last mapping
> -	 * of the DMA buffer, it the exporter supports backing storage
> -	 * migration.
>  	 */
>  	void (*unmap_dma_buf)(struct dma_buf_attachment *,
>  			      struct sg_table *,
> @@ -302,12 +328,43 @@ struct dma_buf {
>  	} cb_excl, cb_shared;
>  };
>  
> +/**
> + * struct dma_buf_attach_ops - importer operations for an attachment
> + * @invalidate: [optional] invalidate all mappings made using this attachment.

@invalidate twice pisses of kerneldoc.

> + *
> + * Attachment operations implemented by the importer.
> + */
> +struct dma_buf_attach_ops {
> +	/**
> +	 * @invalidate:
> +	 *
> +	 * If the invalidate callback is provided the framework can avoid
> +	 * pinning the backing store while mappings exists.
> +	 *
> +	 * This callback is called with the lock of the reservation object
> +	 * associated with the dma_buf held and the mapping function must be
> +	 * called with this lock held as well. This makes sure that no mapping
> +	 * is created concurrently with an ongoing invalidation.
> +	 *
> +	 * After the callback all existing mappings are still valid until all
> +	 * fences in the dma_bufs reservation object are signaled. After getting an
> +	 * invalidation callback all mappings should be destroyed by the importer using
> +	 * the normal dma_buf_unmap_attachment() function as soon as possible.

I think "as soon as possible" is a bit vague. What breaks if we don't do
that? With my suggestion to immediately call unmap (but with a expclit or
implicit fence) would resolve this question much clearer, by offloading
the duty of "unmapping soon enough" to the exporter.

This also matches more how dynamic_map works: the mapping is only valid
once the explicit fence has signaled, not right away. Doing a similar
"unmap is only ok once the fences have signaled" sounds reasonable to me.
Aside: If we pass an explicit fence to dynamic_unamp, would make sense to
receive an explicit fence from dynamic_map. Passing explicit fences around
also feels a notch cleaner given all the headaches we already have with
leaking implicit fences into reservation objects and causing us trouble
there. Explicit dynamic_map/unmap fences would all an exporter to untangle
these two things (atm not possible, just prep for the future).

Aside: For non-dynamic importer the initial pin/sgt caching also needs to
block for that exclusive fence I think, if it's a dynamic exporter. Not
all drivers wait for the exclusive fence (v4l probably more than drm), so
if the exporter is still moving the buffer we might have a problem.

Or is the pin supposed to be synchronous? Would be another thing to
document I guess ...

> +	 *
> +	 * New mappings can be created immediately, but can't be used before the
> +	 * exclusive fence in the dma_bufs reservation object is signaled.

I guess you can't go right ahead and create a new mapping from the
->invalidate hook?

Need to document that, and maybe enforce?

> +	 */
> +	void (*invalidate)(struct dma_buf_attachment *attach);
> +};
> +
>  /**
>   * struct dma_buf_attachment - holds device-buffer attachment data
>   * @dmabuf: buffer for this attachment.
>   * @dev: device attached to the buffer.
>   * @node: list of dma_buf_attachment.
>   * @priv: exporter specific attachment data.
> + * @importer_ops: importer operations for this attachment.
> + * @importer_priv: importer specific attachment data.
>   *
>   * This structure holds the attachment information between the dma_buf buffer
>   * and its user device(s). The list contains one attachment struct per device
> @@ -324,6 +381,9 @@ struct dma_buf_attachment {
>  	struct list_head node;
>  	struct sg_table *sgt;
>  	void *priv;
> +
> +	const struct dma_buf_attach_ops *importer_ops;
> +	void *importer_priv;
>  };
>  
>  /**
> @@ -383,14 +443,19 @@ static inline void get_dma_buf(struct dma_buf *dmabuf)
>   */
>  static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf)
>  {
> -	/* Always use a static mapping for now */
> -	return false;
> +	return !!dmabuf->ops->pin;

!! is redundant, compiler does that for you when casting too bool.

>  }
>  
>  struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
> -							struct device *dev);
> +					  struct device *dev);
> +struct dma_buf_attachment *
> +dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev,
> +		       const struct dma_buf_attach_ops *importer_ops,
> +		       void *importer_priv);
>  void dma_buf_detach(struct dma_buf *dmabuf,
> -				struct dma_buf_attachment *dmabuf_attach);
> +		    struct dma_buf_attachment *attach);
> +int dma_buf_pin(struct dma_buf_attachment *attach);
> +void dma_buf_unpin(struct dma_buf_attachment *attach);
>  
>  struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info);
>  
> @@ -398,10 +463,16 @@ int dma_buf_fd(struct dma_buf *dmabuf, int flags);
>  struct dma_buf *dma_buf_get(int fd);
>  void dma_buf_put(struct dma_buf *dmabuf);
>  
> +struct sg_table *dma_buf_map_attachment_locked(struct dma_buf_attachment *,
> +					       enum dma_data_direction);
>  struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *,
>  					enum dma_data_direction);
> +void dma_buf_unmap_attachment_locked(struct dma_buf_attachment *,
> +				     struct sg_table *,
> +				     enum dma_data_direction);
>  void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *,
>  				enum dma_data_direction);
> +void dma_buf_invalidate_mappings(struct dma_buf *dma_buf);
>  int dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
>  			     enum dma_data_direction dir);
>  int dma_buf_end_cpu_access(struct dma_buf *dma_buf,
> -- 
> 2.17.1
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/9] drm: remove prime sg_table caching
  2019-05-07  8:13 ` [PATCH 3/9] drm: remove prime sg_table caching Christian König
@ 2019-05-08  9:57   ` Daniel Vetter
  2019-05-08 11:19     ` Christian König
  0 siblings, 1 reply; 20+ messages in thread
From: Daniel Vetter @ 2019-05-08  9:57 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

On Tue, May 07, 2019 at 10:13:32AM +0200, Christian König wrote:
> That is now done by the DMA-buf helpers instead.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/drm_prime.c | 76 ++++++++-----------------------------
>  1 file changed, 16 insertions(+), 60 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
> index 231e3f6d5f41..90f5230cc0d5 100644
> --- a/drivers/gpu/drm/drm_prime.c
> +++ b/drivers/gpu/drm/drm_prime.c
> @@ -86,11 +86,6 @@ struct drm_prime_member {
>  	struct rb_node handle_rb;
>  };
>  
> -struct drm_prime_attachment {
> -	struct sg_table *sgt;
> -	enum dma_data_direction dir;
> -};
> -
>  static int drm_prime_add_buf_handle(struct drm_prime_file_private *prime_fpriv,
>  				    struct dma_buf *dma_buf, uint32_t handle)
>  {
> @@ -188,25 +183,16 @@ static int drm_prime_lookup_buf_handle(struct drm_prime_file_private *prime_fpri
>   * @dma_buf: buffer to attach device to
>   * @attach: buffer attachment data
>   *
> - * Allocates &drm_prime_attachment and calls &drm_driver.gem_prime_pin for
> - * device specific attachment. This can be used as the &dma_buf_ops.attach
> - * callback.
> + * Calls &drm_driver.gem_prime_pin for device specific handling. This can be
> + * used as the &dma_buf_ops.attach callback.
>   *
>   * Returns 0 on success, negative error code on failure.
>   */
>  int drm_gem_map_attach(struct dma_buf *dma_buf,
>  		       struct dma_buf_attachment *attach)
>  {
> -	struct drm_prime_attachment *prime_attach;
>  	struct drm_gem_object *obj = dma_buf->priv;
>  
> -	prime_attach = kzalloc(sizeof(*prime_attach), GFP_KERNEL);
> -	if (!prime_attach)
> -		return -ENOMEM;
> -
> -	prime_attach->dir = DMA_NONE;
> -	attach->priv = prime_attach;
> -
>  	return drm_gem_pin(obj);
>  }
>  EXPORT_SYMBOL(drm_gem_map_attach);
> @@ -222,26 +208,8 @@ EXPORT_SYMBOL(drm_gem_map_attach);
>  void drm_gem_map_detach(struct dma_buf *dma_buf,
>  			struct dma_buf_attachment *attach)
>  {
> -	struct drm_prime_attachment *prime_attach = attach->priv;
>  	struct drm_gem_object *obj = dma_buf->priv;
>  
> -	if (prime_attach) {
> -		struct sg_table *sgt = prime_attach->sgt;
> -
> -		if (sgt) {
> -			if (prime_attach->dir != DMA_NONE)
> -				dma_unmap_sg_attrs(attach->dev, sgt->sgl,
> -						   sgt->nents,
> -						   prime_attach->dir,
> -						   DMA_ATTR_SKIP_CPU_SYNC);
> -			sg_free_table(sgt);
> -		}
> -
> -		kfree(sgt);
> -		kfree(prime_attach);
> -		attach->priv = NULL;
> -	}
> -
>  	drm_gem_unpin(obj);
>  }
>  EXPORT_SYMBOL(drm_gem_map_detach);
> @@ -286,39 +254,22 @@ void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpr
>  struct sg_table *drm_gem_map_dma_buf(struct dma_buf_attachment *attach,
>  				     enum dma_data_direction dir)
>  {
> -	struct drm_prime_attachment *prime_attach = attach->priv;
>  	struct drm_gem_object *obj = attach->dmabuf->priv;
>  	struct sg_table *sgt;
>  
> -	if (WARN_ON(dir == DMA_NONE || !prime_attach))
> +	if (WARN_ON(dir == DMA_NONE))
>  		return ERR_PTR(-EINVAL);
>  
> -	/* return the cached mapping when possible */
> -	if (prime_attach->dir == dir)
> -		return prime_attach->sgt;
> -
> -	/*
> -	 * two mappings with different directions for the same attachment are
> -	 * not allowed
> -	 */
> -	if (WARN_ON(prime_attach->dir != DMA_NONE))
> -		return ERR_PTR(-EBUSY);
> -
>  	if (obj->funcs)
>  		sgt = obj->funcs->get_sg_table(obj);
>  	else
>  		sgt = obj->dev->driver->gem_prime_get_sg_table(obj);
>  
> -	if (!IS_ERR(sgt)) {
> -		if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
> -				      DMA_ATTR_SKIP_CPU_SYNC)) {
> -			sg_free_table(sgt);
> -			kfree(sgt);
> -			sgt = ERR_PTR(-ENOMEM);
> -		} else {
> -			prime_attach->sgt = sgt;
> -			prime_attach->dir = dir;
> -		}
> +	if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
> +			      DMA_ATTR_SKIP_CPU_SYNC)) {
> +		sg_free_table(sgt);
> +		kfree(sgt);
> +		sgt = ERR_PTR(-ENOMEM);
>  	}
>  
>  	return sgt;
> @@ -331,14 +282,19 @@ EXPORT_SYMBOL(drm_gem_map_dma_buf);
>   * @sgt: scatterlist info of the buffer to unmap
>   * @dir: direction of DMA transfer
>   *
> - * Not implemented. The unmap is done at drm_gem_map_detach().  This can be
> - * used as the &dma_buf_ops.unmap_dma_buf callback.
> + * This can be used as the &dma_buf_ops.unmap_dma_buf callback.
>   */
>  void drm_gem_unmap_dma_buf(struct dma_buf_attachment *attach,
>  			   struct sg_table *sgt,
>  			   enum dma_data_direction dir)
>  {
> -	/* nothing to be done here */
> +	if (!sgt)
> +		return;

This copypasta isn't needed anymore, it's a caller bug if you don't supply
an sgt here. The old caching code only needed it because it only cached
on-demand in the map callback. With that:

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>


> +
> +	dma_unmap_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
> +			   DMA_ATTR_SKIP_CPU_SYNC);
> +	sg_free_table(sgt);
> +	kfree(sgt);
>  }
>  EXPORT_SYMBOL(drm_gem_unmap_dma_buf);
>  
> -- 
> 2.17.1
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/9] dma-buf: start caching of sg_table objects
  2019-05-08  9:15 ` [PATCH 1/9] dma-buf: start caching of sg_table objects Daniel Vetter
@ 2019-05-08 10:11   ` Daniel Vetter
  2019-05-08 12:00     ` Christian König
  2019-05-08 11:13   ` Christian König
  1 sibling, 1 reply; 20+ messages in thread
From: Daniel Vetter @ 2019-05-08 10:11 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

On Wed, May 08, 2019 at 11:15:33AM +0200, Daniel Vetter wrote:
> On Tue, May 07, 2019 at 10:13:30AM +0200, Christian König wrote:
> > To allow a smooth transition from pinning buffer objects to dynamic
> > invalidation we first start to cache the sg_table for an attachment.
> > 
> > Signed-off-by: Christian König <christian.koenig@amd.com>
> > ---
> >  drivers/dma-buf/dma-buf.c | 24 ++++++++++++++++++++++++
> >  include/linux/dma-buf.h   | 14 ++++++++++++++
> >  2 files changed, 38 insertions(+)
> > 
> > diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> > index 7c858020d14b..775e13f54083 100644
> > --- a/drivers/dma-buf/dma-buf.c
> > +++ b/drivers/dma-buf/dma-buf.c
> > @@ -573,6 +573,20 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
> >  	list_add(&attach->node, &dmabuf->attachments);
> >  
> >  	mutex_unlock(&dmabuf->lock);
> > +
> > +	if (!dma_buf_is_dynamic(dmabuf)) {
> > +		struct sg_table *sgt;
> > +
> > +		sgt = dmabuf->ops->map_dma_buf(attach, DMA_BIDIRECTIONAL);
> > +		if (!sgt)
> > +			sgt = ERR_PTR(-ENOMEM);
> > +		if (IS_ERR(sgt)) {
> > +			dma_buf_detach(dmabuf, attach);
> > +			return ERR_CAST(sgt);
> > +		}
> > +		attach->sgt = sgt;
> > +	}
> > +
> >  	return attach;
> >  
> >  err_attach:
> > @@ -595,6 +609,10 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
> >  	if (WARN_ON(!dmabuf || !attach))
> >  		return;
> >  
> > +	if (attach->sgt)
> > +		dmabuf->ops->unmap_dma_buf(attach, attach->sgt,
> > +					   DMA_BIDIRECTIONAL);
> > +
> >  	mutex_lock(&dmabuf->lock);
> >  	list_del(&attach->node);
> >  	if (dmabuf->ops->detach)
> > @@ -630,6 +648,9 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
> >  	if (WARN_ON(!attach || !attach->dmabuf))
> >  		return ERR_PTR(-EINVAL);
> >  
> > +	if (attach->sgt)
> > +		return attach->sgt;
> > +
> >  	sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
> >  	if (!sg_table)
> >  		sg_table = ERR_PTR(-ENOMEM);
> > @@ -657,6 +678,9 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
> >  	if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
> >  		return;
> >  
> > +	if (attach->sgt == sg_table)
> > +		return;
> > +
> >  	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
> >  						direction);
> >  }
> > diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
> > index 58725f890b5b..52031fdc75bb 100644
> > --- a/include/linux/dma-buf.h
> > +++ b/include/linux/dma-buf.h
> > @@ -322,6 +322,7 @@ struct dma_buf_attachment {
> >  	struct dma_buf *dmabuf;
> >  	struct device *dev;
> >  	struct list_head node;
> > +	struct sg_table *sgt;
> >  	void *priv;
> >  };
> >  
> > @@ -373,6 +374,19 @@ static inline void get_dma_buf(struct dma_buf *dmabuf)
> >  	get_file(dmabuf->file);
> >  }
> >  
> > +/**
> > + * dma_buf_is_dynamic - check if a DMA-buf uses dynamic mappings.
> > + * @dmabuf: the DMA-buf to check
> > + *
> > + * Returns true if a DMA-buf exporter wants to create dynamic sg table mappings
> > + * for each attachment. False if only a single static sg table should be used.
> > + */
> > +static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf)
> > +{
> > +	/* Always use a static mapping for now */
> > +	return false;
> 
> Hm I still expect that later on we'll want this to be decided by the
> attachment: It's only dynamic if both the exporter and the importer
> support dynamic dma-buf management, otherwise we need to pin.
> 
> But anyway, I feel like we need to go over the entire thing anyway once
> more when p2p has landed, on this:
> 
> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

Correction that I only just realized, but need to retract that r-b on this
and the drm sgt cache removal patch: You now hardcode the direction to
DMA_BIDIRECTIONAL, drm_prime did only keep the cache for a given
direction.

Now x86 drivers always set DMA_BIDIRECTIONAL, but arm soc drivers (and
also v4l videobuf layer) try to guess whether it should be DMA_TO_DEVICE
or DMA_FROM_DEVICE. I have no idea what the implications are, also all the
cache coherency on dma-bufs is kinda ill-defined.

And we can't throw the sgt cache away at map time if it doesn't fit like
drm_prime does, because that reintroduces the reservation object lock,
defeating the entire purpose of this. Also we can't just assume drm_prime
works for everyone, since the special cases all roll their own dma-buf
import. I have also not checked what exactly exporters do. No idea what to
do here now.

/me cries
-Daniel

> 
> > +}
> > +
> >  struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
> >  							struct device *dev);
> >  void dma_buf_detach(struct dma_buf *dmabuf,
> > -- 
> > 2.17.1
> > 
> > _______________________________________________
> > dri-devel mailing list
> > dri-devel@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/9] dma-buf: start caching of sg_table objects
  2019-05-08  9:15 ` [PATCH 1/9] dma-buf: start caching of sg_table objects Daniel Vetter
  2019-05-08 10:11   ` Daniel Vetter
@ 2019-05-08 11:13   ` Christian König
  1 sibling, 0 replies; 20+ messages in thread
From: Christian König @ 2019-05-08 11:13 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: dri-devel

Am 08.05.19 um 11:15 schrieb Daniel Vetter:
> On Tue, May 07, 2019 at 10:13:30AM +0200, Christian König wrote:
>> To allow a smooth transition from pinning buffer objects to dynamic
>> invalidation we first start to cache the sg_table for an attachment.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/dma-buf/dma-buf.c | 24 ++++++++++++++++++++++++
>>   include/linux/dma-buf.h   | 14 ++++++++++++++
>>   2 files changed, 38 insertions(+)
>>
>> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
>> index 7c858020d14b..775e13f54083 100644
>> --- a/drivers/dma-buf/dma-buf.c
>> +++ b/drivers/dma-buf/dma-buf.c
>> @@ -573,6 +573,20 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
>>   	list_add(&attach->node, &dmabuf->attachments);
>>   
>>   	mutex_unlock(&dmabuf->lock);
>> +
>> +	if (!dma_buf_is_dynamic(dmabuf)) {
>> +		struct sg_table *sgt;
>> +
>> +		sgt = dmabuf->ops->map_dma_buf(attach, DMA_BIDIRECTIONAL);
>> +		if (!sgt)
>> +			sgt = ERR_PTR(-ENOMEM);
>> +		if (IS_ERR(sgt)) {
>> +			dma_buf_detach(dmabuf, attach);
>> +			return ERR_CAST(sgt);
>> +		}
>> +		attach->sgt = sgt;
>> +	}
>> +
>>   	return attach;
>>   
>>   err_attach:
>> @@ -595,6 +609,10 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
>>   	if (WARN_ON(!dmabuf || !attach))
>>   		return;
>>   
>> +	if (attach->sgt)
>> +		dmabuf->ops->unmap_dma_buf(attach, attach->sgt,
>> +					   DMA_BIDIRECTIONAL);
>> +
>>   	mutex_lock(&dmabuf->lock);
>>   	list_del(&attach->node);
>>   	if (dmabuf->ops->detach)
>> @@ -630,6 +648,9 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
>>   	if (WARN_ON(!attach || !attach->dmabuf))
>>   		return ERR_PTR(-EINVAL);
>>   
>> +	if (attach->sgt)
>> +		return attach->sgt;
>> +
>>   	sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
>>   	if (!sg_table)
>>   		sg_table = ERR_PTR(-ENOMEM);
>> @@ -657,6 +678,9 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
>>   	if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
>>   		return;
>>   
>> +	if (attach->sgt == sg_table)
>> +		return;
>> +
>>   	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
>>   						direction);
>>   }
>> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
>> index 58725f890b5b..52031fdc75bb 100644
>> --- a/include/linux/dma-buf.h
>> +++ b/include/linux/dma-buf.h
>> @@ -322,6 +322,7 @@ struct dma_buf_attachment {
>>   	struct dma_buf *dmabuf;
>>   	struct device *dev;
>>   	struct list_head node;
>> +	struct sg_table *sgt;
>>   	void *priv;
>>   };
>>   
>> @@ -373,6 +374,19 @@ static inline void get_dma_buf(struct dma_buf *dmabuf)
>>   	get_file(dmabuf->file);
>>   }
>>   
>> +/**
>> + * dma_buf_is_dynamic - check if a DMA-buf uses dynamic mappings.
>> + * @dmabuf: the DMA-buf to check
>> + *
>> + * Returns true if a DMA-buf exporter wants to create dynamic sg table mappings
>> + * for each attachment. False if only a single static sg table should be used.
>> + */
>> +static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf)
>> +{
>> +	/* Always use a static mapping for now */
>> +	return false;
> Hm I still expect that later on we'll want this to be decided by the
> attachment: It's only dynamic if both the exporter and the importer
> support dynamic dma-buf management, otherwise we need to pin.

Yeah, essentially we need to handle the following cases:

1. Static DMA-buf and the exporter doesn't want caching.
2. Static DMA-buf and the exporter does want caching.
3. Dynamic DMA-buf and we use caching as a workaround for smooth transition.
4. Dynamic DMA-buf and caching doesn't make much sense.

So far we only support 3 & 4 and not 1 & 2. Could be that this gets much 
more complicated, but I would only want to add this complicated code if 
we really find that we need it.

> But anyway, I feel like we need to go over the entire thing anyway once
> more when p2p has landed, on this:
>
> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

Thanks for the review, next question how do we merge this? Through drm-misc?

Regards,
Christian.

>
>> +}
>> +
>>   struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
>>   							struct device *dev);
>>   void dma_buf_detach(struct dma_buf *dmabuf,
>> -- 
>> 2.17.1
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/9] drm: remove prime sg_table caching
  2019-05-08  9:57   ` Daniel Vetter
@ 2019-05-08 11:19     ` Christian König
  0 siblings, 0 replies; 20+ messages in thread
From: Christian König @ 2019-05-08 11:19 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: dri-devel

Am 08.05.19 um 11:57 schrieb Daniel Vetter:
> On Tue, May 07, 2019 at 10:13:32AM +0200, Christian König wrote:
>> That is now done by the DMA-buf helpers instead.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/drm_prime.c | 76 ++++++++-----------------------------
>>   1 file changed, 16 insertions(+), 60 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
>> index 231e3f6d5f41..90f5230cc0d5 100644
>> --- a/drivers/gpu/drm/drm_prime.c
>> +++ b/drivers/gpu/drm/drm_prime.c
>> @@ -86,11 +86,6 @@ struct drm_prime_member {
>>   	struct rb_node handle_rb;
>>   };
>>   
>> -struct drm_prime_attachment {
>> -	struct sg_table *sgt;
>> -	enum dma_data_direction dir;
>> -};
>> -
>>   static int drm_prime_add_buf_handle(struct drm_prime_file_private *prime_fpriv,
>>   				    struct dma_buf *dma_buf, uint32_t handle)
>>   {
>> @@ -188,25 +183,16 @@ static int drm_prime_lookup_buf_handle(struct drm_prime_file_private *prime_fpri
>>    * @dma_buf: buffer to attach device to
>>    * @attach: buffer attachment data
>>    *
>> - * Allocates &drm_prime_attachment and calls &drm_driver.gem_prime_pin for
>> - * device specific attachment. This can be used as the &dma_buf_ops.attach
>> - * callback.
>> + * Calls &drm_driver.gem_prime_pin for device specific handling. This can be
>> + * used as the &dma_buf_ops.attach callback.
>>    *
>>    * Returns 0 on success, negative error code on failure.
>>    */
>>   int drm_gem_map_attach(struct dma_buf *dma_buf,
>>   		       struct dma_buf_attachment *attach)
>>   {
>> -	struct drm_prime_attachment *prime_attach;
>>   	struct drm_gem_object *obj = dma_buf->priv;
>>   
>> -	prime_attach = kzalloc(sizeof(*prime_attach), GFP_KERNEL);
>> -	if (!prime_attach)
>> -		return -ENOMEM;
>> -
>> -	prime_attach->dir = DMA_NONE;
>> -	attach->priv = prime_attach;
>> -
>>   	return drm_gem_pin(obj);
>>   }
>>   EXPORT_SYMBOL(drm_gem_map_attach);
>> @@ -222,26 +208,8 @@ EXPORT_SYMBOL(drm_gem_map_attach);
>>   void drm_gem_map_detach(struct dma_buf *dma_buf,
>>   			struct dma_buf_attachment *attach)
>>   {
>> -	struct drm_prime_attachment *prime_attach = attach->priv;
>>   	struct drm_gem_object *obj = dma_buf->priv;
>>   
>> -	if (prime_attach) {
>> -		struct sg_table *sgt = prime_attach->sgt;
>> -
>> -		if (sgt) {
>> -			if (prime_attach->dir != DMA_NONE)
>> -				dma_unmap_sg_attrs(attach->dev, sgt->sgl,
>> -						   sgt->nents,
>> -						   prime_attach->dir,
>> -						   DMA_ATTR_SKIP_CPU_SYNC);
>> -			sg_free_table(sgt);
>> -		}
>> -
>> -		kfree(sgt);
>> -		kfree(prime_attach);
>> -		attach->priv = NULL;
>> -	}
>> -
>>   	drm_gem_unpin(obj);
>>   }
>>   EXPORT_SYMBOL(drm_gem_map_detach);
>> @@ -286,39 +254,22 @@ void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpr
>>   struct sg_table *drm_gem_map_dma_buf(struct dma_buf_attachment *attach,
>>   				     enum dma_data_direction dir)
>>   {
>> -	struct drm_prime_attachment *prime_attach = attach->priv;
>>   	struct drm_gem_object *obj = attach->dmabuf->priv;
>>   	struct sg_table *sgt;
>>   
>> -	if (WARN_ON(dir == DMA_NONE || !prime_attach))
>> +	if (WARN_ON(dir == DMA_NONE))
>>   		return ERR_PTR(-EINVAL);
>>   
>> -	/* return the cached mapping when possible */
>> -	if (prime_attach->dir == dir)
>> -		return prime_attach->sgt;
>> -
>> -	/*
>> -	 * two mappings with different directions for the same attachment are
>> -	 * not allowed
>> -	 */
>> -	if (WARN_ON(prime_attach->dir != DMA_NONE))
>> -		return ERR_PTR(-EBUSY);
>> -
>>   	if (obj->funcs)
>>   		sgt = obj->funcs->get_sg_table(obj);
>>   	else
>>   		sgt = obj->dev->driver->gem_prime_get_sg_table(obj);
>>   
>> -	if (!IS_ERR(sgt)) {
>> -		if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
>> -				      DMA_ATTR_SKIP_CPU_SYNC)) {
>> -			sg_free_table(sgt);
>> -			kfree(sgt);
>> -			sgt = ERR_PTR(-ENOMEM);
>> -		} else {
>> -			prime_attach->sgt = sgt;
>> -			prime_attach->dir = dir;
>> -		}
>> +	if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
>> +			      DMA_ATTR_SKIP_CPU_SYNC)) {
>> +		sg_free_table(sgt);
>> +		kfree(sgt);
>> +		sgt = ERR_PTR(-ENOMEM);
>>   	}
>>   
>>   	return sgt;
>> @@ -331,14 +282,19 @@ EXPORT_SYMBOL(drm_gem_map_dma_buf);
>>    * @sgt: scatterlist info of the buffer to unmap
>>    * @dir: direction of DMA transfer
>>    *
>> - * Not implemented. The unmap is done at drm_gem_map_detach().  This can be
>> - * used as the &dma_buf_ops.unmap_dma_buf callback.
>> + * This can be used as the &dma_buf_ops.unmap_dma_buf callback.
>>    */
>>   void drm_gem_unmap_dma_buf(struct dma_buf_attachment *attach,
>>   			   struct sg_table *sgt,
>>   			   enum dma_data_direction dir)
>>   {
>> -	/* nothing to be done here */
>> +	if (!sgt)
>> +		return;
> This copypasta isn't needed anymore, it's a caller bug if you don't supply
> an sgt here. The old caching code only needed it because it only cached
> on-demand in the map callback. With that:
>
> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

Thanks for clearing that up, I was already wondering where that stuff 
came from.

In this case going to remove the extra NULL checks from the amdgpu code 
as well.

Christian.

>
>
>> +
>> +	dma_unmap_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
>> +			   DMA_ATTR_SKIP_CPU_SYNC);
>> +	sg_free_table(sgt);
>> +	kfree(sgt);
>>   }
>>   EXPORT_SYMBOL(drm_gem_unmap_dma_buf);
>>   
>> -- 
>> 2.17.1
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/9] dma-buf: start caching of sg_table objects
  2019-05-08 10:11   ` Daniel Vetter
@ 2019-05-08 12:00     ` Christian König
  2019-05-08 12:15       ` Daniel Vetter
  0 siblings, 1 reply; 20+ messages in thread
From: Christian König @ 2019-05-08 12:00 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: dri-devel

Am 08.05.19 um 12:11 schrieb Daniel Vetter:
> On Wed, May 08, 2019 at 11:15:33AM +0200, Daniel Vetter wrote:
>> On Tue, May 07, 2019 at 10:13:30AM +0200, Christian König wrote:
>>> To allow a smooth transition from pinning buffer objects to dynamic
>>> invalidation we first start to cache the sg_table for an attachment.
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>   drivers/dma-buf/dma-buf.c | 24 ++++++++++++++++++++++++
>>>   include/linux/dma-buf.h   | 14 ++++++++++++++
>>>   2 files changed, 38 insertions(+)
>>>
>>> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
>>> index 7c858020d14b..775e13f54083 100644
>>> --- a/drivers/dma-buf/dma-buf.c
>>> +++ b/drivers/dma-buf/dma-buf.c
>>> @@ -573,6 +573,20 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
>>>   	list_add(&attach->node, &dmabuf->attachments);
>>>   
>>>   	mutex_unlock(&dmabuf->lock);
>>> +
>>> +	if (!dma_buf_is_dynamic(dmabuf)) {
>>> +		struct sg_table *sgt;
>>> +
>>> +		sgt = dmabuf->ops->map_dma_buf(attach, DMA_BIDIRECTIONAL);
>>> +		if (!sgt)
>>> +			sgt = ERR_PTR(-ENOMEM);
>>> +		if (IS_ERR(sgt)) {
>>> +			dma_buf_detach(dmabuf, attach);
>>> +			return ERR_CAST(sgt);
>>> +		}
>>> +		attach->sgt = sgt;
>>> +	}
>>> +
>>>   	return attach;
>>>   
>>>   err_attach:
>>> @@ -595,6 +609,10 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
>>>   	if (WARN_ON(!dmabuf || !attach))
>>>   		return;
>>>   
>>> +	if (attach->sgt)
>>> +		dmabuf->ops->unmap_dma_buf(attach, attach->sgt,
>>> +					   DMA_BIDIRECTIONAL);
>>> +
>>>   	mutex_lock(&dmabuf->lock);
>>>   	list_del(&attach->node);
>>>   	if (dmabuf->ops->detach)
>>> @@ -630,6 +648,9 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
>>>   	if (WARN_ON(!attach || !attach->dmabuf))
>>>   		return ERR_PTR(-EINVAL);
>>>   
>>> +	if (attach->sgt)
>>> +		return attach->sgt;
>>> +
>>>   	sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
>>>   	if (!sg_table)
>>>   		sg_table = ERR_PTR(-ENOMEM);
>>> @@ -657,6 +678,9 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
>>>   	if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
>>>   		return;
>>>   
>>> +	if (attach->sgt == sg_table)
>>> +		return;
>>> +
>>>   	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
>>>   						direction);
>>>   }
>>> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
>>> index 58725f890b5b..52031fdc75bb 100644
>>> --- a/include/linux/dma-buf.h
>>> +++ b/include/linux/dma-buf.h
>>> @@ -322,6 +322,7 @@ struct dma_buf_attachment {
>>>   	struct dma_buf *dmabuf;
>>>   	struct device *dev;
>>>   	struct list_head node;
>>> +	struct sg_table *sgt;
>>>   	void *priv;
>>>   };
>>>   
>>> @@ -373,6 +374,19 @@ static inline void get_dma_buf(struct dma_buf *dmabuf)
>>>   	get_file(dmabuf->file);
>>>   }
>>>   
>>> +/**
>>> + * dma_buf_is_dynamic - check if a DMA-buf uses dynamic mappings.
>>> + * @dmabuf: the DMA-buf to check
>>> + *
>>> + * Returns true if a DMA-buf exporter wants to create dynamic sg table mappings
>>> + * for each attachment. False if only a single static sg table should be used.
>>> + */
>>> +static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf)
>>> +{
>>> +	/* Always use a static mapping for now */
>>> +	return false;
>> Hm I still expect that later on we'll want this to be decided by the
>> attachment: It's only dynamic if both the exporter and the importer
>> support dynamic dma-buf management, otherwise we need to pin.
>>
>> But anyway, I feel like we need to go over the entire thing anyway once
>> more when p2p has landed, on this:
>>
>> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> Correction that I only just realized, but need to retract that r-b on this
> and the drm sgt cache removal patch: You now hardcode the direction to
> DMA_BIDIRECTIONAL, drm_prime did only keep the cache for a given
> direction.
>
> Now x86 drivers always set DMA_BIDIRECTIONAL, but arm soc drivers (and
> also v4l videobuf layer) try to guess whether it should be DMA_TO_DEVICE
> or DMA_FROM_DEVICE. I have no idea what the implications are, also all the
> cache coherency on dma-bufs is kinda ill-defined.
>
> And we can't throw the sgt cache away at map time if it doesn't fit like
> drm_prime does, because that reintroduces the reservation object lock,
> defeating the entire purpose of this. Also we can't just assume drm_prime
> works for everyone, since the special cases all roll their own dma-buf
> import. I have also not checked what exactly exporters do. No idea what to
> do here now.
>
> /me cries

Well it is kind of abusing to use the DMA direction here in the first 
place and I actually considered to completely remove it.

The key problem is that the DMA direction defines things from the view 
point of the CPU, e.g. DMA_TO_DEVICE means the data has been accessed by 
the CPU and we are now pushing it to a device.

But DMA-buf is essentially about device to device transfers, so 
DMA_TO_DEVICE as well as DMA_FROM_DEVICE is completely ambiguous because 
you don't know the point of view to start with.

Additional to that the caching implementation in drm_prime.c didn't 
handled it full either:
> -	/*
> -	 * two mappings with different directions for the same attachment are
> -	 * not allowed
> -	 */
> -	if (WARN_ON(prime_attach->dir != DMA_NONE))
> -		return ERR_PTR(-EBUSY);

Do the ARM guys and V4L guys actually do anything with the direction 
except for passing it on dma_map_sg_attrs() ?

Regards,
Christian.

> -Daniel
>
>>> +}
>>> +
>>>   struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
>>>   							struct device *dev);
>>>   void dma_buf_detach(struct dma_buf *dmabuf,
>>> -- 
>>> 2.17.1
>>>
>>> _______________________________________________
>>> dri-devel mailing list
>>> dri-devel@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>> -- 
>> Daniel Vetter
>> Software Engineer, Intel Corporation
>> http://blog.ffwll.ch

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/9] dma-buf: start caching of sg_table objects
  2019-05-08 12:00     ` Christian König
@ 2019-05-08 12:15       ` Daniel Vetter
  2019-05-08 12:23         ` Koenig, Christian
  0 siblings, 1 reply; 20+ messages in thread
From: Daniel Vetter @ 2019-05-08 12:15 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

On Wed, May 8, 2019 at 2:00 PM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
> Am 08.05.19 um 12:11 schrieb Daniel Vetter:
> > On Wed, May 08, 2019 at 11:15:33AM +0200, Daniel Vetter wrote:
> >> On Tue, May 07, 2019 at 10:13:30AM +0200, Christian König wrote:
> >>> To allow a smooth transition from pinning buffer objects to dynamic
> >>> invalidation we first start to cache the sg_table for an attachment.
> >>>
> >>> Signed-off-by: Christian König <christian.koenig@amd.com>
> >>> ---
> >>>   drivers/dma-buf/dma-buf.c | 24 ++++++++++++++++++++++++
> >>>   include/linux/dma-buf.h   | 14 ++++++++++++++
> >>>   2 files changed, 38 insertions(+)
> >>>
> >>> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> >>> index 7c858020d14b..775e13f54083 100644
> >>> --- a/drivers/dma-buf/dma-buf.c
> >>> +++ b/drivers/dma-buf/dma-buf.c
> >>> @@ -573,6 +573,20 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
> >>>     list_add(&attach->node, &dmabuf->attachments);
> >>>
> >>>     mutex_unlock(&dmabuf->lock);
> >>> +
> >>> +   if (!dma_buf_is_dynamic(dmabuf)) {
> >>> +           struct sg_table *sgt;
> >>> +
> >>> +           sgt = dmabuf->ops->map_dma_buf(attach, DMA_BIDIRECTIONAL);
> >>> +           if (!sgt)
> >>> +                   sgt = ERR_PTR(-ENOMEM);
> >>> +           if (IS_ERR(sgt)) {
> >>> +                   dma_buf_detach(dmabuf, attach);
> >>> +                   return ERR_CAST(sgt);
> >>> +           }
> >>> +           attach->sgt = sgt;
> >>> +   }
> >>> +
> >>>     return attach;
> >>>
> >>>   err_attach:
> >>> @@ -595,6 +609,10 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
> >>>     if (WARN_ON(!dmabuf || !attach))
> >>>             return;
> >>>
> >>> +   if (attach->sgt)
> >>> +           dmabuf->ops->unmap_dma_buf(attach, attach->sgt,
> >>> +                                      DMA_BIDIRECTIONAL);
> >>> +
> >>>     mutex_lock(&dmabuf->lock);
> >>>     list_del(&attach->node);
> >>>     if (dmabuf->ops->detach)
> >>> @@ -630,6 +648,9 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
> >>>     if (WARN_ON(!attach || !attach->dmabuf))
> >>>             return ERR_PTR(-EINVAL);
> >>>
> >>> +   if (attach->sgt)
> >>> +           return attach->sgt;
> >>> +
> >>>     sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
> >>>     if (!sg_table)
> >>>             sg_table = ERR_PTR(-ENOMEM);
> >>> @@ -657,6 +678,9 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
> >>>     if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
> >>>             return;
> >>>
> >>> +   if (attach->sgt == sg_table)
> >>> +           return;
> >>> +
> >>>     attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
> >>>                                             direction);
> >>>   }
> >>> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
> >>> index 58725f890b5b..52031fdc75bb 100644
> >>> --- a/include/linux/dma-buf.h
> >>> +++ b/include/linux/dma-buf.h
> >>> @@ -322,6 +322,7 @@ struct dma_buf_attachment {
> >>>     struct dma_buf *dmabuf;
> >>>     struct device *dev;
> >>>     struct list_head node;
> >>> +   struct sg_table *sgt;
> >>>     void *priv;
> >>>   };
> >>>
> >>> @@ -373,6 +374,19 @@ static inline void get_dma_buf(struct dma_buf *dmabuf)
> >>>     get_file(dmabuf->file);
> >>>   }
> >>>
> >>> +/**
> >>> + * dma_buf_is_dynamic - check if a DMA-buf uses dynamic mappings.
> >>> + * @dmabuf: the DMA-buf to check
> >>> + *
> >>> + * Returns true if a DMA-buf exporter wants to create dynamic sg table mappings
> >>> + * for each attachment. False if only a single static sg table should be used.
> >>> + */
> >>> +static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf)
> >>> +{
> >>> +   /* Always use a static mapping for now */
> >>> +   return false;
> >> Hm I still expect that later on we'll want this to be decided by the
> >> attachment: It's only dynamic if both the exporter and the importer
> >> support dynamic dma-buf management, otherwise we need to pin.
> >>
> >> But anyway, I feel like we need to go over the entire thing anyway once
> >> more when p2p has landed, on this:
> >>
> >> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > Correction that I only just realized, but need to retract that r-b on this
> > and the drm sgt cache removal patch: You now hardcode the direction to
> > DMA_BIDIRECTIONAL, drm_prime did only keep the cache for a given
> > direction.
> >
> > Now x86 drivers always set DMA_BIDIRECTIONAL, but arm soc drivers (and
> > also v4l videobuf layer) try to guess whether it should be DMA_TO_DEVICE
> > or DMA_FROM_DEVICE. I have no idea what the implications are, also all the
> > cache coherency on dma-bufs is kinda ill-defined.
> >
> > And we can't throw the sgt cache away at map time if it doesn't fit like
> > drm_prime does, because that reintroduces the reservation object lock,
> > defeating the entire purpose of this. Also we can't just assume drm_prime
> > works for everyone, since the special cases all roll their own dma-buf
> > import. I have also not checked what exactly exporters do. No idea what to
> > do here now.
> >
> > /me cries
>
> Well it is kind of abusing to use the DMA direction here in the first
> place and I actually considered to completely remove it.
>
> The key problem is that the DMA direction defines things from the view
> point of the CPU, e.g. DMA_TO_DEVICE means the data has been accessed by
> the CPU and we are now pushing it to a device.
>
> But DMA-buf is essentially about device to device transfers, so
> DMA_TO_DEVICE as well as DMA_FROM_DEVICE is completely ambiguous because
> you don't know the point of view to start with.
>
> Additional to that the caching implementation in drm_prime.c didn't
> handled it full either:
> > -     /*
> > -      * two mappings with different directions for the same attachment are
> > -      * not allowed
> > -      */
> > -     if (WARN_ON(prime_attach->dir != DMA_NONE))
> > -             return ERR_PTR(-EBUSY);
>
> Do the ARM guys and V4L guys actually do anything with the direction
> except for passing it on dma_map_sg_attrs() ?

I don't think so, but that alone is a big thing because on arm, that
actually changes what happens wrt flushing and stuff. On x86 the dma
subsystem assumes everything is always coherent, and we have a pile of
hacks in drivers to make sure the dma access is coherent enough (since
on a gpu this is controlled on a per transaction basis usually, fairly
often even under userspace's control).

I have tried to better understand what a better dma api interface
would look like for gpus and dma-buf, but every time that comes up the
dma maintainers tell me I'm a fool and that this _must_ be abstracted
away by the dma api and I give up again. Hence the crying and no idea
what to do here :-/

In reality it probably doesn't matter, as long as no one tries to run
a gpu driver with dynamic dma-buf export on arm, but I've seen some
people trying to get amdgpu at least going in that combo. So not a
long-term solution either. And we can't just not cache, because that
breaks the world wrt locking ordering I think, or at least a bunch of
drm drivers (and not all drm drivers use the drm_prime helpers, so we
can't just do this for us and not cache for anyone else and hope that
works out).

One option I haven't looked into much, but might work, is that we call
the dma api coherency functions if the requested attachment_map/unmap
direction doesn't match. But I honestly don't know whether that's
allowed and would result in the same thing in the dma-api backends as
doing a mapping with the desired direction right away. Also, it would
terribly break the layering, so we'd need to add a map_sync callback
(there's been patches, but they never landed) and roll them out to all
exporters.

Or maybe just back to crying :-)
-Daniel


> Regards,
> Christian.
>
> > -Daniel
> >
> >>> +}
> >>> +
> >>>   struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
> >>>                                                     struct device *dev);
> >>>   void dma_buf_detach(struct dma_buf *dmabuf,
> >>> --
> >>> 2.17.1
> >>>
> >>> _______________________________________________
> >>> dri-devel mailing list
> >>> dri-devel@lists.freedesktop.org
> >>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
> >> --
> >> Daniel Vetter
> >> Software Engineer, Intel Corporation
> >> http://blog.ffwll.ch
>


-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/9] dma-buf: start caching of sg_table objects
  2019-05-08 12:15       ` Daniel Vetter
@ 2019-05-08 12:23         ` Koenig, Christian
  2019-05-08 12:44           ` Daniel Vetter
  0 siblings, 1 reply; 20+ messages in thread
From: Koenig, Christian @ 2019-05-08 12:23 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: dri-devel

Am 08.05.19 um 14:15 schrieb Daniel Vetter:
> [CAUTION: External Email]
>
> On Wed, May 8, 2019 at 2:00 PM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> Am 08.05.19 um 12:11 schrieb Daniel Vetter:
>>> On Wed, May 08, 2019 at 11:15:33AM +0200, Daniel Vetter wrote:
>>>> On Tue, May 07, 2019 at 10:13:30AM +0200, Christian König wrote:
>>>>> To allow a smooth transition from pinning buffer objects to dynamic
>>>>> invalidation we first start to cache the sg_table for an attachment.
>>>>>
>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>> ---
>>>>>    drivers/dma-buf/dma-buf.c | 24 ++++++++++++++++++++++++
>>>>>    include/linux/dma-buf.h   | 14 ++++++++++++++
>>>>>    2 files changed, 38 insertions(+)
>>>>>
>>>>> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
>>>>> index 7c858020d14b..775e13f54083 100644
>>>>> --- a/drivers/dma-buf/dma-buf.c
>>>>> +++ b/drivers/dma-buf/dma-buf.c
>>>>> @@ -573,6 +573,20 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
>>>>>      list_add(&attach->node, &dmabuf->attachments);
>>>>>
>>>>>      mutex_unlock(&dmabuf->lock);
>>>>> +
>>>>> +   if (!dma_buf_is_dynamic(dmabuf)) {
>>>>> +           struct sg_table *sgt;
>>>>> +
>>>>> +           sgt = dmabuf->ops->map_dma_buf(attach, DMA_BIDIRECTIONAL);
>>>>> +           if (!sgt)
>>>>> +                   sgt = ERR_PTR(-ENOMEM);
>>>>> +           if (IS_ERR(sgt)) {
>>>>> +                   dma_buf_detach(dmabuf, attach);
>>>>> +                   return ERR_CAST(sgt);
>>>>> +           }
>>>>> +           attach->sgt = sgt;
>>>>> +   }
>>>>> +
>>>>>      return attach;
>>>>>
>>>>>    err_attach:
>>>>> @@ -595,6 +609,10 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
>>>>>      if (WARN_ON(!dmabuf || !attach))
>>>>>              return;
>>>>>
>>>>> +   if (attach->sgt)
>>>>> +           dmabuf->ops->unmap_dma_buf(attach, attach->sgt,
>>>>> +                                      DMA_BIDIRECTIONAL);
>>>>> +
>>>>>      mutex_lock(&dmabuf->lock);
>>>>>      list_del(&attach->node);
>>>>>      if (dmabuf->ops->detach)
>>>>> @@ -630,6 +648,9 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
>>>>>      if (WARN_ON(!attach || !attach->dmabuf))
>>>>>              return ERR_PTR(-EINVAL);
>>>>>
>>>>> +   if (attach->sgt)
>>>>> +           return attach->sgt;
>>>>> +
>>>>>      sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
>>>>>      if (!sg_table)
>>>>>              sg_table = ERR_PTR(-ENOMEM);
>>>>> @@ -657,6 +678,9 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
>>>>>      if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
>>>>>              return;
>>>>>
>>>>> +   if (attach->sgt == sg_table)
>>>>> +           return;
>>>>> +
>>>>>      attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
>>>>>                                              direction);
>>>>>    }
>>>>> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
>>>>> index 58725f890b5b..52031fdc75bb 100644
>>>>> --- a/include/linux/dma-buf.h
>>>>> +++ b/include/linux/dma-buf.h
>>>>> @@ -322,6 +322,7 @@ struct dma_buf_attachment {
>>>>>      struct dma_buf *dmabuf;
>>>>>      struct device *dev;
>>>>>      struct list_head node;
>>>>> +   struct sg_table *sgt;
>>>>>      void *priv;
>>>>>    };
>>>>>
>>>>> @@ -373,6 +374,19 @@ static inline void get_dma_buf(struct dma_buf *dmabuf)
>>>>>      get_file(dmabuf->file);
>>>>>    }
>>>>>
>>>>> +/**
>>>>> + * dma_buf_is_dynamic - check if a DMA-buf uses dynamic mappings.
>>>>> + * @dmabuf: the DMA-buf to check
>>>>> + *
>>>>> + * Returns true if a DMA-buf exporter wants to create dynamic sg table mappings
>>>>> + * for each attachment. False if only a single static sg table should be used.
>>>>> + */
>>>>> +static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf)
>>>>> +{
>>>>> +   /* Always use a static mapping for now */
>>>>> +   return false;
>>>> Hm I still expect that later on we'll want this to be decided by the
>>>> attachment: It's only dynamic if both the exporter and the importer
>>>> support dynamic dma-buf management, otherwise we need to pin.
>>>>
>>>> But anyway, I feel like we need to go over the entire thing anyway once
>>>> more when p2p has landed, on this:
>>>>
>>>> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>>> Correction that I only just realized, but need to retract that r-b on this
>>> and the drm sgt cache removal patch: You now hardcode the direction to
>>> DMA_BIDIRECTIONAL, drm_prime did only keep the cache for a given
>>> direction.
>>>
>>> Now x86 drivers always set DMA_BIDIRECTIONAL, but arm soc drivers (and
>>> also v4l videobuf layer) try to guess whether it should be DMA_TO_DEVICE
>>> or DMA_FROM_DEVICE. I have no idea what the implications are, also all the
>>> cache coherency on dma-bufs is kinda ill-defined.
>>>
>>> And we can't throw the sgt cache away at map time if it doesn't fit like
>>> drm_prime does, because that reintroduces the reservation object lock,
>>> defeating the entire purpose of this. Also we can't just assume drm_prime
>>> works for everyone, since the special cases all roll their own dma-buf
>>> import. I have also not checked what exactly exporters do. No idea what to
>>> do here now.
>>>
>>> /me cries
>> Well it is kind of abusing to use the DMA direction here in the first
>> place and I actually considered to completely remove it.
>>
>> The key problem is that the DMA direction defines things from the view
>> point of the CPU, e.g. DMA_TO_DEVICE means the data has been accessed by
>> the CPU and we are now pushing it to a device.
>>
>> But DMA-buf is essentially about device to device transfers, so
>> DMA_TO_DEVICE as well as DMA_FROM_DEVICE is completely ambiguous because
>> you don't know the point of view to start with.
>>
>> Additional to that the caching implementation in drm_prime.c didn't
>> handled it full either:
>>> -     /*
>>> -      * two mappings with different directions for the same attachment are
>>> -      * not allowed
>>> -      */
>>> -     if (WARN_ON(prime_attach->dir != DMA_NONE))
>>> -             return ERR_PTR(-EBUSY);
>> Do the ARM guys and V4L guys actually do anything with the direction
>> except for passing it on dma_map_sg_attrs() ?
> I don't think so, but that alone is a big thing because on arm, that
> actually changes what happens wrt flushing and stuff. On x86 the dma
> subsystem assumes everything is always coherent, and we have a pile of
> hacks in drivers to make sure the dma access is coherent enough (since
> on a gpu this is controlled on a per transaction basis usually, fairly
> often even under userspace's control).
>
> I have tried to better understand what a better dma api interface
> would look like for gpus and dma-buf, but every time that comes up the
> dma maintainers tell me I'm a fool and that this _must_ be abstracted
> away by the dma api and I give up again. Hence the crying and no idea
> what to do here :-/

Well count me to the people who think that the DMA-API on Linux is not a 
good fit for GPUs.

E.g. I've tried to explain multiple times now that we rather want a 
failed mapping instead of using bounce buffers, but that doesn't seem to 
sink in.

> In reality it probably doesn't matter, as long as no one tries to run
> a gpu driver with dynamic dma-buf export on arm, but I've seen some
> people trying to get amdgpu at least going in that combo. So not a
> long-term solution either. And we can't just not cache, because that
> breaks the world wrt locking ordering I think, or at least a bunch of
> drm drivers (and not all drm drivers use the drm_prime helpers, so we
> can't just do this for us and not cache for anyone else and hope that
> works out).

Well the amdgpu user space stack absolutely needs coherent mappings or 
otherwise won't work. So I'm not worried about amdgpu.

But this doesn't mean that we can ignore it for ARM.

> One option I haven't looked into much, but might work, is that we call
> the dma api coherency functions if the requested attachment_map/unmap
> direction doesn't match. But I honestly don't know whether that's
> allowed and would result in the same thing in the dma-api backends as
> doing a mapping with the desired direction right away. Also, it would
> terribly break the layering, so we'd need to add a map_sync callback
> (there's been patches, but they never landed) and roll them out to all
> exporters.

Well no, not really. I think I will just go down the route of making sgt 
caching as optional as possible.

E.g. only create the cache mapping during attach if we actually have the 
locking problem.

So back to coding for now,
Christian.

>
> Or maybe just back to crying :-)
> -Daniel

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/9] dma-buf: start caching of sg_table objects
  2019-05-08 12:23         ` Koenig, Christian
@ 2019-05-08 12:44           ` Daniel Vetter
  0 siblings, 0 replies; 20+ messages in thread
From: Daniel Vetter @ 2019-05-08 12:44 UTC (permalink / raw)
  To: Koenig, Christian; +Cc: dri-devel

On Wed, May 8, 2019 at 2:23 PM Koenig, Christian
<Christian.Koenig@amd.com> wrote:
>
> Am 08.05.19 um 14:15 schrieb Daniel Vetter:
> > [CAUTION: External Email]
> >
> > On Wed, May 8, 2019 at 2:00 PM Christian König
> > <ckoenig.leichtzumerken@gmail.com> wrote:
> >> Am 08.05.19 um 12:11 schrieb Daniel Vetter:
> >>> On Wed, May 08, 2019 at 11:15:33AM +0200, Daniel Vetter wrote:
> >>>> On Tue, May 07, 2019 at 10:13:30AM +0200, Christian König wrote:
> >>>>> To allow a smooth transition from pinning buffer objects to dynamic
> >>>>> invalidation we first start to cache the sg_table for an attachment.
> >>>>>
> >>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
> >>>>> ---
> >>>>>    drivers/dma-buf/dma-buf.c | 24 ++++++++++++++++++++++++
> >>>>>    include/linux/dma-buf.h   | 14 ++++++++++++++
> >>>>>    2 files changed, 38 insertions(+)
> >>>>>
> >>>>> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> >>>>> index 7c858020d14b..775e13f54083 100644
> >>>>> --- a/drivers/dma-buf/dma-buf.c
> >>>>> +++ b/drivers/dma-buf/dma-buf.c
> >>>>> @@ -573,6 +573,20 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
> >>>>>      list_add(&attach->node, &dmabuf->attachments);
> >>>>>
> >>>>>      mutex_unlock(&dmabuf->lock);
> >>>>> +
> >>>>> +   if (!dma_buf_is_dynamic(dmabuf)) {
> >>>>> +           struct sg_table *sgt;
> >>>>> +
> >>>>> +           sgt = dmabuf->ops->map_dma_buf(attach, DMA_BIDIRECTIONAL);
> >>>>> +           if (!sgt)
> >>>>> +                   sgt = ERR_PTR(-ENOMEM);
> >>>>> +           if (IS_ERR(sgt)) {
> >>>>> +                   dma_buf_detach(dmabuf, attach);
> >>>>> +                   return ERR_CAST(sgt);
> >>>>> +           }
> >>>>> +           attach->sgt = sgt;
> >>>>> +   }
> >>>>> +
> >>>>>      return attach;
> >>>>>
> >>>>>    err_attach:
> >>>>> @@ -595,6 +609,10 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
> >>>>>      if (WARN_ON(!dmabuf || !attach))
> >>>>>              return;
> >>>>>
> >>>>> +   if (attach->sgt)
> >>>>> +           dmabuf->ops->unmap_dma_buf(attach, attach->sgt,
> >>>>> +                                      DMA_BIDIRECTIONAL);
> >>>>> +
> >>>>>      mutex_lock(&dmabuf->lock);
> >>>>>      list_del(&attach->node);
> >>>>>      if (dmabuf->ops->detach)
> >>>>> @@ -630,6 +648,9 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
> >>>>>      if (WARN_ON(!attach || !attach->dmabuf))
> >>>>>              return ERR_PTR(-EINVAL);
> >>>>>
> >>>>> +   if (attach->sgt)
> >>>>> +           return attach->sgt;
> >>>>> +
> >>>>>      sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
> >>>>>      if (!sg_table)
> >>>>>              sg_table = ERR_PTR(-ENOMEM);
> >>>>> @@ -657,6 +678,9 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
> >>>>>      if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
> >>>>>              return;
> >>>>>
> >>>>> +   if (attach->sgt == sg_table)
> >>>>> +           return;
> >>>>> +
> >>>>>      attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
> >>>>>                                              direction);
> >>>>>    }
> >>>>> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
> >>>>> index 58725f890b5b..52031fdc75bb 100644
> >>>>> --- a/include/linux/dma-buf.h
> >>>>> +++ b/include/linux/dma-buf.h
> >>>>> @@ -322,6 +322,7 @@ struct dma_buf_attachment {
> >>>>>      struct dma_buf *dmabuf;
> >>>>>      struct device *dev;
> >>>>>      struct list_head node;
> >>>>> +   struct sg_table *sgt;
> >>>>>      void *priv;
> >>>>>    };
> >>>>>
> >>>>> @@ -373,6 +374,19 @@ static inline void get_dma_buf(struct dma_buf *dmabuf)
> >>>>>      get_file(dmabuf->file);
> >>>>>    }
> >>>>>
> >>>>> +/**
> >>>>> + * dma_buf_is_dynamic - check if a DMA-buf uses dynamic mappings.
> >>>>> + * @dmabuf: the DMA-buf to check
> >>>>> + *
> >>>>> + * Returns true if a DMA-buf exporter wants to create dynamic sg table mappings
> >>>>> + * for each attachment. False if only a single static sg table should be used.
> >>>>> + */
> >>>>> +static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf)
> >>>>> +{
> >>>>> +   /* Always use a static mapping for now */
> >>>>> +   return false;
> >>>> Hm I still expect that later on we'll want this to be decided by the
> >>>> attachment: It's only dynamic if both the exporter and the importer
> >>>> support dynamic dma-buf management, otherwise we need to pin.
> >>>>
> >>>> But anyway, I feel like we need to go over the entire thing anyway once
> >>>> more when p2p has landed, on this:
> >>>>
> >>>> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> >>> Correction that I only just realized, but need to retract that r-b on this
> >>> and the drm sgt cache removal patch: You now hardcode the direction to
> >>> DMA_BIDIRECTIONAL, drm_prime did only keep the cache for a given
> >>> direction.
> >>>
> >>> Now x86 drivers always set DMA_BIDIRECTIONAL, but arm soc drivers (and
> >>> also v4l videobuf layer) try to guess whether it should be DMA_TO_DEVICE
> >>> or DMA_FROM_DEVICE. I have no idea what the implications are, also all the
> >>> cache coherency on dma-bufs is kinda ill-defined.
> >>>
> >>> And we can't throw the sgt cache away at map time if it doesn't fit like
> >>> drm_prime does, because that reintroduces the reservation object lock,
> >>> defeating the entire purpose of this. Also we can't just assume drm_prime
> >>> works for everyone, since the special cases all roll their own dma-buf
> >>> import. I have also not checked what exactly exporters do. No idea what to
> >>> do here now.
> >>>
> >>> /me cries
> >> Well it is kind of abusing to use the DMA direction here in the first
> >> place and I actually considered to completely remove it.
> >>
> >> The key problem is that the DMA direction defines things from the view
> >> point of the CPU, e.g. DMA_TO_DEVICE means the data has been accessed by
> >> the CPU and we are now pushing it to a device.
> >>
> >> But DMA-buf is essentially about device to device transfers, so
> >> DMA_TO_DEVICE as well as DMA_FROM_DEVICE is completely ambiguous because
> >> you don't know the point of view to start with.
> >>
> >> Additional to that the caching implementation in drm_prime.c didn't
> >> handled it full either:
> >>> -     /*
> >>> -      * two mappings with different directions for the same attachment are
> >>> -      * not allowed
> >>> -      */
> >>> -     if (WARN_ON(prime_attach->dir != DMA_NONE))
> >>> -             return ERR_PTR(-EBUSY);
> >> Do the ARM guys and V4L guys actually do anything with the direction
> >> except for passing it on dma_map_sg_attrs() ?
> > I don't think so, but that alone is a big thing because on arm, that
> > actually changes what happens wrt flushing and stuff. On x86 the dma
> > subsystem assumes everything is always coherent, and we have a pile of
> > hacks in drivers to make sure the dma access is coherent enough (since
> > on a gpu this is controlled on a per transaction basis usually, fairly
> > often even under userspace's control).
> >
> > I have tried to better understand what a better dma api interface
> > would look like for gpus and dma-buf, but every time that comes up the
> > dma maintainers tell me I'm a fool and that this _must_ be abstracted
> > away by the dma api and I give up again. Hence the crying and no idea
> > what to do here :-/
>
> Well count me to the people who think that the DMA-API on Linux is not a
> good fit for GPUs.
>
> E.g. I've tried to explain multiple times now that we rather want a
> failed mapping instead of using bounce buffers, but that doesn't seem to
> sink in.
>
> > In reality it probably doesn't matter, as long as no one tries to run
> > a gpu driver with dynamic dma-buf export on arm, but I've seen some
> > people trying to get amdgpu at least going in that combo. So not a
> > long-term solution either. And we can't just not cache, because that
> > breaks the world wrt locking ordering I think, or at least a bunch of
> > drm drivers (and not all drm drivers use the drm_prime helpers, so we
> > can't just do this for us and not cache for anyone else and hope that
> > works out).
>
> Well the amdgpu user space stack absolutely needs coherent mappings or
> otherwise won't work. So I'm not worried about amdgpu.
>
> But this doesn't mean that we can ignore it for ARM.
>
> > One option I haven't looked into much, but might work, is that we call
> > the dma api coherency functions if the requested attachment_map/unmap
> > direction doesn't match. But I honestly don't know whether that's
> > allowed and would result in the same thing in the dma-api backends as
> > doing a mapping with the desired direction right away. Also, it would
> > terribly break the layering, so we'd need to add a map_sync callback
> > (there's been patches, but they never landed) and roll them out to all
> > exporters.
>
> Well no, not really. I think I will just go down the route of making sgt
> caching as optional as possible.
>
> E.g. only create the cache mapping during attach if we actually have the
> locking problem.

Well that's the "review and update everyone, and oh welp is there a
lot of them" approach. Which we tried to avoid ...

> So back to coding for now,

So if you want to go this way, what about resurrecting the
dma_buf_attachment_info rework (I really think that's cleaner than
shitloads of arguments, maybe keep the current dma_buf_attach as-is
and do the info argument thing only for dma_buf_attach_ext() or
similar), and have a ->need_caching flag in there? Then we can set
that for drm_prime and i915 at least. Just an idea to avoid changing
the world ...
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 2/9] dma-buf: add dynamic DMA-buf handling v7
       [not found] ` <20190507081512.2619-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2019-05-07  8:15   ` Christian König
  0 siblings, 0 replies; 20+ messages in thread
From: Christian König @ 2019-05-07  8:15 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On the exporter side we add optional explicit pinning callbacks. If those
callbacks are implemented the framework no longer caches sg tables and the
map/unmap callbacks are always called with the lock of the reservation object
held.

On the importer side we add an optional invalidate callback. This callback is
used by the exporter to inform the importers that their mappings should be
destroyed as soon as possible.

This allows the exporter to provide the mappings without the need to pin
the backing store.

v2: don't try to invalidate mappings when the callback is NULL,
    lock the reservation obj while using the attachments,
    add helper to set the callback
v3: move flag for invalidation support into the DMA-buf,
    use new attach_info structure to set the callback
v4: use importer_priv field instead of mangling exporter priv.
v5: drop invalidation_supported flag
v6: squash together with pin/unpin changes
v7: pin/unpin takes an attachment now

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/dma-buf.c | 190 ++++++++++++++++++++++++++++++++++++--
 include/linux/dma-buf.h   |  91 ++++++++++++++++--
 2 files changed, 261 insertions(+), 20 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 775e13f54083..464a4c38df6c 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -530,10 +530,12 @@ void dma_buf_put(struct dma_buf *dmabuf)
 EXPORT_SYMBOL_GPL(dma_buf_put);
 
 /**
- * dma_buf_attach - Add the device to dma_buf's attachments list; optionally,
+ * dma_buf_dynamic_attach - Add the device to dma_buf's attachments list; optionally,
  * calls attach() of dma_buf_ops to allow device-specific attach functionality
- * @dmabuf:	[in]	buffer to attach device to.
- * @dev:	[in]	device to be attached.
+ * @dmabuf:		[in]	buffer to attach device to.
+ * @dev:		[in]	device to be attached.
+ * @importer_ops	[in]	importer operations for the attachment
+ * @importer_priv	[in]	importer private pointer for the attachment
  *
  * Returns struct dma_buf_attachment pointer for this attachment. Attachments
  * must be cleaned up by calling dma_buf_detach().
@@ -547,8 +549,10 @@ EXPORT_SYMBOL_GPL(dma_buf_put);
  * accessible to @dev, and cannot be moved to a more suitable place. This is
  * indicated with the error code -EBUSY.
  */
-struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
-					  struct device *dev)
+struct dma_buf_attachment *
+dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev,
+		       const struct dma_buf_attach_ops *importer_ops,
+		       void *importer_priv)
 {
 	struct dma_buf_attachment *attach;
 	int ret;
@@ -562,6 +566,8 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
 
 	attach->dev = dev;
 	attach->dmabuf = dmabuf;
+	attach->importer_ops = importer_ops;
+	attach->importer_priv = importer_priv;
 
 	mutex_lock(&dmabuf->lock);
 
@@ -570,7 +576,9 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
 		if (ret)
 			goto err_attach;
 	}
+	reservation_object_lock(dmabuf->resv, NULL);
 	list_add(&attach->node, &dmabuf->attachments);
+	reservation_object_unlock(dmabuf->resv);
 
 	mutex_unlock(&dmabuf->lock);
 
@@ -594,6 +602,21 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
 	mutex_unlock(&dmabuf->lock);
 	return ERR_PTR(ret);
 }
+EXPORT_SYMBOL_GPL(dma_buf_dynamic_attach);
+
+/**
+ * dma_buf_attach - Wrapper for dma_buf_dynamic_attach
+ * @dmabuf:	[in]	buffer to attach device to.
+ * @dev:	[in]	device to be attached.
+ *
+ * Wrapper to call dma_buf_dynamic_attach() for drivers which still use a static
+ * mapping.
+ */
+struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
+					  struct device *dev)
+{
+	return dma_buf_dynamic_attach(dmabuf, dev, NULL, NULL);
+}
 EXPORT_SYMBOL_GPL(dma_buf_attach);
 
 /**
@@ -614,7 +637,9 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
 					   DMA_BIDIRECTIONAL);
 
 	mutex_lock(&dmabuf->lock);
+	reservation_object_lock(dmabuf->resv, NULL);
 	list_del(&attach->node);
+	reservation_object_unlock(dmabuf->resv);
 	if (dmabuf->ops->detach)
 		dmabuf->ops->detach(dmabuf, attach);
 
@@ -623,6 +648,100 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
 }
 EXPORT_SYMBOL_GPL(dma_buf_detach);
 
+/**
+ * dma_buf_pin - Lock down the DMA-buf
+ *
+ * @attach:	[in]	attachment which should be pinned
+ *
+ * Returns:
+ * 0 on success, negative error code on failure.
+ */
+int dma_buf_pin(struct dma_buf_attachment *attach)
+{
+	struct dma_buf *dmabuf = attach->dmabuf;
+	int ret = 0;
+
+	reservation_object_assert_held(dmabuf->resv);
+
+	if (dmabuf->ops->pin)
+		ret = dmabuf->ops->pin(attach);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dma_buf_pin);
+
+/**
+ * dma_buf_unpin - Remove lock from DMA-buf
+ *
+ * @attach:	[in]	attachment which should be unpinned
+ */
+void dma_buf_unpin(struct dma_buf_attachment *attach)
+{
+	struct dma_buf *dmabuf = attach->dmabuf;
+
+	reservation_object_assert_held(dmabuf->resv);
+
+	if (dmabuf->ops->unpin)
+		dmabuf->ops->unpin(attach);
+}
+EXPORT_SYMBOL_GPL(dma_buf_unpin);
+
+/**
+ * dma_buf_map_attachment_locked - Maps the buffer into _device_ address space
+ * with the reservation lock held. Is a wrapper for map_dma_buf() of the
+ *
+ * Returns the scatterlist table of the attachment;
+ * dma_buf_ops.
+ * @attach:	[in]	attachment whose scatterlist is to be returned
+ * @direction:	[in]	direction of DMA transfer
+ *
+ * Returns sg_table containing the scatterlist to be returned; returns ERR_PTR
+ * on error. May return -EINTR if it is interrupted by a signal.
+ *
+ * A mapping must be unmapped by using dma_buf_unmap_attachment().
+ */
+struct sg_table *
+dma_buf_map_attachment_locked(struct dma_buf_attachment *attach,
+			      enum dma_data_direction direction)
+{
+	struct sg_table *sg_table;
+	int r;
+
+	might_sleep();
+	reservation_object_assert_held(attach->dmabuf->resv);
+
+	if (attach->sgt)
+		return attach->sgt;
+
+	if (attach->importer_ops && attach->importer_ops->invalidate) {
+		/*
+		 * Mapping a DMA-buf can trigger its invalidation, prevent
+		 * sending this event to the caller by temporary removing
+		 * this attachment from the list.
+		 */
+		list_del(&attach->node);
+	} else {
+		r = dma_buf_pin(attach);
+		if (r)
+			return ERR_PTR(r);
+	}
+
+	sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
+	if (attach->importer_ops && attach->importer_ops->invalidate)
+		list_add(&attach->node, &attach->dmabuf->attachments);
+	if (!sg_table)
+		sg_table = ERR_PTR(-ENOMEM);
+
+	if (IS_ERR(sg_table)) {
+		reservation_object_lock(attach->dmabuf->resv, NULL);
+		dma_buf_unpin(attach);
+		reservation_object_unlock(attach->dmabuf->resv);
+	}
+
+	return sg_table;
+}
+EXPORT_SYMBOL_GPL(dma_buf_map_attachment_locked);
+
 /**
  * dma_buf_map_attachment - Returns the scatterlist table of the attachment;
  * mapped into _device_ address space. Is a wrapper for map_dma_buf() of the
@@ -651,14 +770,42 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
 	if (attach->sgt)
 		return attach->sgt;
 
-	sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
-	if (!sg_table)
-		sg_table = ERR_PTR(-ENOMEM);
+	reservation_object_lock(attach->dmabuf->resv, NULL);
+	sg_table = dma_buf_map_attachment(attach, direction);
+	reservation_object_unlock(attach->dmabuf->resv);
 
 	return sg_table;
 }
 EXPORT_SYMBOL_GPL(dma_buf_map_attachment);
 
+/**
+ * dma_buf_unmap_attachment_locked - unmaps the buffer with reservation lock
+ * held, should deallocate the associated scatterlist. Is a wrapper for
+ * unmap_dma_buf() of dma_buf_ops.
+ * @attach:	[in]	attachment to unmap buffer from
+ * @sg_table:	[in]	scatterlist info of the buffer to unmap
+ * @direction:  [in]    direction of DMA transfer
+ *
+ * This unmaps a DMA mapping for @attached obtained by
+ * dma_buf_map_attachment_locked().
+ */
+void dma_buf_unmap_attachment_locked(struct dma_buf_attachment *attach,
+				     struct sg_table *sg_table,
+				     enum dma_data_direction direction)
+{
+	might_sleep();
+	reservation_object_assert_held(attach->dmabuf->resv);
+
+	if (attach->sgt == sg_table)
+		return;
+
+	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
+						direction);
+	if (!(attach->importer_ops && attach->importer_ops->invalidate))
+		dma_buf_unpin(attach);
+}
+EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment_locked);
+
 /**
  * dma_buf_unmap_attachment - unmaps and decreases usecount of the buffer;might
  * deallocate the scatterlist associated. Is a wrapper for unmap_dma_buf() of
@@ -681,11 +828,32 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
 	if (attach->sgt == sg_table)
 		return;
 
-	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
-						direction);
+	reservation_object_lock(attach->dmabuf->resv, NULL);
+	dma_buf_unmap_attachment_locked(attach, sg_table, direction);
+	reservation_object_unlock(attach->dmabuf->resv);
 }
 EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
 
+/**
+ * dma_buf_invalidate_mappings - invalidate all mappings of this dma_buf
+ *
+ * @dmabuf:	[in]	buffer which mappings should be invalidated
+ *
+ * Informs all attachmenst that they need to destroy and recreated all their
+ * mappings.
+ */
+void dma_buf_invalidate_mappings(struct dma_buf *dmabuf)
+{
+	struct dma_buf_attachment *attach;
+
+	reservation_object_assert_held(dmabuf->resv);
+
+	list_for_each_entry(attach, &dmabuf->attachments, node)
+		if (attach->importer_ops && attach->importer_ops->invalidate)
+			attach->importer_ops->invalidate(attach);
+}
+EXPORT_SYMBOL_GPL(dma_buf_invalidate_mappings);
+
 /**
  * DOC: cpu access
  *
@@ -1098,10 +1266,12 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
 		seq_puts(s, "\tAttached Devices:\n");
 		attach_count = 0;
 
+		reservation_object_lock(buf_obj->resv, NULL);
 		list_for_each_entry(attach_obj, &buf_obj->attachments, node) {
 			seq_printf(s, "\t%s\n", dev_name(attach_obj->dev));
 			attach_count++;
 		}
+		reservation_object_unlock(buf_obj->resv);
 
 		seq_printf(s, "Total %d devices attached\n\n",
 				attach_count);
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 52031fdc75bb..f5681ca15d09 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -90,14 +90,40 @@ struct dma_buf_ops {
 	 */
 	void (*detach)(struct dma_buf *, struct dma_buf_attachment *);
 
+	/**
+	 * @pin:
+	 *
+	 * This is called by dma_buf_pin and lets the exporter know that the
+	 * DMA-buf can't be moved any more.
+	 *
+	 * This is called with the dmabuf->resv object locked.
+	 *
+	 * This callback is optional.
+	 *
+	 * Returns:
+	 *
+	 * 0 on success, negative error code on failure.
+	 */
+	int (*pin)(struct dma_buf_attachment *);
+
+	/**
+	 * @unpin:
+	 *
+	 * This is called by dma_buf_unpin and lets the exporter know that the
+	 * DMA-buf can be moved again.
+	 *
+	 * This is called with the dmabuf->resv object locked.
+	 *
+	 * This callback is optional.
+	 */
+	void (*unpin)(struct dma_buf_attachment *);
+
 	/**
 	 * @map_dma_buf:
 	 *
 	 * This is called by dma_buf_map_attachment() and is used to map a
 	 * shared &dma_buf into device address space, and it is mandatory. It
-	 * can only be called if @attach has been called successfully. This
-	 * essentially pins the DMA buffer into place, and it cannot be moved
-	 * any more
+	 * can only be called if @attach has been called successfully.
 	 *
 	 * This call may sleep, e.g. when the backing storage first needs to be
 	 * allocated, or moved to a location suitable for all currently attached
@@ -118,6 +144,9 @@ struct dma_buf_ops {
 	 * any other kind of sharing that the exporter might wish to make
 	 * available to buffer-users.
 	 *
+	 * This is always called with the dmabuf->resv object locked when
+	 * the pin/unpin callbacks are implemented.
+	 *
 	 * Returns:
 	 *
 	 * A &sg_table scatter list of or the backing storage of the DMA buffer,
@@ -135,9 +164,6 @@ struct dma_buf_ops {
 	 *
 	 * This is called by dma_buf_unmap_attachment() and should unmap and
 	 * release the &sg_table allocated in @map_dma_buf, and it is mandatory.
-	 * It should also unpin the backing storage if this is the last mapping
-	 * of the DMA buffer, it the exporter supports backing storage
-	 * migration.
 	 */
 	void (*unmap_dma_buf)(struct dma_buf_attachment *,
 			      struct sg_table *,
@@ -302,12 +328,43 @@ struct dma_buf {
 	} cb_excl, cb_shared;
 };
 
+/**
+ * struct dma_buf_attach_ops - importer operations for an attachment
+ * @invalidate: [optional] invalidate all mappings made using this attachment.
+ *
+ * Attachment operations implemented by the importer.
+ */
+struct dma_buf_attach_ops {
+	/**
+	 * @invalidate:
+	 *
+	 * If the invalidate callback is provided the framework can avoid
+	 * pinning the backing store while mappings exists.
+	 *
+	 * This callback is called with the lock of the reservation object
+	 * associated with the dma_buf held and the mapping function must be
+	 * called with this lock held as well. This makes sure that no mapping
+	 * is created concurrently with an ongoing invalidation.
+	 *
+	 * After the callback all existing mappings are still valid until all
+	 * fences in the dma_bufs reservation object are signaled. After getting an
+	 * invalidation callback all mappings should be destroyed by the importer using
+	 * the normal dma_buf_unmap_attachment() function as soon as possible.
+	 *
+	 * New mappings can be created immediately, but can't be used before the
+	 * exclusive fence in the dma_bufs reservation object is signaled.
+	 */
+	void (*invalidate)(struct dma_buf_attachment *attach);
+};
+
 /**
  * struct dma_buf_attachment - holds device-buffer attachment data
  * @dmabuf: buffer for this attachment.
  * @dev: device attached to the buffer.
  * @node: list of dma_buf_attachment.
  * @priv: exporter specific attachment data.
+ * @importer_ops: importer operations for this attachment.
+ * @importer_priv: importer specific attachment data.
  *
  * This structure holds the attachment information between the dma_buf buffer
  * and its user device(s). The list contains one attachment struct per device
@@ -324,6 +381,9 @@ struct dma_buf_attachment {
 	struct list_head node;
 	struct sg_table *sgt;
 	void *priv;
+
+	const struct dma_buf_attach_ops *importer_ops;
+	void *importer_priv;
 };
 
 /**
@@ -383,14 +443,19 @@ static inline void get_dma_buf(struct dma_buf *dmabuf)
  */
 static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf)
 {
-	/* Always use a static mapping for now */
-	return false;
+	return !!dmabuf->ops->pin;
 }
 
 struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
-							struct device *dev);
+					  struct device *dev);
+struct dma_buf_attachment *
+dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev,
+		       const struct dma_buf_attach_ops *importer_ops,
+		       void *importer_priv);
 void dma_buf_detach(struct dma_buf *dmabuf,
-				struct dma_buf_attachment *dmabuf_attach);
+		    struct dma_buf_attachment *attach);
+int dma_buf_pin(struct dma_buf_attachment *attach);
+void dma_buf_unpin(struct dma_buf_attachment *attach);
 
 struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info);
 
@@ -398,10 +463,16 @@ int dma_buf_fd(struct dma_buf *dmabuf, int flags);
 struct dma_buf *dma_buf_get(int fd);
 void dma_buf_put(struct dma_buf *dmabuf);
 
+struct sg_table *dma_buf_map_attachment_locked(struct dma_buf_attachment *,
+					       enum dma_data_direction);
 struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *,
 					enum dma_data_direction);
+void dma_buf_unmap_attachment_locked(struct dma_buf_attachment *,
+				     struct sg_table *,
+				     enum dma_data_direction);
 void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *,
 				enum dma_data_direction);
+void dma_buf_invalidate_mappings(struct dma_buf *dma_buf);
 int dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
 			     enum dma_data_direction dir);
 int dma_buf_end_cpu_access(struct dma_buf *dma_buf,
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2019-05-08 12:44 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-07  8:13 [PATCH 1/9] dma-buf: start caching of sg_table objects Christian König
2019-05-07  8:13 ` [PATCH 2/9] dma-buf: add dynamic DMA-buf handling v7 Christian König
2019-05-08  9:51   ` Daniel Vetter
2019-05-07  8:13 ` [PATCH 3/9] drm: remove prime sg_table caching Christian König
2019-05-08  9:57   ` Daniel Vetter
2019-05-08 11:19     ` Christian König
2019-05-07  8:13 ` [PATCH 4/9] drm/ttm: remove the backing store if no placement is given Christian König
2019-05-07  8:13 ` [PATCH 5/9] drm/ttm: use the parent resv for ghost objects Christian König
2019-05-07  8:13 ` [PATCH 6/9] drm/amdgpu: use allowed_domains for exported DMA-bufs Christian König
2019-05-07  8:13 ` [PATCH 7/9] drm/amdgpu: add independent DMA-buf export v3 Christian König
2019-05-07  8:13 ` [PATCH 8/9] drm/amdgpu: add independent DMA-buf import v5 Christian König
2019-05-07  8:13 ` [PATCH 9/9] drm/amdgpu: add DMA-buf invalidation callback v2 Christian König
2019-05-08  9:15 ` [PATCH 1/9] dma-buf: start caching of sg_table objects Daniel Vetter
2019-05-08 10:11   ` Daniel Vetter
2019-05-08 12:00     ` Christian König
2019-05-08 12:15       ` Daniel Vetter
2019-05-08 12:23         ` Koenig, Christian
2019-05-08 12:44           ` Daniel Vetter
2019-05-08 11:13   ` Christian König
2019-05-07  8:15 Christian König
     [not found] ` <20190507081512.2619-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2019-05-07  8:15   ` [PATCH 2/9] dma-buf: add dynamic DMA-buf handling v7 Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.