All of lore.kernel.org
 help / color / mirror / Atom feed
* Preparation for unpinned DMA-buf handling
@ 2019-05-14 14:05 Christian König
  2019-05-14 14:05 ` [PATCH 1/2] dma-buf: start caching of sg_table objects v2 Christian König
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Christian König @ 2019-05-14 14:05 UTC (permalink / raw)
  To: daniel, dri-devel

Hi Daniel,

since we are not moving forward with this I've separated the change out of the larger patchset.

Any objections to this now? It's basically a 1:1 move of the functionality from DRM to DMA-buf.

Thanks,
Christian.


_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/2] dma-buf: start caching of sg_table objects v2
  2019-05-14 14:05 Preparation for unpinned DMA-buf handling Christian König
@ 2019-05-14 14:05 ` Christian König
  2019-05-15  8:58   ` Daniel Vetter
  2019-05-14 14:05 ` [PATCH 2/2] drm: remove prime sg_table caching Christian König
  2019-05-15  8:54 ` Preparation for unpinned DMA-buf handling Daniel Vetter
  2 siblings, 1 reply; 12+ messages in thread
From: Christian König @ 2019-05-14 14:05 UTC (permalink / raw)
  To: daniel, dri-devel

To allow a smooth transition from pinning buffer objects to dynamic
invalidation we first start to cache the sg_table for an attachment.

v2: keep closer to the DRM implementation

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/dma-buf.c | 27 +++++++++++++++++++++++++--
 include/linux/dma-buf.h   | 13 +++++++++++++
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 7c858020d14b..a2d34c6b80a5 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -573,6 +573,7 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
 	list_add(&attach->node, &dmabuf->attachments);
 
 	mutex_unlock(&dmabuf->lock);
+
 	return attach;
 
 err_attach:
@@ -595,6 +596,9 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
 	if (WARN_ON(!dmabuf || !attach))
 		return;
 
+	if (attach->sgt)
+		dmabuf->ops->unmap_dma_buf(attach, attach->sgt, attach->dir);
+
 	mutex_lock(&dmabuf->lock);
 	list_del(&attach->node);
 	if (dmabuf->ops->detach)
@@ -630,10 +634,27 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
 	if (WARN_ON(!attach || !attach->dmabuf))
 		return ERR_PTR(-EINVAL);
 
+	if (attach->sgt) {
+		/*
+		 * Two mappings with different directions for the same
+		 * attachment are not allowed.
+		 */
+		if (attach->dir != direction &&
+		    attach->dir != DMA_BIDIRECTIONAL)
+			return ERR_PTR(-EBUSY);
+
+		return attach->sgt;
+	}
+
 	sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
 	if (!sg_table)
 		sg_table = ERR_PTR(-ENOMEM);
 
+	if (!IS_ERR(sg_table) && attach->dmabuf->ops->cache_sgt_mapping) {
+		attach->sgt = sg_table;
+		attach->dir = direction;
+	}
+
 	return sg_table;
 }
 EXPORT_SYMBOL_GPL(dma_buf_map_attachment);
@@ -657,8 +678,10 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
 	if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
 		return;
 
-	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
-						direction);
+	if (attach->sgt == sg_table)
+		return;
+
+	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table, direction);
 }
 EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
 
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 58725f890b5b..45b9767e67ec 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -51,6 +51,15 @@ struct dma_buf_attachment;
  * @vunmap: [optional] unmaps a vmap from the buffer
  */
 struct dma_buf_ops {
+	/**
+	  * @cache_sgt_mapping:
+	  *
+	  * If true the framework will cache the first mapping made for each
+	  * attachment. This avoids creating mappings for attachments multiple
+	  * times.
+	  */
+	bool cache_sgt_mapping;
+
 	/**
 	 * @attach:
 	 *
@@ -307,6 +316,8 @@ struct dma_buf {
  * @dmabuf: buffer for this attachment.
  * @dev: device attached to the buffer.
  * @node: list of dma_buf_attachment.
+ * @sgt: cached mapping.
+ * @dir: direction of cached mapping.
  * @priv: exporter specific attachment data.
  *
  * This structure holds the attachment information between the dma_buf buffer
@@ -322,6 +333,8 @@ struct dma_buf_attachment {
 	struct dma_buf *dmabuf;
 	struct device *dev;
 	struct list_head node;
+	struct sg_table *sgt;
+	enum dma_data_direction dir;
 	void *priv;
 };
 
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/2] drm: remove prime sg_table caching
  2019-05-14 14:05 Preparation for unpinned DMA-buf handling Christian König
  2019-05-14 14:05 ` [PATCH 1/2] dma-buf: start caching of sg_table objects v2 Christian König
@ 2019-05-14 14:05 ` Christian König
  2019-05-15  8:54 ` Preparation for unpinned DMA-buf handling Daniel Vetter
  2 siblings, 0 replies; 12+ messages in thread
From: Christian König @ 2019-05-14 14:05 UTC (permalink / raw)
  To: daniel, dri-devel

That is now done by the DMA-buf helpers instead.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/drm_prime.c | 77 ++++++++-----------------------------
 1 file changed, 17 insertions(+), 60 deletions(-)

diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 231e3f6d5f41..a6132ded7ec4 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -86,11 +86,6 @@ struct drm_prime_member {
 	struct rb_node handle_rb;
 };
 
-struct drm_prime_attachment {
-	struct sg_table *sgt;
-	enum dma_data_direction dir;
-};
-
 static int drm_prime_add_buf_handle(struct drm_prime_file_private *prime_fpriv,
 				    struct dma_buf *dma_buf, uint32_t handle)
 {
@@ -188,25 +183,16 @@ static int drm_prime_lookup_buf_handle(struct drm_prime_file_private *prime_fpri
  * @dma_buf: buffer to attach device to
  * @attach: buffer attachment data
  *
- * Allocates &drm_prime_attachment and calls &drm_driver.gem_prime_pin for
- * device specific attachment. This can be used as the &dma_buf_ops.attach
- * callback.
+ * Calls &drm_driver.gem_prime_pin for device specific handling. This can be
+ * used as the &dma_buf_ops.attach callback.
  *
  * Returns 0 on success, negative error code on failure.
  */
 int drm_gem_map_attach(struct dma_buf *dma_buf,
 		       struct dma_buf_attachment *attach)
 {
-	struct drm_prime_attachment *prime_attach;
 	struct drm_gem_object *obj = dma_buf->priv;
 
-	prime_attach = kzalloc(sizeof(*prime_attach), GFP_KERNEL);
-	if (!prime_attach)
-		return -ENOMEM;
-
-	prime_attach->dir = DMA_NONE;
-	attach->priv = prime_attach;
-
 	return drm_gem_pin(obj);
 }
 EXPORT_SYMBOL(drm_gem_map_attach);
@@ -222,26 +208,8 @@ EXPORT_SYMBOL(drm_gem_map_attach);
 void drm_gem_map_detach(struct dma_buf *dma_buf,
 			struct dma_buf_attachment *attach)
 {
-	struct drm_prime_attachment *prime_attach = attach->priv;
 	struct drm_gem_object *obj = dma_buf->priv;
 
-	if (prime_attach) {
-		struct sg_table *sgt = prime_attach->sgt;
-
-		if (sgt) {
-			if (prime_attach->dir != DMA_NONE)
-				dma_unmap_sg_attrs(attach->dev, sgt->sgl,
-						   sgt->nents,
-						   prime_attach->dir,
-						   DMA_ATTR_SKIP_CPU_SYNC);
-			sg_free_table(sgt);
-		}
-
-		kfree(sgt);
-		kfree(prime_attach);
-		attach->priv = NULL;
-	}
-
 	drm_gem_unpin(obj);
 }
 EXPORT_SYMBOL(drm_gem_map_detach);
@@ -286,39 +254,22 @@ void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpr
 struct sg_table *drm_gem_map_dma_buf(struct dma_buf_attachment *attach,
 				     enum dma_data_direction dir)
 {
-	struct drm_prime_attachment *prime_attach = attach->priv;
 	struct drm_gem_object *obj = attach->dmabuf->priv;
 	struct sg_table *sgt;
 
-	if (WARN_ON(dir == DMA_NONE || !prime_attach))
+	if (WARN_ON(dir == DMA_NONE))
 		return ERR_PTR(-EINVAL);
 
-	/* return the cached mapping when possible */
-	if (prime_attach->dir == dir)
-		return prime_attach->sgt;
-
-	/*
-	 * two mappings with different directions for the same attachment are
-	 * not allowed
-	 */
-	if (WARN_ON(prime_attach->dir != DMA_NONE))
-		return ERR_PTR(-EBUSY);
-
 	if (obj->funcs)
 		sgt = obj->funcs->get_sg_table(obj);
 	else
 		sgt = obj->dev->driver->gem_prime_get_sg_table(obj);
 
-	if (!IS_ERR(sgt)) {
-		if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
-				      DMA_ATTR_SKIP_CPU_SYNC)) {
-			sg_free_table(sgt);
-			kfree(sgt);
-			sgt = ERR_PTR(-ENOMEM);
-		} else {
-			prime_attach->sgt = sgt;
-			prime_attach->dir = dir;
-		}
+	if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
+			      DMA_ATTR_SKIP_CPU_SYNC)) {
+		sg_free_table(sgt);
+		kfree(sgt);
+		sgt = ERR_PTR(-ENOMEM);
 	}
 
 	return sgt;
@@ -331,14 +282,19 @@ EXPORT_SYMBOL(drm_gem_map_dma_buf);
  * @sgt: scatterlist info of the buffer to unmap
  * @dir: direction of DMA transfer
  *
- * Not implemented. The unmap is done at drm_gem_map_detach().  This can be
- * used as the &dma_buf_ops.unmap_dma_buf callback.
+ * This can be used as the &dma_buf_ops.unmap_dma_buf callback.
  */
 void drm_gem_unmap_dma_buf(struct dma_buf_attachment *attach,
 			   struct sg_table *sgt,
 			   enum dma_data_direction dir)
 {
-	/* nothing to be done here */
+	if (!sgt)
+		return;
+
+	dma_unmap_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
+			   DMA_ATTR_SKIP_CPU_SYNC);
+	sg_free_table(sgt);
+	kfree(sgt);
 }
 EXPORT_SYMBOL(drm_gem_unmap_dma_buf);
 
@@ -452,6 +408,7 @@ int drm_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma)
 EXPORT_SYMBOL(drm_gem_dmabuf_mmap);
 
 static const struct dma_buf_ops drm_gem_prime_dmabuf_ops =  {
+	.cache_sgt_mapping = true,
 	.attach = drm_gem_map_attach,
 	.detach = drm_gem_map_detach,
 	.map_dma_buf = drm_gem_map_dma_buf,
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: Preparation for unpinned DMA-buf handling
  2019-05-14 14:05 Preparation for unpinned DMA-buf handling Christian König
  2019-05-14 14:05 ` [PATCH 1/2] dma-buf: start caching of sg_table objects v2 Christian König
  2019-05-14 14:05 ` [PATCH 2/2] drm: remove prime sg_table caching Christian König
@ 2019-05-15  8:54 ` Daniel Vetter
  2019-05-15 20:07   ` Alex Deucher
  2 siblings, 1 reply; 12+ messages in thread
From: Daniel Vetter @ 2019-05-15  8:54 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

On Tue, May 14, 2019 at 04:05:13PM +0200, Christian König wrote:
> Hi Daniel,
> 
> since we are not moving forward with this I've separated the change out of the larger patchset.
> 
> Any objections to this now? It's basically a 1:1 move of the functionality from DRM to DMA-buf.

For my big picture understanding, and because I spotted in the latest rocm
release that apparently the fancy interconnect is now support. I looked
around a bit in upstream and basic xgmi support seems to be there already,
but I haven't seen anything that looks like xgmi buffer sharing is there
already? Missing something, or am I looking at the wrong thing (xgmi seems
to be the engineering name for that fancy interconnect at least).

For the big plan I've seen the dynamic dma-buf and p2p stuff, I think it'd
be good if we also make sure xgmi and related things integrate somewhat
neatly. Ofc xgmi requires that we upcast the dma_buf_attachment to
somethign driver specific (I don't think we should make that generic, but
maybe Jerome has different ideas with HMM). Question I have is how much of
the map/unmap/invaliidate and passing sg tables around we can reuse for
this.

If you have patches/pointers would be really intersting to read them a
bit.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] dma-buf: start caching of sg_table objects v2
  2019-05-14 14:05 ` [PATCH 1/2] dma-buf: start caching of sg_table objects v2 Christian König
@ 2019-05-15  8:58   ` Daniel Vetter
  2019-05-15  9:25     ` Christian König
  0 siblings, 1 reply; 12+ messages in thread
From: Daniel Vetter @ 2019-05-15  8:58 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

On Tue, May 14, 2019 at 04:05:14PM +0200, Christian König wrote:
> To allow a smooth transition from pinning buffer objects to dynamic
> invalidation we first start to cache the sg_table for an attachment.
> 
> v2: keep closer to the DRM implementation
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>

i915 doesn't use the prime helpers, so this would be a change for us. Not
sure that matters. Since we can't use this as an easy way out of the
locking problem around reservation_object I'm not sure how useful it is to
move this ahead. At least until we have a clearer picture on how we plan
to roll out the dynamic dma-buf stuff. Once we have a bit more clarity
there I'm all for merging prep work, but as-is I'd be cautious ... This
stuff is a lot more tricky than it seems at first :-/
-Daniel
> ---
>  drivers/dma-buf/dma-buf.c | 27 +++++++++++++++++++++++++--
>  include/linux/dma-buf.h   | 13 +++++++++++++
>  2 files changed, 38 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> index 7c858020d14b..a2d34c6b80a5 100644
> --- a/drivers/dma-buf/dma-buf.c
> +++ b/drivers/dma-buf/dma-buf.c
> @@ -573,6 +573,7 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
>  	list_add(&attach->node, &dmabuf->attachments);
>  
>  	mutex_unlock(&dmabuf->lock);
> +
>  	return attach;
>  
>  err_attach:
> @@ -595,6 +596,9 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
>  	if (WARN_ON(!dmabuf || !attach))
>  		return;
>  
> +	if (attach->sgt)
> +		dmabuf->ops->unmap_dma_buf(attach, attach->sgt, attach->dir);
> +
>  	mutex_lock(&dmabuf->lock);
>  	list_del(&attach->node);
>  	if (dmabuf->ops->detach)
> @@ -630,10 +634,27 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
>  	if (WARN_ON(!attach || !attach->dmabuf))
>  		return ERR_PTR(-EINVAL);
>  
> +	if (attach->sgt) {
> +		/*
> +		 * Two mappings with different directions for the same
> +		 * attachment are not allowed.
> +		 */
> +		if (attach->dir != direction &&
> +		    attach->dir != DMA_BIDIRECTIONAL)
> +			return ERR_PTR(-EBUSY);
> +
> +		return attach->sgt;
> +	}
> +
>  	sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
>  	if (!sg_table)
>  		sg_table = ERR_PTR(-ENOMEM);
>  
> +	if (!IS_ERR(sg_table) && attach->dmabuf->ops->cache_sgt_mapping) {
> +		attach->sgt = sg_table;
> +		attach->dir = direction;
> +	}
> +
>  	return sg_table;
>  }
>  EXPORT_SYMBOL_GPL(dma_buf_map_attachment);
> @@ -657,8 +678,10 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
>  	if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
>  		return;
>  
> -	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
> -						direction);
> +	if (attach->sgt == sg_table)
> +		return;
> +
> +	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table, direction);
>  }
>  EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
>  
> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
> index 58725f890b5b..45b9767e67ec 100644
> --- a/include/linux/dma-buf.h
> +++ b/include/linux/dma-buf.h
> @@ -51,6 +51,15 @@ struct dma_buf_attachment;
>   * @vunmap: [optional] unmaps a vmap from the buffer
>   */
>  struct dma_buf_ops {
> +	/**
> +	  * @cache_sgt_mapping:
> +	  *
> +	  * If true the framework will cache the first mapping made for each
> +	  * attachment. This avoids creating mappings for attachments multiple
> +	  * times.
> +	  */
> +	bool cache_sgt_mapping;
> +
>  	/**
>  	 * @attach:
>  	 *
> @@ -307,6 +316,8 @@ struct dma_buf {
>   * @dmabuf: buffer for this attachment.
>   * @dev: device attached to the buffer.
>   * @node: list of dma_buf_attachment.
> + * @sgt: cached mapping.
> + * @dir: direction of cached mapping.
>   * @priv: exporter specific attachment data.
>   *
>   * This structure holds the attachment information between the dma_buf buffer
> @@ -322,6 +333,8 @@ struct dma_buf_attachment {
>  	struct dma_buf *dmabuf;
>  	struct device *dev;
>  	struct list_head node;
> +	struct sg_table *sgt;
> +	enum dma_data_direction dir;
>  	void *priv;
>  };
>  
> -- 
> 2.17.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] dma-buf: start caching of sg_table objects v2
  2019-05-15  8:58   ` Daniel Vetter
@ 2019-05-15  9:25     ` Christian König
  2019-05-16  6:26       ` Daniel Vetter
  0 siblings, 1 reply; 12+ messages in thread
From: Christian König @ 2019-05-15  9:25 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: dri-devel

Am 15.05.19 um 10:58 schrieb Daniel Vetter:
> On Tue, May 14, 2019 at 04:05:14PM +0200, Christian König wrote:
>> To allow a smooth transition from pinning buffer objects to dynamic
>> invalidation we first start to cache the sg_table for an attachment.
>>
>> v2: keep closer to the DRM implementation
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
> i915 doesn't use the prime helpers, so this would be a change for us.

Not a problem at all, see below. I've added an cache_sgt_mapping member 
into the dma_buf_ops structure. Only when this member is true the 
functionality is enabled.

So I'm just moving code around in those two patches and no functional 
change at all.

Regards,
Christian.

>   Not
> sure that matters. Since we can't use this as an easy way out of the
> locking problem around reservation_object I'm not sure how useful it is to
> move this ahead. At least until we have a clearer picture on how we plan
> to roll out the dynamic dma-buf stuff. Once we have a bit more clarity
> there I'm all for merging prep work, but as-is I'd be cautious ... This
> stuff is a lot more tricky than it seems at first :-/
> -Daniel
>> ---
>>   drivers/dma-buf/dma-buf.c | 27 +++++++++++++++++++++++++--
>>   include/linux/dma-buf.h   | 13 +++++++++++++
>>   2 files changed, 38 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
>> index 7c858020d14b..a2d34c6b80a5 100644
>> --- a/drivers/dma-buf/dma-buf.c
>> +++ b/drivers/dma-buf/dma-buf.c
>> @@ -573,6 +573,7 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
>>   	list_add(&attach->node, &dmabuf->attachments);
>>   
>>   	mutex_unlock(&dmabuf->lock);
>> +
>>   	return attach;
>>   
>>   err_attach:
>> @@ -595,6 +596,9 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
>>   	if (WARN_ON(!dmabuf || !attach))
>>   		return;
>>   
>> +	if (attach->sgt)
>> +		dmabuf->ops->unmap_dma_buf(attach, attach->sgt, attach->dir);
>> +
>>   	mutex_lock(&dmabuf->lock);
>>   	list_del(&attach->node);
>>   	if (dmabuf->ops->detach)
>> @@ -630,10 +634,27 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
>>   	if (WARN_ON(!attach || !attach->dmabuf))
>>   		return ERR_PTR(-EINVAL);
>>   
>> +	if (attach->sgt) {
>> +		/*
>> +		 * Two mappings with different directions for the same
>> +		 * attachment are not allowed.
>> +		 */
>> +		if (attach->dir != direction &&
>> +		    attach->dir != DMA_BIDIRECTIONAL)
>> +			return ERR_PTR(-EBUSY);
>> +
>> +		return attach->sgt;
>> +	}
>> +
>>   	sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
>>   	if (!sg_table)
>>   		sg_table = ERR_PTR(-ENOMEM);
>>   
>> +	if (!IS_ERR(sg_table) && attach->dmabuf->ops->cache_sgt_mapping) {
>> +		attach->sgt = sg_table;
>> +		attach->dir = direction;
>> +	}
>> +
>>   	return sg_table;
>>   }
>>   EXPORT_SYMBOL_GPL(dma_buf_map_attachment);
>> @@ -657,8 +678,10 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
>>   	if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
>>   		return;
>>   
>> -	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
>> -						direction);
>> +	if (attach->sgt == sg_table)
>> +		return;
>> +
>> +	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table, direction);
>>   }
>>   EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
>>   
>> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
>> index 58725f890b5b..45b9767e67ec 100644
>> --- a/include/linux/dma-buf.h
>> +++ b/include/linux/dma-buf.h
>> @@ -51,6 +51,15 @@ struct dma_buf_attachment;
>>    * @vunmap: [optional] unmaps a vmap from the buffer
>>    */
>>   struct dma_buf_ops {
>> +	/**
>> +	  * @cache_sgt_mapping:
>> +	  *
>> +	  * If true the framework will cache the first mapping made for each
>> +	  * attachment. This avoids creating mappings for attachments multiple
>> +	  * times.
>> +	  */
>> +	bool cache_sgt_mapping;
>> +
>>   	/**
>>   	 * @attach:
>>   	 *
>> @@ -307,6 +316,8 @@ struct dma_buf {
>>    * @dmabuf: buffer for this attachment.
>>    * @dev: device attached to the buffer.
>>    * @node: list of dma_buf_attachment.
>> + * @sgt: cached mapping.
>> + * @dir: direction of cached mapping.
>>    * @priv: exporter specific attachment data.
>>    *
>>    * This structure holds the attachment information between the dma_buf buffer
>> @@ -322,6 +333,8 @@ struct dma_buf_attachment {
>>   	struct dma_buf *dmabuf;
>>   	struct device *dev;
>>   	struct list_head node;
>> +	struct sg_table *sgt;
>> +	enum dma_data_direction dir;
>>   	void *priv;
>>   };
>>   
>> -- 
>> 2.17.1
>>

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: Preparation for unpinned DMA-buf handling
  2019-05-15  8:54 ` Preparation for unpinned DMA-buf handling Daniel Vetter
@ 2019-05-15 20:07   ` Alex Deucher
  2019-05-15 20:51     ` Daniel Vetter
  0 siblings, 1 reply; 12+ messages in thread
From: Alex Deucher @ 2019-05-15 20:07 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Christian König, Maling list - DRI developers

On Wed, May 15, 2019 at 4:54 AM Daniel Vetter <daniel@ffwll.ch> wrote:
>
> On Tue, May 14, 2019 at 04:05:13PM +0200, Christian König wrote:
> > Hi Daniel,
> >
> > since we are not moving forward with this I've separated the change out of the larger patchset.
> >
> > Any objections to this now? It's basically a 1:1 move of the functionality from DRM to DMA-buf.
>
> For my big picture understanding, and because I spotted in the latest rocm
> release that apparently the fancy interconnect is now support. I looked
> around a bit in upstream and basic xgmi support seems to be there already,
> but I haven't seen anything that looks like xgmi buffer sharing is there
> already? Missing something, or am I looking at the wrong thing (xgmi seems
> to be the engineering name for that fancy interconnect at least).
>
> For the big plan I've seen the dynamic dma-buf and p2p stuff, I think it'd
> be good if we also make sure xgmi and related things integrate somewhat
> neatly. Ofc xgmi requires that we upcast the dma_buf_attachment to
> somethign driver specific (I don't think we should make that generic, but
> maybe Jerome has different ideas with HMM). Question I have is how much of
> the map/unmap/invaliidate and passing sg tables around we can reuse for
> this.

xgmi is an optimization in the driver.  If a dma-buf comes in, we'd
check the drm-buf pointers to verify that it is an amdgpu buffer, then
we'd check if the other GPU is connected via xgmi or not.  If so, we'd
use the internal xgmi address when setting up our GPUVM address space,
otherwise, we'd use the dma address.

The code to actually enable sharing over xgmi (or p2p for that matter)
is not upstream yet.  It's pretty trivial though once the
prerequisites are in place.

Here's the patch to enable xgmi rather than p2p when it's available:
https://cgit.freedesktop.org/~agd5f/linux/commit/?h=amd-19.10&id=7a443319db2e614114b119015daa4e2c1ed9474d

And the original patch to add p2p support is mixed into this giant
squashed commit (I'll see if I can dig up the original patch):
https://cgit.freedesktop.org/~agd5f/linux/commit/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c?h=amd-19.10&id=67f5976ee4842d01af79144a8355a040ed36b6d2
The relevant change is this hunk here:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d4d3424..49dd501 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1674,9 +1677,15 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
         exclusive = reservation_object_get_excl(bo->tbo.resv);
     }

-    if (bo)
+    if (bo) {
         flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
-    else
+        bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
+        if (mem && mem->mem_type == TTM_PL_VRAM &&
+            adev != bo_adev) {
+            flags |= AMDGPU_PTE_SYSTEM;
+            vram_base_offset = bo_adev->gmc.aper_base;
+        }
+    } else
         flags = 0x0;

     if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv))

We just treat the remote BAR like we would for system memory mapped
into the GPUVM.  This will obviously need some updating to properly
deal with IOMMU, etc. once dma-buf lands.

Alex

>
> If you have patches/pointers would be really intersting to read them a
> bit.
> -Daniel
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: Preparation for unpinned DMA-buf handling
  2019-05-15 20:07   ` Alex Deucher
@ 2019-05-15 20:51     ` Daniel Vetter
  2019-05-15 21:24       ` Alex Deucher
  0 siblings, 1 reply; 12+ messages in thread
From: Daniel Vetter @ 2019-05-15 20:51 UTC (permalink / raw)
  To: Alex Deucher; +Cc: Christian König, Maling list - DRI developers

On Wed, May 15, 2019 at 10:08 PM Alex Deucher <alexdeucher@gmail.com> wrote:
>
> On Wed, May 15, 2019 at 4:54 AM Daniel Vetter <daniel@ffwll.ch> wrote:
> >
> > On Tue, May 14, 2019 at 04:05:13PM +0200, Christian König wrote:
> > > Hi Daniel,
> > >
> > > since we are not moving forward with this I've separated the change out of the larger patchset.
> > >
> > > Any objections to this now? It's basically a 1:1 move of the functionality from DRM to DMA-buf.
> >
> > For my big picture understanding, and because I spotted in the latest rocm
> > release that apparently the fancy interconnect is now support. I looked
> > around a bit in upstream and basic xgmi support seems to be there already,
> > but I haven't seen anything that looks like xgmi buffer sharing is there
> > already? Missing something, or am I looking at the wrong thing (xgmi seems
> > to be the engineering name for that fancy interconnect at least).
> >
> > For the big plan I've seen the dynamic dma-buf and p2p stuff, I think it'd
> > be good if we also make sure xgmi and related things integrate somewhat
> > neatly. Ofc xgmi requires that we upcast the dma_buf_attachment to
> > somethign driver specific (I don't think we should make that generic, but
> > maybe Jerome has different ideas with HMM). Question I have is how much of
> > the map/unmap/invaliidate and passing sg tables around we can reuse for
> > this.
>
> xgmi is an optimization in the driver.  If a dma-buf comes in, we'd
> check the drm-buf pointers to verify that it is an amdgpu buffer, then
> we'd check if the other GPU is connected via xgmi or not.  If so, we'd
> use the internal xgmi address when setting up our GPUVM address space,
> otherwise, we'd use the dma address.
>
> The code to actually enable sharing over xgmi (or p2p for that matter)
> is not upstream yet.  It's pretty trivial though once the
> prerequisites are in place.
>
> Here's the patch to enable xgmi rather than p2p when it's available:
> https://cgit.freedesktop.org/~agd5f/linux/commit/?h=amd-19.10&id=7a443319db2e614114b119015daa4e2c1ed9474d

Hm I think I looked at this, but didn't realize how it all fits. I'll
grab the rocm kernel tree again and browse a bit more.

> And the original patch to add p2p support is mixed into this giant
> squashed commit (I'll see if I can dig up the original patch):
> https://cgit.freedesktop.org/~agd5f/linux/commit/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c?h=amd-19.10&id=67f5976ee4842d01af79144a8355a040ed36b6d2
> The relevant change is this hunk here:
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index d4d3424..49dd501 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1674,9 +1677,15 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>          exclusive = reservation_object_get_excl(bo->tbo.resv);
>      }
>
> -    if (bo)
> +    if (bo) {
>          flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
> -    else
> +        bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
> +        if (mem && mem->mem_type == TTM_PL_VRAM &&
> +            adev != bo_adev) {
> +            flags |= AMDGPU_PTE_SYSTEM;
> +            vram_base_offset = bo_adev->gmc.aper_base;
> +        }
> +    } else
>          flags = 0x0;
>
>      if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv))
>
> We just treat the remote BAR like we would for system memory mapped
> into the GPUVM.  This will obviously need some updating to properly
> deal with IOMMU, etc. once dma-buf lands.

Yeah the base address adjusting to pick the right window looks as
expected. I think how you get at the list of offsets for all the pages
of a bo is the more interesting thing. For p2p or system memory you
get an sg table with dma addresses (which you then need to correct for
where that is in gpu pa and stuff that into the gpu vm ptes ofc). For
the direct access/xgmi case I can see a bunch of options, one of them
would be to forgo the import and just directly upcast to the other
driver's amdgpu_bo and access everything directly. That seems to be
what you're doing here. Other extreme would be to somehow augment the
sg table to be able to handle xgmi addresses I think.

Not sure where we should land on this spectrum for upstream.
-Daniel

>
> Alex
>
> >
> > If you have patches/pointers would be really intersting to read them a
> > bit.
> > -Daniel
> > --
> > Daniel Vetter
> > Software Engineer, Intel Corporation
> > http://blog.ffwll.ch
> > _______________________________________________
> > dri-devel mailing list
> > dri-devel@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/dri-devel



-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: Preparation for unpinned DMA-buf handling
  2019-05-15 20:51     ` Daniel Vetter
@ 2019-05-15 21:24       ` Alex Deucher
  2019-05-16  6:18         ` Daniel Vetter
  0 siblings, 1 reply; 12+ messages in thread
From: Alex Deucher @ 2019-05-15 21:24 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Christian König, Maling list - DRI developers

On Wed, May 15, 2019 at 4:52 PM Daniel Vetter <daniel@ffwll.ch> wrote:
>
> On Wed, May 15, 2019 at 10:08 PM Alex Deucher <alexdeucher@gmail.com> wrote:
> >
> > On Wed, May 15, 2019 at 4:54 AM Daniel Vetter <daniel@ffwll.ch> wrote:
> > >
> > > On Tue, May 14, 2019 at 04:05:13PM +0200, Christian König wrote:
> > > > Hi Daniel,
> > > >
> > > > since we are not moving forward with this I've separated the change out of the larger patchset.
> > > >
> > > > Any objections to this now? It's basically a 1:1 move of the functionality from DRM to DMA-buf.
> > >
> > > For my big picture understanding, and because I spotted in the latest rocm
> > > release that apparently the fancy interconnect is now support. I looked
> > > around a bit in upstream and basic xgmi support seems to be there already,
> > > but I haven't seen anything that looks like xgmi buffer sharing is there
> > > already? Missing something, or am I looking at the wrong thing (xgmi seems
> > > to be the engineering name for that fancy interconnect at least).
> > >
> > > For the big plan I've seen the dynamic dma-buf and p2p stuff, I think it'd
> > > be good if we also make sure xgmi and related things integrate somewhat
> > > neatly. Ofc xgmi requires that we upcast the dma_buf_attachment to
> > > somethign driver specific (I don't think we should make that generic, but
> > > maybe Jerome has different ideas with HMM). Question I have is how much of
> > > the map/unmap/invaliidate and passing sg tables around we can reuse for
> > > this.
> >
> > xgmi is an optimization in the driver.  If a dma-buf comes in, we'd
> > check the drm-buf pointers to verify that it is an amdgpu buffer, then
> > we'd check if the other GPU is connected via xgmi or not.  If so, we'd
> > use the internal xgmi address when setting up our GPUVM address space,
> > otherwise, we'd use the dma address.
> >
> > The code to actually enable sharing over xgmi (or p2p for that matter)
> > is not upstream yet.  It's pretty trivial though once the
> > prerequisites are in place.
> >
> > Here's the patch to enable xgmi rather than p2p when it's available:
> > https://cgit.freedesktop.org/~agd5f/linux/commit/?h=amd-19.10&id=7a443319db2e614114b119015daa4e2c1ed9474d
>
> Hm I think I looked at this, but didn't realize how it all fits. I'll
> grab the rocm kernel tree again and browse a bit more.
>
> > And the original patch to add p2p support is mixed into this giant
> > squashed commit (I'll see if I can dig up the original patch):
> > https://cgit.freedesktop.org/~agd5f/linux/commit/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c?h=amd-19.10&id=67f5976ee4842d01af79144a8355a040ed36b6d2
> > The relevant change is this hunk here:
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > index d4d3424..49dd501 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > @@ -1674,9 +1677,15 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
> >          exclusive = reservation_object_get_excl(bo->tbo.resv);
> >      }
> >
> > -    if (bo)
> > +    if (bo) {
> >          flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
> > -    else
> > +        bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
> > +        if (mem && mem->mem_type == TTM_PL_VRAM &&
> > +            adev != bo_adev) {
> > +            flags |= AMDGPU_PTE_SYSTEM;
> > +            vram_base_offset = bo_adev->gmc.aper_base;
> > +        }
> > +    } else
> >          flags = 0x0;
> >
> >      if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv))
> >
> > We just treat the remote BAR like we would for system memory mapped
> > into the GPUVM.  This will obviously need some updating to properly
> > deal with IOMMU, etc. once dma-buf lands.
>
> Yeah the base address adjusting to pick the right window looks as
> expected. I think how you get at the list of offsets for all the pages
> of a bo is the more interesting thing. For p2p or system memory you
> get an sg table with dma addresses (which you then need to correct for
> where that is in gpu pa and stuff that into the gpu vm ptes ofc). For
> the direct access/xgmi case I can see a bunch of options, one of them
> would be to forgo the import and just directly upcast to the other
> driver's amdgpu_bo and access everything directly. That seems to be
> what you're doing here. Other extreme would be to somehow augment the

Right.  For xgmi, all connected gpus can access the entire vram
aperture from all connected gpus via it's local vram aperture like a
giant vram NUMA array.  Well, assuming all driver instances have given
permission to allow access from their xgmi peers.  At the moment we
always do.

For dma-buf, we can check and upcast based on the dma-buf pointers.
For ROCm, we have a single device node for all GPUs in the system and
all allocations go through that and we can use that get back to the
right amdgpu driver instance.

Alex

> sg table to be able to handle xgmi addresses I think.
>
> Not sure where we should land on this spectrum for upstream.
> -Daniel
>
> >
> > Alex
> >
> > >
> > > If you have patches/pointers would be really intersting to read them a
> > > bit.
> > > -Daniel
> > > --
> > > Daniel Vetter
> > > Software Engineer, Intel Corporation
> > > http://blog.ffwll.ch
> > > _______________________________________________
> > > dri-devel mailing list
> > > dri-devel@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
>
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: Preparation for unpinned DMA-buf handling
  2019-05-15 21:24       ` Alex Deucher
@ 2019-05-16  6:18         ` Daniel Vetter
  0 siblings, 0 replies; 12+ messages in thread
From: Daniel Vetter @ 2019-05-16  6:18 UTC (permalink / raw)
  To: Alex Deucher; +Cc: Christian König, Maling list - DRI developers

On Wed, May 15, 2019 at 11:24 PM Alex Deucher <alexdeucher@gmail.com> wrote:
>
> On Wed, May 15, 2019 at 4:52 PM Daniel Vetter <daniel@ffwll.ch> wrote:
> >
> > On Wed, May 15, 2019 at 10:08 PM Alex Deucher <alexdeucher@gmail.com> wrote:
> > >
> > > On Wed, May 15, 2019 at 4:54 AM Daniel Vetter <daniel@ffwll.ch> wrote:
> > > >
> > > > On Tue, May 14, 2019 at 04:05:13PM +0200, Christian König wrote:
> > > > > Hi Daniel,
> > > > >
> > > > > since we are not moving forward with this I've separated the change out of the larger patchset.
> > > > >
> > > > > Any objections to this now? It's basically a 1:1 move of the functionality from DRM to DMA-buf.
> > > >
> > > > For my big picture understanding, and because I spotted in the latest rocm
> > > > release that apparently the fancy interconnect is now support. I looked
> > > > around a bit in upstream and basic xgmi support seems to be there already,
> > > > but I haven't seen anything that looks like xgmi buffer sharing is there
> > > > already? Missing something, or am I looking at the wrong thing (xgmi seems
> > > > to be the engineering name for that fancy interconnect at least).
> > > >
> > > > For the big plan I've seen the dynamic dma-buf and p2p stuff, I think it'd
> > > > be good if we also make sure xgmi and related things integrate somewhat
> > > > neatly. Ofc xgmi requires that we upcast the dma_buf_attachment to
> > > > somethign driver specific (I don't think we should make that generic, but
> > > > maybe Jerome has different ideas with HMM). Question I have is how much of
> > > > the map/unmap/invaliidate and passing sg tables around we can reuse for
> > > > this.
> > >
> > > xgmi is an optimization in the driver.  If a dma-buf comes in, we'd
> > > check the drm-buf pointers to verify that it is an amdgpu buffer, then
> > > we'd check if the other GPU is connected via xgmi or not.  If so, we'd
> > > use the internal xgmi address when setting up our GPUVM address space,
> > > otherwise, we'd use the dma address.
> > >
> > > The code to actually enable sharing over xgmi (or p2p for that matter)
> > > is not upstream yet.  It's pretty trivial though once the
> > > prerequisites are in place.
> > >
> > > Here's the patch to enable xgmi rather than p2p when it's available:
> > > https://cgit.freedesktop.org/~agd5f/linux/commit/?h=amd-19.10&id=7a443319db2e614114b119015daa4e2c1ed9474d
> >
> > Hm I think I looked at this, but didn't realize how it all fits. I'll
> > grab the rocm kernel tree again and browse a bit more.
> >
> > > And the original patch to add p2p support is mixed into this giant
> > > squashed commit (I'll see if I can dig up the original patch):
> > > https://cgit.freedesktop.org/~agd5f/linux/commit/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c?h=amd-19.10&id=67f5976ee4842d01af79144a8355a040ed36b6d2
> > > The relevant change is this hunk here:
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > > index d4d3424..49dd501 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > > @@ -1674,9 +1677,15 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
> > >          exclusive = reservation_object_get_excl(bo->tbo.resv);
> > >      }
> > >
> > > -    if (bo)
> > > +    if (bo) {
> > >          flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
> > > -    else
> > > +        bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
> > > +        if (mem && mem->mem_type == TTM_PL_VRAM &&
> > > +            adev != bo_adev) {
> > > +            flags |= AMDGPU_PTE_SYSTEM;
> > > +            vram_base_offset = bo_adev->gmc.aper_base;
> > > +        }
> > > +    } else
> > >          flags = 0x0;
> > >
> > >      if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv))
> > >
> > > We just treat the remote BAR like we would for system memory mapped
> > > into the GPUVM.  This will obviously need some updating to properly
> > > deal with IOMMU, etc. once dma-buf lands.
> >
> > Yeah the base address adjusting to pick the right window looks as
> > expected. I think how you get at the list of offsets for all the pages
> > of a bo is the more interesting thing. For p2p or system memory you
> > get an sg table with dma addresses (which you then need to correct for
> > where that is in gpu pa and stuff that into the gpu vm ptes ofc). For
> > the direct access/xgmi case I can see a bunch of options, one of them
> > would be to forgo the import and just directly upcast to the other
> > driver's amdgpu_bo and access everything directly. That seems to be
> > what you're doing here. Other extreme would be to somehow augment the
>
> Right.  For xgmi, all connected gpus can access the entire vram
> aperture from all connected gpus via it's local vram aperture like a
> giant vram NUMA array.  Well, assuming all driver instances have given
> permission to allow access from their xgmi peers.  At the moment we
> always do.
>
> For dma-buf, we can check and upcast based on the dma-buf pointers.
> For ROCm, we have a single device node for all GPUs in the system and
> all allocations go through that and we can use that get back to the
> right amdgpu driver instance.

Hm right the amdkfd model makes things a bit easier in this regard,
and since you have ttm locking going across device boundaries isn't a
bad idea either.
-Daniel

>
> Alex
>
> > sg table to be able to handle xgmi addresses I think.
> >
> > Not sure where we should land on this spectrum for upstream.
> > -Daniel
> >
> > >
> > > Alex
> > >
> > > >
> > > > If you have patches/pointers would be really intersting to read them a
> > > > bit.
> > > > -Daniel
> > > > --
> > > > Daniel Vetter
> > > > Software Engineer, Intel Corporation
> > > > http://blog.ffwll.ch
> > > > _______________________________________________
> > > > dri-devel mailing list
> > > > dri-devel@lists.freedesktop.org
> > > > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> >
> >
> >
> > --
> > Daniel Vetter
> > Software Engineer, Intel Corporation
> > +41 (0) 79 365 57 48 - http://blog.ffwll.ch



-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] dma-buf: start caching of sg_table objects v2
  2019-05-15  9:25     ` Christian König
@ 2019-05-16  6:26       ` Daniel Vetter
  2019-05-22  8:11         ` Daniel Vetter
  0 siblings, 1 reply; 12+ messages in thread
From: Daniel Vetter @ 2019-05-16  6:26 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

On Wed, May 15, 2019 at 11:25 AM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> Am 15.05.19 um 10:58 schrieb Daniel Vetter:
> > On Tue, May 14, 2019 at 04:05:14PM +0200, Christian König wrote:
> >> To allow a smooth transition from pinning buffer objects to dynamic
> >> invalidation we first start to cache the sg_table for an attachment.
> >>
> >> v2: keep closer to the DRM implementation
> >>
> >> Signed-off-by: Christian König <christian.koenig@amd.com>
> > i915 doesn't use the prime helpers, so this would be a change for us.
>
> Not a problem at all, see below. I've added an cache_sgt_mapping member
> into the dma_buf_ops structure. Only when this member is true the
> functionality is enabled.

Hm right I missed that.

> So I'm just moving code around in those two patches and no functional
> change at all.

Still not sure. I'll look at your latest dma-buf series to see how it
all fits together.
-Daniel

>
> Regards,
> Christian.
>
> >   Not
> > sure that matters. Since we can't use this as an easy way out of the
> > locking problem around reservation_object I'm not sure how useful it is to
> > move this ahead. At least until we have a clearer picture on how we plan
> > to roll out the dynamic dma-buf stuff. Once we have a bit more clarity
> > there I'm all for merging prep work, but as-is I'd be cautious ... This
> > stuff is a lot more tricky than it seems at first :-/
> > -Daniel
> >> ---
> >>   drivers/dma-buf/dma-buf.c | 27 +++++++++++++++++++++++++--
> >>   include/linux/dma-buf.h   | 13 +++++++++++++
> >>   2 files changed, 38 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> >> index 7c858020d14b..a2d34c6b80a5 100644
> >> --- a/drivers/dma-buf/dma-buf.c
> >> +++ b/drivers/dma-buf/dma-buf.c
> >> @@ -573,6 +573,7 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
> >>      list_add(&attach->node, &dmabuf->attachments);
> >>
> >>      mutex_unlock(&dmabuf->lock);
> >> +
> >>      return attach;
> >>
> >>   err_attach:
> >> @@ -595,6 +596,9 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
> >>      if (WARN_ON(!dmabuf || !attach))
> >>              return;
> >>
> >> +    if (attach->sgt)
> >> +            dmabuf->ops->unmap_dma_buf(attach, attach->sgt, attach->dir);
> >> +
> >>      mutex_lock(&dmabuf->lock);
> >>      list_del(&attach->node);
> >>      if (dmabuf->ops->detach)
> >> @@ -630,10 +634,27 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
> >>      if (WARN_ON(!attach || !attach->dmabuf))
> >>              return ERR_PTR(-EINVAL);
> >>
> >> +    if (attach->sgt) {
> >> +            /*
> >> +             * Two mappings with different directions for the same
> >> +             * attachment are not allowed.
> >> +             */
> >> +            if (attach->dir != direction &&
> >> +                attach->dir != DMA_BIDIRECTIONAL)
> >> +                    return ERR_PTR(-EBUSY);
> >> +
> >> +            return attach->sgt;
> >> +    }
> >> +
> >>      sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
> >>      if (!sg_table)
> >>              sg_table = ERR_PTR(-ENOMEM);
> >>
> >> +    if (!IS_ERR(sg_table) && attach->dmabuf->ops->cache_sgt_mapping) {
> >> +            attach->sgt = sg_table;
> >> +            attach->dir = direction;
> >> +    }
> >> +
> >>      return sg_table;
> >>   }
> >>   EXPORT_SYMBOL_GPL(dma_buf_map_attachment);
> >> @@ -657,8 +678,10 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
> >>      if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
> >>              return;
> >>
> >> -    attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
> >> -                                            direction);
> >> +    if (attach->sgt == sg_table)
> >> +            return;
> >> +
> >> +    attach->dmabuf->ops->unmap_dma_buf(attach, sg_table, direction);
> >>   }
> >>   EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
> >>
> >> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
> >> index 58725f890b5b..45b9767e67ec 100644
> >> --- a/include/linux/dma-buf.h
> >> +++ b/include/linux/dma-buf.h
> >> @@ -51,6 +51,15 @@ struct dma_buf_attachment;
> >>    * @vunmap: [optional] unmaps a vmap from the buffer
> >>    */
> >>   struct dma_buf_ops {
> >> +    /**
> >> +      * @cache_sgt_mapping:
> >> +      *
> >> +      * If true the framework will cache the first mapping made for each
> >> +      * attachment. This avoids creating mappings for attachments multiple
> >> +      * times.
> >> +      */
> >> +    bool cache_sgt_mapping;
> >> +
> >>      /**
> >>       * @attach:
> >>       *
> >> @@ -307,6 +316,8 @@ struct dma_buf {
> >>    * @dmabuf: buffer for this attachment.
> >>    * @dev: device attached to the buffer.
> >>    * @node: list of dma_buf_attachment.
> >> + * @sgt: cached mapping.
> >> + * @dir: direction of cached mapping.
> >>    * @priv: exporter specific attachment data.
> >>    *
> >>    * This structure holds the attachment information between the dma_buf buffer
> >> @@ -322,6 +333,8 @@ struct dma_buf_attachment {
> >>      struct dma_buf *dmabuf;
> >>      struct device *dev;
> >>      struct list_head node;
> >> +    struct sg_table *sgt;
> >> +    enum dma_data_direction dir;
> >>      void *priv;
> >>   };
> >>
> >> --
> >> 2.17.1
> >>
>


--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] dma-buf: start caching of sg_table objects v2
  2019-05-16  6:26       ` Daniel Vetter
@ 2019-05-22  8:11         ` Daniel Vetter
  0 siblings, 0 replies; 12+ messages in thread
From: Daniel Vetter @ 2019-05-22  8:11 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel

On Thu, May 16, 2019 at 08:26:36AM +0200, Daniel Vetter wrote:
> On Wed, May 15, 2019 at 11:25 AM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
> >
> > Am 15.05.19 um 10:58 schrieb Daniel Vetter:
> > > On Tue, May 14, 2019 at 04:05:14PM +0200, Christian König wrote:
> > >> To allow a smooth transition from pinning buffer objects to dynamic
> > >> invalidation we first start to cache the sg_table for an attachment.
> > >>
> > >> v2: keep closer to the DRM implementation
> > >>
> > >> Signed-off-by: Christian König <christian.koenig@amd.com>
> > > i915 doesn't use the prime helpers, so this would be a change for us.
> >
> > Not a problem at all, see below. I've added an cache_sgt_mapping member
> > into the dma_buf_ops structure. Only when this member is true the
> > functionality is enabled.
> 
> Hm right I missed that.
> 
> > So I'm just moving code around in those two patches and no functional
> > change at all.
> 
> Still not sure. I'll look at your latest dma-buf series to see how it
> all fits together.

Ok I'm still procrastinating on that one, but I guess either way we need
some caching or another in dma-buf.c, and this series here at least looks
like it's moving in that direction.

On both patches Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

btw might be good to throw this at intel-gfx-trybot too, just to see
whether anything is amiss. I think we've beaten down all the -rc1
regressions now.
-Daniel


> -Daniel
> 
> >
> > Regards,
> > Christian.
> >
> > >   Not
> > > sure that matters. Since we can't use this as an easy way out of the
> > > locking problem around reservation_object I'm not sure how useful it is to
> > > move this ahead. At least until we have a clearer picture on how we plan
> > > to roll out the dynamic dma-buf stuff. Once we have a bit more clarity
> > > there I'm all for merging prep work, but as-is I'd be cautious ... This
> > > stuff is a lot more tricky than it seems at first :-/
> > > -Daniel
> > >> ---
> > >>   drivers/dma-buf/dma-buf.c | 27 +++++++++++++++++++++++++--
> > >>   include/linux/dma-buf.h   | 13 +++++++++++++
> > >>   2 files changed, 38 insertions(+), 2 deletions(-)
> > >>
> > >> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> > >> index 7c858020d14b..a2d34c6b80a5 100644
> > >> --- a/drivers/dma-buf/dma-buf.c
> > >> +++ b/drivers/dma-buf/dma-buf.c
> > >> @@ -573,6 +573,7 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
> > >>      list_add(&attach->node, &dmabuf->attachments);
> > >>
> > >>      mutex_unlock(&dmabuf->lock);
> > >> +
> > >>      return attach;
> > >>
> > >>   err_attach:
> > >> @@ -595,6 +596,9 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
> > >>      if (WARN_ON(!dmabuf || !attach))
> > >>              return;
> > >>
> > >> +    if (attach->sgt)
> > >> +            dmabuf->ops->unmap_dma_buf(attach, attach->sgt, attach->dir);
> > >> +
> > >>      mutex_lock(&dmabuf->lock);
> > >>      list_del(&attach->node);
> > >>      if (dmabuf->ops->detach)
> > >> @@ -630,10 +634,27 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
> > >>      if (WARN_ON(!attach || !attach->dmabuf))
> > >>              return ERR_PTR(-EINVAL);
> > >>
> > >> +    if (attach->sgt) {
> > >> +            /*
> > >> +             * Two mappings with different directions for the same
> > >> +             * attachment are not allowed.
> > >> +             */
> > >> +            if (attach->dir != direction &&
> > >> +                attach->dir != DMA_BIDIRECTIONAL)
> > >> +                    return ERR_PTR(-EBUSY);
> > >> +
> > >> +            return attach->sgt;
> > >> +    }
> > >> +
> > >>      sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
> > >>      if (!sg_table)
> > >>              sg_table = ERR_PTR(-ENOMEM);
> > >>
> > >> +    if (!IS_ERR(sg_table) && attach->dmabuf->ops->cache_sgt_mapping) {
> > >> +            attach->sgt = sg_table;
> > >> +            attach->dir = direction;
> > >> +    }
> > >> +
> > >>      return sg_table;
> > >>   }
> > >>   EXPORT_SYMBOL_GPL(dma_buf_map_attachment);
> > >> @@ -657,8 +678,10 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
> > >>      if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
> > >>              return;
> > >>
> > >> -    attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
> > >> -                                            direction);
> > >> +    if (attach->sgt == sg_table)
> > >> +            return;
> > >> +
> > >> +    attach->dmabuf->ops->unmap_dma_buf(attach, sg_table, direction);
> > >>   }
> > >>   EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
> > >>
> > >> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
> > >> index 58725f890b5b..45b9767e67ec 100644
> > >> --- a/include/linux/dma-buf.h
> > >> +++ b/include/linux/dma-buf.h
> > >> @@ -51,6 +51,15 @@ struct dma_buf_attachment;
> > >>    * @vunmap: [optional] unmaps a vmap from the buffer
> > >>    */
> > >>   struct dma_buf_ops {
> > >> +    /**
> > >> +      * @cache_sgt_mapping:
> > >> +      *
> > >> +      * If true the framework will cache the first mapping made for each
> > >> +      * attachment. This avoids creating mappings for attachments multiple
> > >> +      * times.
> > >> +      */
> > >> +    bool cache_sgt_mapping;
> > >> +
> > >>      /**
> > >>       * @attach:
> > >>       *
> > >> @@ -307,6 +316,8 @@ struct dma_buf {
> > >>    * @dmabuf: buffer for this attachment.
> > >>    * @dev: device attached to the buffer.
> > >>    * @node: list of dma_buf_attachment.
> > >> + * @sgt: cached mapping.
> > >> + * @dir: direction of cached mapping.
> > >>    * @priv: exporter specific attachment data.
> > >>    *
> > >>    * This structure holds the attachment information between the dma_buf buffer
> > >> @@ -322,6 +333,8 @@ struct dma_buf_attachment {
> > >>      struct dma_buf *dmabuf;
> > >>      struct device *dev;
> > >>      struct list_head node;
> > >> +    struct sg_table *sgt;
> > >> +    enum dma_data_direction dir;
> > >>      void *priv;
> > >>   };
> > >>
> > >> --
> > >> 2.17.1
> > >>
> >
> 
> 
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2019-05-22  8:11 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-14 14:05 Preparation for unpinned DMA-buf handling Christian König
2019-05-14 14:05 ` [PATCH 1/2] dma-buf: start caching of sg_table objects v2 Christian König
2019-05-15  8:58   ` Daniel Vetter
2019-05-15  9:25     ` Christian König
2019-05-16  6:26       ` Daniel Vetter
2019-05-22  8:11         ` Daniel Vetter
2019-05-14 14:05 ` [PATCH 2/2] drm: remove prime sg_table caching Christian König
2019-05-15  8:54 ` Preparation for unpinned DMA-buf handling Daniel Vetter
2019-05-15 20:07   ` Alex Deucher
2019-05-15 20:51     ` Daniel Vetter
2019-05-15 21:24       ` Alex Deucher
2019-05-16  6:18         ` Daniel Vetter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.