All of lore.kernel.org
 help / color / mirror / Atom feed
From: Thomas Zimmermann <tzimmermann@suse.de>
To: Paul Cercueil <paul@crapouillou.net>,
	Maarten Lankhorst <maarten.lankhorst@linux.intel.com>,
	Maxime Ripard <mripard@kernel.org>,
	David Airlie <airlied@linux.ie>, Daniel Vetter <daniel@ffwll.ch>
Cc: Christoph Hellwig <hch@infradead.org>,
	list@opendingux.net, dri-devel@lists.freedesktop.org,
	linux-kernel@vger.kernel.org, linux-mips@vger.kernel.org
Subject: Re: [PATCH v4 1/3] drm: Add support for GEM buffers backed by non-coherent memory
Date: Sat, 15 May 2021 20:49:08 +0200	[thread overview]
Message-ID: <fce9aae9-a9f8-1b90-33fc-16cf64888ff7@suse.de> (raw)
In-Reply-To: <20210515145359.64802-2-paul@crapouillou.net>


[-- Attachment #1.1: Type: text/plain, Size: 5916 bytes --]



Am 15.05.21 um 16:53 schrieb Paul Cercueil:
> Having GEM buffers backed by non-coherent memory is interesting in the
> particular case where it is faster to render to a non-coherent buffer
> then sync the data cache, than to render to a write-combine buffer, and
> (by extension) much faster than using a shadow buffer. This is true for
> instance on some Ingenic SoCs, where even simple blits (e.g. memcpy)
> are about three times faster using this method.
> 
> Add a 'map_noncoherent' flag to the drm_gem_cma_object structure, which
> can be set by the drivers when they create the dumb buffer.
> 
> Since this really only applies to software rendering, disable this flag
> as soon as the CMA objects are exported via PRIME.
> 
> v3: New patch. Now uses a simple 'map_noncoherent' flag to control how
>      the objects are mapped, and use the new dma_mmap_pages function.
> 
> v4: Make sure map_noncoherent is always disabled when creating GEM
>      objects meant to be used with dma-buf.
> 
> Signed-off-by: Paul Cercueil <paul@crapouillou.net>

Acked-by: Thomas Zimmermann <tzimmermann@suse.de>

> ---
>   drivers/gpu/drm/drm_gem_cma_helper.c | 38 +++++++++++++++++++++-------
>   include/drm/drm_gem_cma_helper.h     |  3 +++
>   2 files changed, 32 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c
> index 7942cf05cd93..235c7a63da2b 100644
> --- a/drivers/gpu/drm/drm_gem_cma_helper.c
> +++ b/drivers/gpu/drm/drm_gem_cma_helper.c
> @@ -46,6 +46,7 @@ static const struct drm_gem_object_funcs drm_gem_cma_default_funcs = {
>    * __drm_gem_cma_create - Create a GEM CMA object without allocating memory
>    * @drm: DRM device
>    * @size: size of the object to allocate
> + * @private: true if used for internal purposes
>    *
>    * This function creates and initializes a GEM CMA object of the given size,
>    * but doesn't allocate any memory to back the object.
> @@ -55,11 +56,11 @@ static const struct drm_gem_object_funcs drm_gem_cma_default_funcs = {
>    * error code on failure.
>    */
>   static struct drm_gem_cma_object *
> -__drm_gem_cma_create(struct drm_device *drm, size_t size)
> +__drm_gem_cma_create(struct drm_device *drm, size_t size, bool private)
>   {
>   	struct drm_gem_cma_object *cma_obj;
>   	struct drm_gem_object *gem_obj;
> -	int ret;
> +	int ret = 0;
>   
>   	if (drm->driver->gem_create_object)
>   		gem_obj = drm->driver->gem_create_object(drm, size);
> @@ -73,7 +74,14 @@ __drm_gem_cma_create(struct drm_device *drm, size_t size)
>   
>   	cma_obj = container_of(gem_obj, struct drm_gem_cma_object, base);
>   
> -	ret = drm_gem_object_init(drm, gem_obj, size);
> +	if (private) {
> +		drm_gem_private_object_init(drm, gem_obj, size);
> +
> +		/* Always use writecombine for dma-buf mappings */
> +		cma_obj->map_noncoherent = false;
> +	} else {
> +		ret = drm_gem_object_init(drm, gem_obj, size);
> +	}
>   	if (ret)
>   		goto error;
>   
> @@ -111,12 +119,19 @@ struct drm_gem_cma_object *drm_gem_cma_create(struct drm_device *drm,
>   
>   	size = round_up(size, PAGE_SIZE);
>   
> -	cma_obj = __drm_gem_cma_create(drm, size);
> +	cma_obj = __drm_gem_cma_create(drm, size, false);
>   	if (IS_ERR(cma_obj))
>   		return cma_obj;
>   
> -	cma_obj->vaddr = dma_alloc_wc(drm->dev, size, &cma_obj->paddr,
> -				      GFP_KERNEL | __GFP_NOWARN);
> +	if (cma_obj->map_noncoherent) {
> +		cma_obj->vaddr = dma_alloc_noncoherent(drm->dev, size,
> +						       &cma_obj->paddr,
> +						       DMA_TO_DEVICE,
> +						       GFP_KERNEL | __GFP_NOWARN);
> +	} else {
> +		cma_obj->vaddr = dma_alloc_wc(drm->dev, size, &cma_obj->paddr,
> +					      GFP_KERNEL | __GFP_NOWARN);
> +	}
>   	if (!cma_obj->vaddr) {
>   		drm_dbg(drm, "failed to allocate buffer with size %zu\n",
>   			 size);
> @@ -432,7 +447,7 @@ drm_gem_cma_prime_import_sg_table(struct drm_device 
*dev,
>   		return ERR_PTR(-EINVAL);
>   
>   	/* Create a CMA GEM buffer. */
> -	cma_obj = __drm_gem_cma_create(dev, attach->dmabuf->size);
> +	cma_obj = __drm_gem_cma_create(dev, attach->dmabuf->size, true);
>   	if (IS_ERR(cma_obj))
>   		return ERR_CAST(cma_obj);
>   
> @@ -499,8 +514,13 @@ int drm_gem_cma_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
>   
>   	cma_obj = to_drm_gem_cma_obj(obj);
>   
> -	ret = dma_mmap_wc(cma_obj->base.dev->dev, vma, cma_obj->vaddr,
> -			  cma_obj->paddr, vma->vm_end - vma->vm_start);
> +	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
> +	if (!cma_obj->map_noncoherent)
> +		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
> +
> +	ret = dma_mmap_pages(cma_obj->base.dev->dev,
> +			     vma, vma->vm_end - vma->vm_start,
> +			     virt_to_page(cma_obj->vaddr));
>   	if (ret)
>   		drm_gem_vm_close(vma);
>   
> diff --git a/include/drm/drm_gem_cma_helper.h b/include/drm/drm_gem_cma_helper.h
> index 0a9711caa3e8..cd13508acbc1 100644
> --- a/include/drm/drm_gem_cma_helper.h
> +++ b/include/drm/drm_gem_cma_helper.h
> @@ -16,6 +16,7 @@ struct drm_mode_create_dumb;
>    *       more than one entry but they are guaranteed to have contiguous
>    *       DMA addresses.
>    * @vaddr: kernel virtual address of the backing memory
> + * @map_noncoherent: if true, the GEM object is backed by non-coherent 
memory
>    */
>   struct drm_gem_cma_object {
>   	struct drm_gem_object base;
> @@ -24,6 +25,8 @@ struct drm_gem_cma_object {
>   
>   	/* For objects with DMA memory allocated by GEM CMA */
>   	void *vaddr;
> +
> +	bool map_noncoherent;
>   };
>   
>   #define to_drm_gem_cma_obj(gem_obj) \
> 

-- 
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Maxfeldstr. 5, 90409 Nürnberg, Germany
(HRB 36809, AG Nürnberg)
Geschäftsführer: Felix Imendörffer


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 840 bytes --]

WARNING: multiple messages have this Message-ID (diff)
From: Thomas Zimmermann <tzimmermann@suse.de>
To: Paul Cercueil <paul@crapouillou.net>,
	Maarten Lankhorst <maarten.lankhorst@linux.intel.com>,
	Maxime Ripard <mripard@kernel.org>,
	David Airlie <airlied@linux.ie>, Daniel Vetter <daniel@ffwll.ch>
Cc: Christoph Hellwig <hch@infradead.org>,
	list@opendingux.net, linux-kernel@vger.kernel.org,
	dri-devel@lists.freedesktop.org, linux-mips@vger.kernel.org
Subject: Re: [PATCH v4 1/3] drm: Add support for GEM buffers backed by non-coherent memory
Date: Sat, 15 May 2021 20:49:08 +0200	[thread overview]
Message-ID: <fce9aae9-a9f8-1b90-33fc-16cf64888ff7@suse.de> (raw)
In-Reply-To: <20210515145359.64802-2-paul@crapouillou.net>


[-- Attachment #1.1: Type: text/plain, Size: 5916 bytes --]



Am 15.05.21 um 16:53 schrieb Paul Cercueil:
> Having GEM buffers backed by non-coherent memory is interesting in the
> particular case where it is faster to render to a non-coherent buffer
> then sync the data cache, than to render to a write-combine buffer, and
> (by extension) much faster than using a shadow buffer. This is true for
> instance on some Ingenic SoCs, where even simple blits (e.g. memcpy)
> are about three times faster using this method.
> 
> Add a 'map_noncoherent' flag to the drm_gem_cma_object structure, which
> can be set by the drivers when they create the dumb buffer.
> 
> Since this really only applies to software rendering, disable this flag
> as soon as the CMA objects are exported via PRIME.
> 
> v3: New patch. Now uses a simple 'map_noncoherent' flag to control how
>      the objects are mapped, and use the new dma_mmap_pages function.
> 
> v4: Make sure map_noncoherent is always disabled when creating GEM
>      objects meant to be used with dma-buf.
> 
> Signed-off-by: Paul Cercueil <paul@crapouillou.net>

Acked-by: Thomas Zimmermann <tzimmermann@suse.de>

> ---
>   drivers/gpu/drm/drm_gem_cma_helper.c | 38 +++++++++++++++++++++-------
>   include/drm/drm_gem_cma_helper.h     |  3 +++
>   2 files changed, 32 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c
> index 7942cf05cd93..235c7a63da2b 100644
> --- a/drivers/gpu/drm/drm_gem_cma_helper.c
> +++ b/drivers/gpu/drm/drm_gem_cma_helper.c
> @@ -46,6 +46,7 @@ static const struct drm_gem_object_funcs drm_gem_cma_default_funcs = {
>    * __drm_gem_cma_create - Create a GEM CMA object without allocating memory
>    * @drm: DRM device
>    * @size: size of the object to allocate
> + * @private: true if used for internal purposes
>    *
>    * This function creates and initializes a GEM CMA object of the given size,
>    * but doesn't allocate any memory to back the object.
> @@ -55,11 +56,11 @@ static const struct drm_gem_object_funcs drm_gem_cma_default_funcs = {
>    * error code on failure.
>    */
>   static struct drm_gem_cma_object *
> -__drm_gem_cma_create(struct drm_device *drm, size_t size)
> +__drm_gem_cma_create(struct drm_device *drm, size_t size, bool private)
>   {
>   	struct drm_gem_cma_object *cma_obj;
>   	struct drm_gem_object *gem_obj;
> -	int ret;
> +	int ret = 0;
>   
>   	if (drm->driver->gem_create_object)
>   		gem_obj = drm->driver->gem_create_object(drm, size);
> @@ -73,7 +74,14 @@ __drm_gem_cma_create(struct drm_device *drm, size_t size)
>   
>   	cma_obj = container_of(gem_obj, struct drm_gem_cma_object, base);
>   
> -	ret = drm_gem_object_init(drm, gem_obj, size);
> +	if (private) {
> +		drm_gem_private_object_init(drm, gem_obj, size);
> +
> +		/* Always use writecombine for dma-buf mappings */
> +		cma_obj->map_noncoherent = false;
> +	} else {
> +		ret = drm_gem_object_init(drm, gem_obj, size);
> +	}
>   	if (ret)
>   		goto error;
>   
> @@ -111,12 +119,19 @@ struct drm_gem_cma_object *drm_gem_cma_create(struct drm_device *drm,
>   
>   	size = round_up(size, PAGE_SIZE);
>   
> -	cma_obj = __drm_gem_cma_create(drm, size);
> +	cma_obj = __drm_gem_cma_create(drm, size, false);
>   	if (IS_ERR(cma_obj))
>   		return cma_obj;
>   
> -	cma_obj->vaddr = dma_alloc_wc(drm->dev, size, &cma_obj->paddr,
> -				      GFP_KERNEL | __GFP_NOWARN);
> +	if (cma_obj->map_noncoherent) {
> +		cma_obj->vaddr = dma_alloc_noncoherent(drm->dev, size,
> +						       &cma_obj->paddr,
> +						       DMA_TO_DEVICE,
> +						       GFP_KERNEL | __GFP_NOWARN);
> +	} else {
> +		cma_obj->vaddr = dma_alloc_wc(drm->dev, size, &cma_obj->paddr,
> +					      GFP_KERNEL | __GFP_NOWARN);
> +	}
>   	if (!cma_obj->vaddr) {
>   		drm_dbg(drm, "failed to allocate buffer with size %zu\n",
>   			 size);
> @@ -432,7 +447,7 @@ drm_gem_cma_prime_import_sg_table(struct drm_device 
*dev,
>   		return ERR_PTR(-EINVAL);
>   
>   	/* Create a CMA GEM buffer. */
> -	cma_obj = __drm_gem_cma_create(dev, attach->dmabuf->size);
> +	cma_obj = __drm_gem_cma_create(dev, attach->dmabuf->size, true);
>   	if (IS_ERR(cma_obj))
>   		return ERR_CAST(cma_obj);
>   
> @@ -499,8 +514,13 @@ int drm_gem_cma_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
>   
>   	cma_obj = to_drm_gem_cma_obj(obj);
>   
> -	ret = dma_mmap_wc(cma_obj->base.dev->dev, vma, cma_obj->vaddr,
> -			  cma_obj->paddr, vma->vm_end - vma->vm_start);
> +	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
> +	if (!cma_obj->map_noncoherent)
> +		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
> +
> +	ret = dma_mmap_pages(cma_obj->base.dev->dev,
> +			     vma, vma->vm_end - vma->vm_start,
> +			     virt_to_page(cma_obj->vaddr));
>   	if (ret)
>   		drm_gem_vm_close(vma);
>   
> diff --git a/include/drm/drm_gem_cma_helper.h b/include/drm/drm_gem_cma_helper.h
> index 0a9711caa3e8..cd13508acbc1 100644
> --- a/include/drm/drm_gem_cma_helper.h
> +++ b/include/drm/drm_gem_cma_helper.h
> @@ -16,6 +16,7 @@ struct drm_mode_create_dumb;
>    *       more than one entry but they are guaranteed to have contiguous
>    *       DMA addresses.
>    * @vaddr: kernel virtual address of the backing memory
> + * @map_noncoherent: if true, the GEM object is backed by non-coherent 
memory
>    */
>   struct drm_gem_cma_object {
>   	struct drm_gem_object base;
> @@ -24,6 +25,8 @@ struct drm_gem_cma_object {
>   
>   	/* For objects with DMA memory allocated by GEM CMA */
>   	void *vaddr;
> +
> +	bool map_noncoherent;
>   };
>   
>   #define to_drm_gem_cma_obj(gem_obj) \
> 

-- 
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Maxfeldstr. 5, 90409 Nürnberg, Germany
(HRB 36809, AG Nürnberg)
Geschäftsführer: Felix Imendörffer


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 840 bytes --]

  reply	other threads:[~2021-05-15 18:49 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-15 14:53 [PATCH v4 0/3] Add option to mmap GEM buffers cached Paul Cercueil
2021-05-15 14:53 ` Paul Cercueil
2021-05-15 14:53 ` [PATCH v4 1/3] drm: Add support for GEM buffers backed by non-coherent memory Paul Cercueil
2021-05-15 14:53   ` Paul Cercueil
2021-05-15 18:49   ` Thomas Zimmermann [this message]
2021-05-15 18:49     ` Thomas Zimmermann
2021-05-15 14:53 ` [PATCH v4 2/3] drm: Add and export function drm_gem_cma_sync_data Paul Cercueil
2021-05-15 14:53   ` Paul Cercueil
2021-05-15 19:06   ` Thomas Zimmermann
2021-05-15 19:06     ` Thomas Zimmermann
2021-05-15 14:53 ` [PATCH v4 3/3] drm/ingenic: Add option to alloc cached GEM buffers Paul Cercueil
2021-05-15 14:53   ` Paul Cercueil
2021-05-15 19:42   ` Thomas Zimmermann
2021-05-15 19:42     ` Thomas Zimmermann
2021-05-15 20:08     ` Paul Cercueil
2021-05-15 20:08       ` Paul Cercueil

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=fce9aae9-a9f8-1b90-33fc-16cf64888ff7@suse.de \
    --to=tzimmermann@suse.de \
    --cc=airlied@linux.ie \
    --cc=daniel@ffwll.ch \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=hch@infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mips@vger.kernel.org \
    --cc=list@opendingux.net \
    --cc=maarten.lankhorst@linux.intel.com \
    --cc=mripard@kernel.org \
    --cc=paul@crapouillou.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.