All of lore.kernel.org
 help / color / mirror / Atom feed
From: Daniel Vetter <daniel@ffwll.ch>
To: Alexandre Courbot <acourbot@nvidia.com>
Cc: nouveau@lists.freedesktop.org, linux-kernel@vger.kernel.org,
	dri-devel@lists.freedesktop.org, linux-tegra@vger.kernel.org,
	Ben Skeggs <bskeggs@redhat.com>
Subject: Re: [Nouveau] [PATCH v4 4/6] drm/nouveau: synchronize BOs when required
Date: Thu, 10 Jul 2014 15:04:49 +0200	[thread overview]
Message-ID: <20140710130449.GG17271@phenom.ffwll.local> (raw)
In-Reply-To: <1404807961-30530-5-git-send-email-acourbot@nvidia.com>

On Tue, Jul 08, 2014 at 05:25:59PM +0900, Alexandre Courbot wrote:
> On architectures for which access to GPU memory is non-coherent,
> caches need to be flushed and invalidated explicitly when BO control
> changes between CPU and GPU.
> 
> This patch adds buffer synchronization functions which invokes the
> correct API (PCI or DMA) to ensure synchronization is effective.
> 
> Based on the TTM DMA cache helper patches by Lucas Stach.
> 
> Signed-off-by: Lucas Stach <dev@lynxeye.de>
> Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
> ---
>  drivers/gpu/drm/nouveau/nouveau_bo.c  | 56 +++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/nouveau/nouveau_bo.h  |  2 ++
>  drivers/gpu/drm/nouveau/nouveau_gem.c | 12 ++++++++
>  3 files changed, 70 insertions(+)
> 
> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
> index 67e9e8e2e2ec..47e4e8886769 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
> @@ -402,6 +402,60 @@ nouveau_bo_unmap(struct nouveau_bo *nvbo)
>  		ttm_bo_kunmap(&nvbo->kmap);
>  }
>  
> +void
> +nouveau_bo_sync_for_device(struct nouveau_bo *nvbo)
> +{
> +	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
> +	struct nouveau_device *device = nouveau_dev(drm->dev);
> +	struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm;
> +	int i;
> +
> +	if (!ttm_dma)
> +		return;
> +
> +	if (nv_device_is_cpu_coherent(device) || nvbo->force_coherent)
> +		return;

Is the is_cpu_coherent check really required? On coherent platforms the
sync_for_foo should be a noop. It's the dma api's job to encapsulate this
knowledge so that drivers can be blissfully ignorant. The explicit
is_coherent check makes this a bit leaky. And same comment that underlying
the bus-specifics dma-mapping functions are identical.
-Daniel

> +
> +	if (nv_device_is_pci(device)) {
> +		for (i = 0; i < ttm_dma->ttm.num_pages; i++)
> +			pci_dma_sync_single_for_device(device->pdev,
> +				ttm_dma->dma_address[i], PAGE_SIZE,
> +				PCI_DMA_TODEVICE);
> +	} else {
> +		for (i = 0; i < ttm_dma->ttm.num_pages; i++)
> +			dma_sync_single_for_device(nv_device_base(device),
> +				ttm_dma->dma_address[i], PAGE_SIZE,
> +				DMA_TO_DEVICE);
> +	}
> +}
> +
> +void
> +nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo)
> +{
> +	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
> +	struct nouveau_device *device = nouveau_dev(drm->dev);
> +	struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm;
> +	int i;
> +
> +	if (!ttm_dma)
> +		return;
> +
> +	if (nv_device_is_cpu_coherent(device) || nvbo->force_coherent)
> +		return;
> +
> +	if (nv_device_is_pci(device)) {
> +		for (i = 0; i < ttm_dma->ttm.num_pages; i++)
> +			pci_dma_sync_single_for_cpu(device->pdev,
> +				ttm_dma->dma_address[i], PAGE_SIZE,
> +				PCI_DMA_FROMDEVICE);
> +	} else {
> +		for (i = 0; i < ttm_dma->ttm.num_pages; i++)
> +			dma_sync_single_for_cpu(nv_device_base(device),
> +				ttm_dma->dma_address[i], PAGE_SIZE,
> +				DMA_FROM_DEVICE);
> +	}
> +}
> +
>  int
>  nouveau_bo_validate(struct nouveau_bo *nvbo, bool interruptible,
>  		    bool no_wait_gpu)
> @@ -418,6 +472,8 @@ nouveau_bo_validate(struct nouveau_bo *nvbo, bool interruptible,
>  	if (!ttm)
>  		return ret;
>  
> +	nouveau_bo_sync_for_device(nvbo);
> +
>  	device = nouveau_dev(nouveau_bdev(ttm->bdev)->dev);
>  	nv_wr32(device, 0x70004, 0x00000001);
>  	if (!nv_wait(device, 0x070004, 0x00000001, 0x00000000))
> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h
> index ff17c1f432fc..fa42298d2dca 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_bo.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.h
> @@ -81,6 +81,8 @@ void nouveau_bo_wr32(struct nouveau_bo *, unsigned index, u32 val);
>  void nouveau_bo_fence(struct nouveau_bo *, struct nouveau_fence *);
>  int  nouveau_bo_validate(struct nouveau_bo *, bool interruptible,
>  			 bool no_wait_gpu);
> +void nouveau_bo_sync_for_device(struct nouveau_bo *nvbo);
> +void nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo);
>  
>  struct nouveau_vma *
>  nouveau_bo_vma_find(struct nouveau_bo *, struct nouveau_vm *);
> diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
> index c90c0dc0afe8..08829a720891 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_gem.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
> @@ -896,6 +896,7 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data,
>  	spin_lock(&nvbo->bo.bdev->fence_lock);
>  	ret = ttm_bo_wait(&nvbo->bo, true, true, no_wait);
>  	spin_unlock(&nvbo->bo.bdev->fence_lock);
> +	nouveau_bo_sync_for_cpu(nvbo);
>  	drm_gem_object_unreference_unlocked(gem);
>  	return ret;
>  }
> @@ -904,6 +905,17 @@ int
>  nouveau_gem_ioctl_cpu_fini(struct drm_device *dev, void *data,
>  			   struct drm_file *file_priv)
>  {
> +	struct drm_nouveau_gem_cpu_fini *req = data;
> +	struct drm_gem_object *gem;
> +	struct nouveau_bo *nvbo;
> +
> +	gem = drm_gem_object_lookup(dev, file_priv, req->handle);
> +	if (!gem)
> +		return -ENOENT;
> +	nvbo = nouveau_gem_object(gem);
> +
> +	nouveau_bo_sync_for_device(nvbo);
> +	drm_gem_object_unreference_unlocked(gem);
>  	return 0;
>  }
>  
> -- 
> 2.0.0
> 
> _______________________________________________
> Nouveau mailing list
> Nouveau@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/nouveau

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

WARNING: multiple messages have this Message-ID (diff)
From: Daniel Vetter <daniel@ffwll.ch>
To: Alexandre Courbot <acourbot@nvidia.com>
Cc: Ben Skeggs <bskeggs@redhat.com>, David Airlie <airlied@linux.ie>,
	David Herrmann <dh.herrmann@gmail.com>,
	Lucas Stach <dev@lynxeye.de>,
	Thierry Reding <thierry.reding@gmail.com>,
	Maarten Lankhorst <maarten.lankhorst@canonical.com>,
	nouveau@lists.freedesktop.org, linux-kernel@vger.kernel.org,
	dri-devel@lists.freedesktop.org, linux-tegra@vger.kernel.org
Subject: Re: [Nouveau] [PATCH v4 4/6] drm/nouveau: synchronize BOs when required
Date: Thu, 10 Jul 2014 15:04:49 +0200	[thread overview]
Message-ID: <20140710130449.GG17271@phenom.ffwll.local> (raw)
In-Reply-To: <1404807961-30530-5-git-send-email-acourbot@nvidia.com>

On Tue, Jul 08, 2014 at 05:25:59PM +0900, Alexandre Courbot wrote:
> On architectures for which access to GPU memory is non-coherent,
> caches need to be flushed and invalidated explicitly when BO control
> changes between CPU and GPU.
> 
> This patch adds buffer synchronization functions which invokes the
> correct API (PCI or DMA) to ensure synchronization is effective.
> 
> Based on the TTM DMA cache helper patches by Lucas Stach.
> 
> Signed-off-by: Lucas Stach <dev@lynxeye.de>
> Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
> ---
>  drivers/gpu/drm/nouveau/nouveau_bo.c  | 56 +++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/nouveau/nouveau_bo.h  |  2 ++
>  drivers/gpu/drm/nouveau/nouveau_gem.c | 12 ++++++++
>  3 files changed, 70 insertions(+)
> 
> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
> index 67e9e8e2e2ec..47e4e8886769 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
> @@ -402,6 +402,60 @@ nouveau_bo_unmap(struct nouveau_bo *nvbo)
>  		ttm_bo_kunmap(&nvbo->kmap);
>  }
>  
> +void
> +nouveau_bo_sync_for_device(struct nouveau_bo *nvbo)
> +{
> +	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
> +	struct nouveau_device *device = nouveau_dev(drm->dev);
> +	struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm;
> +	int i;
> +
> +	if (!ttm_dma)
> +		return;
> +
> +	if (nv_device_is_cpu_coherent(device) || nvbo->force_coherent)
> +		return;

Is the is_cpu_coherent check really required? On coherent platforms the
sync_for_foo should be a noop. It's the dma api's job to encapsulate this
knowledge so that drivers can be blissfully ignorant. The explicit
is_coherent check makes this a bit leaky. And same comment that underlying
the bus-specifics dma-mapping functions are identical.
-Daniel

> +
> +	if (nv_device_is_pci(device)) {
> +		for (i = 0; i < ttm_dma->ttm.num_pages; i++)
> +			pci_dma_sync_single_for_device(device->pdev,
> +				ttm_dma->dma_address[i], PAGE_SIZE,
> +				PCI_DMA_TODEVICE);
> +	} else {
> +		for (i = 0; i < ttm_dma->ttm.num_pages; i++)
> +			dma_sync_single_for_device(nv_device_base(device),
> +				ttm_dma->dma_address[i], PAGE_SIZE,
> +				DMA_TO_DEVICE);
> +	}
> +}
> +
> +void
> +nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo)
> +{
> +	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
> +	struct nouveau_device *device = nouveau_dev(drm->dev);
> +	struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm;
> +	int i;
> +
> +	if (!ttm_dma)
> +		return;
> +
> +	if (nv_device_is_cpu_coherent(device) || nvbo->force_coherent)
> +		return;
> +
> +	if (nv_device_is_pci(device)) {
> +		for (i = 0; i < ttm_dma->ttm.num_pages; i++)
> +			pci_dma_sync_single_for_cpu(device->pdev,
> +				ttm_dma->dma_address[i], PAGE_SIZE,
> +				PCI_DMA_FROMDEVICE);
> +	} else {
> +		for (i = 0; i < ttm_dma->ttm.num_pages; i++)
> +			dma_sync_single_for_cpu(nv_device_base(device),
> +				ttm_dma->dma_address[i], PAGE_SIZE,
> +				DMA_FROM_DEVICE);
> +	}
> +}
> +
>  int
>  nouveau_bo_validate(struct nouveau_bo *nvbo, bool interruptible,
>  		    bool no_wait_gpu)
> @@ -418,6 +472,8 @@ nouveau_bo_validate(struct nouveau_bo *nvbo, bool interruptible,
>  	if (!ttm)
>  		return ret;
>  
> +	nouveau_bo_sync_for_device(nvbo);
> +
>  	device = nouveau_dev(nouveau_bdev(ttm->bdev)->dev);
>  	nv_wr32(device, 0x70004, 0x00000001);
>  	if (!nv_wait(device, 0x070004, 0x00000001, 0x00000000))
> diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h
> index ff17c1f432fc..fa42298d2dca 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_bo.h
> +++ b/drivers/gpu/drm/nouveau/nouveau_bo.h
> @@ -81,6 +81,8 @@ void nouveau_bo_wr32(struct nouveau_bo *, unsigned index, u32 val);
>  void nouveau_bo_fence(struct nouveau_bo *, struct nouveau_fence *);
>  int  nouveau_bo_validate(struct nouveau_bo *, bool interruptible,
>  			 bool no_wait_gpu);
> +void nouveau_bo_sync_for_device(struct nouveau_bo *nvbo);
> +void nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo);
>  
>  struct nouveau_vma *
>  nouveau_bo_vma_find(struct nouveau_bo *, struct nouveau_vm *);
> diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
> index c90c0dc0afe8..08829a720891 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_gem.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
> @@ -896,6 +896,7 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data,
>  	spin_lock(&nvbo->bo.bdev->fence_lock);
>  	ret = ttm_bo_wait(&nvbo->bo, true, true, no_wait);
>  	spin_unlock(&nvbo->bo.bdev->fence_lock);
> +	nouveau_bo_sync_for_cpu(nvbo);
>  	drm_gem_object_unreference_unlocked(gem);
>  	return ret;
>  }
> @@ -904,6 +905,17 @@ int
>  nouveau_gem_ioctl_cpu_fini(struct drm_device *dev, void *data,
>  			   struct drm_file *file_priv)
>  {
> +	struct drm_nouveau_gem_cpu_fini *req = data;
> +	struct drm_gem_object *gem;
> +	struct nouveau_bo *nvbo;
> +
> +	gem = drm_gem_object_lookup(dev, file_priv, req->handle);
> +	if (!gem)
> +		return -ENOENT;
> +	nvbo = nouveau_gem_object(gem);
> +
> +	nouveau_bo_sync_for_device(nvbo);
> +	drm_gem_object_unreference_unlocked(gem);
>  	return 0;
>  }
>  
> -- 
> 2.0.0
> 
> _______________________________________________
> Nouveau mailing list
> Nouveau@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/nouveau

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

  reply	other threads:[~2014-07-10 13:04 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-07-08  8:25 [PATCH v4 0/6] drm: nouveau: memory coherency on ARM Alexandre Courbot
2014-07-08  8:25 ` Alexandre Courbot
     [not found] ` <1404807961-30530-1-git-send-email-acourbot-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org>
2014-07-08  8:25   ` [PATCH v4 1/6] drm/ttm: expose CPU address of DMA-allocated pages Alexandre Courbot
2014-07-08  8:25     ` Alexandre Courbot
2014-07-08  8:25   ` [PATCH v4 2/6] drm/nouveau: map pages using DMA API on platform devices Alexandre Courbot
2014-07-08  8:25     ` Alexandre Courbot
     [not found]     ` <1404807961-30530-3-git-send-email-acourbot-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org>
2014-07-10 12:58       ` Daniel Vetter
2014-07-10 12:58         ` [Nouveau] " Daniel Vetter
     [not found]         ` <20140710125849.GF17271-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
2014-07-11  2:35           ` Alexandre Courbot
2014-07-11  2:35             ` Alexandre Courbot
     [not found]             ` <53BF4D6B.70904-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org>
2014-07-11  2:50               ` Ben Skeggs
2014-07-11  2:50                 ` [Nouveau] " Ben Skeggs
     [not found]                 ` <CACAvsv7eER4VmbR81Ym=YE7fQZ9cNuJsb5372SAuSX+PQfYyrQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2014-07-11  2:57                   ` Alexandre Courbot
2014-07-11  2:57                     ` Alexandre Courbot
2014-07-11  9:53                     ` Lucas Stach
2014-07-11  9:53                       ` Lucas Stach
2014-07-11  7:38             ` Daniel Vetter
2014-07-11  7:38               ` Daniel Vetter
2014-07-08  8:25   ` [PATCH v4 3/6] drm/nouveau: introduce nv_device_is_cpu_coherent() Alexandre Courbot
2014-07-08  8:25     ` Alexandre Courbot
2014-07-08  8:25   ` [PATCH v4 4/6] drm/nouveau: synchronize BOs when required Alexandre Courbot
2014-07-08  8:25     ` Alexandre Courbot
2014-07-10 13:04     ` Daniel Vetter [this message]
2014-07-10 13:04       ` [Nouveau] " Daniel Vetter
     [not found]       ` <20140710130449.GG17271-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
2014-07-11  2:40         ` Alexandre Courbot
2014-07-11  2:40           ` Alexandre Courbot
     [not found]           ` <53BF4E9B.7090606-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org>
2014-07-11  7:41             ` Daniel Vetter
2014-07-11  7:41               ` [Nouveau] " Daniel Vetter
     [not found]               ` <20140711074138.GW17271-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
2014-07-11  9:35                 ` Alexandre Courbot
2014-07-11  9:35                   ` [Nouveau] " Alexandre Courbot
2014-07-08  8:26   ` [PATCH v4 5/6] drm/nouveau: implement explicitly coherent BOs Alexandre Courbot
2014-07-08  8:26     ` Alexandre Courbot
2014-07-08  8:26 ` [PATCH v4 6/6] drm/nouveau: allocate GPFIFOs and fences coherently Alexandre Courbot
2014-07-08  8:26   ` Alexandre Courbot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140710130449.GG17271@phenom.ffwll.local \
    --to=daniel@ffwll.ch \
    --cc=acourbot@nvidia.com \
    --cc=bskeggs@redhat.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-tegra@vger.kernel.org \
    --cc=nouveau@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.