linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Paul Cercueil <paul@crapouillou.net>
To: Jonathan Cameron <jic23@kernel.org>
Cc: "Alexandru Ardelean" <ardeleanalex@gmail.com>,
	"Lars-Peter Clausen" <lars@metafoo.de>,
	"Michael Hennerich" <Michael.Hennerich@analog.com>,
	"Sumit Semwal" <sumit.semwal@linaro.org>,
	"Christian König" <christian.koenig@amd.com>,
	linux-iio@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-media@vger.kernel.org, dri-devel@lists.freedesktop.org,
	linaro-mm-sig@lists.linaro.org
Subject: Re: [PATCH 11/15] iio: buffer-dma: Boost performance using write-combine cache setting
Date: Thu, 18 Nov 2021 11:45:15 +0000	[thread overview]
Message-ID: <FBNR2R.LJWHFK5HYPTY@crapouillou.net> (raw)
In-Reply-To: <20211115141925.60164-12-paul@crapouillou.net>

Hi,

Le lun., nov. 15 2021 at 14:19:21 +0000, Paul Cercueil 
<paul@crapouillou.net> a écrit :
> We can be certain that the input buffers will only be accessed by
> userspace for reading, and output buffers will mostly be accessed by
> userspace for writing.
> 
> Therefore, it makes more sense to use only fully cached input buffers,
> and to use the write-combine cache coherency setting for output 
> buffers.
> 
> This boosts performance, as the data written to the output buffers 
> does
> not have to be sync'd for coherency. It will halve performance if the
> userspace application tries to read from the output buffer, but this
> should never happen.
> 
> Since we don't need to sync the cache when disabling CPU access either
> for input buffers or output buffers, the .end_cpu_access() callback 
> can
> be dropped completely.
> 
> Signed-off-by: Paul Cercueil <paul@crapouillou.net>
> ---
>  drivers/iio/buffer/industrialio-buffer-dma.c | 82 
> +++++++++++++-------
>  1 file changed, 54 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/iio/buffer/industrialio-buffer-dma.c 
> b/drivers/iio/buffer/industrialio-buffer-dma.c
> index 92356ee02f30..fb39054d8c15 100644
> --- a/drivers/iio/buffer/industrialio-buffer-dma.c
> +++ b/drivers/iio/buffer/industrialio-buffer-dma.c
> @@ -229,8 +229,33 @@ static int iio_buffer_dma_buf_mmap(struct 
> dma_buf *dbuf,
>  	if (vma->vm_ops->open)
>  		vma->vm_ops->open(vma);
> 
> -	return dma_mmap_pages(dev, vma, vma->vm_end - vma->vm_start,
> -			      virt_to_page(block->vaddr));
> +	if (block->queue->buffer.direction == IIO_BUFFER_DIRECTION_IN) {
> +		/*
> +		 * With an input buffer, userspace will only read the data and
> +		 * never write. We can mmap the buffer fully cached.
> +		 */
> +		return dma_mmap_pages(dev, vma, vma->vm_end - vma->vm_start,
> +				      virt_to_page(block->vaddr));
> +	} else {
> +		/*
> +		 * With an output buffer, userspace will only write the data
> +		 * and should rarely (if never) read from it. It is better to
> +		 * use write-combine in this case.
> +		 */
> +		return dma_mmap_wc(dev, vma, block->vaddr, block->phys_addr,
> +				   vma->vm_end - vma->vm_start);
> +	}
> +}
> +
> +static void iio_dma_buffer_free_dmamem(struct iio_dma_buffer_block 
> *block)
> +{
> +	struct device *dev = block->queue->dev;
> +	size_t size = PAGE_ALIGN(block->size);
> +
> +	if (block->queue->buffer.direction == IIO_BUFFER_DIRECTION_IN)
> +		dma_free_coherent(dev, size, block->vaddr, block->phys_addr);
> +	else
> +		dma_free_wc(dev, size, block->vaddr, block->phys_addr);
>  }
> 
>  static void iio_buffer_dma_buf_release(struct dma_buf *dbuf)
> @@ -243,9 +268,7 @@ static void iio_buffer_dma_buf_release(struct 
> dma_buf *dbuf)
> 
>  	mutex_lock(&queue->lock);
> 
> -	dma_free_coherent(queue->dev, PAGE_ALIGN(block->size),
> -			  block->vaddr, block->phys_addr);
> -
> +	iio_dma_buffer_free_dmamem(block);
>  	kfree(block);
> 
>  	queue->num_blocks--;
> @@ -268,19 +291,6 @@ static int 
> iio_buffer_dma_buf_begin_cpu_access(struct dma_buf *dbuf,
>  	return 0;
>  }
> 
> -static int iio_buffer_dma_buf_end_cpu_access(struct dma_buf *dbuf,
> -					     enum dma_data_direction dma_dir)
> -{
> -	struct iio_dma_buffer_block *block = dbuf->priv;
> -	struct device *dev = block->queue->dev;
> -
> -	/* We only need to sync the cache for output buffers */
> -	if (block->queue->buffer.direction == IIO_BUFFER_DIRECTION_OUT)
> -		dma_sync_single_for_device(dev, block->phys_addr, block->size, 
> dma_dir);
> -
> -	return 0;
> -}
> -
>  static const struct dma_buf_ops iio_dma_buffer_dmabuf_ops = {
>  	.attach			= iio_buffer_dma_buf_attach,
>  	.map_dma_buf		= iio_buffer_dma_buf_map,
> @@ -288,9 +298,28 @@ static const struct dma_buf_ops 
> iio_dma_buffer_dmabuf_ops = {
>  	.mmap			= iio_buffer_dma_buf_mmap,
>  	.release		= iio_buffer_dma_buf_release,
>  	.begin_cpu_access	= iio_buffer_dma_buf_begin_cpu_access,
> -	.end_cpu_access		= iio_buffer_dma_buf_end_cpu_access,
>  };
> 
> +static int iio_dma_buffer_alloc_dmamem(struct iio_dma_buffer_block 
> *block)
> +{
> +	struct device *dev = block->queue->dev;
> +	size_t size = PAGE_ALIGN(block->size);
> +
> +	if (block->queue->buffer.direction == IIO_BUFFER_DIRECTION_IN) {
> +		block->vaddr = dma_alloc_coherent(dev, size,
> +						  &block->phys_addr,
> +						  GFP_KERNEL);

I'm so used to dma_alloc_noncoherent() that I didn't even notice that 
it was dma_alloc_coherent() here. The code I added meant to work with 
non-coherent memory - hence the dma_sync_* operations and the use of 
dma_mmap_pages().

I'll fix that in V2.

Cheers,
-Paul

> +	} else {
> +		block->vaddr = dma_alloc_wc(dev, size,
> +					    &block->phys_addr,
> +					    GFP_KERNEL);
> +	}
> +	if (!block->vaddr)
> +		return -ENOMEM;
> +
> +	return 0;
> +}
> +
>  static struct iio_dma_buffer_block *iio_dma_buffer_alloc_block(
>  	struct iio_dma_buffer_queue *queue, size_t size, bool fileio)
>  {
> @@ -303,12 +332,12 @@ static struct iio_dma_buffer_block 
> *iio_dma_buffer_alloc_block(
>  	if (!block)
>  		return ERR_PTR(-ENOMEM);
> 
> -	block->vaddr = dma_alloc_coherent(queue->dev, PAGE_ALIGN(size),
> -		&block->phys_addr, GFP_KERNEL);
> -	if (!block->vaddr) {
> -		err = -ENOMEM;
> +	block->size = size;
> +	block->queue = queue;
> +
> +	err = iio_dma_buffer_alloc_dmamem(block);
> +	if (err)
>  		goto err_free_block;
> -	}
> 
>  	einfo.ops = &iio_dma_buffer_dmabuf_ops;
>  	einfo.size = PAGE_ALIGN(size);
> @@ -322,10 +351,8 @@ static struct iio_dma_buffer_block 
> *iio_dma_buffer_alloc_block(
>  	}
> 
>  	block->dmabuf = dmabuf;
> -	block->size = size;
>  	block->bytes_used = size;
>  	block->state = IIO_BLOCK_STATE_DONE;
> -	block->queue = queue;
>  	block->fileio = fileio;
>  	INIT_LIST_HEAD(&block->head);
> 
> @@ -338,8 +365,7 @@ static struct iio_dma_buffer_block 
> *iio_dma_buffer_alloc_block(
>  	return block;
> 
>  err_free_dma:
> -	dma_free_coherent(queue->dev, PAGE_ALIGN(size),
> -			  block->vaddr, block->phys_addr);
> +	iio_dma_buffer_free_dmamem(block);
>  err_free_block:
>  	kfree(block);
>  	return ERR_PTR(err);
> --
> 2.33.0
> 



  reply	other threads:[~2021-11-18 11:46 UTC|newest]

Thread overview: 59+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-15 14:19 [PATCH 00/15] iio: buffer-dma: write() and new DMABUF based API Paul Cercueil
2021-11-15 14:19 ` [PATCH 01/15] iio: buffer-dma: Get rid of incoming/outgoing queues Paul Cercueil
2021-11-16  8:23   ` Alexandru Ardelean
2021-11-21 14:05   ` Jonathan Cameron
2021-11-21 16:23   ` Lars-Peter Clausen
2021-11-21 17:52     ` Paul Cercueil
2021-11-21 18:49       ` Lars-Peter Clausen
2021-11-21 20:08         ` Paul Cercueil
2021-11-22 15:08           ` Lars-Peter Clausen
2021-11-22 15:16             ` Paul Cercueil
2021-11-22 15:17               ` Lars-Peter Clausen
2021-11-22 15:27                 ` Paul Cercueil
2021-11-15 14:19 ` [PATCH 02/15] iio: buffer-dma: Remove unused iio_buffer_block struct Paul Cercueil
2021-11-16  8:22   ` Alexandru Ardelean
2021-11-15 14:19 ` [PATCH 03/15] iio: buffer-dma: Use round_down() instead of rounddown() Paul Cercueil
2021-11-16  8:26   ` Alexandru Ardelean
2021-11-21 14:08   ` Jonathan Cameron
2021-11-22 10:00     ` Paul Cercueil
2021-11-27 15:15       ` Jonathan Cameron
2021-11-15 14:19 ` [PATCH 04/15] iio: buffer-dma: Enable buffer write support Paul Cercueil
2021-11-16  8:52   ` Alexandru Ardelean
2021-11-21 14:20   ` Jonathan Cameron
2021-11-21 17:19     ` Paul Cercueil
2021-11-27 15:17       ` Jonathan Cameron
2021-11-15 14:19 ` [PATCH 05/15] iio: buffer-dmaengine: Support specifying buffer direction Paul Cercueil
2021-11-16  8:53   ` Alexandru Ardelean
2021-11-15 14:19 ` [PATCH 06/15] iio: buffer-dmaengine: Enable write support Paul Cercueil
2021-11-16  8:55   ` Alexandru Ardelean
2021-11-15 14:19 ` [PATCH 07/15] iio: core: Add new DMABUF interface infrastructure Paul Cercueil
2021-11-21 14:31   ` Jonathan Cameron
2021-11-15 14:19 ` [PATCH 08/15] iio: buffer-dma: split iio_dma_buffer_fileio_free() function Paul Cercueil
2021-11-16 10:59   ` Alexandru Ardelean
2021-11-21 13:49     ` Jonathan Cameron
2021-11-15 14:19 ` [PATCH 09/15] iio: buffer-dma: Use DMABUFs instead of custom solution Paul Cercueil
2021-11-15 14:19 ` [PATCH 10/15] iio: buffer-dma: Implement new DMABUF based userspace API Paul Cercueil
2021-11-15 14:19 ` [PATCH 11/15] iio: buffer-dma: Boost performance using write-combine cache setting Paul Cercueil
2021-11-18 11:45   ` Paul Cercueil [this message]
2021-11-21 15:00   ` Jonathan Cameron
2021-11-21 17:43     ` Paul Cercueil
2021-11-25 17:29       ` Paul Cercueil
2021-11-27 16:05         ` Jonathan Cameron
2021-11-28 13:25           ` Lars-Peter Clausen
2021-11-27 15:20       ` Jonathan Cameron
2021-11-15 14:22 ` [PATCH 12/15] iio: buffer-dmaengine: Support new DMABUF based userspace API Paul Cercueil
2021-11-15 14:22   ` [PATCH 13/15] iio: core: Add support for cyclic buffers Paul Cercueil
2021-11-16  9:50     ` Alexandru Ardelean
2021-11-15 14:22   ` [PATCH 14/15] iio: buffer-dmaengine: " Paul Cercueil
2021-11-16  9:50     ` Alexandru Ardelean
2021-11-15 14:22   ` [PATCH 15/15] Documentation: iio: Document high-speed DMABUF based API Paul Cercueil
2021-11-21 15:10     ` Jonathan Cameron
2021-11-21 17:46       ` Paul Cercueil
2021-11-15 14:37 ` [PATCH 00/15] iio: buffer-dma: write() and new " Daniel Vetter
2021-11-15 14:57   ` Paul Cercueil
2021-11-16 16:02     ` Daniel Vetter
2021-11-16 16:31       ` Laurent Pinchart
2021-11-17  8:48         ` Christian König
2021-11-17 12:50       ` Paul Cercueil
2021-11-17 13:42         ` Hennerich, Michael
2021-11-21 13:57 ` Jonathan Cameron

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=FBNR2R.LJWHFK5HYPTY@crapouillou.net \
    --to=paul@crapouillou.net \
    --cc=Michael.Hennerich@analog.com \
    --cc=ardeleanalex@gmail.com \
    --cc=christian.koenig@amd.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=jic23@kernel.org \
    --cc=lars@metafoo.de \
    --cc=linaro-mm-sig@lists.linaro.org \
    --cc=linux-iio@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-media@vger.kernel.org \
    --cc=sumit.semwal@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).