All of lore.kernel.org
 help / color / mirror / Atom feed
From: Thierry Reding <thierry.reding-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
To: Dmitry Osipenko <digetx-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>,
	Joerg Roedel <joro-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
Cc: linux-tegra-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Jonathan Hunter
	<jonathanh-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org>
Subject: Re: [PATCH v1 4/4] iommu/tegra: gart: Optimize map/unmap
Date: Fri, 27 Apr 2018 12:02:02 +0200	[thread overview]
Message-ID: <20180427100202.GO30388@ulmo> (raw)
In-Reply-To: <f21a7b6a8f141b87f75687904a76f3728ea639a8.1523304324.git.digetx-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>


[-- Attachment #1.1: Type: text/plain, Size: 5011 bytes --]

On Mon, Apr 09, 2018 at 11:07:22PM +0300, Dmitry Osipenko wrote:
> Currently GART writes one page entry at a time. More optimal would be to
> aggregate the writes and flush BUS buffer in the end, this gives map/unmap
> 10-40% (depending on size of mapping) performance boost compared to a
> flushing after each entry update.
> 
> Signed-off-by: Dmitry Osipenko <digetx-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
> ---
>  drivers/iommu/tegra-gart.c | 63 +++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 48 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
> index 4a0607669d34..9f59f5f17661 100644
> --- a/drivers/iommu/tegra-gart.c
> +++ b/drivers/iommu/tegra-gart.c
> @@ -36,7 +36,7 @@
>  #define GART_APERTURE_SIZE	SZ_32M
>  
>  /* bitmap of the page sizes currently supported */
> -#define GART_IOMMU_PGSIZES	(SZ_4K)
> +#define GART_IOMMU_PGSIZES	GENMASK(24, 12)

That doesn't look right. The GART really only supports 4 KiB pages. You
seem to be "emulating" more page sizes here in order to improve mapping
performance. That seems wrong to me. I'm wondering if this couldn't be
improved by a similar factor by simply moving the flushing into an
implementation of ->iotlb_sync().

That said, it seems like ->iotlb_sync() is only used for unmapping, but
I don't see a reason why iommu_map() wouldn't need to call it as well
after going through several calls to ->map(). It seems to me like a
driver that implements ->iotlb_sync() would want to use it to optimize
for both the mapping and unmapping cases.

Joerg, I've gone over the git log and header files and I see no mention
of why the TLB flush interface isn't used for mapping. Do you recall any
special reasons why the same shouldn't be applied for mapping? Would you
accept any patches doing this?

Thierry

>  
>  #define GART_REG_BASE		0x24
>  #define GART_CONFIG		(0x24 - GART_REG_BASE)
> @@ -269,25 +269,21 @@ static void gart_iommu_domain_free(struct iommu_domain *domain)
>  	kfree(gart_domain);
>  }
>  
> -static int gart_iommu_map(struct iommu_domain *domain, unsigned long iova,
> -			  phys_addr_t pa, size_t bytes, int prot)
> +static int gart_iommu_map_page(struct gart_device *gart,
> +			       unsigned long iova,
> +			       phys_addr_t pa)
>  {
> -	struct gart_domain *gart_domain = to_gart_domain(domain);
> -	struct gart_device *gart = gart_domain->gart;
>  	unsigned long flags;
>  	unsigned long pfn;
>  	unsigned long pte;
>  
> -	if (!gart_iova_range_valid(gart, iova, bytes))
> -		return -EINVAL;
> -
> -	spin_lock_irqsave(&gart->pte_lock, flags);
>  	pfn = __phys_to_pfn(pa);
>  	if (!pfn_valid(pfn)) {
>  		dev_err(gart->dev, "Invalid page: %pa\n", &pa);
> -		spin_unlock_irqrestore(&gart->pte_lock, flags);
>  		return -EINVAL;
>  	}
> +
> +	spin_lock_irqsave(&gart->pte_lock, flags);
>  	if (gart_debug) {
>  		pte = gart_read_pte(gart, iova);
>  		if (pte & GART_ENTRY_PHYS_ADDR_VALID) {
> @@ -297,8 +293,41 @@ static int gart_iommu_map(struct iommu_domain *domain, unsigned long iova,
>  		}
>  	}
>  	gart_set_pte(gart, iova, GART_PTE(pfn));
> +	spin_unlock_irqrestore(&gart->pte_lock, flags);
> +
> +	return 0;
> +}
> +
> +static int gart_iommu_map(struct iommu_domain *domain, unsigned long iova,
> +			  phys_addr_t pa, size_t bytes, int prot)
> +{
> +	struct gart_domain *gart_domain = to_gart_domain(domain);
> +	struct gart_device *gart = gart_domain->gart;
> +	size_t mapped;
> +	int ret = -1;
> +
> +	if (!gart_iova_range_valid(gart, iova, bytes))
> +		return -EINVAL;
> +
> +	for (mapped = 0; mapped < bytes; mapped += GART_PAGE_SIZE) {
> +		ret = gart_iommu_map_page(gart, iova + mapped, pa + mapped);
> +		if (ret)
> +			break;
> +	}
> +
>  	FLUSH_GART_REGS(gart);
> +	return ret;
> +}
> +
> +static int gart_iommu_unmap_page(struct gart_device *gart,
> +				 unsigned long iova)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&gart->pte_lock, flags);
> +	gart_set_pte(gart, iova, 0);
>  	spin_unlock_irqrestore(&gart->pte_lock, flags);
> +
>  	return 0;
>  }
>  
> @@ -307,16 +336,20 @@ static size_t gart_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
>  {
>  	struct gart_domain *gart_domain = to_gart_domain(domain);
>  	struct gart_device *gart = gart_domain->gart;
> -	unsigned long flags;
> +	size_t unmapped;
> +	int ret;
>  
>  	if (!gart_iova_range_valid(gart, iova, bytes))
>  		return 0;
>  
> -	spin_lock_irqsave(&gart->pte_lock, flags);
> -	gart_set_pte(gart, iova, 0);
> +	for (unmapped = 0; unmapped < bytes; unmapped += GART_PAGE_SIZE) {
> +		ret = gart_iommu_unmap_page(gart, iova + unmapped);
> +		if (ret)
> +			break;
> +	}
> +
>  	FLUSH_GART_REGS(gart);
> -	spin_unlock_irqrestore(&gart->pte_lock, flags);
> -	return bytes;
> +	return unmapped;
>  }
>  
>  static phys_addr_t gart_iommu_iova_to_phys(struct iommu_domain *domain,
> -- 
> 2.16.3
> 

[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

[-- Attachment #2: Type: text/plain, Size: 0 bytes --]



WARNING: multiple messages have this Message-ID (diff)
From: Thierry Reding <thierry.reding@gmail.com>
To: Dmitry Osipenko <digetx@gmail.com>, Joerg Roedel <joro@8bytes.org>
Cc: Jonathan Hunter <jonathanh@nvidia.com>,
	iommu@lists.linux-foundation.org, linux-tegra@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH v1 4/4] iommu/tegra: gart: Optimize map/unmap
Date: Fri, 27 Apr 2018 12:02:02 +0200	[thread overview]
Message-ID: <20180427100202.GO30388@ulmo> (raw)
In-Reply-To: <f21a7b6a8f141b87f75687904a76f3728ea639a8.1523304324.git.digetx@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 4981 bytes --]

On Mon, Apr 09, 2018 at 11:07:22PM +0300, Dmitry Osipenko wrote:
> Currently GART writes one page entry at a time. More optimal would be to
> aggregate the writes and flush BUS buffer in the end, this gives map/unmap
> 10-40% (depending on size of mapping) performance boost compared to a
> flushing after each entry update.
> 
> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
> ---
>  drivers/iommu/tegra-gart.c | 63 +++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 48 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
> index 4a0607669d34..9f59f5f17661 100644
> --- a/drivers/iommu/tegra-gart.c
> +++ b/drivers/iommu/tegra-gart.c
> @@ -36,7 +36,7 @@
>  #define GART_APERTURE_SIZE	SZ_32M
>  
>  /* bitmap of the page sizes currently supported */
> -#define GART_IOMMU_PGSIZES	(SZ_4K)
> +#define GART_IOMMU_PGSIZES	GENMASK(24, 12)

That doesn't look right. The GART really only supports 4 KiB pages. You
seem to be "emulating" more page sizes here in order to improve mapping
performance. That seems wrong to me. I'm wondering if this couldn't be
improved by a similar factor by simply moving the flushing into an
implementation of ->iotlb_sync().

That said, it seems like ->iotlb_sync() is only used for unmapping, but
I don't see a reason why iommu_map() wouldn't need to call it as well
after going through several calls to ->map(). It seems to me like a
driver that implements ->iotlb_sync() would want to use it to optimize
for both the mapping and unmapping cases.

Joerg, I've gone over the git log and header files and I see no mention
of why the TLB flush interface isn't used for mapping. Do you recall any
special reasons why the same shouldn't be applied for mapping? Would you
accept any patches doing this?

Thierry

>  
>  #define GART_REG_BASE		0x24
>  #define GART_CONFIG		(0x24 - GART_REG_BASE)
> @@ -269,25 +269,21 @@ static void gart_iommu_domain_free(struct iommu_domain *domain)
>  	kfree(gart_domain);
>  }
>  
> -static int gart_iommu_map(struct iommu_domain *domain, unsigned long iova,
> -			  phys_addr_t pa, size_t bytes, int prot)
> +static int gart_iommu_map_page(struct gart_device *gart,
> +			       unsigned long iova,
> +			       phys_addr_t pa)
>  {
> -	struct gart_domain *gart_domain = to_gart_domain(domain);
> -	struct gart_device *gart = gart_domain->gart;
>  	unsigned long flags;
>  	unsigned long pfn;
>  	unsigned long pte;
>  
> -	if (!gart_iova_range_valid(gart, iova, bytes))
> -		return -EINVAL;
> -
> -	spin_lock_irqsave(&gart->pte_lock, flags);
>  	pfn = __phys_to_pfn(pa);
>  	if (!pfn_valid(pfn)) {
>  		dev_err(gart->dev, "Invalid page: %pa\n", &pa);
> -		spin_unlock_irqrestore(&gart->pte_lock, flags);
>  		return -EINVAL;
>  	}
> +
> +	spin_lock_irqsave(&gart->pte_lock, flags);
>  	if (gart_debug) {
>  		pte = gart_read_pte(gart, iova);
>  		if (pte & GART_ENTRY_PHYS_ADDR_VALID) {
> @@ -297,8 +293,41 @@ static int gart_iommu_map(struct iommu_domain *domain, unsigned long iova,
>  		}
>  	}
>  	gart_set_pte(gart, iova, GART_PTE(pfn));
> +	spin_unlock_irqrestore(&gart->pte_lock, flags);
> +
> +	return 0;
> +}
> +
> +static int gart_iommu_map(struct iommu_domain *domain, unsigned long iova,
> +			  phys_addr_t pa, size_t bytes, int prot)
> +{
> +	struct gart_domain *gart_domain = to_gart_domain(domain);
> +	struct gart_device *gart = gart_domain->gart;
> +	size_t mapped;
> +	int ret = -1;
> +
> +	if (!gart_iova_range_valid(gart, iova, bytes))
> +		return -EINVAL;
> +
> +	for (mapped = 0; mapped < bytes; mapped += GART_PAGE_SIZE) {
> +		ret = gart_iommu_map_page(gart, iova + mapped, pa + mapped);
> +		if (ret)
> +			break;
> +	}
> +
>  	FLUSH_GART_REGS(gart);
> +	return ret;
> +}
> +
> +static int gart_iommu_unmap_page(struct gart_device *gart,
> +				 unsigned long iova)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&gart->pte_lock, flags);
> +	gart_set_pte(gart, iova, 0);
>  	spin_unlock_irqrestore(&gart->pte_lock, flags);
> +
>  	return 0;
>  }
>  
> @@ -307,16 +336,20 @@ static size_t gart_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
>  {
>  	struct gart_domain *gart_domain = to_gart_domain(domain);
>  	struct gart_device *gart = gart_domain->gart;
> -	unsigned long flags;
> +	size_t unmapped;
> +	int ret;
>  
>  	if (!gart_iova_range_valid(gart, iova, bytes))
>  		return 0;
>  
> -	spin_lock_irqsave(&gart->pte_lock, flags);
> -	gart_set_pte(gart, iova, 0);
> +	for (unmapped = 0; unmapped < bytes; unmapped += GART_PAGE_SIZE) {
> +		ret = gart_iommu_unmap_page(gart, iova + unmapped);
> +		if (ret)
> +			break;
> +	}
> +
>  	FLUSH_GART_REGS(gart);
> -	spin_unlock_irqrestore(&gart->pte_lock, flags);
> -	return bytes;
> +	return unmapped;
>  }
>  
>  static phys_addr_t gart_iommu_iova_to_phys(struct iommu_domain *domain,
> -- 
> 2.16.3
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

  parent reply	other threads:[~2018-04-27 10:02 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-04-09 20:07 [PATCH v1 0/4] Tegra GART fixes and improvements Dmitry Osipenko
2018-04-09 20:07 ` Dmitry Osipenko
     [not found] ` <cover.1523304324.git.digetx-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2018-04-09 20:07   ` [PATCH v1 1/4] iommu/tegra: gart: Add debugging facility Dmitry Osipenko
2018-04-09 20:07     ` Dmitry Osipenko
2018-04-27  9:46     ` Thierry Reding
2018-04-09 20:07   ` [PATCH v1 2/4] iommu/tegra: gart: Fix gart_iommu_unmap() Dmitry Osipenko
2018-04-09 20:07     ` Dmitry Osipenko
     [not found]     ` <dd25a9ff7bad7c92b345c0c0ce2bf235c4c3b6e8.1523304324.git.digetx-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2018-04-27  9:43       ` Thierry Reding
2018-04-27  9:43         ` Thierry Reding
2018-04-09 20:07   ` [PATCH v1 3/4] iommu/tegra: gart: Constify number of GART pages Dmitry Osipenko
2018-04-09 20:07     ` Dmitry Osipenko
     [not found]     ` <954659be6760130f6ffd5e733db2ad58cbb8e6e4.1523304324.git.digetx-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2018-04-27  9:49       ` Thierry Reding
2018-04-27  9:49         ` Thierry Reding
2018-04-09 20:07   ` [PATCH v1 4/4] iommu/tegra: gart: Optimize map/unmap Dmitry Osipenko
2018-04-09 20:07     ` Dmitry Osipenko
     [not found]     ` <f21a7b6a8f141b87f75687904a76f3728ea639a8.1523304324.git.digetx-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2018-04-27 10:02       ` Thierry Reding [this message]
2018-04-27 10:02         ` Thierry Reding
2018-04-27 12:01         ` Dmitry Osipenko
2018-04-27 12:36         ` Robin Murphy
2018-04-27 12:36           ` Robin Murphy
     [not found]           ` <716edf58-38a7-21e5-1668-b866bf392e34-5wv7dgnIgG8@public.gmane.org>
2018-05-06 21:19             ` Dmitry Osipenko
2018-05-06 21:19               ` Dmitry Osipenko
     [not found]               ` <6827bda3-1aa2-da60-a749-8e2dd2e595f3-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2018-05-07  8:04                 ` Joerg Roedel
2018-05-07  8:04                   ` Joerg Roedel
     [not found]                   ` <20180507080420.GB18595-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
2018-05-07 15:51                     ` Dmitry Osipenko
2018-05-07 15:51                       ` Dmitry Osipenko
     [not found]                       ` <a6e26d17-97ac-d02e-7bf4-f009af1a25dc-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2018-05-07 17:38                         ` Dmitry Osipenko
2018-05-07 17:38                           ` Dmitry Osipenko
2018-05-07  7:59         ` Joerg Roedel
2018-05-07  7:59           ` Joerg Roedel
     [not found]           ` <20180507075920.GA18595-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
2018-05-07 15:46             ` Dmitry Osipenko
2018-05-07 15:46               ` Dmitry Osipenko
2018-05-03 12:52   ` [PATCH v1 0/4] Tegra GART fixes and improvements Joerg Roedel
2018-05-03 12:52     ` Joerg Roedel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180427100202.GO30388@ulmo \
    --to=thierry.reding-re5jqeeqqe8avxtiumwx3w@public.gmane.org \
    --cc=digetx-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org \
    --cc=iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org \
    --cc=jonathanh-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org \
    --cc=joro-zLv9SwRftAIdnm+yROfE0A@public.gmane.org \
    --cc=linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-tegra-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.