linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* Re: [RFC PATCH v5 11/15] iommu/io-pgtable-arm: Implement arm_lpae_map_pages()
       [not found] ` <20210408171402.12607-12-isaacm@codeaurora.org>
@ 2021-04-20  5:59   ` chenxiang (M)
  0 siblings, 0 replies; 2+ messages in thread
From: chenxiang (M) @ 2021-04-20  5:59 UTC (permalink / raw)
  To: Isaac J. Manjarres, iommu, linux-arm-kernel; +Cc: robin.murphy, will, pratikp

Hi Isaac,


在 2021/4/9 1:13, Isaac J. Manjarres 写道:
> Implement the map_pages() callback for the ARM LPAE io-pgtable
> format.
>
> Signed-off-by: Isaac J. Manjarres <isaacm@codeaurora.org>
> ---
>   drivers/iommu/io-pgtable-arm.c | 42 ++++++++++++++++++++++++++--------
>   1 file changed, 32 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
> index 1b690911995a..92978dd9c885 100644
> --- a/drivers/iommu/io-pgtable-arm.c
> +++ b/drivers/iommu/io-pgtable-arm.c
> @@ -344,20 +344,30 @@ static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table,
>   }
>   
>   static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
> -			  phys_addr_t paddr, size_t size, arm_lpae_iopte prot,
> -			  int lvl, arm_lpae_iopte *ptep, gfp_t gfp)
> +			  phys_addr_t paddr, size_t size, size_t pgcount,
> +			  arm_lpae_iopte prot, int lvl, arm_lpae_iopte *ptep,
> +			  gfp_t gfp, size_t *mapped)
>   {
>   	arm_lpae_iopte *cptep, pte;
>   	size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data);
>   	size_t tblsz = ARM_LPAE_GRANULE(data);
>   	struct io_pgtable_cfg *cfg = &data->iop.cfg;
> +	int ret = 0, num_entries, max_entries, map_idx_start;
>   
>   	/* Find our entry at the current level */
> -	ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
> +	map_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
> +	ptep += map_idx_start;
>   
>   	/* If we can install a leaf entry at this level, then do so */
> -	if (size == block_size)
> -		return arm_lpae_init_pte(data, iova, paddr, prot, lvl, 1, ptep);
> +	if (size == block_size) {
> +		max_entries = ARM_LPAE_PTES_PER_TABLE(data) - map_idx_start;
> +		num_entries = min_t(int, pgcount, max_entries);
> +		ret = arm_lpae_init_pte(data, iova, paddr, prot, lvl, num_entries, ptep);
> +		if (!ret && mapped)
> +			*mapped += num_entries * size;
> +
> +		return ret;
> +	}
>   
>   	/* We can't allocate tables at the final level */
>   	if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1))
> @@ -386,7 +396,8 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
>   	}
>   
>   	/* Rinse, repeat */
> -	return __arm_lpae_map(data, iova, paddr, size, prot, lvl + 1, cptep, gfp);
> +	return __arm_lpae_map(data, iova, paddr, size, pgcount, prot, lvl + 1,
> +			      cptep, gfp, mapped);
>   }
>   
>   static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
> @@ -453,8 +464,9 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
>   	return pte;
>   }
>   
> -static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
> -			phys_addr_t paddr, size_t size, int iommu_prot, gfp_t gfp)
> +static int arm_lpae_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
> +			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
> +			      int iommu_prot, gfp_t gfp, size_t *mapped)
>   {
>   	struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
>   	struct io_pgtable_cfg *cfg = &data->iop.cfg;
> @@ -463,7 +475,7 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
>   	arm_lpae_iopte prot;
>   	long iaext = (s64)iova >> cfg->ias;
>   
> -	if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size))
> +	if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize))
>   		return -EINVAL;
>   
>   	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
> @@ -476,7 +488,8 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
>   		return 0;
>   
>   	prot = arm_lpae_prot_to_pte(data, iommu_prot);
> -	ret = __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep, gfp);
> +	ret = __arm_lpae_map(data, iova, paddr, pgsize, pgcount, prot, lvl,
> +			     ptep, gfp, NULL);

The last input parameter should be mapped instead of NULL.

>   	/*
>   	 * Synchronise all PTE updates for the new mapping before there's
>   	 * a chance for anything to kick off a table walk for the new iova.
> @@ -486,6 +499,14 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
>   	return ret;
>   }
>   
> +
> +static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
> +			phys_addr_t paddr, size_t size, int iommu_prot, gfp_t gfp)
> +{
> +	return arm_lpae_map_pages(ops, iova, paddr, size, 1, iommu_prot, gfp,
> +				  NULL);
> +}
> +
>   static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
>   				    arm_lpae_iopte *ptep)
>   {
> @@ -782,6 +803,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
>   
>   	data->iop.ops = (struct io_pgtable_ops) {
>   		.map		= arm_lpae_map,
> +		.map_pages	= arm_lpae_map_pages,
>   		.unmap		= arm_lpae_unmap,
>   		.unmap_pages	= arm_lpae_unmap_pages,
>   		.iova_to_phys	= arm_lpae_iova_to_phys,



_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [RFC PATCH v5 00/15] Optimizing iommu_[map/unmap] performance
       [not found] <20210408171402.12607-1-isaacm@codeaurora.org>
       [not found] ` <20210408171402.12607-12-isaacm@codeaurora.org>
@ 2021-06-11  3:10 ` Lu Baolu
  1 sibling, 0 replies; 2+ messages in thread
From: Lu Baolu @ 2021-06-11  3:10 UTC (permalink / raw)
  To: Isaac J. Manjarres, iommu, linux-arm-kernel
  Cc: baolu.lu, will, robin.murphy, pratikp

Hi Isaac,

Any update for this series? The iommu core part looks good to me and I
also have some patches for Intel IOMMU implementation of [un]map_pages.
Just wonder when could iommu core have this optimization.

Best regards,
baolu

On 4/9/21 1:13 AM, Isaac J. Manjarres wrote:
> When unmapping a buffer from an IOMMU domain, the IOMMU framework unmaps
> the buffer at a granule of the largest page size that is supported by
> the IOMMU hardware and fits within the buffer. For every block that
> is unmapped, the IOMMU framework will call into the IOMMU driver, and
> then the io-pgtable framework to walk the page tables to find the entry
> that corresponds to the IOVA, and then unmaps the entry.
> 
> This can be suboptimal in scenarios where a buffer or a piece of a
> buffer can be split into several contiguous page blocks of the same size.
> For example, consider an IOMMU that supports 4 KB page blocks, 2 MB page
> blocks, and 1 GB page blocks, and a buffer that is 4 MB in size is being
> unmapped at IOVA 0. The current call-flow will result in 4 indirect calls,
> and 2 page table walks, to unmap 2 entries that are next to each other in
> the page-tables, when both entries could have been unmapped in one shot
> by clearing both page table entries in the same call.
> 
> The same optimization is applicable to mapping buffers as well, so
> these patches implement a set of callbacks called unmap_pages and
> map_pages to the io-pgtable code and IOMMU drivers which unmaps or maps
> an IOVA range that consists of a number of pages of the same
> page size that is supported by the IOMMU hardware, and allows for
> manipulating multiple page table entries in the same set of indirect
> calls. The reason for introducing these callbacks is to give other IOMMU
> drivers/io-pgtable formats time to change to using the new callbacks, so
> that the transition to using this approach can be done piecemeal.
> 
> Changes since V4:
> 
> * Fixed type for addr_merge from phys_addr_t to unsigned long so
>    that GENMASK() can be used.
> * Hooked up arm_v7s_[unmap/map]_pages to the io-pgtable ops.
> * Introduced a macro for calculating the number of page table entries
>    for the ARM LPAE io-pgtable format.
> 
> Changes since V3:
> 
> * Removed usage of ULL variants of bitops from Will's patches, as
>    they were not needed.
> * Instead of unmapping/mapping pgcount pages, unmap_pages() and
>    map_pages() will at most unmap and map pgcount pages, allowing
>    for part of the pages in pgcount to be mapped and unmapped. This
>    was done to simplify the handling in the io-pgtable layer.
> * Extended the existing PTE manipulation methods in io-pgtable-arm
>    to handle multiple entries, per Robin's suggestion, eliminating
>    the need to add functions to clear multiple PTEs.
> * Implemented a naive form of [map/unmap]_pages() for ARM v7s io-pgtable
>    format.
> * arm_[v7s/lpae]_[map/unmap] will call
>    arm_[v7s/lpae]_[map_pages/unmap_pages] with an argument of 1 page.
> * The arm_smmu_[map/unmap] functions have been removed, since they
>    have been replaced by arm_smmu_[map/unmap]_pages.
> 
> Changes since V2:
> 
> * Added a check in __iommu_map() to check for the existence
>    of either the map or map_pages callback as per Lu's suggestion.
> 
> Changes since V1:
> 
> * Implemented the map_pages() callbacks
> * Integrated Will's patches into this series which
>    address several concerns about how iommu_pgsize() partitioned a
>    buffer (I made a minor change to the patch which changes
>    iommu_pgsize() to use bitmaps by using the ULL variants of
>    the bitops)
> 
> Isaac J. Manjarres (12):
>    iommu/io-pgtable: Introduce unmap_pages() as a page table op
>    iommu: Add an unmap_pages() op for IOMMU drivers
>    iommu/io-pgtable: Introduce map_pages() as a page table op
>    iommu: Add a map_pages() op for IOMMU drivers
>    iommu: Add support for the map_pages() callback
>    iommu/io-pgtable-arm: Prepare PTE methods for handling multiple
>      entries
>    iommu/io-pgtable-arm: Implement arm_lpae_unmap_pages()
>    iommu/io-pgtable-arm: Implement arm_lpae_map_pages()
>    iommu/io-pgtable-arm-v7s: Implement arm_v7s_unmap_pages()
>    iommu/io-pgtable-arm-v7s: Implement arm_v7s_map_pages()
>    iommu/arm-smmu: Implement the unmap_pages() IOMMU driver callback
>    iommu/arm-smmu: Implement the map_pages() IOMMU driver callback
> 
> Will Deacon (3):
>    iommu: Use bitmap to calculate page size in iommu_pgsize()
>    iommu: Split 'addr_merge' argument to iommu_pgsize() into separate
>      parts
>    iommu: Hook up '->unmap_pages' driver callback
> 
>   drivers/iommu/arm/arm-smmu/arm-smmu.c |  18 +--
>   drivers/iommu/io-pgtable-arm-v7s.c    |  50 ++++++-
>   drivers/iommu/io-pgtable-arm.c        | 189 +++++++++++++++++---------
>   drivers/iommu/iommu.c                 | 130 +++++++++++++-----
>   include/linux/io-pgtable.h            |   8 ++
>   include/linux/iommu.h                 |   9 ++
>   6 files changed, 289 insertions(+), 115 deletions(-)
> 

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-06-11  3:15 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20210408171402.12607-1-isaacm@codeaurora.org>
     [not found] ` <20210408171402.12607-12-isaacm@codeaurora.org>
2021-04-20  5:59   ` [RFC PATCH v5 11/15] iommu/io-pgtable-arm: Implement arm_lpae_map_pages() chenxiang (M)
2021-06-11  3:10 ` [RFC PATCH v5 00/15] Optimizing iommu_[map/unmap] performance Lu Baolu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).