* [PATCH 1/5] iommu/io-pgtable: Introduce map_sg() as a page table op
2021-01-09 1:50 [PATCH 0/5] Optimize iommu_map_sg() performance Isaac J. Manjarres
@ 2021-01-09 1:50 ` Isaac J. Manjarres
2021-01-09 1:50 ` [PATCH 2/5] iommu/io-pgtable-arm: Hook up map_sg() Isaac J. Manjarres
` (4 subsequent siblings)
5 siblings, 0 replies; 9+ messages in thread
From: Isaac J. Manjarres @ 2021-01-09 1:50 UTC (permalink / raw)
To: will, robin.murphy, joro
Cc: Isaac J. Manjarres, pdaly, linux-kernel, iommu, pratikp,
linux-arm-kernel
While mapping a scatter-gather list, iommu_map_sg() calls
into the IOMMU driver through an indirect call, which can
call into the io-pgtable code through another indirect call.
This sequence of going through the IOMMU core code, the IOMMU
driver, and finally the io-pgtable code, occurs for every
element in the scatter-gather list, in the worse case, which
is not optimal.
Introduce a map_sg callback in the io-pgtable ops so that
IOMMU drivers can invoke it with the complete scatter-gather
list, so that it can be processed within the io-pgtable
code entirely, reducing the number of indirect calls, and
boosting overall iommu_map_sg() performance.
Signed-off-by: Isaac J. Manjarres <isaacm@codeaurora.org>
---
include/linux/io-pgtable.h | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index ea727eb..6d0e731 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -147,6 +147,9 @@ struct io_pgtable_cfg {
* struct io_pgtable_ops - Page table manipulation API for IOMMU drivers.
*
* @map: Map a physically contiguous memory region.
+ * @map_sg: Map a scatter-gather list of physically contiguous memory
+ * chunks. The mapped pointer argument is used to store how
+ * many bytes are mapped.
* @unmap: Unmap a physically contiguous memory region.
* @iova_to_phys: Translate iova to physical address.
*
@@ -156,6 +159,9 @@ struct io_pgtable_cfg {
struct io_pgtable_ops {
int (*map)(struct io_pgtable_ops *ops, unsigned long iova,
phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
+ int (*map_sg)(struct io_pgtable_ops *ops, unsigned long iova,
+ struct scatterlist *sg, unsigned int nents, int prot,
+ gfp_t gfp, size_t *mapped);
size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
size_t size, struct iommu_iotlb_gather *gather);
phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 2/5] iommu/io-pgtable-arm: Hook up map_sg()
2021-01-09 1:50 [PATCH 0/5] Optimize iommu_map_sg() performance Isaac J. Manjarres
2021-01-09 1:50 ` [PATCH 1/5] iommu/io-pgtable: Introduce map_sg() as a page table op Isaac J. Manjarres
@ 2021-01-09 1:50 ` Isaac J. Manjarres
2021-01-09 1:50 ` [PATCH 3/5] iommu/io-pgtable-arm-v7s: " Isaac J. Manjarres
` (3 subsequent siblings)
5 siblings, 0 replies; 9+ messages in thread
From: Isaac J. Manjarres @ 2021-01-09 1:50 UTC (permalink / raw)
To: will, robin.murphy, joro
Cc: Isaac J. Manjarres, pdaly, linux-kernel, iommu, pratikp,
linux-arm-kernel
Implement the map_sg io-pgtable op for the ARM LPAE io-pgtable
code, so that IOMMU drivers can call it when they need to map
a scatter-gather list.
Signed-off-by: Isaac J. Manjarres <isaacm@codeaurora.org>
---
drivers/iommu/io-pgtable-arm.c | 86 ++++++++++++++++++++++++++++++++++++++++++
drivers/iommu/iommu.c | 12 +++---
include/linux/iommu.h | 8 ++++
3 files changed, 101 insertions(+), 5 deletions(-)
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 87def58..9c17d9d 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -473,6 +473,91 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
return ret;
}
+static int arm_lpae_map_by_pgsize(struct io_pgtable_ops *ops,
+ unsigned long iova, phys_addr_t paddr,
+ size_t size, int iommu_prot, gfp_t gfp,
+ size_t *mapped)
+{
+ struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
+ struct io_pgtable_cfg *cfg = &data->iop.cfg;
+ arm_lpae_iopte *ptep = data->pgd;
+ int ret, lvl = data->start_level;
+ arm_lpae_iopte prot = arm_lpae_prot_to_pte(data, iommu_prot);
+ unsigned int min_pagesz = 1 << __ffs(cfg->pgsize_bitmap);
+ long iaext = (s64)(iova + size) >> cfg->ias;
+ size_t pgsize;
+
+ if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
+ pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n",
+ iova, &paddr, size, min_pagesz);
+ return -EINVAL;
+ }
+
+ if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
+ iaext = ~iaext;
+ if (WARN_ON(iaext || (paddr + size) >> cfg->oas))
+ return -ERANGE;
+
+ while (size) {
+ pgsize = iommu_pgsize(cfg->pgsize_bitmap, iova | paddr, size);
+ ret = __arm_lpae_map(data, iova, paddr, pgsize, prot, lvl, ptep,
+ gfp);
+ if (ret)
+ return ret;
+
+ iova += pgsize;
+ paddr += pgsize;
+ *mapped += pgsize;
+ size -= pgsize;
+ }
+
+ return 0;
+}
+
+static int arm_lpae_map_sg(struct io_pgtable_ops *ops, unsigned long iova,
+ struct scatterlist *sg, unsigned int nents,
+ int iommu_prot, gfp_t gfp, size_t *mapped)
+{
+
+ size_t len = 0;
+ unsigned int i = 0;
+ int ret;
+ phys_addr_t start;
+
+ *mapped = 0;
+
+ /* If no access, then nothing to do */
+ if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
+ return 0;
+
+ while (i <= nents) {
+ phys_addr_t s_phys = sg_phys(sg);
+
+ if (len && s_phys != start + len) {
+ ret = arm_lpae_map_by_pgsize(ops, iova + *mapped, start,
+ len, iommu_prot, gfp,
+ mapped);
+
+ if (ret)
+ return ret;
+
+ len = 0;
+ }
+
+ if (len) {
+ len += sg->length;
+ } else {
+ len = sg->length;
+ start = s_phys;
+ }
+
+ if (++i < nents)
+ sg = sg_next(sg);
+ }
+
+ return 0;
+}
+
static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
arm_lpae_iopte *ptep)
{
@@ -750,6 +835,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
data->iop.ops = (struct io_pgtable_ops) {
.map = arm_lpae_map,
+ .map_sg = arm_lpae_map_sg,
.unmap = arm_lpae_unmap,
.iova_to_phys = arm_lpae_iova_to_phys,
};
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index ffeebda..0da0687 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2346,8 +2346,8 @@ phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
}
EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
-static size_t iommu_pgsize(struct iommu_domain *domain,
- unsigned long addr_merge, size_t size)
+size_t iommu_pgsize(unsigned long pgsize_bitmap, unsigned long addr_merge,
+ size_t size)
{
unsigned int pgsize_idx;
size_t pgsize;
@@ -2366,7 +2366,7 @@ static size_t iommu_pgsize(struct iommu_domain *domain,
pgsize = (1UL << (pgsize_idx + 1)) - 1;
/* throw away page sizes not supported by the hardware */
- pgsize &= domain->pgsize_bitmap;
+ pgsize &= pgsize_bitmap;
/* make sure we're still sane */
BUG_ON(!pgsize);
@@ -2412,7 +2412,8 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
while (size) {
- size_t pgsize = iommu_pgsize(domain, iova | paddr, size);
+ size_t pgsize = iommu_pgsize(domain->pgsize_bitmap,
+ iova | paddr, size);
pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
iova, &paddr, pgsize);
@@ -2490,7 +2491,8 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
* or we hit an area that isn't mapped.
*/
while (unmapped < size) {
- size_t pgsize = iommu_pgsize(domain, iova, size - unmapped);
+ size_t pgsize = iommu_pgsize(domain->pgsize_bitmap, iova,
+ size - unmapped);
unmapped_page = ops->unmap(domain, iova, pgsize, iotlb_gather);
if (!unmapped_page)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index b3f0e20..0e40a38 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -438,6 +438,8 @@ extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid);
extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
+extern size_t iommu_pgsize(unsigned long pgsize_bitmap, unsigned long addr_mer,
+ size_t size);
extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot);
extern int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova,
@@ -690,6 +692,12 @@ static inline struct iommu_domain *iommu_get_domain_for_dev(struct device *dev)
return NULL;
}
+static inline size_t iommu_pgsize(unsigned long pgsize_bitmap,
+ unsigned long addr_merge, size_t size)
+{
+ return 0;
+}
+
static inline int iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot)
{
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 3/5] iommu/io-pgtable-arm-v7s: Hook up map_sg()
2021-01-09 1:50 [PATCH 0/5] Optimize iommu_map_sg() performance Isaac J. Manjarres
2021-01-09 1:50 ` [PATCH 1/5] iommu/io-pgtable: Introduce map_sg() as a page table op Isaac J. Manjarres
2021-01-09 1:50 ` [PATCH 2/5] iommu/io-pgtable-arm: Hook up map_sg() Isaac J. Manjarres
@ 2021-01-09 1:50 ` Isaac J. Manjarres
2021-01-09 1:50 ` [PATCH 4/5] iommu: Introduce map_sg() as an IOMMU op for IOMMU drivers Isaac J. Manjarres
` (2 subsequent siblings)
5 siblings, 0 replies; 9+ messages in thread
From: Isaac J. Manjarres @ 2021-01-09 1:50 UTC (permalink / raw)
To: will, robin.murphy, joro
Cc: Isaac J. Manjarres, pdaly, linux-kernel, iommu, pratikp,
linux-arm-kernel
Implement the map_sg io-pgtable op for the ARMv7s io-pgtable
code, so that IOMMU drivers can call it when they need to map
a scatter-gather list.
Signed-off-by: Isaac J. Manjarres <isaacm@codeaurora.org>
---
drivers/iommu/io-pgtable-arm-v7s.c | 90 ++++++++++++++++++++++++++++++++++++++
1 file changed, 90 insertions(+)
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index 1d92ac9..40d96d2 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -545,6 +545,95 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
return ret;
}
+static int arm_v7s_map_by_pgsize(struct io_pgtable_ops *ops,
+ unsigned long iova, phys_addr_t paddr,
+ size_t size, int prot, gfp_t gfp,
+ size_t *mapped)
+{
+ struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
+ struct io_pgtable *iop = &data->iop;
+ struct io_pgtable_cfg *cfg = &iop->cfg;
+ unsigned int min_pagesz = 1 << __ffs(cfg->pgsize_bitmap);
+ int ret;
+ size_t pgsize;
+
+ if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
+ pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n",
+ iova, &paddr, size, min_pagesz);
+ return -EINVAL;
+ }
+
+ if (WARN_ON((iova + size) >= (1ULL << cfg->ias) ||
+ (paddr + size) >= (1ULL << cfg->oas)))
+ return -ERANGE;
+
+ while (size) {
+ pgsize = iommu_pgsize(cfg->pgsize_bitmap, iova | paddr, size);
+ ret = __arm_v7s_map(data, iova, paddr, pgsize, prot, 1,
+ data->pgd, gfp);
+
+ if (iop->cfg.quirks & IO_PGTABLE_QUIRK_TLBI_ON_MAP) {
+ io_pgtable_tlb_flush_walk(&data->iop, iova, size,
+ ARM_V7S_BLOCK_SIZE(2));
+ } else {
+ wmb();
+ }
+
+ if (ret)
+ return ret;
+
+ iova += pgsize;
+ paddr += pgsize;
+ *mapped += pgsize;
+ size -= pgsize;
+ }
+
+ return 0;
+}
+
+static int arm_v7s_map_sg(struct io_pgtable_ops *ops, unsigned long iova,
+ struct scatterlist *sg, unsigned int nents,
+ int iommu_prot, gfp_t gfp, size_t *mapped)
+{
+ size_t len = 0;
+ unsigned int i = 0;
+ int ret;
+ phys_addr_t start;
+
+ *mapped = 0;
+
+ /* If no access, then nothing to do */
+ if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
+ return 0;
+
+ while (i <= nents) {
+ phys_addr_t s_phys = sg_phys(sg);
+
+ if (len && s_phys != start + len) {
+ ret = arm_v7s_map_by_pgsize(ops, iova + *mapped, start,
+ len, iommu_prot, gfp,
+ mapped);
+
+ if (ret)
+ return ret;
+
+ len = 0;
+ }
+
+ if (len) {
+ len += sg->length;
+ } else {
+ len = sg->length;
+ start = s_phys;
+ }
+
+ if (++i < nents)
+ sg = sg_next(sg);
+ }
+
+ return 0;
+}
+
static void arm_v7s_free_pgtable(struct io_pgtable *iop)
{
struct arm_v7s_io_pgtable *data = io_pgtable_to_data(iop);
@@ -783,6 +872,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
data->iop.ops = (struct io_pgtable_ops) {
.map = arm_v7s_map,
+ .map_sg = arm_v7s_map_sg,
.unmap = arm_v7s_unmap,
.iova_to_phys = arm_v7s_iova_to_phys,
};
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 4/5] iommu: Introduce map_sg() as an IOMMU op for IOMMU drivers
2021-01-09 1:50 [PATCH 0/5] Optimize iommu_map_sg() performance Isaac J. Manjarres
` (2 preceding siblings ...)
2021-01-09 1:50 ` [PATCH 3/5] iommu/io-pgtable-arm-v7s: " Isaac J. Manjarres
@ 2021-01-09 1:50 ` Isaac J. Manjarres
2021-01-09 1:50 ` [PATCH 5/5] iommu/arm-smmu: Hook up map_sg() Isaac J. Manjarres
2021-01-11 6:22 ` [PATCH 0/5] Optimize iommu_map_sg() performance Sai Prakash Ranjan
5 siblings, 0 replies; 9+ messages in thread
From: Isaac J. Manjarres @ 2021-01-09 1:50 UTC (permalink / raw)
To: will, robin.murphy, joro
Cc: Isaac J. Manjarres, pdaly, linux-kernel, iommu, pratikp,
linux-arm-kernel
Add support for IOMMU drivers to have their own map_sg() callbacks.
This completes the path for having iommu_map_sg() invoke an IOMMU
driver's map_sg() callback, which can then invoke the io-pgtable
map_sg() callback with the entire scatter-gather list, so that it
can be processed entirely in the io-pgtable layer.
For IOMMU drivers that do not provide a callback, the default
implementation of iterating through the scatter-gather list, while
calling iommu_map() will be used.
Signed-off-by: Isaac J. Manjarres <isaacm@codeaurora.org>
---
drivers/iommu/iommu.c | 13 +++++++++++++
include/linux/iommu.h | 5 +++++
2 files changed, 18 insertions(+)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 0da0687..46acd5c 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2535,11 +2535,24 @@ static size_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
struct scatterlist *sg, unsigned int nents, int prot,
gfp_t gfp)
{
+ const struct iommu_ops *ops = domain->ops;
size_t len = 0, mapped = 0;
phys_addr_t start;
unsigned int i = 0;
int ret;
+ if (ops->map_sg) {
+ ret = ops->map_sg(domain, iova, sg, nents, prot, gfp, &mapped);
+
+ if (ops->iotlb_sync_map)
+ ops->iotlb_sync_map(domain);
+
+ if (ret)
+ goto out_err;
+
+ return mapped;
+ }
+
while (i <= nents) {
phys_addr_t s_phys = sg_phys(sg);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 0e40a38..bac7681 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -192,6 +192,8 @@ struct iommu_iotlb_gather {
* @attach_dev: attach device to an iommu domain
* @detach_dev: detach device from an iommu domain
* @map: map a physically contiguous memory region to an iommu domain
+ * @map_sg: map a scatter-gather list of physically contiguous chunks to
+ * an iommu domain.
* @unmap: unmap a physically contiguous memory region from an iommu domain
* @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain
* @iotlb_sync_map: Sync mappings created recently using @map to the hardware
@@ -243,6 +245,9 @@ struct iommu_ops {
void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
int (*map)(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
+ int (*map_sg)(struct iommu_domain *domain, unsigned long iova,
+ struct scatterlist *sg, unsigned int nents, int prot,
+ gfp_t gfp, size_t *mapped);
size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
size_t size, struct iommu_iotlb_gather *iotlb_gather);
void (*flush_iotlb_all)(struct iommu_domain *domain);
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 5/5] iommu/arm-smmu: Hook up map_sg()
2021-01-09 1:50 [PATCH 0/5] Optimize iommu_map_sg() performance Isaac J. Manjarres
` (3 preceding siblings ...)
2021-01-09 1:50 ` [PATCH 4/5] iommu: Introduce map_sg() as an IOMMU op for IOMMU drivers Isaac J. Manjarres
@ 2021-01-09 1:50 ` Isaac J. Manjarres
2021-01-11 6:22 ` [PATCH 0/5] Optimize iommu_map_sg() performance Sai Prakash Ranjan
5 siblings, 0 replies; 9+ messages in thread
From: Isaac J. Manjarres @ 2021-01-09 1:50 UTC (permalink / raw)
To: will, robin.murphy, joro
Cc: Isaac J. Manjarres, pdaly, linux-kernel, iommu, pratikp,
linux-arm-kernel
Now that everything is in place for iommu_map_sg() to defer
mapping a scatter-gather list to the io-pgtable layer, implement
the map_sg() callback in the SMMU driver, so that iommu_map_sg()
can invoke it with the entire scatter-gather list that will be
mapped.
Signed-off-by: Isaac J. Manjarres <isaacm@codeaurora.org>
---
drivers/iommu/arm/arm-smmu/arm-smmu.c | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index d8c6bfd..52acc68 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -1208,6 +1208,24 @@ static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
return ret;
}
+static int arm_smmu_map_sg(struct iommu_domain *domain, unsigned long iova,
+ struct scatterlist *sg, unsigned int nents, int prot,
+ gfp_t gfp, size_t *mapped)
+{
+ struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+ struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
+ int ret;
+
+ if (!ops)
+ return -ENODEV;
+
+ arm_smmu_rpm_get(smmu);
+ ret = ops->map_sg(ops, iova, sg, nents, prot, gfp, mapped);
+ arm_smmu_rpm_put(smmu);
+
+ return ret;
+}
+
static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t size, struct iommu_iotlb_gather *gather)
{
@@ -1624,6 +1642,7 @@ static struct iommu_ops arm_smmu_ops = {
.domain_free = arm_smmu_domain_free,
.attach_dev = arm_smmu_attach_dev,
.map = arm_smmu_map,
+ .map_sg = arm_smmu_map_sg,
.unmap = arm_smmu_unmap,
.flush_iotlb_all = arm_smmu_flush_iotlb_all,
.iotlb_sync = arm_smmu_iotlb_sync,
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH 0/5] Optimize iommu_map_sg() performance
2021-01-09 1:50 [PATCH 0/5] Optimize iommu_map_sg() performance Isaac J. Manjarres
` (4 preceding siblings ...)
2021-01-09 1:50 ` [PATCH 5/5] iommu/arm-smmu: Hook up map_sg() Isaac J. Manjarres
@ 2021-01-11 6:22 ` Sai Prakash Ranjan
2021-01-11 7:52 ` Sai Prakash Ranjan
5 siblings, 1 reply; 9+ messages in thread
From: Sai Prakash Ranjan @ 2021-01-11 6:22 UTC (permalink / raw)
To: isaacm
Cc: will, pdaly, robin.murphy, linux-kernel, iommu, pratikp,
linux-arm-kernel
Hi Isaac,
On 2021-01-09 07:20, Isaac J. Manjarres wrote:
> The iommu_map_sg() code currently iterates through the given
> scatter-gather list, and in the worst case, invokes iommu_map()
> for each element in the scatter-gather list, which calls into
> the IOMMU driver through an indirect call. For an IOMMU driver
> that uses a format supported by the io-pgtable code, the IOMMU
> driver will then call into the io-pgtable code to map the chunk.
>
> Jumping between the IOMMU core code, the IOMMU driver, and the
> io-pgtable code and back for each element in a scatter-gather list
> is not efficient.
>
> Instead, add a map_sg() hook in both the IOMMU driver ops and the
> io-pgtable ops. iommu_map_sg() can then call into the IOMMU driver's
> map_sg() hook with the entire scatter-gather list, which can call
> into the io-pgtable map_sg() hook, which can process the entire
> scatter-gather list, signficantly reducing the number of indirect
> calls, and jumps between these layers, boosting performance.
>
> On a system that uses the ARM SMMU driver, and the ARM LPAE format,
> the current implementation of iommu_map_sg() yields the following
> latencies for mapping scatter-gather lists of various sizes. These
> latencies are calculated by repeating the mapping operation 10 times:
>
> size iommu_map_sg latency
> 4K 0.624 us
> 64K 9.468 us
> 1M 122.557 us
> 2M 239.807 us
> 12M 1435.979 us
> 24M 2884.968 us
> 32M 3832.979 us
>
> On the same system, the proposed modifications yield the following
> results:
>
> size iommu_map_sg latency
> 4K 3.645 us
> 64K 4.198 us
> 1M 11.010 us
> 2M 17.125 us
> 12M 82.416 us
> 24M 158.677 us
> 32M 210.468 us
>
> The procedure for collecting the iommu_map_sg latencies is
> the same in both experiments. Clearly, reducing the jumps
> between the different layers in the IOMMU code offers a
> signficant performance boost in iommu_map_sg() latency.
>
I gave this series a go on chromebook and saw these warnings
and several device probe failures, logs attached below:
WARN corresponds to this code in arm_lpae_map_by_pgsize()
if (WARN_ON(iaext || (paddr + size) >> cfg->oas))
return -ERANGE;
Logs:
[ 2.411391] ------------[ cut here ]------------
[ 2.416149] WARNING: CPU: 6 PID: 56 at drivers/iommu/io-pgtable-arm.c:492 arm_lpae_map_sg+0x234/0x248
[ 2.425606] Modules linked in:
[ 2.428749] CPU: 6 PID: 56 Comm: kworker/6:1 Not tainted 5.10.5 #970
[ 2.440287] Workqueue: events deferred_probe_work_func
[ 2.445563] pstate: 20c00009 (nzCv daif +PAN +UAO -TCO BTYPE=--)
[ 2.451726] pc : arm_lpae_map_sg+0x234/0x248
[ 2.456112] lr : arm_lpae_map_sg+0xe0/0x248
[ 2.460410] sp : ffffffc010513750
[ 2.463820] x29: ffffffc010513790 x28: ffffffb943332000
[ 2.469281] x27: 0000000ffffff000 x26: ffffffb943d14900
[ 2.474738] x25: 0000000000001000 x24: 0000000103465000
[ 2.480196] x23: 0000000000000001 x22: 0000000103466000
[ 2.485645] x21: 0000000000000003 x20: 0000000000000a20
[ 2.491103] x19: ffffffc010513850 x18: 0000000000000001
[ 2.496562] x17: 0000000000000002 x16: 00000000ffffffff
[ 2.502021] x15: 0000000000000000 x14: 0000000000000000
[ 2.507479] x13: 0000000000000001 x12: 0000000000000000
[ 2.512928] x11: 0000001000000000 x10: 0000000000000000
[ 2.518385] x9 : 0000000000000001 x8 : 0000000040201000
[ 2.523844] x7 : 0000000000000a20 x6 : ffffffb943463000
[ 2.529302] x5 : 0000000000000003 x4 : 0000000000001000
[ 2.534760] x3 : 0000000000000001 x2 : ffffffb941f605a0
[ 2.540219] x1 : 0000000000000003 x0 : 0000000000000e40
[ 2.545679] Call trace:
[ 2.548196] arm_lpae_map_sg+0x234/0x248
[ 2.552225] arm_smmu_map_sg+0x80/0xc4
[ 2.556078] __iommu_map_sg+0x6c/0x188
[ 2.559931] iommu_map_sg_atomic+0x18/0x20
[ 2.564144] iommu_dma_alloc_remap+0x26c/0x34c
[ 2.568703] iommu_dma_alloc+0x9c/0x268
[ 2.572647] dma_alloc_attrs+0x88/0xfc
[ 2.576503] gsi_ring_alloc+0x50/0x144
[ 2.580356] gsi_init+0x2c4/0x5c4
[ 2.583766] ipa_probe+0x14c/0x2b4
[ 2.587263] platform_drv_probe+0x94/0xb4
[ 2.591377] really_probe+0x138/0x348
[ 2.595145] driver_probe_device+0x80/0xb8
[ 2.599358] __device_attach_driver+0x90/0xa8
[ 2.603829] bus_for_each_drv+0x84/0xcc
[ 2.607772] __device_attach+0xc0/0x148
[ 2.611713] device_initial_probe+0x18/0x20
[ 2.616012] bus_probe_device+0x38/0x94
[ 2.619953] deferred_probe_work_func+0x78/0xb0
[ 2.624611] process_one_work+0x210/0x3dc
[ 2.628726] worker_thread+0x284/0x3e0
[ 2.632578] kthread+0x148/0x1a8
[ 2.635891] ret_from_fork+0x10/0x18
[ 2.639562] ---[ end trace 9bac18cad6a9862e ]---
[ 2.644414] ipa 1e40000.ipa: error -12 allocating channel 0 event ring
[ 2.651656] ipa: probe of 1e40000.ipa failed with error -12
[ 2.660072] dwc3 a600000.dwc3: Adding to iommu group 8
[ 2.668632] xhci-hcd xhci-hcd.13.auto: xHCI Host Controller
[ 2.674680] xhci-hcd xhci-hcd.13.auto: new USB bus registered, assigned bus number 1
[ 2.683638] ------------[ cut here ]------------
[ 2.688391] WARNING: CPU: 6 PID: 56 at drivers/iommu/io-pgtable-arm.c:492 arm_lpae_map_sg+0x234/0x248
[ 2.697846] Modules linked in:
[ 2.700988] CPU: 6 PID: 56 Comm: kworker/6:1 Tainted: G W 5.10.5 #970
[ 2.713954] Workqueue: events deferred_probe_work_func
[ 2.719228] pstate: 20c00009 (nzCv daif +PAN +UAO -TCO BTYPE=--)
[ 2.725390] pc : arm_lpae_map_sg+0x234/0x248
[ 2.729775] lr : arm_lpae_map_sg+0xe0/0x248
[ 2.734073] sp : ffffffc010512e20
[ 2.737483] x29: ffffffc010512e60 x28: ffffffb94345e000
[ 2.742942] x27: 0000000ffffff000 x26: ffffffb941fa1500
[ 2.748400] x25: 0000000000001000 x24: 0000000103468000
[ 2.753858] x23: 0000000000000001 x22: 0000000103469000
[ 2.759318] x21: 0000000000000003 x20: 0000000000000a20
[ 2.764777] x19: ffffffc010512f20 x18: 0000000000000001
[ 2.770235] x17: 0000000000000001 x16: 00000000ffffffff
[ 2.775694] x15: 0000000000000000 x14: 0000000000000001
[ 2.781154] x13: 0000000000000001 x12: 0000000000000000
[ 2.786613] x11: 0000001000000000 x10: 0000000000000000
[ 2.792071] x9 : 0000000000000001 x8 : 0000000040201000
[ 2.797529] x7 : 0000000000000000 x6 : ffffffc010512f20
[ 2.802988] x5 : 0000000000000a20 x4 : 0000000000001000
[ 2.808438] x3 : 0000000000000001 x2 : ffffffb9457d6100
[ 2.813896] x1 : 0000000000000003 x0 : 0000000000000e40
[ 2.819356] Call trace:
[ 2.821878] arm_lpae_map_sg+0x234/0x248
[ 2.825907] arm_smmu_map_sg+0x80/0xc4
[ 2.829761] __iommu_map_sg+0x6c/0x188
[ 2.833615] iommu_map_sg_atomic+0x18/0x20
[ 2.837827] iommu_dma_alloc_remap+0x26c/0x34c
[ 2.842386] iommu_dma_alloc+0x9c/0x268
[ 2.846329] dma_alloc_attrs+0x88/0xfc
[ 2.850184] xhci_mem_init+0x200/0x930
[ 2.854037] xhci_init+0xc0/0xec
[ 2.857350] xhci_gen_setup+0x270/0x348
[ 2.861292] xhci_plat_setup+0x4c/0x58
[ 2.865146] usb_add_hcd+0x288/0x430
[ 2.868815] xhci_plat_probe+0x3f8/0x568
[ 2.872844] platform_drv_probe+0x94/0xb4
[ 2.876957] really_probe+0x138/0x348
[ 2.880725] driver_probe_device+0x80/0xb8
[ 2.884936] __device_attach_driver+0x90/0xa8
[ 2.889407] bus_for_each_drv+0x84/0xcc
[ 2.893348] __device_attach+0xc0/0x148
[ 2.897289] device_initial_probe+0x18/0x20
[ 2.901587] bus_probe_device+0x38/0x94
[ 2.905529] device_add+0x214/0x3c4
[ 2.909112] platform_device_add+0x198/0x208
[ 2.913497] dwc3_host_init+0x228/0x2bc
[ 2.917437] dwc3_core_init_mode+0xfc/0x18c
[ 2.921735] dwc3_probe+0x978/0xac8
[ 2.925318] platform_drv_probe+0x94/0xb4
[ 2.929432] really_probe+0x138/0x348
[ 2.933200] driver_probe_device+0x80/0xb8
[ 2.937411] __device_attach_driver+0x90/0xa8
[ 2.941881] bus_for_each_drv+0x84/0xcc
[ 2.945823] __device_attach+0xc0/0x148
[ 2.949765] device_initial_probe+0x18/0x20
[ 2.954064] bus_probe_device+0x38/0x94
[ 2.958005] device_add+0x214/0x3c4
[ 2.961590] of_device_add+0x3c/0x48
[ 2.965260] of_platform_device_create_pdata+0xac/0xec
[ 2.970535] of_platform_bus_create+0x1cc/0x348
[ 2.975191] of_platform_populate+0x78/0xc8
[ 2.979490] dwc3_qcom_probe+0x4e0/0xa88
[ 2.983518] platform_drv_probe+0x94/0xb4
[ 2.987632] really_probe+0x138/0x348
[ 2.991400] driver_probe_device+0x80/0xb8
[ 2.995612] __device_attach_driver+0x90/0xa8
[ 3.000084] bus_for_each_drv+0x84/0xcc
[ 3.004025] __device_attach+0xc0/0x148
[ 3.007966] device_initial_probe+0x18/0x20
[ 3.012264] bus_probe_device+0x38/0x94
[ 3.016206] deferred_probe_work_func+0x78/0xb0
[ 3.020864] process_one_work+0x210/0x3dc
[ 3.024979] worker_thread+0x284/0x3e0
[ 3.028833] kthread+0x148/0x1a8
[ 3.032147] ret_from_fork+0x10/0x18
[ 3.035817] ---[ end trace 9bac18cad6a9862f ]---
[ 3.041583] xhci-hcd xhci-hcd.13.auto: can't setup: -12
[ 3.046950] xhci-hcd xhci-hcd.13.auto: USB bus 1 deregistered
[ 3.053107] xhci-hcd: probe of xhci-hcd.13.auto failed with error -12
[ 3.062208] coresight etm0: CPU0: ETM v4.2 initialized
[ 3.063345] sdhci_msm 7c4000.sdhci: TCXO clk not present (-2)
[ 3.067862] coresight etm1: CPU1: ETM v4.2 initialized
[ 3.078829] ------------[ cut here ]------------
[ 3.079076] coresight etm2: CPU2: ETM v4.2 initialized
[ 3.083587] WARNING: CPU: 5 PID: 7 at drivers/iommu/io-pgtable-arm.c:492 arm_lpae_map_sg+0x234/0x248
[ 3.083589] Modules linked in:
[ 3.089200] coresight etm3: CPU3: ETM v4.2 initialized
[ 3.098235]
[ 3.098241] CPU: 5 PID: 7 Comm: kworker/u16:0 Tainted: G W 5.10.5 #970
[ 3.101722] coresight etm4: CPU4: ETM v4.2 initialized
[ 3.106672] Workqueue: events_unbound async_run_entry_fn
[ 3.131984] pstate: 20c00009 (nzCv daif +PAN +UAO -TCO BTYPE=--)
[ 3.138161] pc : arm_lpae_map_sg+0x234/0x248
[ 3.142557] lr : arm_lpae_map_sg+0xe0/0x248
[ 3.146864] sp : ffffffc0100c3800
[ 3.150272] x29: ffffffc0100c3840 x28: ffffffb942394000
[ 3.155736] x27: 0000000ffffff000 x26: ffffffb944010100
[ 3.161198] x25: 0000000000001000 x24: 0000000102326000
[ 3.166660] x23: 0000000000000001 x22: 0000000102327000
[ 3.172122] x21: 0000000000000003 x20: 0000000000000a20
[ 3.177586] x19: ffffffc0100c3900 x18: 0000000000000001
[ 3.183048] x17: 0000000000000001 x16: 00000000ffffffff
[ 3.188512] x15: 0000000000000000 x14: 0000000000000001
[ 3.193973] x13: 0000000000000001 x12: 0000000000000000
[ 3.199434] x11: 0000001000000000 x10: 0000000000000000
[ 3.204895] x9 : 0000000000000001 x8 : 0000000040201000
[ 3.210356] x7 : 0000000000000000 x6 : ffffffc0100c3900
[ 3.215818] x5 : 0000000000000a20 x4 : 0000000000001000
[ 3.221279] x3 : 0000000000000001 x2 : ffffffb94581a100
[ 3.226741] x1 : 0000000000000003 x0 : 0000000000000e40
[ 3.232203] Call trace:
[ 3.234722] arm_lpae_map_sg+0x234/0x248
[ 3.238761] arm_smmu_map_sg+0x80/0xc4
[ 3.242615] __iommu_map_sg+0x6c/0x188
[ 3.246468] iommu_map_sg_atomic+0x18/0x20
[ 3.250680] iommu_dma_alloc_remap+0x26c/0x34c
[ 3.255250] iommu_dma_alloc+0x9c/0x268
[ 3.259204] dma_alloc_attrs+0x88/0xfc
[ 3.263058] sdhci_setup_host+0x250/0xc8c
[ 3.267185] sdhci_msm_cqe_add_host+0x38/0x188
[ 3.271756] sdhci_msm_probe+0x540/0x628
[ 3.275795] platform_drv_probe+0x94/0xb4
[ 3.279921] really_probe+0x138/0x348
[ 3.283687] driver_probe_device+0x80/0xb8
[ 3.287899] __device_attach_driver+0x90/0xa8
[ 3.292381] bus_for_each_drv+0x84/0xcc
[ 3.296332] __device_attach_async_helper+0x80/0xd4
[ 3.301347] async_run_entry_fn+0x4c/0x100
[ 3.305559] process_one_work+0x210/0x3dc
[ 3.309684] worker_thread+0x234/0x3e0
[ 3.313537] kthread+0x148/0x1a8
[ 3.316861] ret_from_fork+0x10/0x18
[ 3.320541] ---[ end trace 9bac18cad6a98630 ]---
[ 3.325372] mmc1: Unable to allocate ADMA buffers - falling back to standard DMA
<snip>...
[ 3.587535] mmc1: running CQE recovery
[ 3.591419] mmc1: Got command interrupt 0x00010001 even though no command operation was in progress.
[ 3.600796] mmc1: sdhci: ============ SDHCI REGISTER DUMP ===========
[ 3.607417] mmc1: sdhci: Sys addr: 0x00000000 | Version: 0x00007202
[ 3.614038] mmc1: sdhci: Blk size: 0x00000200 | Blk cnt: 0x00000108
Thanks,
Sai
--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 0/5] Optimize iommu_map_sg() performance
2021-01-11 6:22 ` [PATCH 0/5] Optimize iommu_map_sg() performance Sai Prakash Ranjan
@ 2021-01-11 7:52 ` Sai Prakash Ranjan
2021-01-11 15:12 ` isaacm
0 siblings, 1 reply; 9+ messages in thread
From: Sai Prakash Ranjan @ 2021-01-11 7:52 UTC (permalink / raw)
To: isaacm
Cc: will, pdaly, robin.murphy, linux-kernel, iommu, pratikp,
linux-arm-kernel
On 2021-01-11 11:52, Sai Prakash Ranjan wrote:
> Hi Isaac,
>
> I gave this series a go on chromebook and saw these warnings
> and several device probe failures, logs attached below:
>
> WARN corresponds to this code in arm_lpae_map_by_pgsize()
>
> if (WARN_ON(iaext || (paddr + size) >> cfg->oas))
> return -ERANGE;
>
> Logs:
>
> [ 2.411391] ------------[ cut here ]------------
> [ 2.416149] WARNING: CPU: 6 PID: 56 at
> drivers/iommu/io-pgtable-arm.c:492 arm_lpae_map_sg+0x234/0x248
> [ 2.425606] Modules linked in:
> [ 2.428749] CPU: 6 PID: 56 Comm: kworker/6:1 Not tainted 5.10.5 #970
> [ 2.440287] Workqueue: events deferred_probe_work_func
> [ 2.445563] pstate: 20c00009 (nzCv daif +PAN +UAO -TCO BTYPE=--)
> [ 2.451726] pc : arm_lpae_map_sg+0x234/0x248
> [ 2.456112] lr : arm_lpae_map_sg+0xe0/0x248
> [ 2.460410] sp : ffffffc010513750
> [ 2.463820] x29: ffffffc010513790 x28: ffffffb943332000
> [ 2.469281] x27: 0000000ffffff000 x26: ffffffb943d14900
> [ 2.474738] x25: 0000000000001000 x24: 0000000103465000
> [ 2.480196] x23: 0000000000000001 x22: 0000000103466000
> [ 2.485645] x21: 0000000000000003 x20: 0000000000000a20
> [ 2.491103] x19: ffffffc010513850 x18: 0000000000000001
> [ 2.496562] x17: 0000000000000002 x16: 00000000ffffffff
> [ 2.502021] x15: 0000000000000000 x14: 0000000000000000
> [ 2.507479] x13: 0000000000000001 x12: 0000000000000000
> [ 2.512928] x11: 0000001000000000 x10: 0000000000000000
> [ 2.518385] x9 : 0000000000000001 x8 : 0000000040201000
> [ 2.523844] x7 : 0000000000000a20 x6 : ffffffb943463000
> [ 2.529302] x5 : 0000000000000003 x4 : 0000000000001000
> [ 2.534760] x3 : 0000000000000001 x2 : ffffffb941f605a0
> [ 2.540219] x1 : 0000000000000003 x0 : 0000000000000e40
> [ 2.545679] Call trace:
> [ 2.548196] arm_lpae_map_sg+0x234/0x248
> [ 2.552225] arm_smmu_map_sg+0x80/0xc4
> [ 2.556078] __iommu_map_sg+0x6c/0x188
> [ 2.559931] iommu_map_sg_atomic+0x18/0x20
> [ 2.564144] iommu_dma_alloc_remap+0x26c/0x34c
> [ 2.568703] iommu_dma_alloc+0x9c/0x268
> [ 2.572647] dma_alloc_attrs+0x88/0xfc
> [ 2.576503] gsi_ring_alloc+0x50/0x144
> [ 2.580356] gsi_init+0x2c4/0x5c4
> [ 2.583766] ipa_probe+0x14c/0x2b4
> [ 2.587263] platform_drv_probe+0x94/0xb4
> [ 2.591377] really_probe+0x138/0x348
> [ 2.595145] driver_probe_device+0x80/0xb8
> [ 2.599358] __device_attach_driver+0x90/0xa8
> [ 2.603829] bus_for_each_drv+0x84/0xcc
> [ 2.607772] __device_attach+0xc0/0x148
> [ 2.611713] device_initial_probe+0x18/0x20
> [ 2.616012] bus_probe_device+0x38/0x94
> [ 2.619953] deferred_probe_work_func+0x78/0xb0
> [ 2.624611] process_one_work+0x210/0x3dc
> [ 2.628726] worker_thread+0x284/0x3e0
> [ 2.632578] kthread+0x148/0x1a8
> [ 2.635891] ret_from_fork+0x10/0x18
> [ 2.639562] ---[ end trace 9bac18cad6a9862e ]---
> [ 2.644414] ipa 1e40000.ipa: error -12 allocating channel 0 event
> ring
> [ 2.651656] ipa: probe of 1e40000.ipa failed with error -12
> [ 2.660072] dwc3 a600000.dwc3: Adding to iommu group 8
> [ 2.668632] xhci-hcd xhci-hcd.13.auto: xHCI Host Controller
> [ 2.674680] xhci-hcd xhci-hcd.13.auto: new USB bus registered,
> assigned bus number 1
>
<snip>...
Isaac provided a fix which he will post as v2 and no warnings were
observed
with that fix.
Tested-by: Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>
Thanks,
Sai
--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a
member
of Code Aurora Forum, hosted by The Linux Foundation
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 0/5] Optimize iommu_map_sg() performance
2021-01-11 7:52 ` Sai Prakash Ranjan
@ 2021-01-11 15:12 ` isaacm
0 siblings, 0 replies; 9+ messages in thread
From: isaacm @ 2021-01-11 15:12 UTC (permalink / raw)
To: Sai Prakash Ranjan
Cc: will, pdaly, robin.murphy, linux-kernel, iommu, pratikp,
linux-arm-kernel
On 2021-01-10 23:52, Sai Prakash Ranjan wrote:
> On 2021-01-11 11:52, Sai Prakash Ranjan wrote:
>> Hi Isaac,
>>
>> I gave this series a go on chromebook and saw these warnings
>> and several device probe failures, logs attached below:
>>
>> WARN corresponds to this code in arm_lpae_map_by_pgsize()
>>
>> if (WARN_ON(iaext || (paddr + size) >> cfg->oas))
>> return -ERANGE;
>>
>> Logs:
>>
>> [ 2.411391] ------------[ cut here ]------------
>> [ 2.416149] WARNING: CPU: 6 PID: 56 at
>> drivers/iommu/io-pgtable-arm.c:492 arm_lpae_map_sg+0x234/0x248
>> [ 2.425606] Modules linked in:
>> [ 2.428749] CPU: 6 PID: 56 Comm: kworker/6:1 Not tainted 5.10.5
>> #970
>> [ 2.440287] Workqueue: events deferred_probe_work_func
>> [ 2.445563] pstate: 20c00009 (nzCv daif +PAN +UAO -TCO BTYPE=--)
>> [ 2.451726] pc : arm_lpae_map_sg+0x234/0x248
>> [ 2.456112] lr : arm_lpae_map_sg+0xe0/0x248
>> [ 2.460410] sp : ffffffc010513750
>> [ 2.463820] x29: ffffffc010513790 x28: ffffffb943332000
>> [ 2.469281] x27: 0000000ffffff000 x26: ffffffb943d14900
>> [ 2.474738] x25: 0000000000001000 x24: 0000000103465000
>> [ 2.480196] x23: 0000000000000001 x22: 0000000103466000
>> [ 2.485645] x21: 0000000000000003 x20: 0000000000000a20
>> [ 2.491103] x19: ffffffc010513850 x18: 0000000000000001
>> [ 2.496562] x17: 0000000000000002 x16: 00000000ffffffff
>> [ 2.502021] x15: 0000000000000000 x14: 0000000000000000
>> [ 2.507479] x13: 0000000000000001 x12: 0000000000000000
>> [ 2.512928] x11: 0000001000000000 x10: 0000000000000000
>> [ 2.518385] x9 : 0000000000000001 x8 : 0000000040201000
>> [ 2.523844] x7 : 0000000000000a20 x6 : ffffffb943463000
>> [ 2.529302] x5 : 0000000000000003 x4 : 0000000000001000
>> [ 2.534760] x3 : 0000000000000001 x2 : ffffffb941f605a0
>> [ 2.540219] x1 : 0000000000000003 x0 : 0000000000000e40
>> [ 2.545679] Call trace:
>> [ 2.548196] arm_lpae_map_sg+0x234/0x248
>> [ 2.552225] arm_smmu_map_sg+0x80/0xc4
>> [ 2.556078] __iommu_map_sg+0x6c/0x188
>> [ 2.559931] iommu_map_sg_atomic+0x18/0x20
>> [ 2.564144] iommu_dma_alloc_remap+0x26c/0x34c
>> [ 2.568703] iommu_dma_alloc+0x9c/0x268
>> [ 2.572647] dma_alloc_attrs+0x88/0xfc
>> [ 2.576503] gsi_ring_alloc+0x50/0x144
>> [ 2.580356] gsi_init+0x2c4/0x5c4
>> [ 2.583766] ipa_probe+0x14c/0x2b4
>> [ 2.587263] platform_drv_probe+0x94/0xb4
>> [ 2.591377] really_probe+0x138/0x348
>> [ 2.595145] driver_probe_device+0x80/0xb8
>> [ 2.599358] __device_attach_driver+0x90/0xa8
>> [ 2.603829] bus_for_each_drv+0x84/0xcc
>> [ 2.607772] __device_attach+0xc0/0x148
>> [ 2.611713] device_initial_probe+0x18/0x20
>> [ 2.616012] bus_probe_device+0x38/0x94
>> [ 2.619953] deferred_probe_work_func+0x78/0xb0
>> [ 2.624611] process_one_work+0x210/0x3dc
>> [ 2.628726] worker_thread+0x284/0x3e0
>> [ 2.632578] kthread+0x148/0x1a8
>> [ 2.635891] ret_from_fork+0x10/0x18
>> [ 2.639562] ---[ end trace 9bac18cad6a9862e ]---
>> [ 2.644414] ipa 1e40000.ipa: error -12 allocating channel 0 event
>> ring
>> [ 2.651656] ipa: probe of 1e40000.ipa failed with error -12
>> [ 2.660072] dwc3 a600000.dwc3: Adding to iommu group 8
>> [ 2.668632] xhci-hcd xhci-hcd.13.auto: xHCI Host Controller
>> [ 2.674680] xhci-hcd xhci-hcd.13.auto: new USB bus registered,
>> assigned bus number 1
>>
>
> <snip>...
>
> Isaac provided a fix which he will post as v2 and no warnings were
> observed
> with that fix.
>
> Tested-by: Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>
>
> Thanks,
> Sai
Thanks for testing out the patches. I've added the fix (there was an
off-by-one error in the calculation
used to check if the IOVA/physical addresses are within limits) to
version 2 of the series:
https://lore.kernel.org/linux-iommu/1610376862-927-1-git-send-email-isaacm@codeaurora.org/T/#t
Thanks,
Isaac
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu
^ permalink raw reply [flat|nested] 9+ messages in thread