All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-25  5:31 ` Oza Pawandeep via iommu
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Pawandeep @ 2017-03-25  5:31 UTC (permalink / raw)
  To: Joerg Roedel, Robin Murphy
  Cc: iommu, linux-pci, linux-kernel, linux-arm-kernel, devicetree,
	bcm-kernel-feedback-list, Oza Pawandeep

it is possible that PCI device supports 64-bit DMA addressing,
and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
however PCI host bridge may have limitations on the inbound
transaction addressing. As an example, consider NVME SSD device
connected to iproc-PCIe controller.

Currently, the IOMMU DMA ops only considers PCI device dma_mask
when allocating an IOVA. This is particularly problematic on
ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
PA for in-bound transactions only after PCI Host has forwarded
these transactions on SOC IO bus. This means on such ARM/ARM64
SOCs the IOVA of in-bound transactions has to honor the addressing
restrictions of the PCI Host.

current pcie frmework and of framework integration assumes dma-ranges
in a way where memory-mapped devices define their dma-ranges.
dma-ranges: (child-bus-address, parent-bus-address, length).

but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;

of_dma_configure is specifically witten to take care of memory mapped devices.
but no implementation exists for pci to take care of pcie based memory ranges.
in fact pci world doesnt seem to define standard dma-ranges

this patch implements of_pci_get_dma_ranges to cater to pci world dma-ranges.
so then the returned size get best possible (largest) dma_mask.
for e.g.
dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
we should get dev->coherent_dma_mask=0x7fffffffff.

Reviewed-by: Anup Patel <anup.patel@broadcom.com>
Reviewed-by: Scott Branden <scott.branden@broadcom.com>
Signed-off-by: Oza Pawandeep <oza.oza@broadcom.com>

Signed-off-by: Oza Pawandeep <oza.oza@broadcom.com>

diff --git a/drivers/of/device.c b/drivers/of/device.c
index b1e6beb..d362a98 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -9,6 +9,7 @@
 #include <linux/module.h>
 #include <linux/mod_devicetable.h>
 #include <linux/slab.h>
+#include <linux/of_pci.h>
 
 #include <asm/errno.h>
 #include "of_private.h"
@@ -104,7 +105,11 @@ void of_dma_configure(struct device *dev, struct device_node *np)
 	if (!dev->dma_mask)
 		dev->dma_mask = &dev->coherent_dma_mask;
 
-	ret = of_dma_get_range(np, &dma_addr, &paddr, &size);
+	if (dev_is_pci(dev))
+		ret = of_pci_get_dma_ranges(np, &dma_addr, &paddr, &size);
+	else
+		ret = of_dma_get_range(np, &dma_addr, &paddr, &size);
+
 	if (ret < 0) {
 		dma_addr = offset = 0;
 		size = dev->coherent_dma_mask + 1;
diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
index 0ee42c3..c7f8626 100644
--- a/drivers/of/of_pci.c
+++ b/drivers/of/of_pci.c
@@ -283,6 +283,52 @@ int of_pci_get_host_bridge_resources(struct device_node *dev,
 	return err;
 }
 EXPORT_SYMBOL_GPL(of_pci_get_host_bridge_resources);
+
+int of_pci_get_dma_ranges(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *size)
+{
+	struct device_node *node = of_node_get(np);
+	int rlen, ret = 0;
+	const int na = 3, ns = 2;
+	struct of_pci_range_parser parser;
+	struct of_pci_range range;
+
+	if (!node)
+		return -EINVAL;
+
+	parser.node = node;
+	parser.pna = of_n_addr_cells(node);
+	parser.np = parser.pna + na + ns;
+
+	parser.range = of_get_property(node, "dma-ranges", &rlen);
+
+	if (!parser.range) {
+		pr_debug("pcie device has no dma-ranges defined for node(%s)\n", np->full_name);
+		ret = -ENODEV;
+		goto out;
+	}
+
+	parser.end = parser.range + rlen / sizeof(__be32);
+	*size = 0;
+
+	for_each_of_pci_range(&parser, &range) {
+		if (*size < range.size) {
+			*dma_addr = range.pci_addr;
+			*size = range.size;
+			*paddr = range.cpu_addr;
+		}
+	}
+
+	pr_debug("dma_addr(%llx) cpu_addr(%llx) size(%llx)\n",
+		 *dma_addr, *paddr, *size);
+		 *dma_addr = range.pci_addr;
+		 *size = range.size;
+
+out:
+	of_node_put(node);
+	return ret;
+
+}
+EXPORT_SYMBOL_GPL(of_pci_get_dma_ranges);
 #endif /* CONFIG_OF_ADDRESS */
 
 #ifdef CONFIG_PCI_MSI
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index 0e0974e..907ace0 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -76,6 +76,7 @@ static inline void of_pci_check_probe_only(void) { }
 int of_pci_get_host_bridge_resources(struct device_node *dev,
 			unsigned char busno, unsigned char bus_max,
 			struct list_head *resources, resource_size_t *io_base);
+int of_pci_get_dma_ranges(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *size);
 #else
 static inline int of_pci_get_host_bridge_resources(struct device_node *dev,
 			unsigned char busno, unsigned char bus_max,
@@ -83,6 +84,11 @@ static inline int of_pci_get_host_bridge_resources(struct device_node *dev,
 {
 	return -EINVAL;
 }
+
+static inline int of_pci_get_dma_ranges(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *size)
+{
+	return -EINVAL;
+}
 #endif
 
 #if defined(CONFIG_OF) && defined(CONFIG_PCI_MSI)
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-25  5:31 ` Oza Pawandeep via iommu
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Pawandeep via iommu @ 2017-03-25  5:31 UTC (permalink / raw)
  To: Joerg Roedel, Robin Murphy
  Cc: devicetree-u79uwXL29TY76Z2rM5mHXA,
	linux-pci-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	bcm-kernel-feedback-list-dY08KVG/lbpWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

it is possible that PCI device supports 64-bit DMA addressing,
and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
however PCI host bridge may have limitations on the inbound
transaction addressing. As an example, consider NVME SSD device
connected to iproc-PCIe controller.

Currently, the IOMMU DMA ops only considers PCI device dma_mask
when allocating an IOVA. This is particularly problematic on
ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
PA for in-bound transactions only after PCI Host has forwarded
these transactions on SOC IO bus. This means on such ARM/ARM64
SOCs the IOVA of in-bound transactions has to honor the addressing
restrictions of the PCI Host.

current pcie frmework and of framework integration assumes dma-ranges
in a way where memory-mapped devices define their dma-ranges.
dma-ranges: (child-bus-address, parent-bus-address, length).

but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;

of_dma_configure is specifically witten to take care of memory mapped devices.
but no implementation exists for pci to take care of pcie based memory ranges.
in fact pci world doesnt seem to define standard dma-ranges

this patch implements of_pci_get_dma_ranges to cater to pci world dma-ranges.
so then the returned size get best possible (largest) dma_mask.
for e.g.
dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
we should get dev->coherent_dma_mask=0x7fffffffff.

Reviewed-by: Anup Patel <anup.patel-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
Reviewed-by: Scott Branden <scott.branden-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
Signed-off-by: Oza Pawandeep <oza.oza-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>

Signed-off-by: Oza Pawandeep <oza.oza-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>

diff --git a/drivers/of/device.c b/drivers/of/device.c
index b1e6beb..d362a98 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -9,6 +9,7 @@
 #include <linux/module.h>
 #include <linux/mod_devicetable.h>
 #include <linux/slab.h>
+#include <linux/of_pci.h>
 
 #include <asm/errno.h>
 #include "of_private.h"
@@ -104,7 +105,11 @@ void of_dma_configure(struct device *dev, struct device_node *np)
 	if (!dev->dma_mask)
 		dev->dma_mask = &dev->coherent_dma_mask;
 
-	ret = of_dma_get_range(np, &dma_addr, &paddr, &size);
+	if (dev_is_pci(dev))
+		ret = of_pci_get_dma_ranges(np, &dma_addr, &paddr, &size);
+	else
+		ret = of_dma_get_range(np, &dma_addr, &paddr, &size);
+
 	if (ret < 0) {
 		dma_addr = offset = 0;
 		size = dev->coherent_dma_mask + 1;
diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
index 0ee42c3..c7f8626 100644
--- a/drivers/of/of_pci.c
+++ b/drivers/of/of_pci.c
@@ -283,6 +283,52 @@ int of_pci_get_host_bridge_resources(struct device_node *dev,
 	return err;
 }
 EXPORT_SYMBOL_GPL(of_pci_get_host_bridge_resources);
+
+int of_pci_get_dma_ranges(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *size)
+{
+	struct device_node *node = of_node_get(np);
+	int rlen, ret = 0;
+	const int na = 3, ns = 2;
+	struct of_pci_range_parser parser;
+	struct of_pci_range range;
+
+	if (!node)
+		return -EINVAL;
+
+	parser.node = node;
+	parser.pna = of_n_addr_cells(node);
+	parser.np = parser.pna + na + ns;
+
+	parser.range = of_get_property(node, "dma-ranges", &rlen);
+
+	if (!parser.range) {
+		pr_debug("pcie device has no dma-ranges defined for node(%s)\n", np->full_name);
+		ret = -ENODEV;
+		goto out;
+	}
+
+	parser.end = parser.range + rlen / sizeof(__be32);
+	*size = 0;
+
+	for_each_of_pci_range(&parser, &range) {
+		if (*size < range.size) {
+			*dma_addr = range.pci_addr;
+			*size = range.size;
+			*paddr = range.cpu_addr;
+		}
+	}
+
+	pr_debug("dma_addr(%llx) cpu_addr(%llx) size(%llx)\n",
+		 *dma_addr, *paddr, *size);
+		 *dma_addr = range.pci_addr;
+		 *size = range.size;
+
+out:
+	of_node_put(node);
+	return ret;
+
+}
+EXPORT_SYMBOL_GPL(of_pci_get_dma_ranges);
 #endif /* CONFIG_OF_ADDRESS */
 
 #ifdef CONFIG_PCI_MSI
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index 0e0974e..907ace0 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -76,6 +76,7 @@ static inline void of_pci_check_probe_only(void) { }
 int of_pci_get_host_bridge_resources(struct device_node *dev,
 			unsigned char busno, unsigned char bus_max,
 			struct list_head *resources, resource_size_t *io_base);
+int of_pci_get_dma_ranges(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *size);
 #else
 static inline int of_pci_get_host_bridge_resources(struct device_node *dev,
 			unsigned char busno, unsigned char bus_max,
@@ -83,6 +84,11 @@ static inline int of_pci_get_host_bridge_resources(struct device_node *dev,
 {
 	return -EINVAL;
 }
+
+static inline int of_pci_get_dma_ranges(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *size)
+{
+	return -EINVAL;
+}
 #endif
 
 #if defined(CONFIG_OF) && defined(CONFIG_PCI_MSI)
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-25  5:31 ` Oza Pawandeep via iommu
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Pawandeep @ 2017-03-25  5:31 UTC (permalink / raw)
  To: Joerg Roedel, Robin Murphy
  Cc: devicetree, linux-pci, linux-kernel, iommu,
	bcm-kernel-feedback-list, Oza Pawandeep, linux-arm-kernel

it is possible that PCI device supports 64-bit DMA addressing,
and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
however PCI host bridge may have limitations on the inbound
transaction addressing. As an example, consider NVME SSD device
connected to iproc-PCIe controller.

Currently, the IOMMU DMA ops only considers PCI device dma_mask
when allocating an IOVA. This is particularly problematic on
ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
PA for in-bound transactions only after PCI Host has forwarded
these transactions on SOC IO bus. This means on such ARM/ARM64
SOCs the IOVA of in-bound transactions has to honor the addressing
restrictions of the PCI Host.

current pcie frmework and of framework integration assumes dma-ranges
in a way where memory-mapped devices define their dma-ranges.
dma-ranges: (child-bus-address, parent-bus-address, length).

but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;

of_dma_configure is specifically witten to take care of memory mapped devices.
but no implementation exists for pci to take care of pcie based memory ranges.
in fact pci world doesnt seem to define standard dma-ranges

this patch implements of_pci_get_dma_ranges to cater to pci world dma-ranges.
so then the returned size get best possible (largest) dma_mask.
for e.g.
dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
we should get dev->coherent_dma_mask=0x7fffffffff.

Reviewed-by: Anup Patel <anup.patel@broadcom.com>
Reviewed-by: Scott Branden <scott.branden@broadcom.com>
Signed-off-by: Oza Pawandeep <oza.oza@broadcom.com>

Signed-off-by: Oza Pawandeep <oza.oza@broadcom.com>

diff --git a/drivers/of/device.c b/drivers/of/device.c
index b1e6beb..d362a98 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -9,6 +9,7 @@
 #include <linux/module.h>
 #include <linux/mod_devicetable.h>
 #include <linux/slab.h>
+#include <linux/of_pci.h>
 
 #include <asm/errno.h>
 #include "of_private.h"
@@ -104,7 +105,11 @@ void of_dma_configure(struct device *dev, struct device_node *np)
 	if (!dev->dma_mask)
 		dev->dma_mask = &dev->coherent_dma_mask;
 
-	ret = of_dma_get_range(np, &dma_addr, &paddr, &size);
+	if (dev_is_pci(dev))
+		ret = of_pci_get_dma_ranges(np, &dma_addr, &paddr, &size);
+	else
+		ret = of_dma_get_range(np, &dma_addr, &paddr, &size);
+
 	if (ret < 0) {
 		dma_addr = offset = 0;
 		size = dev->coherent_dma_mask + 1;
diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
index 0ee42c3..c7f8626 100644
--- a/drivers/of/of_pci.c
+++ b/drivers/of/of_pci.c
@@ -283,6 +283,52 @@ int of_pci_get_host_bridge_resources(struct device_node *dev,
 	return err;
 }
 EXPORT_SYMBOL_GPL(of_pci_get_host_bridge_resources);
+
+int of_pci_get_dma_ranges(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *size)
+{
+	struct device_node *node = of_node_get(np);
+	int rlen, ret = 0;
+	const int na = 3, ns = 2;
+	struct of_pci_range_parser parser;
+	struct of_pci_range range;
+
+	if (!node)
+		return -EINVAL;
+
+	parser.node = node;
+	parser.pna = of_n_addr_cells(node);
+	parser.np = parser.pna + na + ns;
+
+	parser.range = of_get_property(node, "dma-ranges", &rlen);
+
+	if (!parser.range) {
+		pr_debug("pcie device has no dma-ranges defined for node(%s)\n", np->full_name);
+		ret = -ENODEV;
+		goto out;
+	}
+
+	parser.end = parser.range + rlen / sizeof(__be32);
+	*size = 0;
+
+	for_each_of_pci_range(&parser, &range) {
+		if (*size < range.size) {
+			*dma_addr = range.pci_addr;
+			*size = range.size;
+			*paddr = range.cpu_addr;
+		}
+	}
+
+	pr_debug("dma_addr(%llx) cpu_addr(%llx) size(%llx)\n",
+		 *dma_addr, *paddr, *size);
+		 *dma_addr = range.pci_addr;
+		 *size = range.size;
+
+out:
+	of_node_put(node);
+	return ret;
+
+}
+EXPORT_SYMBOL_GPL(of_pci_get_dma_ranges);
 #endif /* CONFIG_OF_ADDRESS */
 
 #ifdef CONFIG_PCI_MSI
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index 0e0974e..907ace0 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -76,6 +76,7 @@ static inline void of_pci_check_probe_only(void) { }
 int of_pci_get_host_bridge_resources(struct device_node *dev,
 			unsigned char busno, unsigned char bus_max,
 			struct list_head *resources, resource_size_t *io_base);
+int of_pci_get_dma_ranges(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *size);
 #else
 static inline int of_pci_get_host_bridge_resources(struct device_node *dev,
 			unsigned char busno, unsigned char bus_max,
@@ -83,6 +84,11 @@ static inline int of_pci_get_host_bridge_resources(struct device_node *dev,
 {
 	return -EINVAL;
 }
+
+static inline int of_pci_get_dma_ranges(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *size)
+{
+	return -EINVAL;
+}
 #endif
 
 #if defined(CONFIG_OF) && defined(CONFIG_PCI_MSI)
-- 
1.9.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-25  5:31 ` Oza Pawandeep via iommu
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Pawandeep @ 2017-03-25  5:31 UTC (permalink / raw)
  To: linux-arm-kernel

it is possible that PCI device supports 64-bit DMA addressing,
and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
however PCI host bridge may have limitations on the inbound
transaction addressing. As an example, consider NVME SSD device
connected to iproc-PCIe controller.

Currently, the IOMMU DMA ops only considers PCI device dma_mask
when allocating an IOVA. This is particularly problematic on
ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
PA for in-bound transactions only after PCI Host has forwarded
these transactions on SOC IO bus. This means on such ARM/ARM64
SOCs the IOVA of in-bound transactions has to honor the addressing
restrictions of the PCI Host.

current pcie frmework and of framework integration assumes dma-ranges
in a way where memory-mapped devices define their dma-ranges.
dma-ranges: (child-bus-address, parent-bus-address, length).

but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;

of_dma_configure is specifically witten to take care of memory mapped devices.
but no implementation exists for pci to take care of pcie based memory ranges.
in fact pci world doesnt seem to define standard dma-ranges

this patch implements of_pci_get_dma_ranges to cater to pci world dma-ranges.
so then the returned size get best possible (largest) dma_mask.
for e.g.
dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
we should get dev->coherent_dma_mask=0x7fffffffff.

Reviewed-by: Anup Patel <anup.patel@broadcom.com>
Reviewed-by: Scott Branden <scott.branden@broadcom.com>
Signed-off-by: Oza Pawandeep <oza.oza@broadcom.com>

Signed-off-by: Oza Pawandeep <oza.oza@broadcom.com>

diff --git a/drivers/of/device.c b/drivers/of/device.c
index b1e6beb..d362a98 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -9,6 +9,7 @@
 #include <linux/module.h>
 #include <linux/mod_devicetable.h>
 #include <linux/slab.h>
+#include <linux/of_pci.h>
 
 #include <asm/errno.h>
 #include "of_private.h"
@@ -104,7 +105,11 @@ void of_dma_configure(struct device *dev, struct device_node *np)
 	if (!dev->dma_mask)
 		dev->dma_mask = &dev->coherent_dma_mask;
 
-	ret = of_dma_get_range(np, &dma_addr, &paddr, &size);
+	if (dev_is_pci(dev))
+		ret = of_pci_get_dma_ranges(np, &dma_addr, &paddr, &size);
+	else
+		ret = of_dma_get_range(np, &dma_addr, &paddr, &size);
+
 	if (ret < 0) {
 		dma_addr = offset = 0;
 		size = dev->coherent_dma_mask + 1;
diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
index 0ee42c3..c7f8626 100644
--- a/drivers/of/of_pci.c
+++ b/drivers/of/of_pci.c
@@ -283,6 +283,52 @@ int of_pci_get_host_bridge_resources(struct device_node *dev,
 	return err;
 }
 EXPORT_SYMBOL_GPL(of_pci_get_host_bridge_resources);
+
+int of_pci_get_dma_ranges(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *size)
+{
+	struct device_node *node = of_node_get(np);
+	int rlen, ret = 0;
+	const int na = 3, ns = 2;
+	struct of_pci_range_parser parser;
+	struct of_pci_range range;
+
+	if (!node)
+		return -EINVAL;
+
+	parser.node = node;
+	parser.pna = of_n_addr_cells(node);
+	parser.np = parser.pna + na + ns;
+
+	parser.range = of_get_property(node, "dma-ranges", &rlen);
+
+	if (!parser.range) {
+		pr_debug("pcie device has no dma-ranges defined for node(%s)\n", np->full_name);
+		ret = -ENODEV;
+		goto out;
+	}
+
+	parser.end = parser.range + rlen / sizeof(__be32);
+	*size = 0;
+
+	for_each_of_pci_range(&parser, &range) {
+		if (*size < range.size) {
+			*dma_addr = range.pci_addr;
+			*size = range.size;
+			*paddr = range.cpu_addr;
+		}
+	}
+
+	pr_debug("dma_addr(%llx) cpu_addr(%llx) size(%llx)\n",
+		 *dma_addr, *paddr, *size);
+		 *dma_addr = range.pci_addr;
+		 *size = range.size;
+
+out:
+	of_node_put(node);
+	return ret;
+
+}
+EXPORT_SYMBOL_GPL(of_pci_get_dma_ranges);
 #endif /* CONFIG_OF_ADDRESS */
 
 #ifdef CONFIG_PCI_MSI
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index 0e0974e..907ace0 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -76,6 +76,7 @@ static inline void of_pci_check_probe_only(void) { }
 int of_pci_get_host_bridge_resources(struct device_node *dev,
 			unsigned char busno, unsigned char bus_max,
 			struct list_head *resources, resource_size_t *io_base);
+int of_pci_get_dma_ranges(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *size);
 #else
 static inline int of_pci_get_host_bridge_resources(struct device_node *dev,
 			unsigned char busno, unsigned char bus_max,
@@ -83,6 +84,11 @@ static inline int of_pci_get_host_bridge_resources(struct device_node *dev,
 {
 	return -EINVAL;
 }
+
+static inline int of_pci_get_dma_ranges(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *size)
+{
+	return -EINVAL;
+}
 #endif
 
 #if defined(CONFIG_OF) && defined(CONFIG_PCI_MSI)
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [RFC PATCH 2/3] iommu/dma: account pci host bridge dma_mask for IOVA allocation
@ 2017-03-25  5:31   ` Oza Pawandeep via iommu
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Pawandeep @ 2017-03-25  5:31 UTC (permalink / raw)
  To: Joerg Roedel, Robin Murphy
  Cc: iommu, linux-pci, linux-kernel, linux-arm-kernel, devicetree,
	bcm-kernel-feedback-list, Oza Pawandeep

it is possible that PCI device supports 64-bit DMA addressing,
and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
however PCI host bridge may have limitations on the inbound
transaction addressing. As an example, consider NVME SSD device
connected to iproc-PCIe controller.

Currently, the IOMMU DMA ops only considers PCI device dma_mask
when allocating an IOVA. This is particularly problematic on
ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
PA for in-bound transactions only after PCI Host has forwarded
these transactions on SOC IO bus. This means on such ARM/ARM64
SOCs the IOVA of in-bound transactions has to honor the addressing
restrictions of the PCI Host.

this patch is inspired by
http://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1306545.html
http://www.spinics.net/lists/arm-kernel/msg566947.html

but above inspiraiton solves the half of the problem.
the rest of the problem is descrbied below, what we face on iproc based
SOCs.

current pcie frmework and of framework integration assumes dma-ranges
in a way where memory-mapped devices define their dma-ranges.
dma-ranges: (child-bus-address, parent-bus-address, length).

but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;

of_dma_configure is specifically witten to take care of memory mapped devices.
but no implementation exists for pci to take care of pcie based memory ranges.
in fact pci world doesnt seem to define standard dma-ranges

this patch implements of_pci_get_dma_ranges to cater to pci world dma-ranges.
so then the returned size get best possible (largest) dma_mask.
for e.g.
dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
we should get dev->coherent_dma_mask=0x7fffffffff.

Reviewed-by: Anup Patel <anup.patel@broadcom.com>
Reviewed-by: Scott Branden <scott.branden@broadcom.com>
Signed-off-by: Oza Pawandeep <oza.oza@broadcom.com>

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 8c7c244..20cfff7 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -217,6 +217,9 @@ config NEED_DMA_MAP_STATE
 config NEED_SG_DMA_LENGTH
 	def_bool y
 
+config ARCH_HAS_DMA_SET_COHERENT_MASK
+	def_bool y
+
 config SMP
 	def_bool y
 
diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h
index 73d5bab..64b4dc3 100644
--- a/arch/arm64/include/asm/device.h
+++ b/arch/arm64/include/asm/device.h
@@ -20,6 +20,7 @@ struct dev_archdata {
 #ifdef CONFIG_IOMMU_API
 	void *iommu;			/* private IOMMU data */
 #endif
+	u64 parent_dma_mask;
 	bool dma_coherent;
 };
 
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 81cdb2e..5845ecd 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -564,6 +564,7 @@ static void flush_page(struct device *dev, const void *virt, phys_addr_t phys)
 	__dma_flush_area(virt, PAGE_SIZE);
 }
 
+
 static void *__iommu_alloc_attrs(struct device *dev, size_t size,
 				 dma_addr_t *handle, gfp_t gfp,
 				 unsigned long attrs)
@@ -795,6 +796,20 @@ static void __iommu_unmap_sg_attrs(struct device *dev,
 	iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs);
 }
 
+static int __iommu_set_dma_mask(struct device *dev, u64 mask)
+{
+	/* device is not DMA capable */
+	if (!dev->dma_mask)
+		return -EIO;
+
+	if (mask > dev->archdata.parent_dma_mask)
+		mask = dev->archdata.parent_dma_mask;
+
+	*dev->dma_mask = mask;
+
+	return 0;
+}
+
 static const struct dma_map_ops iommu_dma_ops = {
 	.alloc = __iommu_alloc_attrs,
 	.free = __iommu_free_attrs,
@@ -811,8 +826,21 @@ static void __iommu_unmap_sg_attrs(struct device *dev,
 	.map_resource = iommu_dma_map_resource,
 	.unmap_resource = iommu_dma_unmap_resource,
 	.mapping_error = iommu_dma_mapping_error,
+	.set_dma_mask = __iommu_set_dma_mask,
 };
 
+int dma_set_coherent_mask(struct device *dev, u64 mask)
+{
+	if (get_dma_ops(dev) == &iommu_dma_ops &&
+	    mask > dev->archdata.parent_dma_mask)
+		mask = dev->archdata.parent_dma_mask;
+
+	dev->coherent_dma_mask = mask;
+	return 0;
+}
+EXPORT_SYMBOL(dma_set_coherent_mask);
+
+
 /*
  * TODO: Right now __iommu_setup_dma_ops() gets called too early to do
  * everything it needs to - the device is only partially created and the
@@ -975,6 +1003,8 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 	if (!dev->dma_ops)
 		dev->dma_ops = &swiotlb_dma_ops;
 
+	dev->archdata.parent_dma_mask = size - 1;
+
 	dev->archdata.dma_coherent = coherent;
 	__iommu_setup_dma_ops(dev, dma_base, size, iommu);
 }
diff --git a/drivers/of/device.c b/drivers/of/device.c
index d362a98..471dcdf 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -139,10 +139,8 @@ void of_dma_configure(struct device *dev, struct device_node *np)
 	 * Limit coherent and dma mask based on size and default mask
 	 * set by the driver.
 	 */
-	dev->coherent_dma_mask = min(dev->coherent_dma_mask,
-				     DMA_BIT_MASK(ilog2(dma_addr + size)));
-	*dev->dma_mask = min((*dev->dma_mask),
-			     DMA_BIT_MASK(ilog2(dma_addr + size)));
+	dev->coherent_dma_mask = DMA_BIT_MASK(ilog2(dma_addr + size));
+	*dev->dma_mask = dev->coherent_dma_mask;
 
 	coherent = of_dma_is_coherent(np);
 	dev_dbg(dev, "device is%sdma coherent\n",
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [RFC PATCH 2/3] iommu/dma: account pci host bridge dma_mask for IOVA allocation
@ 2017-03-25  5:31   ` Oza Pawandeep via iommu
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Pawandeep via iommu @ 2017-03-25  5:31 UTC (permalink / raw)
  To: Joerg Roedel, Robin Murphy
  Cc: devicetree-u79uwXL29TY76Z2rM5mHXA,
	linux-pci-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	bcm-kernel-feedback-list-dY08KVG/lbpWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

it is possible that PCI device supports 64-bit DMA addressing,
and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
however PCI host bridge may have limitations on the inbound
transaction addressing. As an example, consider NVME SSD device
connected to iproc-PCIe controller.

Currently, the IOMMU DMA ops only considers PCI device dma_mask
when allocating an IOVA. This is particularly problematic on
ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
PA for in-bound transactions only after PCI Host has forwarded
these transactions on SOC IO bus. This means on such ARM/ARM64
SOCs the IOVA of in-bound transactions has to honor the addressing
restrictions of the PCI Host.

this patch is inspired by
http://www.mail-archive.com/linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org/msg1306545.html
http://www.spinics.net/lists/arm-kernel/msg566947.html

but above inspiraiton solves the half of the problem.
the rest of the problem is descrbied below, what we face on iproc based
SOCs.

current pcie frmework and of framework integration assumes dma-ranges
in a way where memory-mapped devices define their dma-ranges.
dma-ranges: (child-bus-address, parent-bus-address, length).

but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;

of_dma_configure is specifically witten to take care of memory mapped devices.
but no implementation exists for pci to take care of pcie based memory ranges.
in fact pci world doesnt seem to define standard dma-ranges

this patch implements of_pci_get_dma_ranges to cater to pci world dma-ranges.
so then the returned size get best possible (largest) dma_mask.
for e.g.
dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
we should get dev->coherent_dma_mask=0x7fffffffff.

Reviewed-by: Anup Patel <anup.patel-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
Reviewed-by: Scott Branden <scott.branden-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
Signed-off-by: Oza Pawandeep <oza.oza-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 8c7c244..20cfff7 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -217,6 +217,9 @@ config NEED_DMA_MAP_STATE
 config NEED_SG_DMA_LENGTH
 	def_bool y
 
+config ARCH_HAS_DMA_SET_COHERENT_MASK
+	def_bool y
+
 config SMP
 	def_bool y
 
diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h
index 73d5bab..64b4dc3 100644
--- a/arch/arm64/include/asm/device.h
+++ b/arch/arm64/include/asm/device.h
@@ -20,6 +20,7 @@ struct dev_archdata {
 #ifdef CONFIG_IOMMU_API
 	void *iommu;			/* private IOMMU data */
 #endif
+	u64 parent_dma_mask;
 	bool dma_coherent;
 };
 
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 81cdb2e..5845ecd 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -564,6 +564,7 @@ static void flush_page(struct device *dev, const void *virt, phys_addr_t phys)
 	__dma_flush_area(virt, PAGE_SIZE);
 }
 
+
 static void *__iommu_alloc_attrs(struct device *dev, size_t size,
 				 dma_addr_t *handle, gfp_t gfp,
 				 unsigned long attrs)
@@ -795,6 +796,20 @@ static void __iommu_unmap_sg_attrs(struct device *dev,
 	iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs);
 }
 
+static int __iommu_set_dma_mask(struct device *dev, u64 mask)
+{
+	/* device is not DMA capable */
+	if (!dev->dma_mask)
+		return -EIO;
+
+	if (mask > dev->archdata.parent_dma_mask)
+		mask = dev->archdata.parent_dma_mask;
+
+	*dev->dma_mask = mask;
+
+	return 0;
+}
+
 static const struct dma_map_ops iommu_dma_ops = {
 	.alloc = __iommu_alloc_attrs,
 	.free = __iommu_free_attrs,
@@ -811,8 +826,21 @@ static void __iommu_unmap_sg_attrs(struct device *dev,
 	.map_resource = iommu_dma_map_resource,
 	.unmap_resource = iommu_dma_unmap_resource,
 	.mapping_error = iommu_dma_mapping_error,
+	.set_dma_mask = __iommu_set_dma_mask,
 };
 
+int dma_set_coherent_mask(struct device *dev, u64 mask)
+{
+	if (get_dma_ops(dev) == &iommu_dma_ops &&
+	    mask > dev->archdata.parent_dma_mask)
+		mask = dev->archdata.parent_dma_mask;
+
+	dev->coherent_dma_mask = mask;
+	return 0;
+}
+EXPORT_SYMBOL(dma_set_coherent_mask);
+
+
 /*
  * TODO: Right now __iommu_setup_dma_ops() gets called too early to do
  * everything it needs to - the device is only partially created and the
@@ -975,6 +1003,8 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 	if (!dev->dma_ops)
 		dev->dma_ops = &swiotlb_dma_ops;
 
+	dev->archdata.parent_dma_mask = size - 1;
+
 	dev->archdata.dma_coherent = coherent;
 	__iommu_setup_dma_ops(dev, dma_base, size, iommu);
 }
diff --git a/drivers/of/device.c b/drivers/of/device.c
index d362a98..471dcdf 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -139,10 +139,8 @@ void of_dma_configure(struct device *dev, struct device_node *np)
 	 * Limit coherent and dma mask based on size and default mask
 	 * set by the driver.
 	 */
-	dev->coherent_dma_mask = min(dev->coherent_dma_mask,
-				     DMA_BIT_MASK(ilog2(dma_addr + size)));
-	*dev->dma_mask = min((*dev->dma_mask),
-			     DMA_BIT_MASK(ilog2(dma_addr + size)));
+	dev->coherent_dma_mask = DMA_BIT_MASK(ilog2(dma_addr + size));
+	*dev->dma_mask = dev->coherent_dma_mask;
 
 	coherent = of_dma_is_coherent(np);
 	dev_dbg(dev, "device is%sdma coherent\n",
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [RFC PATCH 2/3] iommu/dma: account pci host bridge dma_mask for IOVA allocation
@ 2017-03-25  5:31   ` Oza Pawandeep via iommu
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Pawandeep @ 2017-03-25  5:31 UTC (permalink / raw)
  To: Joerg Roedel, Robin Murphy
  Cc: devicetree, linux-pci, linux-kernel, iommu,
	bcm-kernel-feedback-list, Oza Pawandeep, linux-arm-kernel

it is possible that PCI device supports 64-bit DMA addressing,
and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
however PCI host bridge may have limitations on the inbound
transaction addressing. As an example, consider NVME SSD device
connected to iproc-PCIe controller.

Currently, the IOMMU DMA ops only considers PCI device dma_mask
when allocating an IOVA. This is particularly problematic on
ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
PA for in-bound transactions only after PCI Host has forwarded
these transactions on SOC IO bus. This means on such ARM/ARM64
SOCs the IOVA of in-bound transactions has to honor the addressing
restrictions of the PCI Host.

this patch is inspired by
http://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1306545.html
http://www.spinics.net/lists/arm-kernel/msg566947.html

but above inspiraiton solves the half of the problem.
the rest of the problem is descrbied below, what we face on iproc based
SOCs.

current pcie frmework and of framework integration assumes dma-ranges
in a way where memory-mapped devices define their dma-ranges.
dma-ranges: (child-bus-address, parent-bus-address, length).

but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;

of_dma_configure is specifically witten to take care of memory mapped devices.
but no implementation exists for pci to take care of pcie based memory ranges.
in fact pci world doesnt seem to define standard dma-ranges

this patch implements of_pci_get_dma_ranges to cater to pci world dma-ranges.
so then the returned size get best possible (largest) dma_mask.
for e.g.
dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
we should get dev->coherent_dma_mask=0x7fffffffff.

Reviewed-by: Anup Patel <anup.patel@broadcom.com>
Reviewed-by: Scott Branden <scott.branden@broadcom.com>
Signed-off-by: Oza Pawandeep <oza.oza@broadcom.com>

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 8c7c244..20cfff7 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -217,6 +217,9 @@ config NEED_DMA_MAP_STATE
 config NEED_SG_DMA_LENGTH
 	def_bool y
 
+config ARCH_HAS_DMA_SET_COHERENT_MASK
+	def_bool y
+
 config SMP
 	def_bool y
 
diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h
index 73d5bab..64b4dc3 100644
--- a/arch/arm64/include/asm/device.h
+++ b/arch/arm64/include/asm/device.h
@@ -20,6 +20,7 @@ struct dev_archdata {
 #ifdef CONFIG_IOMMU_API
 	void *iommu;			/* private IOMMU data */
 #endif
+	u64 parent_dma_mask;
 	bool dma_coherent;
 };
 
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 81cdb2e..5845ecd 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -564,6 +564,7 @@ static void flush_page(struct device *dev, const void *virt, phys_addr_t phys)
 	__dma_flush_area(virt, PAGE_SIZE);
 }
 
+
 static void *__iommu_alloc_attrs(struct device *dev, size_t size,
 				 dma_addr_t *handle, gfp_t gfp,
 				 unsigned long attrs)
@@ -795,6 +796,20 @@ static void __iommu_unmap_sg_attrs(struct device *dev,
 	iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs);
 }
 
+static int __iommu_set_dma_mask(struct device *dev, u64 mask)
+{
+	/* device is not DMA capable */
+	if (!dev->dma_mask)
+		return -EIO;
+
+	if (mask > dev->archdata.parent_dma_mask)
+		mask = dev->archdata.parent_dma_mask;
+
+	*dev->dma_mask = mask;
+
+	return 0;
+}
+
 static const struct dma_map_ops iommu_dma_ops = {
 	.alloc = __iommu_alloc_attrs,
 	.free = __iommu_free_attrs,
@@ -811,8 +826,21 @@ static void __iommu_unmap_sg_attrs(struct device *dev,
 	.map_resource = iommu_dma_map_resource,
 	.unmap_resource = iommu_dma_unmap_resource,
 	.mapping_error = iommu_dma_mapping_error,
+	.set_dma_mask = __iommu_set_dma_mask,
 };
 
+int dma_set_coherent_mask(struct device *dev, u64 mask)
+{
+	if (get_dma_ops(dev) == &iommu_dma_ops &&
+	    mask > dev->archdata.parent_dma_mask)
+		mask = dev->archdata.parent_dma_mask;
+
+	dev->coherent_dma_mask = mask;
+	return 0;
+}
+EXPORT_SYMBOL(dma_set_coherent_mask);
+
+
 /*
  * TODO: Right now __iommu_setup_dma_ops() gets called too early to do
  * everything it needs to - the device is only partially created and the
@@ -975,6 +1003,8 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 	if (!dev->dma_ops)
 		dev->dma_ops = &swiotlb_dma_ops;
 
+	dev->archdata.parent_dma_mask = size - 1;
+
 	dev->archdata.dma_coherent = coherent;
 	__iommu_setup_dma_ops(dev, dma_base, size, iommu);
 }
diff --git a/drivers/of/device.c b/drivers/of/device.c
index d362a98..471dcdf 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -139,10 +139,8 @@ void of_dma_configure(struct device *dev, struct device_node *np)
 	 * Limit coherent and dma mask based on size and default mask
 	 * set by the driver.
 	 */
-	dev->coherent_dma_mask = min(dev->coherent_dma_mask,
-				     DMA_BIT_MASK(ilog2(dma_addr + size)));
-	*dev->dma_mask = min((*dev->dma_mask),
-			     DMA_BIT_MASK(ilog2(dma_addr + size)));
+	dev->coherent_dma_mask = DMA_BIT_MASK(ilog2(dma_addr + size));
+	*dev->dma_mask = dev->coherent_dma_mask;
 
 	coherent = of_dma_is_coherent(np);
 	dev_dbg(dev, "device is%sdma coherent\n",
-- 
1.9.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [RFC PATCH 2/3] iommu/dma: account pci host bridge dma_mask for IOVA allocation
@ 2017-03-25  5:31   ` Oza Pawandeep via iommu
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Pawandeep @ 2017-03-25  5:31 UTC (permalink / raw)
  To: linux-arm-kernel

it is possible that PCI device supports 64-bit DMA addressing,
and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
however PCI host bridge may have limitations on the inbound
transaction addressing. As an example, consider NVME SSD device
connected to iproc-PCIe controller.

Currently, the IOMMU DMA ops only considers PCI device dma_mask
when allocating an IOVA. This is particularly problematic on
ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
PA for in-bound transactions only after PCI Host has forwarded
these transactions on SOC IO bus. This means on such ARM/ARM64
SOCs the IOVA of in-bound transactions has to honor the addressing
restrictions of the PCI Host.

this patch is inspired by
http://www.mail-archive.com/linux-kernel at vger.kernel.org/msg1306545.html
http://www.spinics.net/lists/arm-kernel/msg566947.html

but above inspiraiton solves the half of the problem.
the rest of the problem is descrbied below, what we face on iproc based
SOCs.

current pcie frmework and of framework integration assumes dma-ranges
in a way where memory-mapped devices define their dma-ranges.
dma-ranges: (child-bus-address, parent-bus-address, length).

but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;

of_dma_configure is specifically witten to take care of memory mapped devices.
but no implementation exists for pci to take care of pcie based memory ranges.
in fact pci world doesnt seem to define standard dma-ranges

this patch implements of_pci_get_dma_ranges to cater to pci world dma-ranges.
so then the returned size get best possible (largest) dma_mask.
for e.g.
dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
we should get dev->coherent_dma_mask=0x7fffffffff.

Reviewed-by: Anup Patel <anup.patel@broadcom.com>
Reviewed-by: Scott Branden <scott.branden@broadcom.com>
Signed-off-by: Oza Pawandeep <oza.oza@broadcom.com>

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 8c7c244..20cfff7 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -217,6 +217,9 @@ config NEED_DMA_MAP_STATE
 config NEED_SG_DMA_LENGTH
 	def_bool y
 
+config ARCH_HAS_DMA_SET_COHERENT_MASK
+	def_bool y
+
 config SMP
 	def_bool y
 
diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h
index 73d5bab..64b4dc3 100644
--- a/arch/arm64/include/asm/device.h
+++ b/arch/arm64/include/asm/device.h
@@ -20,6 +20,7 @@ struct dev_archdata {
 #ifdef CONFIG_IOMMU_API
 	void *iommu;			/* private IOMMU data */
 #endif
+	u64 parent_dma_mask;
 	bool dma_coherent;
 };
 
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 81cdb2e..5845ecd 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -564,6 +564,7 @@ static void flush_page(struct device *dev, const void *virt, phys_addr_t phys)
 	__dma_flush_area(virt, PAGE_SIZE);
 }
 
+
 static void *__iommu_alloc_attrs(struct device *dev, size_t size,
 				 dma_addr_t *handle, gfp_t gfp,
 				 unsigned long attrs)
@@ -795,6 +796,20 @@ static void __iommu_unmap_sg_attrs(struct device *dev,
 	iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs);
 }
 
+static int __iommu_set_dma_mask(struct device *dev, u64 mask)
+{
+	/* device is not DMA capable */
+	if (!dev->dma_mask)
+		return -EIO;
+
+	if (mask > dev->archdata.parent_dma_mask)
+		mask = dev->archdata.parent_dma_mask;
+
+	*dev->dma_mask = mask;
+
+	return 0;
+}
+
 static const struct dma_map_ops iommu_dma_ops = {
 	.alloc = __iommu_alloc_attrs,
 	.free = __iommu_free_attrs,
@@ -811,8 +826,21 @@ static void __iommu_unmap_sg_attrs(struct device *dev,
 	.map_resource = iommu_dma_map_resource,
 	.unmap_resource = iommu_dma_unmap_resource,
 	.mapping_error = iommu_dma_mapping_error,
+	.set_dma_mask = __iommu_set_dma_mask,
 };
 
+int dma_set_coherent_mask(struct device *dev, u64 mask)
+{
+	if (get_dma_ops(dev) == &iommu_dma_ops &&
+	    mask > dev->archdata.parent_dma_mask)
+		mask = dev->archdata.parent_dma_mask;
+
+	dev->coherent_dma_mask = mask;
+	return 0;
+}
+EXPORT_SYMBOL(dma_set_coherent_mask);
+
+
 /*
  * TODO: Right now __iommu_setup_dma_ops() gets called too early to do
  * everything it needs to - the device is only partially created and the
@@ -975,6 +1003,8 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 	if (!dev->dma_ops)
 		dev->dma_ops = &swiotlb_dma_ops;
 
+	dev->archdata.parent_dma_mask = size - 1;
+
 	dev->archdata.dma_coherent = coherent;
 	__iommu_setup_dma_ops(dev, dma_base, size, iommu);
 }
diff --git a/drivers/of/device.c b/drivers/of/device.c
index d362a98..471dcdf 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -139,10 +139,8 @@ void of_dma_configure(struct device *dev, struct device_node *np)
 	 * Limit coherent and dma mask based on size and default mask
 	 * set by the driver.
 	 */
-	dev->coherent_dma_mask = min(dev->coherent_dma_mask,
-				     DMA_BIT_MASK(ilog2(dma_addr + size)));
-	*dev->dma_mask = min((*dev->dma_mask),
-			     DMA_BIT_MASK(ilog2(dma_addr + size)));
+	dev->coherent_dma_mask = DMA_BIT_MASK(ilog2(dma_addr + size));
+	*dev->dma_mask = dev->coherent_dma_mask;
 
 	coherent = of_dma_is_coherent(np);
 	dev_dbg(dev, "device is%sdma coherent\n",
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [RFC PATCH 3/3] of: fix node traversing in of_dma_get_range
@ 2017-03-25  5:31   ` Oza Pawandeep via iommu
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Pawandeep @ 2017-03-25  5:31 UTC (permalink / raw)
  To: Joerg Roedel, Robin Murphy
  Cc: iommu, linux-pci, linux-kernel, linux-arm-kernel, devicetree,
	bcm-kernel-feedback-list, Oza Pawandeep

it jumps to the parent node without examining the child node.
also with that, it throws "no dma-ranges found for node"
for pci dma-ranges.

this patch fixes device node traversing for dma-ranges.

Reviewed-by: Anup Patel <anup.patel@broadcom.com>
Signed-off-by: Oza Pawandeep <oza.oza@broadcom.com>

diff --git a/drivers/of/address.c b/drivers/of/address.c
index 02b2903..3293d55 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -836,9 +836,6 @@ int of_dma_get_range(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *siz
 	while (1) {
 		naddr = of_n_addr_cells(node);
 		nsize = of_n_size_cells(node);
-		node = of_get_next_parent(node);
-		if (!node)
-			break;
 
 		ranges = of_get_property(node, "dma-ranges", &len);
 
@@ -852,6 +849,10 @@ int of_dma_get_range(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *siz
 		 */
 		if (!ranges)
 			break;
+
+		node = of_get_next_parent(node);
+		if (!node)
+			break;
 	}
 
 	if (!ranges) {
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [RFC PATCH 3/3] of: fix node traversing in of_dma_get_range
@ 2017-03-25  5:31   ` Oza Pawandeep via iommu
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Pawandeep via iommu @ 2017-03-25  5:31 UTC (permalink / raw)
  To: Joerg Roedel, Robin Murphy
  Cc: devicetree-u79uwXL29TY76Z2rM5mHXA,
	linux-pci-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	bcm-kernel-feedback-list-dY08KVG/lbpWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

it jumps to the parent node without examining the child node.
also with that, it throws "no dma-ranges found for node"
for pci dma-ranges.

this patch fixes device node traversing for dma-ranges.

Reviewed-by: Anup Patel <anup.patel-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
Signed-off-by: Oza Pawandeep <oza.oza-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>

diff --git a/drivers/of/address.c b/drivers/of/address.c
index 02b2903..3293d55 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -836,9 +836,6 @@ int of_dma_get_range(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *siz
 	while (1) {
 		naddr = of_n_addr_cells(node);
 		nsize = of_n_size_cells(node);
-		node = of_get_next_parent(node);
-		if (!node)
-			break;
 
 		ranges = of_get_property(node, "dma-ranges", &len);
 
@@ -852,6 +849,10 @@ int of_dma_get_range(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *siz
 		 */
 		if (!ranges)
 			break;
+
+		node = of_get_next_parent(node);
+		if (!node)
+			break;
 	}
 
 	if (!ranges) {
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [RFC PATCH 3/3] of: fix node traversing in of_dma_get_range
@ 2017-03-25  5:31   ` Oza Pawandeep via iommu
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Pawandeep @ 2017-03-25  5:31 UTC (permalink / raw)
  To: Joerg Roedel, Robin Murphy
  Cc: devicetree, linux-pci, linux-kernel, iommu,
	bcm-kernel-feedback-list, Oza Pawandeep, linux-arm-kernel

it jumps to the parent node without examining the child node.
also with that, it throws "no dma-ranges found for node"
for pci dma-ranges.

this patch fixes device node traversing for dma-ranges.

Reviewed-by: Anup Patel <anup.patel@broadcom.com>
Signed-off-by: Oza Pawandeep <oza.oza@broadcom.com>

diff --git a/drivers/of/address.c b/drivers/of/address.c
index 02b2903..3293d55 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -836,9 +836,6 @@ int of_dma_get_range(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *siz
 	while (1) {
 		naddr = of_n_addr_cells(node);
 		nsize = of_n_size_cells(node);
-		node = of_get_next_parent(node);
-		if (!node)
-			break;
 
 		ranges = of_get_property(node, "dma-ranges", &len);
 
@@ -852,6 +849,10 @@ int of_dma_get_range(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *siz
 		 */
 		if (!ranges)
 			break;
+
+		node = of_get_next_parent(node);
+		if (!node)
+			break;
 	}
 
 	if (!ranges) {
-- 
1.9.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [RFC PATCH 3/3] of: fix node traversing in of_dma_get_range
@ 2017-03-25  5:31   ` Oza Pawandeep via iommu
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Pawandeep @ 2017-03-25  5:31 UTC (permalink / raw)
  To: linux-arm-kernel

it jumps to the parent node without examining the child node.
also with that, it throws "no dma-ranges found for node"
for pci dma-ranges.

this patch fixes device node traversing for dma-ranges.

Reviewed-by: Anup Patel <anup.patel@broadcom.com>
Signed-off-by: Oza Pawandeep <oza.oza@broadcom.com>

diff --git a/drivers/of/address.c b/drivers/of/address.c
index 02b2903..3293d55 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -836,9 +836,6 @@ int of_dma_get_range(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *siz
 	while (1) {
 		naddr = of_n_addr_cells(node);
 		nsize = of_n_size_cells(node);
-		node = of_get_next_parent(node);
-		if (!node)
-			break;
 
 		ranges = of_get_property(node, "dma-ranges", &len);
 
@@ -852,6 +849,10 @@ int of_dma_get_range(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *siz
 		 */
 		if (!ranges)
 			break;
+
+		node = of_get_next_parent(node);
+		if (!node)
+			break;
 	}
 
 	if (!ranges) {
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 3/3] of: fix node traversing in of_dma_get_range
@ 2017-03-27 14:34     ` Rob Herring
  0 siblings, 0 replies; 52+ messages in thread
From: Rob Herring @ 2017-03-27 14:34 UTC (permalink / raw)
  To: Oza Pawandeep
  Cc: Joerg Roedel, Robin Murphy, Linux IOMMU, linux-pci, linux-kernel,
	linux-arm-kernel, devicetree, bcm-kernel-feedback-list

On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
> it jumps to the parent node without examining the child node.
> also with that, it throws "no dma-ranges found for node"
> for pci dma-ranges.
>
> this patch fixes device node traversing for dma-ranges.

What's the DT look like that doesn't work?

dma-ranges is supposed to be a bus property, not a device's property.
So looking at the parent is correct behavior generally.

Rob

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 3/3] of: fix node traversing in of_dma_get_range
@ 2017-03-27 14:34     ` Rob Herring
  0 siblings, 0 replies; 52+ messages in thread
From: Rob Herring @ 2017-03-27 14:34 UTC (permalink / raw)
  To: Oza Pawandeep
  Cc: devicetree-u79uwXL29TY76Z2rM5mHXA,
	linux-pci-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Linux IOMMU,
	bcm-kernel-feedback-list-dY08KVG/lbpWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza-dY08KVG/lbpWk0Htik3J/w@public.gmane.org> wrote:
> it jumps to the parent node without examining the child node.
> also with that, it throws "no dma-ranges found for node"
> for pci dma-ranges.
>
> this patch fixes device node traversing for dma-ranges.

What's the DT look like that doesn't work?

dma-ranges is supposed to be a bus property, not a device's property.
So looking at the parent is correct behavior generally.

Rob

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 3/3] of: fix node traversing in of_dma_get_range
@ 2017-03-27 14:34     ` Rob Herring
  0 siblings, 0 replies; 52+ messages in thread
From: Rob Herring @ 2017-03-27 14:34 UTC (permalink / raw)
  To: Oza Pawandeep
  Cc: devicetree, linux-pci, Joerg Roedel, linux-kernel, Linux IOMMU,
	bcm-kernel-feedback-list, Robin Murphy, linux-arm-kernel

On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
> it jumps to the parent node without examining the child node.
> also with that, it throws "no dma-ranges found for node"
> for pci dma-ranges.
>
> this patch fixes device node traversing for dma-ranges.

What's the DT look like that doesn't work?

dma-ranges is supposed to be a bus property, not a device's property.
So looking at the parent is correct behavior generally.

Rob

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [RFC PATCH 3/3] of: fix node traversing in of_dma_get_range
@ 2017-03-27 14:34     ` Rob Herring
  0 siblings, 0 replies; 52+ messages in thread
From: Rob Herring @ 2017-03-27 14:34 UTC (permalink / raw)
  To: linux-arm-kernel

On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
> it jumps to the parent node without examining the child node.
> also with that, it throws "no dma-ranges found for node"
> for pci dma-ranges.
>
> this patch fixes device node traversing for dma-ranges.

What's the DT look like that doesn't work?

dma-ranges is supposed to be a bus property, not a device's property.
So looking at the parent is correct behavior generally.

Rob

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 3/3] of: fix node traversing in of_dma_get_range
@ 2017-03-27 14:45       ` Robin Murphy
  0 siblings, 0 replies; 52+ messages in thread
From: Robin Murphy @ 2017-03-27 14:45 UTC (permalink / raw)
  To: Rob Herring, Oza Pawandeep
  Cc: Joerg Roedel, Linux IOMMU, linux-pci, linux-kernel,
	linux-arm-kernel, devicetree, bcm-kernel-feedback-list

Hi Rob,

On 27/03/17 15:34, Rob Herring wrote:
> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
>> it jumps to the parent node without examining the child node.
>> also with that, it throws "no dma-ranges found for node"
>> for pci dma-ranges.
>>
>> this patch fixes device node traversing for dma-ranges.
> 
> What's the DT look like that doesn't work?

The problem is the bodge in pci_dma_configure() where we don't have an
OF node for the actual device itself, so pass in the host bridge's OF
node instead. This happens to work well enough for dma-coherent, but I
don't think dma-ranges was even considered at the time.

As it happens I'm currently halfway through writing an experiment
wherein pci_dma_configure() creates a temporary child node for the
of_dma_configure() call if no other suitable alternative (e.g. some
intermediate bridge node) exists. How hard are you likely to NAK that
approach? ;)

> dma-ranges is supposed to be a bus property, not a device's property.
> So looking at the parent is correct behavior generally.

Indeed, this patch as-is will break currently correct DTs (because we
won't find dma-ranges on the device, so will bail before even looking at
the parent as we should).

Robin.

> 
> Rob
> 

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 3/3] of: fix node traversing in of_dma_get_range
@ 2017-03-27 14:45       ` Robin Murphy
  0 siblings, 0 replies; 52+ messages in thread
From: Robin Murphy @ 2017-03-27 14:45 UTC (permalink / raw)
  To: Rob Herring, Oza Pawandeep
  Cc: devicetree-u79uwXL29TY76Z2rM5mHXA,
	linux-pci-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Linux IOMMU,
	bcm-kernel-feedback-list-dY08KVG/lbpWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

Hi Rob,

On 27/03/17 15:34, Rob Herring wrote:
> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza-dY08KVG/lbpWk0Htik3J/w@public.gmane.org> wrote:
>> it jumps to the parent node without examining the child node.
>> also with that, it throws "no dma-ranges found for node"
>> for pci dma-ranges.
>>
>> this patch fixes device node traversing for dma-ranges.
> 
> What's the DT look like that doesn't work?

The problem is the bodge in pci_dma_configure() where we don't have an
OF node for the actual device itself, so pass in the host bridge's OF
node instead. This happens to work well enough for dma-coherent, but I
don't think dma-ranges was even considered at the time.

As it happens I'm currently halfway through writing an experiment
wherein pci_dma_configure() creates a temporary child node for the
of_dma_configure() call if no other suitable alternative (e.g. some
intermediate bridge node) exists. How hard are you likely to NAK that
approach? ;)

> dma-ranges is supposed to be a bus property, not a device's property.
> So looking at the parent is correct behavior generally.

Indeed, this patch as-is will break currently correct DTs (because we
won't find dma-ranges on the device, so will bail before even looking at
the parent as we should).

Robin.

> 
> Rob
> 

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 3/3] of: fix node traversing in of_dma_get_range
@ 2017-03-27 14:45       ` Robin Murphy
  0 siblings, 0 replies; 52+ messages in thread
From: Robin Murphy @ 2017-03-27 14:45 UTC (permalink / raw)
  To: Rob Herring, Oza Pawandeep
  Cc: devicetree, linux-pci, Joerg Roedel, linux-kernel, Linux IOMMU,
	bcm-kernel-feedback-list, linux-arm-kernel

Hi Rob,

On 27/03/17 15:34, Rob Herring wrote:
> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
>> it jumps to the parent node without examining the child node.
>> also with that, it throws "no dma-ranges found for node"
>> for pci dma-ranges.
>>
>> this patch fixes device node traversing for dma-ranges.
> 
> What's the DT look like that doesn't work?

The problem is the bodge in pci_dma_configure() where we don't have an
OF node for the actual device itself, so pass in the host bridge's OF
node instead. This happens to work well enough for dma-coherent, but I
don't think dma-ranges was even considered at the time.

As it happens I'm currently halfway through writing an experiment
wherein pci_dma_configure() creates a temporary child node for the
of_dma_configure() call if no other suitable alternative (e.g. some
intermediate bridge node) exists. How hard are you likely to NAK that
approach? ;)

> dma-ranges is supposed to be a bus property, not a device's property.
> So looking at the parent is correct behavior generally.

Indeed, this patch as-is will break currently correct DTs (because we
won't find dma-ranges on the device, so will bail before even looking at
the parent as we should).

Robin.

> 
> Rob
> 


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [RFC PATCH 3/3] of: fix node traversing in of_dma_get_range
@ 2017-03-27 14:45       ` Robin Murphy
  0 siblings, 0 replies; 52+ messages in thread
From: Robin Murphy @ 2017-03-27 14:45 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Rob,

On 27/03/17 15:34, Rob Herring wrote:
> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
>> it jumps to the parent node without examining the child node.
>> also with that, it throws "no dma-ranges found for node"
>> for pci dma-ranges.
>>
>> this patch fixes device node traversing for dma-ranges.
> 
> What's the DT look like that doesn't work?

The problem is the bodge in pci_dma_configure() where we don't have an
OF node for the actual device itself, so pass in the host bridge's OF
node instead. This happens to work well enough for dma-coherent, but I
don't think dma-ranges was even considered at the time.

As it happens I'm currently halfway through writing an experiment
wherein pci_dma_configure() creates a temporary child node for the
of_dma_configure() call if no other suitable alternative (e.g. some
intermediate bridge node) exists. How hard are you likely to NAK that
approach? ;)

> dma-ranges is supposed to be a bus property, not a device's property.
> So looking at the parent is correct behavior generally.

Indeed, this patch as-is will break currently correct DTs (because we
won't find dma-ranges on the device, so will bail before even looking at
the parent as we should).

Robin.

> 
> Rob
> 

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-27 14:46   ` Rob Herring
  0 siblings, 0 replies; 52+ messages in thread
From: Rob Herring @ 2017-03-27 14:46 UTC (permalink / raw)
  To: Oza Pawandeep
  Cc: Joerg Roedel, Robin Murphy, Linux IOMMU, linux-pci, linux-kernel,
	linux-arm-kernel, devicetree, bcm-kernel-feedback-list

On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
> it is possible that PCI device supports 64-bit DMA addressing,
> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
> however PCI host bridge may have limitations on the inbound
> transaction addressing. As an example, consider NVME SSD device
> connected to iproc-PCIe controller.
>
> Currently, the IOMMU DMA ops only considers PCI device dma_mask
> when allocating an IOVA. This is particularly problematic on
> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
> PA for in-bound transactions only after PCI Host has forwarded
> these transactions on SOC IO bus. This means on such ARM/ARM64
> SOCs the IOVA of in-bound transactions has to honor the addressing
> restrictions of the PCI Host.
>
> current pcie frmework and of framework integration assumes dma-ranges
> in a way where memory-mapped devices define their dma-ranges.
> dma-ranges: (child-bus-address, parent-bus-address, length).
>
> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;

If you implement a common function, then I expect to see other users
converted to use it. There's also PCI hosts in arch/powerpc that parse
dma-ranges.

Rob

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-27 14:46   ` Rob Herring
  0 siblings, 0 replies; 52+ messages in thread
From: Rob Herring @ 2017-03-27 14:46 UTC (permalink / raw)
  To: Oza Pawandeep
  Cc: devicetree-u79uwXL29TY76Z2rM5mHXA,
	linux-pci-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Linux IOMMU,
	bcm-kernel-feedback-list-dY08KVG/lbpWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza-dY08KVG/lbpWk0Htik3J/w@public.gmane.org> wrote:
> it is possible that PCI device supports 64-bit DMA addressing,
> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
> however PCI host bridge may have limitations on the inbound
> transaction addressing. As an example, consider NVME SSD device
> connected to iproc-PCIe controller.
>
> Currently, the IOMMU DMA ops only considers PCI device dma_mask
> when allocating an IOVA. This is particularly problematic on
> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
> PA for in-bound transactions only after PCI Host has forwarded
> these transactions on SOC IO bus. This means on such ARM/ARM64
> SOCs the IOVA of in-bound transactions has to honor the addressing
> restrictions of the PCI Host.
>
> current pcie frmework and of framework integration assumes dma-ranges
> in a way where memory-mapped devices define their dma-ranges.
> dma-ranges: (child-bus-address, parent-bus-address, length).
>
> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;

If you implement a common function, then I expect to see other users
converted to use it. There's also PCI hosts in arch/powerpc that parse
dma-ranges.

Rob

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-27 14:46   ` Rob Herring
  0 siblings, 0 replies; 52+ messages in thread
From: Rob Herring @ 2017-03-27 14:46 UTC (permalink / raw)
  To: Oza Pawandeep
  Cc: devicetree, linux-pci, Joerg Roedel, linux-kernel, Linux IOMMU,
	bcm-kernel-feedback-list, Robin Murphy, linux-arm-kernel

On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
> it is possible that PCI device supports 64-bit DMA addressing,
> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
> however PCI host bridge may have limitations on the inbound
> transaction addressing. As an example, consider NVME SSD device
> connected to iproc-PCIe controller.
>
> Currently, the IOMMU DMA ops only considers PCI device dma_mask
> when allocating an IOVA. This is particularly problematic on
> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
> PA for in-bound transactions only after PCI Host has forwarded
> these transactions on SOC IO bus. This means on such ARM/ARM64
> SOCs the IOVA of in-bound transactions has to honor the addressing
> restrictions of the PCI Host.
>
> current pcie frmework and of framework integration assumes dma-ranges
> in a way where memory-mapped devices define their dma-ranges.
> dma-ranges: (child-bus-address, parent-bus-address, length).
>
> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;

If you implement a common function, then I expect to see other users
converted to use it. There's also PCI hosts in arch/powerpc that parse
dma-ranges.

Rob

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-27 14:46   ` Rob Herring
  0 siblings, 0 replies; 52+ messages in thread
From: Rob Herring @ 2017-03-27 14:46 UTC (permalink / raw)
  To: linux-arm-kernel

On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
> it is possible that PCI device supports 64-bit DMA addressing,
> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
> however PCI host bridge may have limitations on the inbound
> transaction addressing. As an example, consider NVME SSD device
> connected to iproc-PCIe controller.
>
> Currently, the IOMMU DMA ops only considers PCI device dma_mask
> when allocating an IOVA. This is particularly problematic on
> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
> PA for in-bound transactions only after PCI Host has forwarded
> these transactions on SOC IO bus. This means on such ARM/ARM64
> SOCs the IOVA of in-bound transactions has to honor the addressing
> restrictions of the PCI Host.
>
> current pcie frmework and of framework integration assumes dma-ranges
> in a way where memory-mapped devices define their dma-ranges.
> dma-ranges: (child-bus-address, parent-bus-address, length).
>
> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;

If you implement a common function, then I expect to see other users
converted to use it. There's also PCI hosts in arch/powerpc that parse
dma-ranges.

Rob

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 3/3] of: fix node traversing in of_dma_get_range
@ 2017-03-28  4:50         ` Oza Oza via iommu
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Oza @ 2017-03-28  4:50 UTC (permalink / raw)
  To: Robin Murphy
  Cc: Rob Herring, Joerg Roedel, Linux IOMMU, linux-pci, linux-kernel,
	linux-arm-kernel, devicetree, bcm-kernel-feedback-list

please find my comments inline.

On Mon, Mar 27, 2017 at 8:15 PM, Robin Murphy <robin.murphy@arm.com> wrote:
> Hi Rob,
>
> On 27/03/17 15:34, Rob Herring wrote:
>> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
>>> it jumps to the parent node without examining the child node.
>>> also with that, it throws "no dma-ranges found for node"
>>> for pci dma-ranges.
>>>
>>> this patch fixes device node traversing for dma-ranges.
>>
>> What's the DT look like that doesn't work?
>
> The problem is the bodge in pci_dma_configure() where we don't have an
> OF node for the actual device itself, so pass in the host bridge's OF
> node instead. This happens to work well enough for dma-coherent, but I
> don't think dma-ranges was even considered at the time.
>
> As it happens I'm currently halfway through writing an experiment
> wherein pci_dma_configure() creates a temporary child node for the
> of_dma_configure() call if no other suitable alternative (e.g. some
> intermediate bridge node) exists. How hard are you likely to NAK that
> approach? ;)
>
>> dma-ranges is supposed to be a bus property, not a device's property.
>> So looking at the parent is correct behavior generally.
>
> Indeed, this patch as-is will break currently correct DTs (because we
> won't find dma-ranges on the device, so will bail before even looking at
> the parent as we should).

current parsing of dma-ranges assume that dma-ranges always to be
found in parent node.

based on that, my thinking is following:
couple of options

1)
instead while(1)  some meaningful condition such as while(!node)
the following bail out is not required.
      if (!ranges)
           break;

2)
have check based on dt-property to distinguish between pci and handle
dma-ranges root bridge

but again these changes do not solve the entire problem for choosing
right dma_mask.
neither it actually correctly address root bridge pci dma-ranges.

and hence I have written
https://lkml.org/lkml/2017/3/27/540

my final take is: this function does not need to change, let it parse
memory mapped dma-ranges as it is doing.

I am more inclined to have generic pci dma-ranges and parsing.
which following already addresses.
https://lkml.org/lkml/2017/3/27/540

Regards,
Oza.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 3/3] of: fix node traversing in of_dma_get_range
@ 2017-03-28  4:50         ` Oza Oza via iommu
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Oza via iommu @ 2017-03-28  4:50 UTC (permalink / raw)
  To: Robin Murphy
  Cc: Rob Herring, devicetree-u79uwXL29TY76Z2rM5mHXA,
	linux-pci-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Linux IOMMU,
	bcm-kernel-feedback-list-dY08KVG/lbpWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

please find my comments inline.

On Mon, Mar 27, 2017 at 8:15 PM, Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org> wrote:
> Hi Rob,
>
> On 27/03/17 15:34, Rob Herring wrote:
>> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza-dY08KVG/lbpWk0Htik3J/w@public.gmane.org> wrote:
>>> it jumps to the parent node without examining the child node.
>>> also with that, it throws "no dma-ranges found for node"
>>> for pci dma-ranges.
>>>
>>> this patch fixes device node traversing for dma-ranges.
>>
>> What's the DT look like that doesn't work?
>
> The problem is the bodge in pci_dma_configure() where we don't have an
> OF node for the actual device itself, so pass in the host bridge's OF
> node instead. This happens to work well enough for dma-coherent, but I
> don't think dma-ranges was even considered at the time.
>
> As it happens I'm currently halfway through writing an experiment
> wherein pci_dma_configure() creates a temporary child node for the
> of_dma_configure() call if no other suitable alternative (e.g. some
> intermediate bridge node) exists. How hard are you likely to NAK that
> approach? ;)
>
>> dma-ranges is supposed to be a bus property, not a device's property.
>> So looking at the parent is correct behavior generally.
>
> Indeed, this patch as-is will break currently correct DTs (because we
> won't find dma-ranges on the device, so will bail before even looking at
> the parent as we should).

current parsing of dma-ranges assume that dma-ranges always to be
found in parent node.

based on that, my thinking is following:
couple of options

1)
instead while(1)  some meaningful condition such as while(!node)
the following bail out is not required.
      if (!ranges)
           break;

2)
have check based on dt-property to distinguish between pci and handle
dma-ranges root bridge

but again these changes do not solve the entire problem for choosing
right dma_mask.
neither it actually correctly address root bridge pci dma-ranges.

and hence I have written
https://lkml.org/lkml/2017/3/27/540

my final take is: this function does not need to change, let it parse
memory mapped dma-ranges as it is doing.

I am more inclined to have generic pci dma-ranges and parsing.
which following already addresses.
https://lkml.org/lkml/2017/3/27/540

Regards,
Oza.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 3/3] of: fix node traversing in of_dma_get_range
@ 2017-03-28  4:50         ` Oza Oza via iommu
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Oza @ 2017-03-28  4:50 UTC (permalink / raw)
  To: Robin Murphy
  Cc: Rob Herring, devicetree, linux-pci, Joerg Roedel, linux-kernel,
	Linux IOMMU, bcm-kernel-feedback-list, linux-arm-kernel

please find my comments inline.

On Mon, Mar 27, 2017 at 8:15 PM, Robin Murphy <robin.murphy@arm.com> wrote:
> Hi Rob,
>
> On 27/03/17 15:34, Rob Herring wrote:
>> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
>>> it jumps to the parent node without examining the child node.
>>> also with that, it throws "no dma-ranges found for node"
>>> for pci dma-ranges.
>>>
>>> this patch fixes device node traversing for dma-ranges.
>>
>> What's the DT look like that doesn't work?
>
> The problem is the bodge in pci_dma_configure() where we don't have an
> OF node for the actual device itself, so pass in the host bridge's OF
> node instead. This happens to work well enough for dma-coherent, but I
> don't think dma-ranges was even considered at the time.
>
> As it happens I'm currently halfway through writing an experiment
> wherein pci_dma_configure() creates a temporary child node for the
> of_dma_configure() call if no other suitable alternative (e.g. some
> intermediate bridge node) exists. How hard are you likely to NAK that
> approach? ;)
>
>> dma-ranges is supposed to be a bus property, not a device's property.
>> So looking at the parent is correct behavior generally.
>
> Indeed, this patch as-is will break currently correct DTs (because we
> won't find dma-ranges on the device, so will bail before even looking at
> the parent as we should).

current parsing of dma-ranges assume that dma-ranges always to be
found in parent node.

based on that, my thinking is following:
couple of options

1)
instead while(1)  some meaningful condition such as while(!node)
the following bail out is not required.
      if (!ranges)
           break;

2)
have check based on dt-property to distinguish between pci and handle
dma-ranges root bridge

but again these changes do not solve the entire problem for choosing
right dma_mask.
neither it actually correctly address root bridge pci dma-ranges.

and hence I have written
https://lkml.org/lkml/2017/3/27/540

my final take is: this function does not need to change, let it parse
memory mapped dma-ranges as it is doing.

I am more inclined to have generic pci dma-ranges and parsing.
which following already addresses.
https://lkml.org/lkml/2017/3/27/540

Regards,
Oza.

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [RFC PATCH 3/3] of: fix node traversing in of_dma_get_range
@ 2017-03-28  4:50         ` Oza Oza via iommu
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Oza @ 2017-03-28  4:50 UTC (permalink / raw)
  To: linux-arm-kernel

please find my comments inline.

On Mon, Mar 27, 2017 at 8:15 PM, Robin Murphy <robin.murphy@arm.com> wrote:
> Hi Rob,
>
> On 27/03/17 15:34, Rob Herring wrote:
>> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
>>> it jumps to the parent node without examining the child node.
>>> also with that, it throws "no dma-ranges found for node"
>>> for pci dma-ranges.
>>>
>>> this patch fixes device node traversing for dma-ranges.
>>
>> What's the DT look like that doesn't work?
>
> The problem is the bodge in pci_dma_configure() where we don't have an
> OF node for the actual device itself, so pass in the host bridge's OF
> node instead. This happens to work well enough for dma-coherent, but I
> don't think dma-ranges was even considered at the time.
>
> As it happens I'm currently halfway through writing an experiment
> wherein pci_dma_configure() creates a temporary child node for the
> of_dma_configure() call if no other suitable alternative (e.g. some
> intermediate bridge node) exists. How hard are you likely to NAK that
> approach? ;)
>
>> dma-ranges is supposed to be a bus property, not a device's property.
>> So looking at the parent is correct behavior generally.
>
> Indeed, this patch as-is will break currently correct DTs (because we
> won't find dma-ranges on the device, so will bail before even looking at
> the parent as we should).

current parsing of dma-ranges assume that dma-ranges always to be
found in parent node.

based on that, my thinking is following:
couple of options

1)
instead while(1)  some meaningful condition such as while(!node)
the following bail out is not required.
      if (!ranges)
           break;

2)
have check based on dt-property to distinguish between pci and handle
dma-ranges root bridge

but again these changes do not solve the entire problem for choosing
right dma_mask.
neither it actually correctly address root bridge pci dma-ranges.

and hence I have written
https://lkml.org/lkml/2017/3/27/540

my final take is: this function does not need to change, let it parse
memory mapped dma-ranges as it is doing.

I am more inclined to have generic pci dma-ranges and parsing.
which following already addresses.
https://lkml.org/lkml/2017/3/27/540

Regards,
Oza.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-28  5:27     ` Oza Oza via iommu
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Oza @ 2017-03-28  5:27 UTC (permalink / raw)
  To: Rob Herring
  Cc: Joerg Roedel, Robin Murphy, Linux IOMMU, linux-pci, linux-kernel,
	linux-arm-kernel, devicetree, bcm-kernel-feedback-list

On Mon, Mar 27, 2017 at 8:16 PM, Rob Herring <robh@kernel.org> wrote:
> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
>> it is possible that PCI device supports 64-bit DMA addressing,
>> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
>> however PCI host bridge may have limitations on the inbound
>> transaction addressing. As an example, consider NVME SSD device
>> connected to iproc-PCIe controller.
>>
>> Currently, the IOMMU DMA ops only considers PCI device dma_mask
>> when allocating an IOVA. This is particularly problematic on
>> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
>> PA for in-bound transactions only after PCI Host has forwarded
>> these transactions on SOC IO bus. This means on such ARM/ARM64
>> SOCs the IOVA of in-bound transactions has to honor the addressing
>> restrictions of the PCI Host.
>>
>> current pcie frmework and of framework integration assumes dma-ranges
>> in a way where memory-mapped devices define their dma-ranges.
>> dma-ranges: (child-bus-address, parent-bus-address, length).
>>
>> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
>> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
>
> If you implement a common function, then I expect to see other users
> converted to use it. There's also PCI hosts in arch/powerpc that parse
> dma-ranges.

the common function should be similar to what
of_pci_get_host_bridge_resources is doing right now.
it parses ranges property right now.

the new function would look look following.

of_pci_get_dma_ranges(struct device_node *dev, struct list_head *resources)
where resources would return the dma-ranges.

but right now if you see the patch, of_dma_configure calls the new
function, which actually returns the largest possible size.
so this new function has to be generic in a way where other PCI hosts
can use it. but certainly iproc(Broadcom SOC) , rcar based SOCs can
use it for sure.

although having powerpc using it;  is a separate exercise, since I do
not have any access to other PCI hosts such as powerpc. but we can
workout with them on thsi forum if required.

so overall, of_pci_get_dma_ranges has to serve following 2 purposes.

1) it has to return largest possible size to of_dma_configure to
generate largest possible dma_mask.

2) it also has to return resources (dma-ranges) parsed, to the users.

so to address above needs

of_pci_get_dma_ranges(struct device_node *dev, struct list_head
*resources, u64 *size)

dev -> device node.
resources -> dma-ranges in allocated list.
size -> highest possible size to generate possible dma_mask for
of_dma_configure.

let em know how this sounds.

Regards,
Oza.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-28  5:27     ` Oza Oza via iommu
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Oza via iommu @ 2017-03-28  5:27 UTC (permalink / raw)
  To: Rob Herring
  Cc: devicetree-u79uwXL29TY76Z2rM5mHXA,
	linux-pci-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Linux IOMMU,
	bcm-kernel-feedback-list-dY08KVG/lbpWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

On Mon, Mar 27, 2017 at 8:16 PM, Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza-dY08KVG/lbpWk0Htik3J/w@public.gmane.org> wrote:
>> it is possible that PCI device supports 64-bit DMA addressing,
>> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
>> however PCI host bridge may have limitations on the inbound
>> transaction addressing. As an example, consider NVME SSD device
>> connected to iproc-PCIe controller.
>>
>> Currently, the IOMMU DMA ops only considers PCI device dma_mask
>> when allocating an IOVA. This is particularly problematic on
>> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
>> PA for in-bound transactions only after PCI Host has forwarded
>> these transactions on SOC IO bus. This means on such ARM/ARM64
>> SOCs the IOVA of in-bound transactions has to honor the addressing
>> restrictions of the PCI Host.
>>
>> current pcie frmework and of framework integration assumes dma-ranges
>> in a way where memory-mapped devices define their dma-ranges.
>> dma-ranges: (child-bus-address, parent-bus-address, length).
>>
>> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
>> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
>
> If you implement a common function, then I expect to see other users
> converted to use it. There's also PCI hosts in arch/powerpc that parse
> dma-ranges.

the common function should be similar to what
of_pci_get_host_bridge_resources is doing right now.
it parses ranges property right now.

the new function would look look following.

of_pci_get_dma_ranges(struct device_node *dev, struct list_head *resources)
where resources would return the dma-ranges.

but right now if you see the patch, of_dma_configure calls the new
function, which actually returns the largest possible size.
so this new function has to be generic in a way where other PCI hosts
can use it. but certainly iproc(Broadcom SOC) , rcar based SOCs can
use it for sure.

although having powerpc using it;  is a separate exercise, since I do
not have any access to other PCI hosts such as powerpc. but we can
workout with them on thsi forum if required.

so overall, of_pci_get_dma_ranges has to serve following 2 purposes.

1) it has to return largest possible size to of_dma_configure to
generate largest possible dma_mask.

2) it also has to return resources (dma-ranges) parsed, to the users.

so to address above needs

of_pci_get_dma_ranges(struct device_node *dev, struct list_head
*resources, u64 *size)

dev -> device node.
resources -> dma-ranges in allocated list.
size -> highest possible size to generate possible dma_mask for
of_dma_configure.

let em know how this sounds.

Regards,
Oza.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-28  5:27     ` Oza Oza via iommu
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Oza @ 2017-03-28  5:27 UTC (permalink / raw)
  To: Rob Herring
  Cc: devicetree, linux-pci, Joerg Roedel, linux-kernel, Linux IOMMU,
	bcm-kernel-feedback-list, Robin Murphy, linux-arm-kernel

On Mon, Mar 27, 2017 at 8:16 PM, Rob Herring <robh@kernel.org> wrote:
> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
>> it is possible that PCI device supports 64-bit DMA addressing,
>> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
>> however PCI host bridge may have limitations on the inbound
>> transaction addressing. As an example, consider NVME SSD device
>> connected to iproc-PCIe controller.
>>
>> Currently, the IOMMU DMA ops only considers PCI device dma_mask
>> when allocating an IOVA. This is particularly problematic on
>> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
>> PA for in-bound transactions only after PCI Host has forwarded
>> these transactions on SOC IO bus. This means on such ARM/ARM64
>> SOCs the IOVA of in-bound transactions has to honor the addressing
>> restrictions of the PCI Host.
>>
>> current pcie frmework and of framework integration assumes dma-ranges
>> in a way where memory-mapped devices define their dma-ranges.
>> dma-ranges: (child-bus-address, parent-bus-address, length).
>>
>> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
>> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
>
> If you implement a common function, then I expect to see other users
> converted to use it. There's also PCI hosts in arch/powerpc that parse
> dma-ranges.

the common function should be similar to what
of_pci_get_host_bridge_resources is doing right now.
it parses ranges property right now.

the new function would look look following.

of_pci_get_dma_ranges(struct device_node *dev, struct list_head *resources)
where resources would return the dma-ranges.

but right now if you see the patch, of_dma_configure calls the new
function, which actually returns the largest possible size.
so this new function has to be generic in a way where other PCI hosts
can use it. but certainly iproc(Broadcom SOC) , rcar based SOCs can
use it for sure.

although having powerpc using it;  is a separate exercise, since I do
not have any access to other PCI hosts such as powerpc. but we can
workout with them on thsi forum if required.

so overall, of_pci_get_dma_ranges has to serve following 2 purposes.

1) it has to return largest possible size to of_dma_configure to
generate largest possible dma_mask.

2) it also has to return resources (dma-ranges) parsed, to the users.

so to address above needs

of_pci_get_dma_ranges(struct device_node *dev, struct list_head
*resources, u64 *size)

dev -> device node.
resources -> dma-ranges in allocated list.
size -> highest possible size to generate possible dma_mask for
of_dma_configure.

let em know how this sounds.

Regards,
Oza.

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-28  5:27     ` Oza Oza via iommu
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Oza @ 2017-03-28  5:27 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Mar 27, 2017 at 8:16 PM, Rob Herring <robh@kernel.org> wrote:
> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
>> it is possible that PCI device supports 64-bit DMA addressing,
>> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
>> however PCI host bridge may have limitations on the inbound
>> transaction addressing. As an example, consider NVME SSD device
>> connected to iproc-PCIe controller.
>>
>> Currently, the IOMMU DMA ops only considers PCI device dma_mask
>> when allocating an IOVA. This is particularly problematic on
>> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
>> PA for in-bound transactions only after PCI Host has forwarded
>> these transactions on SOC IO bus. This means on such ARM/ARM64
>> SOCs the IOVA of in-bound transactions has to honor the addressing
>> restrictions of the PCI Host.
>>
>> current pcie frmework and of framework integration assumes dma-ranges
>> in a way where memory-mapped devices define their dma-ranges.
>> dma-ranges: (child-bus-address, parent-bus-address, length).
>>
>> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
>> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
>
> If you implement a common function, then I expect to see other users
> converted to use it. There's also PCI hosts in arch/powerpc that parse
> dma-ranges.

the common function should be similar to what
of_pci_get_host_bridge_resources is doing right now.
it parses ranges property right now.

the new function would look look following.

of_pci_get_dma_ranges(struct device_node *dev, struct list_head *resources)
where resources would return the dma-ranges.

but right now if you see the patch, of_dma_configure calls the new
function, which actually returns the largest possible size.
so this new function has to be generic in a way where other PCI hosts
can use it. but certainly iproc(Broadcom SOC) , rcar based SOCs can
use it for sure.

although having powerpc using it;  is a separate exercise, since I do
not have any access to other PCI hosts such as powerpc. but we can
workout with them on thsi forum if required.

so overall, of_pci_get_dma_ranges has to serve following 2 purposes.

1) it has to return largest possible size to of_dma_configure to
generate largest possible dma_mask.

2) it also has to return resources (dma-ranges) parsed, to the users.

so to address above needs

of_pci_get_dma_ranges(struct device_node *dev, struct list_head
*resources, u64 *size)

dev -> device node.
resources -> dma-ranges in allocated list.
size -> highest possible size to generate possible dma_mask for
of_dma_configure.

let em know how this sounds.

Regards,
Oza.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-28 14:13       ` Rob Herring
  0 siblings, 0 replies; 52+ messages in thread
From: Rob Herring @ 2017-03-28 14:13 UTC (permalink / raw)
  To: Oza Oza
  Cc: Joerg Roedel, Robin Murphy, Linux IOMMU, linux-pci, linux-kernel,
	linux-arm-kernel, devicetree, bcm-kernel-feedback-list

On Tue, Mar 28, 2017 at 12:27 AM, Oza Oza <oza.oza@broadcom.com> wrote:
> On Mon, Mar 27, 2017 at 8:16 PM, Rob Herring <robh@kernel.org> wrote:
>> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
>>> it is possible that PCI device supports 64-bit DMA addressing,
>>> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
>>> however PCI host bridge may have limitations on the inbound
>>> transaction addressing. As an example, consider NVME SSD device
>>> connected to iproc-PCIe controller.
>>>
>>> Currently, the IOMMU DMA ops only considers PCI device dma_mask
>>> when allocating an IOVA. This is particularly problematic on
>>> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
>>> PA for in-bound transactions only after PCI Host has forwarded
>>> these transactions on SOC IO bus. This means on such ARM/ARM64
>>> SOCs the IOVA of in-bound transactions has to honor the addressing
>>> restrictions of the PCI Host.
>>>
>>> current pcie frmework and of framework integration assumes dma-ranges
>>> in a way where memory-mapped devices define their dma-ranges.
>>> dma-ranges: (child-bus-address, parent-bus-address, length).
>>>
>>> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
>>> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
>>
>> If you implement a common function, then I expect to see other users
>> converted to use it. There's also PCI hosts in arch/powerpc that parse
>> dma-ranges.
>
> the common function should be similar to what
> of_pci_get_host_bridge_resources is doing right now.
> it parses ranges property right now.
>
> the new function would look look following.
>
> of_pci_get_dma_ranges(struct device_node *dev, struct list_head *resources)
> where resources would return the dma-ranges.
>
> but right now if you see the patch, of_dma_configure calls the new
> function, which actually returns the largest possible size.
> so this new function has to be generic in a way where other PCI hosts
> can use it. but certainly iproc(Broadcom SOC) , rcar based SOCs can
> use it for sure.
>
> although having powerpc using it;  is a separate exercise, since I do
> not have any access to other PCI hosts such as powerpc. but we can
> workout with them on thsi forum if required.

You don't need h/w. You can analyze what parts are common, write
patches to convert to common implementation, and build test. The PPC
and rcar folks can test on h/w.

Rob

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-28 14:13       ` Rob Herring
  0 siblings, 0 replies; 52+ messages in thread
From: Rob Herring @ 2017-03-28 14:13 UTC (permalink / raw)
  To: Oza Oza
  Cc: devicetree-u79uwXL29TY76Z2rM5mHXA,
	linux-pci-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Linux IOMMU,
	bcm-kernel-feedback-list-dY08KVG/lbpWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

On Tue, Mar 28, 2017 at 12:27 AM, Oza Oza <oza.oza-dY08KVG/lbpWk0Htik3J/w@public.gmane.org> wrote:
> On Mon, Mar 27, 2017 at 8:16 PM, Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
>> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza-dY08KVG/lbpWk0Htik3J/w@public.gmane.org> wrote:
>>> it is possible that PCI device supports 64-bit DMA addressing,
>>> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
>>> however PCI host bridge may have limitations on the inbound
>>> transaction addressing. As an example, consider NVME SSD device
>>> connected to iproc-PCIe controller.
>>>
>>> Currently, the IOMMU DMA ops only considers PCI device dma_mask
>>> when allocating an IOVA. This is particularly problematic on
>>> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
>>> PA for in-bound transactions only after PCI Host has forwarded
>>> these transactions on SOC IO bus. This means on such ARM/ARM64
>>> SOCs the IOVA of in-bound transactions has to honor the addressing
>>> restrictions of the PCI Host.
>>>
>>> current pcie frmework and of framework integration assumes dma-ranges
>>> in a way where memory-mapped devices define their dma-ranges.
>>> dma-ranges: (child-bus-address, parent-bus-address, length).
>>>
>>> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
>>> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
>>
>> If you implement a common function, then I expect to see other users
>> converted to use it. There's also PCI hosts in arch/powerpc that parse
>> dma-ranges.
>
> the common function should be similar to what
> of_pci_get_host_bridge_resources is doing right now.
> it parses ranges property right now.
>
> the new function would look look following.
>
> of_pci_get_dma_ranges(struct device_node *dev, struct list_head *resources)
> where resources would return the dma-ranges.
>
> but right now if you see the patch, of_dma_configure calls the new
> function, which actually returns the largest possible size.
> so this new function has to be generic in a way where other PCI hosts
> can use it. but certainly iproc(Broadcom SOC) , rcar based SOCs can
> use it for sure.
>
> although having powerpc using it;  is a separate exercise, since I do
> not have any access to other PCI hosts such as powerpc. but we can
> workout with them on thsi forum if required.

You don't need h/w. You can analyze what parts are common, write
patches to convert to common implementation, and build test. The PPC
and rcar folks can test on h/w.

Rob

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-28 14:13       ` Rob Herring
  0 siblings, 0 replies; 52+ messages in thread
From: Rob Herring @ 2017-03-28 14:13 UTC (permalink / raw)
  To: Oza Oza
  Cc: devicetree, linux-pci, Joerg Roedel, linux-kernel, Linux IOMMU,
	bcm-kernel-feedback-list, Robin Murphy, linux-arm-kernel

On Tue, Mar 28, 2017 at 12:27 AM, Oza Oza <oza.oza@broadcom.com> wrote:
> On Mon, Mar 27, 2017 at 8:16 PM, Rob Herring <robh@kernel.org> wrote:
>> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
>>> it is possible that PCI device supports 64-bit DMA addressing,
>>> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
>>> however PCI host bridge may have limitations on the inbound
>>> transaction addressing. As an example, consider NVME SSD device
>>> connected to iproc-PCIe controller.
>>>
>>> Currently, the IOMMU DMA ops only considers PCI device dma_mask
>>> when allocating an IOVA. This is particularly problematic on
>>> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
>>> PA for in-bound transactions only after PCI Host has forwarded
>>> these transactions on SOC IO bus. This means on such ARM/ARM64
>>> SOCs the IOVA of in-bound transactions has to honor the addressing
>>> restrictions of the PCI Host.
>>>
>>> current pcie frmework and of framework integration assumes dma-ranges
>>> in a way where memory-mapped devices define their dma-ranges.
>>> dma-ranges: (child-bus-address, parent-bus-address, length).
>>>
>>> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
>>> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
>>
>> If you implement a common function, then I expect to see other users
>> converted to use it. There's also PCI hosts in arch/powerpc that parse
>> dma-ranges.
>
> the common function should be similar to what
> of_pci_get_host_bridge_resources is doing right now.
> it parses ranges property right now.
>
> the new function would look look following.
>
> of_pci_get_dma_ranges(struct device_node *dev, struct list_head *resources)
> where resources would return the dma-ranges.
>
> but right now if you see the patch, of_dma_configure calls the new
> function, which actually returns the largest possible size.
> so this new function has to be generic in a way where other PCI hosts
> can use it. but certainly iproc(Broadcom SOC) , rcar based SOCs can
> use it for sure.
>
> although having powerpc using it;  is a separate exercise, since I do
> not have any access to other PCI hosts such as powerpc. but we can
> workout with them on thsi forum if required.

You don't need h/w. You can analyze what parts are common, write
patches to convert to common implementation, and build test. The PPC
and rcar folks can test on h/w.

Rob

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-28 14:13       ` Rob Herring
  0 siblings, 0 replies; 52+ messages in thread
From: Rob Herring @ 2017-03-28 14:13 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Mar 28, 2017 at 12:27 AM, Oza Oza <oza.oza@broadcom.com> wrote:
> On Mon, Mar 27, 2017 at 8:16 PM, Rob Herring <robh@kernel.org> wrote:
>> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
>>> it is possible that PCI device supports 64-bit DMA addressing,
>>> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
>>> however PCI host bridge may have limitations on the inbound
>>> transaction addressing. As an example, consider NVME SSD device
>>> connected to iproc-PCIe controller.
>>>
>>> Currently, the IOMMU DMA ops only considers PCI device dma_mask
>>> when allocating an IOVA. This is particularly problematic on
>>> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
>>> PA for in-bound transactions only after PCI Host has forwarded
>>> these transactions on SOC IO bus. This means on such ARM/ARM64
>>> SOCs the IOVA of in-bound transactions has to honor the addressing
>>> restrictions of the PCI Host.
>>>
>>> current pcie frmework and of framework integration assumes dma-ranges
>>> in a way where memory-mapped devices define their dma-ranges.
>>> dma-ranges: (child-bus-address, parent-bus-address, length).
>>>
>>> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
>>> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
>>
>> If you implement a common function, then I expect to see other users
>> converted to use it. There's also PCI hosts in arch/powerpc that parse
>> dma-ranges.
>
> the common function should be similar to what
> of_pci_get_host_bridge_resources is doing right now.
> it parses ranges property right now.
>
> the new function would look look following.
>
> of_pci_get_dma_ranges(struct device_node *dev, struct list_head *resources)
> where resources would return the dma-ranges.
>
> but right now if you see the patch, of_dma_configure calls the new
> function, which actually returns the largest possible size.
> so this new function has to be generic in a way where other PCI hosts
> can use it. but certainly iproc(Broadcom SOC) , rcar based SOCs can
> use it for sure.
>
> although having powerpc using it;  is a separate exercise, since I do
> not have any access to other PCI hosts such as powerpc. but we can
> workout with them on thsi forum if required.

You don't need h/w. You can analyze what parts are common, write
patches to convert to common implementation, and build test. The PPC
and rcar folks can test on h/w.

Rob

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-28 14:29       ` Robin Murphy
  0 siblings, 0 replies; 52+ messages in thread
From: Robin Murphy @ 2017-03-28 14:29 UTC (permalink / raw)
  To: Oza Oza, Rob Herring
  Cc: Joerg Roedel, Linux IOMMU, linux-pci, linux-kernel,
	linux-arm-kernel, devicetree, bcm-kernel-feedback-list

On 28/03/17 06:27, Oza Oza wrote:
> On Mon, Mar 27, 2017 at 8:16 PM, Rob Herring <robh@kernel.org> wrote:
>> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
>>> it is possible that PCI device supports 64-bit DMA addressing,
>>> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
>>> however PCI host bridge may have limitations on the inbound
>>> transaction addressing. As an example, consider NVME SSD device
>>> connected to iproc-PCIe controller.
>>>
>>> Currently, the IOMMU DMA ops only considers PCI device dma_mask
>>> when allocating an IOVA. This is particularly problematic on
>>> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
>>> PA for in-bound transactions only after PCI Host has forwarded
>>> these transactions on SOC IO bus. This means on such ARM/ARM64
>>> SOCs the IOVA of in-bound transactions has to honor the addressing
>>> restrictions of the PCI Host.
>>>
>>> current pcie frmework and of framework integration assumes dma-ranges
>>> in a way where memory-mapped devices define their dma-ranges.
>>> dma-ranges: (child-bus-address, parent-bus-address, length).
>>>
>>> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
>>> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
>>
>> If you implement a common function, then I expect to see other users
>> converted to use it. There's also PCI hosts in arch/powerpc that parse
>> dma-ranges.
> 
> the common function should be similar to what
> of_pci_get_host_bridge_resources is doing right now.
> it parses ranges property right now.
> 
> the new function would look look following.
> 
> of_pci_get_dma_ranges(struct device_node *dev, struct list_head *resources)
> where resources would return the dma-ranges.
> 
> but right now if you see the patch, of_dma_configure calls the new
> function, which actually returns the largest possible size.
> so this new function has to be generic in a way where other PCI hosts
> can use it. but certainly iproc(Broadcom SOC) , rcar based SOCs can
> use it for sure.
> 
> although having powerpc using it;  is a separate exercise, since I do
> not have any access to other PCI hosts such as powerpc. but we can
> workout with them on thsi forum if required.
> 
> so overall, of_pci_get_dma_ranges has to serve following 2 purposes.
> 
> 1) it has to return largest possible size to of_dma_configure to
> generate largest possible dma_mask.
> 
> 2) it also has to return resources (dma-ranges) parsed, to the users.
> 
> so to address above needs
> 
> of_pci_get_dma_ranges(struct device_node *dev, struct list_head
> *resources, u64 *size)
> 
> dev -> device node.
> resources -> dma-ranges in allocated list.
> size -> highest possible size to generate possible dma_mask for
> of_dma_configure.
> 
> let em know how this sounds.

Note that the point of passing PCI host bridges into of_dma_configure()
in the first place was to avoid having some separate PCI-specific path
for DMA configuration. I worry that introducing bus-specific dma-ranges
parsing largely defeats that, since we end up with the worst of both
worlds; effectively-duplicated code, and/or a load of extra complexity
to then attempt to reconverge the divergent paths (there really
shouldn't be any need to allocate a list of anything). Given that
of_translate_dma_address() is already bus-agnostic, it hardly seems
justifiable for its caller not to be so as well, especially when it
mostly just comes down to getting the right #address-cells value.

The patch below is actually enough to make typical cases work, but is
vile, so I'm not seriously considering it (hence I've not bothered
making IOMMU configuration handle all circumstances). What it has served
to do, though, is give me a clear idea of how to properly sort out the
not-quite-right device/parent assumptions between of_dma_configure() and
of_dma_get_range() rather than bodging around them any further - stay tuned.

Robin.

----->8-----
From: Robin Murphy <robin.murphy@arm.com>
Subject: [PATCH] of/pci: Use child node for DMA configuration

of_dma_configure() expects to be passed an OF node representing the
device being configured - for PCI devices we currently pass the node of
the appropriate host controller, which sort of works for inherited
properties which may appear at any level, like "dma-coherent", but falls
apart for properties which actually care about specific device-parent
relationships, like "dma-ranges".

Solve this by attempting to find a suitable child node if the PCI
hierarchy is actually represented in DT, and if not then faking one up
as a last resort, to make all of DMA configuration work as expected.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
 drivers/iommu/of_iommu.c |  3 ++-
 drivers/pci/of.c         | 24 ++++++++++++++++++++++++
 drivers/pci/probe.c      | 14 +++++++++++++-
 include/linux/pci.h      |  3 +++
 4 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 2683e9fc0dcf..35c97b945c15 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -146,7 +146,8 @@ const struct iommu_ops *of_iommu_configure(struct
device *dev,
 	int idx = 0;

 	if (dev_is_pci(dev))
-		return of_pci_iommu_configure(to_pci_dev(dev), master_np);
+		return of_pci_iommu_configure(to_pci_dev(dev),
+					      of_get_next_parent(master_np));

 	/*
 	 * We don't currently walk up the tree looking for a parent IOMMU.
diff --git a/drivers/pci/of.c b/drivers/pci/of.c
index e112da11630e..96eece1c423d 100644
--- a/drivers/pci/of.c
+++ b/drivers/pci/of.c
@@ -88,3 +88,27 @@ struct irq_domain
*pci_host_bridge_of_msi_domain(struct pci_bus *bus)
 	return NULL;
 #endif
 }
+
+struct device_node *pci_dev_get_dma_of_node(struct pci_dev *dev)
+{
+	struct device_node *np;
+	struct pci_bus *bus = dev->bus;
+
+	/* Is the device itself actually described in the DT? */
+	np = of_node_get(dev->dev.of_node);
+	if (np)
+		return np;
+
+	/* Or is some intermediate bridge? That would do... */
+	for (bus = dev->bus; bus->parent; bus = bus->parent) {
+		np = of_node_get(bus->bridge->of_node);
+		if (np)
+			return np;
+	}
+
+	/* Failing that, any child of the same host bridge? */
+	if (bus->bridge->parent)
+		np = of_get_next_child(bus->bridge->parent->of_node, NULL);
+
+	return np;
+}
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index dfc9a2794141..4f7ade64aa3e 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1906,7 +1906,19 @@ static void pci_dma_configure(struct pci_dev *dev)

 	if (IS_ENABLED(CONFIG_OF) &&
 		bridge->parent && bridge->parent->of_node) {
-			of_dma_configure(&dev->dev, bridge->parent->of_node);
+		struct device_node *np, tmp = {0};
+
+		np = pci_dev_get_dma_of_node(dev);
+		if (!np) {
+			np = &tmp;
+			of_node_set_flag(np, OF_DETACHED);
+			of_node_init(np);
+			tmp.parent = bridge->parent->of_node;
+		}
+
+		of_dma_configure(&dev->dev, np);
+		if (np != &tmp)
+			of_node_put(np);
 	} else if (has_acpi_companion(bridge)) {
 		struct acpi_device *adev = to_acpi_device_node(bridge->fwnode);
 		enum dev_dma_attr attr = acpi_get_dma_attr(adev);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index eb3da1a04e6c..94ecd1817f58 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -2086,6 +2086,7 @@ void pci_release_of_node(struct pci_dev *dev);
 void pci_set_bus_of_node(struct pci_bus *bus);
 void pci_release_bus_of_node(struct pci_bus *bus);
 struct irq_domain *pci_host_bridge_of_msi_domain(struct pci_bus *bus);
+struct device_node *pci_dev_get_dma_of_node(struct pci_dev *dev);

 /* Arch may override this (weak) */
 struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus);
@@ -2110,6 +2111,8 @@ static inline struct device_node *
 pci_device_to_OF_node(const struct pci_dev *pdev) { return NULL; }
 static inline struct irq_domain *
 pci_host_bridge_of_msi_domain(struct pci_bus *bus) { return NULL; }
+static inline struct device_node *
+pci_dev_get_dma_of_node(struct pci_dev *dev) { return NULL; }
 #endif  /* CONFIG_OF */

 #ifdef CONFIG_ACPI

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-28 14:29       ` Robin Murphy
  0 siblings, 0 replies; 52+ messages in thread
From: Robin Murphy @ 2017-03-28 14:29 UTC (permalink / raw)
  To: Oza Oza, Rob Herring
  Cc: devicetree-u79uwXL29TY76Z2rM5mHXA,
	linux-pci-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Linux IOMMU,
	bcm-kernel-feedback-list-dY08KVG/lbpWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

On 28/03/17 06:27, Oza Oza wrote:
> On Mon, Mar 27, 2017 at 8:16 PM, Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
>> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza-dY08KVG/lbpWk0Htik3J/w@public.gmane.org> wrote:
>>> it is possible that PCI device supports 64-bit DMA addressing,
>>> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
>>> however PCI host bridge may have limitations on the inbound
>>> transaction addressing. As an example, consider NVME SSD device
>>> connected to iproc-PCIe controller.
>>>
>>> Currently, the IOMMU DMA ops only considers PCI device dma_mask
>>> when allocating an IOVA. This is particularly problematic on
>>> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
>>> PA for in-bound transactions only after PCI Host has forwarded
>>> these transactions on SOC IO bus. This means on such ARM/ARM64
>>> SOCs the IOVA of in-bound transactions has to honor the addressing
>>> restrictions of the PCI Host.
>>>
>>> current pcie frmework and of framework integration assumes dma-ranges
>>> in a way where memory-mapped devices define their dma-ranges.
>>> dma-ranges: (child-bus-address, parent-bus-address, length).
>>>
>>> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
>>> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
>>
>> If you implement a common function, then I expect to see other users
>> converted to use it. There's also PCI hosts in arch/powerpc that parse
>> dma-ranges.
> 
> the common function should be similar to what
> of_pci_get_host_bridge_resources is doing right now.
> it parses ranges property right now.
> 
> the new function would look look following.
> 
> of_pci_get_dma_ranges(struct device_node *dev, struct list_head *resources)
> where resources would return the dma-ranges.
> 
> but right now if you see the patch, of_dma_configure calls the new
> function, which actually returns the largest possible size.
> so this new function has to be generic in a way where other PCI hosts
> can use it. but certainly iproc(Broadcom SOC) , rcar based SOCs can
> use it for sure.
> 
> although having powerpc using it;  is a separate exercise, since I do
> not have any access to other PCI hosts such as powerpc. but we can
> workout with them on thsi forum if required.
> 
> so overall, of_pci_get_dma_ranges has to serve following 2 purposes.
> 
> 1) it has to return largest possible size to of_dma_configure to
> generate largest possible dma_mask.
> 
> 2) it also has to return resources (dma-ranges) parsed, to the users.
> 
> so to address above needs
> 
> of_pci_get_dma_ranges(struct device_node *dev, struct list_head
> *resources, u64 *size)
> 
> dev -> device node.
> resources -> dma-ranges in allocated list.
> size -> highest possible size to generate possible dma_mask for
> of_dma_configure.
> 
> let em know how this sounds.

Note that the point of passing PCI host bridges into of_dma_configure()
in the first place was to avoid having some separate PCI-specific path
for DMA configuration. I worry that introducing bus-specific dma-ranges
parsing largely defeats that, since we end up with the worst of both
worlds; effectively-duplicated code, and/or a load of extra complexity
to then attempt to reconverge the divergent paths (there really
shouldn't be any need to allocate a list of anything). Given that
of_translate_dma_address() is already bus-agnostic, it hardly seems
justifiable for its caller not to be so as well, especially when it
mostly just comes down to getting the right #address-cells value.

The patch below is actually enough to make typical cases work, but is
vile, so I'm not seriously considering it (hence I've not bothered
making IOMMU configuration handle all circumstances). What it has served
to do, though, is give me a clear idea of how to properly sort out the
not-quite-right device/parent assumptions between of_dma_configure() and
of_dma_get_range() rather than bodging around them any further - stay tuned.

Robin.

----->8-----
From: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org>
Subject: [PATCH] of/pci: Use child node for DMA configuration

of_dma_configure() expects to be passed an OF node representing the
device being configured - for PCI devices we currently pass the node of
the appropriate host controller, which sort of works for inherited
properties which may appear at any level, like "dma-coherent", but falls
apart for properties which actually care about specific device-parent
relationships, like "dma-ranges".

Solve this by attempting to find a suitable child node if the PCI
hierarchy is actually represented in DT, and if not then faking one up
as a last resort, to make all of DMA configuration work as expected.

Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org>
---
 drivers/iommu/of_iommu.c |  3 ++-
 drivers/pci/of.c         | 24 ++++++++++++++++++++++++
 drivers/pci/probe.c      | 14 +++++++++++++-
 include/linux/pci.h      |  3 +++
 4 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 2683e9fc0dcf..35c97b945c15 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -146,7 +146,8 @@ const struct iommu_ops *of_iommu_configure(struct
device *dev,
 	int idx = 0;

 	if (dev_is_pci(dev))
-		return of_pci_iommu_configure(to_pci_dev(dev), master_np);
+		return of_pci_iommu_configure(to_pci_dev(dev),
+					      of_get_next_parent(master_np));

 	/*
 	 * We don't currently walk up the tree looking for a parent IOMMU.
diff --git a/drivers/pci/of.c b/drivers/pci/of.c
index e112da11630e..96eece1c423d 100644
--- a/drivers/pci/of.c
+++ b/drivers/pci/of.c
@@ -88,3 +88,27 @@ struct irq_domain
*pci_host_bridge_of_msi_domain(struct pci_bus *bus)
 	return NULL;
 #endif
 }
+
+struct device_node *pci_dev_get_dma_of_node(struct pci_dev *dev)
+{
+	struct device_node *np;
+	struct pci_bus *bus = dev->bus;
+
+	/* Is the device itself actually described in the DT? */
+	np = of_node_get(dev->dev.of_node);
+	if (np)
+		return np;
+
+	/* Or is some intermediate bridge? That would do... */
+	for (bus = dev->bus; bus->parent; bus = bus->parent) {
+		np = of_node_get(bus->bridge->of_node);
+		if (np)
+			return np;
+	}
+
+	/* Failing that, any child of the same host bridge? */
+	if (bus->bridge->parent)
+		np = of_get_next_child(bus->bridge->parent->of_node, NULL);
+
+	return np;
+}
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index dfc9a2794141..4f7ade64aa3e 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1906,7 +1906,19 @@ static void pci_dma_configure(struct pci_dev *dev)

 	if (IS_ENABLED(CONFIG_OF) &&
 		bridge->parent && bridge->parent->of_node) {
-			of_dma_configure(&dev->dev, bridge->parent->of_node);
+		struct device_node *np, tmp = {0};
+
+		np = pci_dev_get_dma_of_node(dev);
+		if (!np) {
+			np = &tmp;
+			of_node_set_flag(np, OF_DETACHED);
+			of_node_init(np);
+			tmp.parent = bridge->parent->of_node;
+		}
+
+		of_dma_configure(&dev->dev, np);
+		if (np != &tmp)
+			of_node_put(np);
 	} else if (has_acpi_companion(bridge)) {
 		struct acpi_device *adev = to_acpi_device_node(bridge->fwnode);
 		enum dev_dma_attr attr = acpi_get_dma_attr(adev);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index eb3da1a04e6c..94ecd1817f58 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -2086,6 +2086,7 @@ void pci_release_of_node(struct pci_dev *dev);
 void pci_set_bus_of_node(struct pci_bus *bus);
 void pci_release_bus_of_node(struct pci_bus *bus);
 struct irq_domain *pci_host_bridge_of_msi_domain(struct pci_bus *bus);
+struct device_node *pci_dev_get_dma_of_node(struct pci_dev *dev);

 /* Arch may override this (weak) */
 struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus);
@@ -2110,6 +2111,8 @@ static inline struct device_node *
 pci_device_to_OF_node(const struct pci_dev *pdev) { return NULL; }
 static inline struct irq_domain *
 pci_host_bridge_of_msi_domain(struct pci_bus *bus) { return NULL; }
+static inline struct device_node *
+pci_dev_get_dma_of_node(struct pci_dev *dev) { return NULL; }
 #endif  /* CONFIG_OF */

 #ifdef CONFIG_ACPI

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-28 14:29       ` Robin Murphy
  0 siblings, 0 replies; 52+ messages in thread
From: Robin Murphy @ 2017-03-28 14:29 UTC (permalink / raw)
  To: Oza Oza, Rob Herring
  Cc: devicetree, linux-pci, Joerg Roedel, linux-kernel, Linux IOMMU,
	bcm-kernel-feedback-list, linux-arm-kernel

On 28/03/17 06:27, Oza Oza wrote:
> On Mon, Mar 27, 2017 at 8:16 PM, Rob Herring <robh@kernel.org> wrote:
>> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
>>> it is possible that PCI device supports 64-bit DMA addressing,
>>> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
>>> however PCI host bridge may have limitations on the inbound
>>> transaction addressing. As an example, consider NVME SSD device
>>> connected to iproc-PCIe controller.
>>>
>>> Currently, the IOMMU DMA ops only considers PCI device dma_mask
>>> when allocating an IOVA. This is particularly problematic on
>>> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
>>> PA for in-bound transactions only after PCI Host has forwarded
>>> these transactions on SOC IO bus. This means on such ARM/ARM64
>>> SOCs the IOVA of in-bound transactions has to honor the addressing
>>> restrictions of the PCI Host.
>>>
>>> current pcie frmework and of framework integration assumes dma-ranges
>>> in a way where memory-mapped devices define their dma-ranges.
>>> dma-ranges: (child-bus-address, parent-bus-address, length).
>>>
>>> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
>>> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
>>
>> If you implement a common function, then I expect to see other users
>> converted to use it. There's also PCI hosts in arch/powerpc that parse
>> dma-ranges.
> 
> the common function should be similar to what
> of_pci_get_host_bridge_resources is doing right now.
> it parses ranges property right now.
> 
> the new function would look look following.
> 
> of_pci_get_dma_ranges(struct device_node *dev, struct list_head *resources)
> where resources would return the dma-ranges.
> 
> but right now if you see the patch, of_dma_configure calls the new
> function, which actually returns the largest possible size.
> so this new function has to be generic in a way where other PCI hosts
> can use it. but certainly iproc(Broadcom SOC) , rcar based SOCs can
> use it for sure.
> 
> although having powerpc using it;  is a separate exercise, since I do
> not have any access to other PCI hosts such as powerpc. but we can
> workout with them on thsi forum if required.
> 
> so overall, of_pci_get_dma_ranges has to serve following 2 purposes.
> 
> 1) it has to return largest possible size to of_dma_configure to
> generate largest possible dma_mask.
> 
> 2) it also has to return resources (dma-ranges) parsed, to the users.
> 
> so to address above needs
> 
> of_pci_get_dma_ranges(struct device_node *dev, struct list_head
> *resources, u64 *size)
> 
> dev -> device node.
> resources -> dma-ranges in allocated list.
> size -> highest possible size to generate possible dma_mask for
> of_dma_configure.
> 
> let em know how this sounds.

Note that the point of passing PCI host bridges into of_dma_configure()
in the first place was to avoid having some separate PCI-specific path
for DMA configuration. I worry that introducing bus-specific dma-ranges
parsing largely defeats that, since we end up with the worst of both
worlds; effectively-duplicated code, and/or a load of extra complexity
to then attempt to reconverge the divergent paths (there really
shouldn't be any need to allocate a list of anything). Given that
of_translate_dma_address() is already bus-agnostic, it hardly seems
justifiable for its caller not to be so as well, especially when it
mostly just comes down to getting the right #address-cells value.

The patch below is actually enough to make typical cases work, but is
vile, so I'm not seriously considering it (hence I've not bothered
making IOMMU configuration handle all circumstances). What it has served
to do, though, is give me a clear idea of how to properly sort out the
not-quite-right device/parent assumptions between of_dma_configure() and
of_dma_get_range() rather than bodging around them any further - stay tuned.

Robin.

----->8-----
From: Robin Murphy <robin.murphy@arm.com>
Subject: [PATCH] of/pci: Use child node for DMA configuration

of_dma_configure() expects to be passed an OF node representing the
device being configured - for PCI devices we currently pass the node of
the appropriate host controller, which sort of works for inherited
properties which may appear at any level, like "dma-coherent", but falls
apart for properties which actually care about specific device-parent
relationships, like "dma-ranges".

Solve this by attempting to find a suitable child node if the PCI
hierarchy is actually represented in DT, and if not then faking one up
as a last resort, to make all of DMA configuration work as expected.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
 drivers/iommu/of_iommu.c |  3 ++-
 drivers/pci/of.c         | 24 ++++++++++++++++++++++++
 drivers/pci/probe.c      | 14 +++++++++++++-
 include/linux/pci.h      |  3 +++
 4 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 2683e9fc0dcf..35c97b945c15 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -146,7 +146,8 @@ const struct iommu_ops *of_iommu_configure(struct
device *dev,
 	int idx = 0;

 	if (dev_is_pci(dev))
-		return of_pci_iommu_configure(to_pci_dev(dev), master_np);
+		return of_pci_iommu_configure(to_pci_dev(dev),
+					      of_get_next_parent(master_np));

 	/*
 	 * We don't currently walk up the tree looking for a parent IOMMU.
diff --git a/drivers/pci/of.c b/drivers/pci/of.c
index e112da11630e..96eece1c423d 100644
--- a/drivers/pci/of.c
+++ b/drivers/pci/of.c
@@ -88,3 +88,27 @@ struct irq_domain
*pci_host_bridge_of_msi_domain(struct pci_bus *bus)
 	return NULL;
 #endif
 }
+
+struct device_node *pci_dev_get_dma_of_node(struct pci_dev *dev)
+{
+	struct device_node *np;
+	struct pci_bus *bus = dev->bus;
+
+	/* Is the device itself actually described in the DT? */
+	np = of_node_get(dev->dev.of_node);
+	if (np)
+		return np;
+
+	/* Or is some intermediate bridge? That would do... */
+	for (bus = dev->bus; bus->parent; bus = bus->parent) {
+		np = of_node_get(bus->bridge->of_node);
+		if (np)
+			return np;
+	}
+
+	/* Failing that, any child of the same host bridge? */
+	if (bus->bridge->parent)
+		np = of_get_next_child(bus->bridge->parent->of_node, NULL);
+
+	return np;
+}
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index dfc9a2794141..4f7ade64aa3e 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1906,7 +1906,19 @@ static void pci_dma_configure(struct pci_dev *dev)

 	if (IS_ENABLED(CONFIG_OF) &&
 		bridge->parent && bridge->parent->of_node) {
-			of_dma_configure(&dev->dev, bridge->parent->of_node);
+		struct device_node *np, tmp = {0};
+
+		np = pci_dev_get_dma_of_node(dev);
+		if (!np) {
+			np = &tmp;
+			of_node_set_flag(np, OF_DETACHED);
+			of_node_init(np);
+			tmp.parent = bridge->parent->of_node;
+		}
+
+		of_dma_configure(&dev->dev, np);
+		if (np != &tmp)
+			of_node_put(np);
 	} else if (has_acpi_companion(bridge)) {
 		struct acpi_device *adev = to_acpi_device_node(bridge->fwnode);
 		enum dev_dma_attr attr = acpi_get_dma_attr(adev);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index eb3da1a04e6c..94ecd1817f58 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -2086,6 +2086,7 @@ void pci_release_of_node(struct pci_dev *dev);
 void pci_set_bus_of_node(struct pci_bus *bus);
 void pci_release_bus_of_node(struct pci_bus *bus);
 struct irq_domain *pci_host_bridge_of_msi_domain(struct pci_bus *bus);
+struct device_node *pci_dev_get_dma_of_node(struct pci_dev *dev);

 /* Arch may override this (weak) */
 struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus);
@@ -2110,6 +2111,8 @@ static inline struct device_node *
 pci_device_to_OF_node(const struct pci_dev *pdev) { return NULL; }
 static inline struct irq_domain *
 pci_host_bridge_of_msi_domain(struct pci_bus *bus) { return NULL; }
+static inline struct device_node *
+pci_dev_get_dma_of_node(struct pci_dev *dev) { return NULL; }
 #endif  /* CONFIG_OF */

 #ifdef CONFIG_ACPI


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-28 14:29       ` Robin Murphy
  0 siblings, 0 replies; 52+ messages in thread
From: Robin Murphy @ 2017-03-28 14:29 UTC (permalink / raw)
  To: linux-arm-kernel

On 28/03/17 06:27, Oza Oza wrote:
> On Mon, Mar 27, 2017 at 8:16 PM, Rob Herring <robh@kernel.org> wrote:
>> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
>>> it is possible that PCI device supports 64-bit DMA addressing,
>>> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
>>> however PCI host bridge may have limitations on the inbound
>>> transaction addressing. As an example, consider NVME SSD device
>>> connected to iproc-PCIe controller.
>>>
>>> Currently, the IOMMU DMA ops only considers PCI device dma_mask
>>> when allocating an IOVA. This is particularly problematic on
>>> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
>>> PA for in-bound transactions only after PCI Host has forwarded
>>> these transactions on SOC IO bus. This means on such ARM/ARM64
>>> SOCs the IOVA of in-bound transactions has to honor the addressing
>>> restrictions of the PCI Host.
>>>
>>> current pcie frmework and of framework integration assumes dma-ranges
>>> in a way where memory-mapped devices define their dma-ranges.
>>> dma-ranges: (child-bus-address, parent-bus-address, length).
>>>
>>> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
>>> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
>>
>> If you implement a common function, then I expect to see other users
>> converted to use it. There's also PCI hosts in arch/powerpc that parse
>> dma-ranges.
> 
> the common function should be similar to what
> of_pci_get_host_bridge_resources is doing right now.
> it parses ranges property right now.
> 
> the new function would look look following.
> 
> of_pci_get_dma_ranges(struct device_node *dev, struct list_head *resources)
> where resources would return the dma-ranges.
> 
> but right now if you see the patch, of_dma_configure calls the new
> function, which actually returns the largest possible size.
> so this new function has to be generic in a way where other PCI hosts
> can use it. but certainly iproc(Broadcom SOC) , rcar based SOCs can
> use it for sure.
> 
> although having powerpc using it;  is a separate exercise, since I do
> not have any access to other PCI hosts such as powerpc. but we can
> workout with them on thsi forum if required.
> 
> so overall, of_pci_get_dma_ranges has to serve following 2 purposes.
> 
> 1) it has to return largest possible size to of_dma_configure to
> generate largest possible dma_mask.
> 
> 2) it also has to return resources (dma-ranges) parsed, to the users.
> 
> so to address above needs
> 
> of_pci_get_dma_ranges(struct device_node *dev, struct list_head
> *resources, u64 *size)
> 
> dev -> device node.
> resources -> dma-ranges in allocated list.
> size -> highest possible size to generate possible dma_mask for
> of_dma_configure.
> 
> let em know how this sounds.

Note that the point of passing PCI host bridges into of_dma_configure()
in the first place was to avoid having some separate PCI-specific path
for DMA configuration. I worry that introducing bus-specific dma-ranges
parsing largely defeats that, since we end up with the worst of both
worlds; effectively-duplicated code, and/or a load of extra complexity
to then attempt to reconverge the divergent paths (there really
shouldn't be any need to allocate a list of anything). Given that
of_translate_dma_address() is already bus-agnostic, it hardly seems
justifiable for its caller not to be so as well, especially when it
mostly just comes down to getting the right #address-cells value.

The patch below is actually enough to make typical cases work, but is
vile, so I'm not seriously considering it (hence I've not bothered
making IOMMU configuration handle all circumstances). What it has served
to do, though, is give me a clear idea of how to properly sort out the
not-quite-right device/parent assumptions between of_dma_configure() and
of_dma_get_range() rather than bodging around them any further - stay tuned.

Robin.

----->8-----
From: Robin Murphy <robin.murphy@arm.com>
Subject: [PATCH] of/pci: Use child node for DMA configuration

of_dma_configure() expects to be passed an OF node representing the
device being configured - for PCI devices we currently pass the node of
the appropriate host controller, which sort of works for inherited
properties which may appear at any level, like "dma-coherent", but falls
apart for properties which actually care about specific device-parent
relationships, like "dma-ranges".

Solve this by attempting to find a suitable child node if the PCI
hierarchy is actually represented in DT, and if not then faking one up
as a last resort, to make all of DMA configuration work as expected.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
 drivers/iommu/of_iommu.c |  3 ++-
 drivers/pci/of.c         | 24 ++++++++++++++++++++++++
 drivers/pci/probe.c      | 14 +++++++++++++-
 include/linux/pci.h      |  3 +++
 4 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 2683e9fc0dcf..35c97b945c15 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -146,7 +146,8 @@ const struct iommu_ops *of_iommu_configure(struct
device *dev,
 	int idx = 0;

 	if (dev_is_pci(dev))
-		return of_pci_iommu_configure(to_pci_dev(dev), master_np);
+		return of_pci_iommu_configure(to_pci_dev(dev),
+					      of_get_next_parent(master_np));

 	/*
 	 * We don't currently walk up the tree looking for a parent IOMMU.
diff --git a/drivers/pci/of.c b/drivers/pci/of.c
index e112da11630e..96eece1c423d 100644
--- a/drivers/pci/of.c
+++ b/drivers/pci/of.c
@@ -88,3 +88,27 @@ struct irq_domain
*pci_host_bridge_of_msi_domain(struct pci_bus *bus)
 	return NULL;
 #endif
 }
+
+struct device_node *pci_dev_get_dma_of_node(struct pci_dev *dev)
+{
+	struct device_node *np;
+	struct pci_bus *bus = dev->bus;
+
+	/* Is the device itself actually described in the DT? */
+	np = of_node_get(dev->dev.of_node);
+	if (np)
+		return np;
+
+	/* Or is some intermediate bridge? That would do... */
+	for (bus = dev->bus; bus->parent; bus = bus->parent) {
+		np = of_node_get(bus->bridge->of_node);
+		if (np)
+			return np;
+	}
+
+	/* Failing that, any child of the same host bridge? */
+	if (bus->bridge->parent)
+		np = of_get_next_child(bus->bridge->parent->of_node, NULL);
+
+	return np;
+}
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index dfc9a2794141..4f7ade64aa3e 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1906,7 +1906,19 @@ static void pci_dma_configure(struct pci_dev *dev)

 	if (IS_ENABLED(CONFIG_OF) &&
 		bridge->parent && bridge->parent->of_node) {
-			of_dma_configure(&dev->dev, bridge->parent->of_node);
+		struct device_node *np, tmp = {0};
+
+		np = pci_dev_get_dma_of_node(dev);
+		if (!np) {
+			np = &tmp;
+			of_node_set_flag(np, OF_DETACHED);
+			of_node_init(np);
+			tmp.parent = bridge->parent->of_node;
+		}
+
+		of_dma_configure(&dev->dev, np);
+		if (np != &tmp)
+			of_node_put(np);
 	} else if (has_acpi_companion(bridge)) {
 		struct acpi_device *adev = to_acpi_device_node(bridge->fwnode);
 		enum dev_dma_attr attr = acpi_get_dma_attr(adev);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index eb3da1a04e6c..94ecd1817f58 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -2086,6 +2086,7 @@ void pci_release_of_node(struct pci_dev *dev);
 void pci_set_bus_of_node(struct pci_bus *bus);
 void pci_release_bus_of_node(struct pci_bus *bus);
 struct irq_domain *pci_host_bridge_of_msi_domain(struct pci_bus *bus);
+struct device_node *pci_dev_get_dma_of_node(struct pci_dev *dev);

 /* Arch may override this (weak) */
 struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus);
@@ -2110,6 +2111,8 @@ static inline struct device_node *
 pci_device_to_OF_node(const struct pci_dev *pdev) { return NULL; }
 static inline struct irq_domain *
 pci_host_bridge_of_msi_domain(struct pci_bus *bus) { return NULL; }
+static inline struct device_node *
+pci_dev_get_dma_of_node(struct pci_dev *dev) { return NULL; }
 #endif  /* CONFIG_OF */

 #ifdef CONFIG_ACPI

^ permalink raw reply related	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-29  4:43         ` Oza Oza
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Oza @ 2017-03-29  4:43 UTC (permalink / raw)
  To: Robin Murphy
  Cc: Rob Herring, Joerg Roedel, Linux IOMMU, linux-pci, linux-kernel,
	linux-arm-kernel, devicetree, bcm-kernel-feedback-list

On Tue, Mar 28, 2017 at 7:59 PM, Robin Murphy <robin.murphy@arm.com> wrote:
> On 28/03/17 06:27, Oza Oza wrote:
>> On Mon, Mar 27, 2017 at 8:16 PM, Rob Herring <robh@kernel.org> wrote:
>>> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
>>>> it is possible that PCI device supports 64-bit DMA addressing,
>>>> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
>>>> however PCI host bridge may have limitations on the inbound
>>>> transaction addressing. As an example, consider NVME SSD device
>>>> connected to iproc-PCIe controller.
>>>>
>>>> Currently, the IOMMU DMA ops only considers PCI device dma_mask
>>>> when allocating an IOVA. This is particularly problematic on
>>>> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
>>>> PA for in-bound transactions only after PCI Host has forwarded
>>>> these transactions on SOC IO bus. This means on such ARM/ARM64
>>>> SOCs the IOVA of in-bound transactions has to honor the addressing
>>>> restrictions of the PCI Host.
>>>>
>>>> current pcie frmework and of framework integration assumes dma-ranges
>>>> in a way where memory-mapped devices define their dma-ranges.
>>>> dma-ranges: (child-bus-address, parent-bus-address, length).
>>>>
>>>> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
>>>> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
>>>
>>> If you implement a common function, then I expect to see other users
>>> converted to use it. There's also PCI hosts in arch/powerpc that parse
>>> dma-ranges.
>>
>> the common function should be similar to what
>> of_pci_get_host_bridge_resources is doing right now.
>> it parses ranges property right now.
>>
>> the new function would look look following.
>>
>> of_pci_get_dma_ranges(struct device_node *dev, struct list_head *resources)
>> where resources would return the dma-ranges.
>>
>> but right now if you see the patch, of_dma_configure calls the new
>> function, which actually returns the largest possible size.
>> so this new function has to be generic in a way where other PCI hosts
>> can use it. but certainly iproc(Broadcom SOC) , rcar based SOCs can
>> use it for sure.
>>
>> although having powerpc using it;  is a separate exercise, since I do
>> not have any access to other PCI hosts such as powerpc. but we can
>> workout with them on thsi forum if required.
>>
>> so overall, of_pci_get_dma_ranges has to serve following 2 purposes.
>>
>> 1) it has to return largest possible size to of_dma_configure to
>> generate largest possible dma_mask.
>>
>> 2) it also has to return resources (dma-ranges) parsed, to the users.
>>
>> so to address above needs
>>
>> of_pci_get_dma_ranges(struct device_node *dev, struct list_head
>> *resources, u64 *size)
>>
>> dev -> device node.
>> resources -> dma-ranges in allocated list.
>> size -> highest possible size to generate possible dma_mask for
>> of_dma_configure.
>>
>> let em know how this sounds.
>
> Note that the point of passing PCI host bridges into of_dma_configure()
> in the first place was to avoid having some separate PCI-specific path
> for DMA configuration. I worry that introducing bus-specific dma-ranges
> parsing largely defeats that, since we end up with the worst of both
> worlds; effectively-duplicated code, and/or a load of extra complexity
> to then attempt to reconverge the divergent paths (there really
> shouldn't be any need to allocate a list of anything). Given that
> of_translate_dma_address() is already bus-agnostic, it hardly seems
> justifiable for its caller not to be so as well, especially when it
> mostly just comes down to getting the right #address-cells value.
>
> The patch below is actually enough to make typical cases work, but is
> vile, so I'm not seriously considering it (hence I've not bothered
> making IOMMU configuration handle all circumstances). What it has served
> to do, though, is give me a clear idea of how to properly sort out the
> not-quite-right device/parent assumptions between of_dma_configure() and
> of_dma_get_range() rather than bodging around them any further - stay tuned.
>
> Robin.
>
> ----->8-----
> From: Robin Murphy <robin.murphy@arm.com>
> Subject: [PATCH] of/pci: Use child node for DMA configuration
>
> of_dma_configure() expects to be passed an OF node representing the
> device being configured - for PCI devices we currently pass the node of
> the appropriate host controller, which sort of works for inherited
> properties which may appear at any level, like "dma-coherent", but falls
> apart for properties which actually care about specific device-parent
> relationships, like "dma-ranges".
>
> Solve this by attempting to find a suitable child node if the PCI
> hierarchy is actually represented in DT, and if not then faking one up
> as a last resort, to make all of DMA configuration work as expected.
>
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
>  drivers/iommu/of_iommu.c |  3 ++-
>  drivers/pci/of.c         | 24 ++++++++++++++++++++++++
>  drivers/pci/probe.c      | 14 +++++++++++++-
>  include/linux/pci.h      |  3 +++
>  4 files changed, 42 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
> index 2683e9fc0dcf..35c97b945c15 100644
> --- a/drivers/iommu/of_iommu.c
> +++ b/drivers/iommu/of_iommu.c
> @@ -146,7 +146,8 @@ const struct iommu_ops *of_iommu_configure(struct
> device *dev,
>         int idx = 0;
>
>         if (dev_is_pci(dev))
> -               return of_pci_iommu_configure(to_pci_dev(dev), master_np);
> +               return of_pci_iommu_configure(to_pci_dev(dev),
> +                                             of_get_next_parent(master_np));
>
>         /*
>          * We don't currently walk up the tree looking for a parent IOMMU.
> diff --git a/drivers/pci/of.c b/drivers/pci/of.c
> index e112da11630e..96eece1c423d 100644
> --- a/drivers/pci/of.c
> +++ b/drivers/pci/of.c
> @@ -88,3 +88,27 @@ struct irq_domain
> *pci_host_bridge_of_msi_domain(struct pci_bus *bus)
>         return NULL;
>  #endif
>  }
> +
> +struct device_node *pci_dev_get_dma_of_node(struct pci_dev *dev)
> +{
> +       struct device_node *np;
> +       struct pci_bus *bus = dev->bus;
> +
> +       /* Is the device itself actually described in the DT? */
> +       np = of_node_get(dev->dev.of_node);
> +       if (np)
> +               return np;
> +
> +       /* Or is some intermediate bridge? That would do... */
> +       for (bus = dev->bus; bus->parent; bus = bus->parent) {
> +               np = of_node_get(bus->bridge->of_node);
> +               if (np)
> +                       return np;
> +       }
> +
> +       /* Failing that, any child of the same host bridge? */
> +       if (bus->bridge->parent)
> +               np = of_get_next_child(bus->bridge->parent->of_node, NULL);
> +
> +       return np;
> +}
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index dfc9a2794141..4f7ade64aa3e 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -1906,7 +1906,19 @@ static void pci_dma_configure(struct pci_dev *dev)
>
>         if (IS_ENABLED(CONFIG_OF) &&
>                 bridge->parent && bridge->parent->of_node) {
> -                       of_dma_configure(&dev->dev, bridge->parent->of_node);
> +               struct device_node *np, tmp = {0};
> +
> +               np = pci_dev_get_dma_of_node(dev);
> +               if (!np) {
> +                       np = &tmp;
> +                       of_node_set_flag(np, OF_DETACHED);
> +                       of_node_init(np);
> +                       tmp.parent = bridge->parent->of_node;
> +               }
> +
> +               of_dma_configure(&dev->dev, np);
> +               if (np != &tmp)
> +                       of_node_put(np);
>         } else if (has_acpi_companion(bridge)) {
>                 struct acpi_device *adev = to_acpi_device_node(bridge->fwnode);
>                 enum dev_dma_attr attr = acpi_get_dma_attr(adev);
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index eb3da1a04e6c..94ecd1817f58 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -2086,6 +2086,7 @@ void pci_release_of_node(struct pci_dev *dev);
>  void pci_set_bus_of_node(struct pci_bus *bus);
>  void pci_release_bus_of_node(struct pci_bus *bus);
>  struct irq_domain *pci_host_bridge_of_msi_domain(struct pci_bus *bus);
> +struct device_node *pci_dev_get_dma_of_node(struct pci_dev *dev);
>
>  /* Arch may override this (weak) */
>  struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus);
> @@ -2110,6 +2111,8 @@ static inline struct device_node *
>  pci_device_to_OF_node(const struct pci_dev *pdev) { return NULL; }
>  static inline struct irq_domain *
>  pci_host_bridge_of_msi_domain(struct pci_bus *bus) { return NULL; }
> +static inline struct device_node *
> +pci_dev_get_dma_of_node(struct pci_dev *dev) { return NULL; }
>  #endif  /* CONFIG_OF */
>
>  #ifdef CONFIG_ACPI
>

pci and memory mapped device world is different. of course if you talk
from iommu perspective, all the master are same for IOMMU
but the original intention of the patch is to try to solve 2 problems.

1) expose generic API for pci world clients to configure their
dma-ranges. right now there is none.
2) same API can be used by IOMMU to derive dma_mask.

the implementation tries to handle dma-ranges for both memory mapped
and pci devices, which I think is overkill.
besides we have different configuration of dma-ranges based on iommu
is enabled or not enabled.

 #define PCIE_DMA_RANGES dma-ranges = <0x43000000 0x00 0x80000000 0x00
0x80000000 0x00 0x80000000 \
                                      0x43000000 0x08 0x00000000 0x08
0x00000000 0x08 0x00000000 \
                                      0x43000000 0x80 0x00000000 0x80
0x00000000 0x40 0x00000000>;


the of_dma_get_range is written with respect to traditional memory
ranges, they were not meant for pci dma-ranges.

Regards,
Oza.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-29  4:43         ` Oza Oza
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Oza @ 2017-03-29  4:43 UTC (permalink / raw)
  To: Robin Murphy
  Cc: Rob Herring, Joerg Roedel, Linux IOMMU,
	linux-pci-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	devicetree-u79uwXL29TY76Z2rM5mHXA,
	bcm-kernel-feedback-list-dY08KVG/lbpWk0Htik3J/w

On Tue, Mar 28, 2017 at 7:59 PM, Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org> wrote:
> On 28/03/17 06:27, Oza Oza wrote:
>> On Mon, Mar 27, 2017 at 8:16 PM, Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
>>> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza-dY08KVG/lbpWk0Htik3J/w@public.gmane.org> wrote:
>>>> it is possible that PCI device supports 64-bit DMA addressing,
>>>> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
>>>> however PCI host bridge may have limitations on the inbound
>>>> transaction addressing. As an example, consider NVME SSD device
>>>> connected to iproc-PCIe controller.
>>>>
>>>> Currently, the IOMMU DMA ops only considers PCI device dma_mask
>>>> when allocating an IOVA. This is particularly problematic on
>>>> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
>>>> PA for in-bound transactions only after PCI Host has forwarded
>>>> these transactions on SOC IO bus. This means on such ARM/ARM64
>>>> SOCs the IOVA of in-bound transactions has to honor the addressing
>>>> restrictions of the PCI Host.
>>>>
>>>> current pcie frmework and of framework integration assumes dma-ranges
>>>> in a way where memory-mapped devices define their dma-ranges.
>>>> dma-ranges: (child-bus-address, parent-bus-address, length).
>>>>
>>>> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
>>>> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
>>>
>>> If you implement a common function, then I expect to see other users
>>> converted to use it. There's also PCI hosts in arch/powerpc that parse
>>> dma-ranges.
>>
>> the common function should be similar to what
>> of_pci_get_host_bridge_resources is doing right now.
>> it parses ranges property right now.
>>
>> the new function would look look following.
>>
>> of_pci_get_dma_ranges(struct device_node *dev, struct list_head *resources)
>> where resources would return the dma-ranges.
>>
>> but right now if you see the patch, of_dma_configure calls the new
>> function, which actually returns the largest possible size.
>> so this new function has to be generic in a way where other PCI hosts
>> can use it. but certainly iproc(Broadcom SOC) , rcar based SOCs can
>> use it for sure.
>>
>> although having powerpc using it;  is a separate exercise, since I do
>> not have any access to other PCI hosts such as powerpc. but we can
>> workout with them on thsi forum if required.
>>
>> so overall, of_pci_get_dma_ranges has to serve following 2 purposes.
>>
>> 1) it has to return largest possible size to of_dma_configure to
>> generate largest possible dma_mask.
>>
>> 2) it also has to return resources (dma-ranges) parsed, to the users.
>>
>> so to address above needs
>>
>> of_pci_get_dma_ranges(struct device_node *dev, struct list_head
>> *resources, u64 *size)
>>
>> dev -> device node.
>> resources -> dma-ranges in allocated list.
>> size -> highest possible size to generate possible dma_mask for
>> of_dma_configure.
>>
>> let em know how this sounds.
>
> Note that the point of passing PCI host bridges into of_dma_configure()
> in the first place was to avoid having some separate PCI-specific path
> for DMA configuration. I worry that introducing bus-specific dma-ranges
> parsing largely defeats that, since we end up with the worst of both
> worlds; effectively-duplicated code, and/or a load of extra complexity
> to then attempt to reconverge the divergent paths (there really
> shouldn't be any need to allocate a list of anything). Given that
> of_translate_dma_address() is already bus-agnostic, it hardly seems
> justifiable for its caller not to be so as well, especially when it
> mostly just comes down to getting the right #address-cells value.
>
> The patch below is actually enough to make typical cases work, but is
> vile, so I'm not seriously considering it (hence I've not bothered
> making IOMMU configuration handle all circumstances). What it has served
> to do, though, is give me a clear idea of how to properly sort out the
> not-quite-right device/parent assumptions between of_dma_configure() and
> of_dma_get_range() rather than bodging around them any further - stay tuned.
>
> Robin.
>
> ----->8-----
> From: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org>
> Subject: [PATCH] of/pci: Use child node for DMA configuration
>
> of_dma_configure() expects to be passed an OF node representing the
> device being configured - for PCI devices we currently pass the node of
> the appropriate host controller, which sort of works for inherited
> properties which may appear at any level, like "dma-coherent", but falls
> apart for properties which actually care about specific device-parent
> relationships, like "dma-ranges".
>
> Solve this by attempting to find a suitable child node if the PCI
> hierarchy is actually represented in DT, and if not then faking one up
> as a last resort, to make all of DMA configuration work as expected.
>
> Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org>
> ---
>  drivers/iommu/of_iommu.c |  3 ++-
>  drivers/pci/of.c         | 24 ++++++++++++++++++++++++
>  drivers/pci/probe.c      | 14 +++++++++++++-
>  include/linux/pci.h      |  3 +++
>  4 files changed, 42 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
> index 2683e9fc0dcf..35c97b945c15 100644
> --- a/drivers/iommu/of_iommu.c
> +++ b/drivers/iommu/of_iommu.c
> @@ -146,7 +146,8 @@ const struct iommu_ops *of_iommu_configure(struct
> device *dev,
>         int idx = 0;
>
>         if (dev_is_pci(dev))
> -               return of_pci_iommu_configure(to_pci_dev(dev), master_np);
> +               return of_pci_iommu_configure(to_pci_dev(dev),
> +                                             of_get_next_parent(master_np));
>
>         /*
>          * We don't currently walk up the tree looking for a parent IOMMU.
> diff --git a/drivers/pci/of.c b/drivers/pci/of.c
> index e112da11630e..96eece1c423d 100644
> --- a/drivers/pci/of.c
> +++ b/drivers/pci/of.c
> @@ -88,3 +88,27 @@ struct irq_domain
> *pci_host_bridge_of_msi_domain(struct pci_bus *bus)
>         return NULL;
>  #endif
>  }
> +
> +struct device_node *pci_dev_get_dma_of_node(struct pci_dev *dev)
> +{
> +       struct device_node *np;
> +       struct pci_bus *bus = dev->bus;
> +
> +       /* Is the device itself actually described in the DT? */
> +       np = of_node_get(dev->dev.of_node);
> +       if (np)
> +               return np;
> +
> +       /* Or is some intermediate bridge? That would do... */
> +       for (bus = dev->bus; bus->parent; bus = bus->parent) {
> +               np = of_node_get(bus->bridge->of_node);
> +               if (np)
> +                       return np;
> +       }
> +
> +       /* Failing that, any child of the same host bridge? */
> +       if (bus->bridge->parent)
> +               np = of_get_next_child(bus->bridge->parent->of_node, NULL);
> +
> +       return np;
> +}
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index dfc9a2794141..4f7ade64aa3e 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -1906,7 +1906,19 @@ static void pci_dma_configure(struct pci_dev *dev)
>
>         if (IS_ENABLED(CONFIG_OF) &&
>                 bridge->parent && bridge->parent->of_node) {
> -                       of_dma_configure(&dev->dev, bridge->parent->of_node);
> +               struct device_node *np, tmp = {0};
> +
> +               np = pci_dev_get_dma_of_node(dev);
> +               if (!np) {
> +                       np = &tmp;
> +                       of_node_set_flag(np, OF_DETACHED);
> +                       of_node_init(np);
> +                       tmp.parent = bridge->parent->of_node;
> +               }
> +
> +               of_dma_configure(&dev->dev, np);
> +               if (np != &tmp)
> +                       of_node_put(np);
>         } else if (has_acpi_companion(bridge)) {
>                 struct acpi_device *adev = to_acpi_device_node(bridge->fwnode);
>                 enum dev_dma_attr attr = acpi_get_dma_attr(adev);
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index eb3da1a04e6c..94ecd1817f58 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -2086,6 +2086,7 @@ void pci_release_of_node(struct pci_dev *dev);
>  void pci_set_bus_of_node(struct pci_bus *bus);
>  void pci_release_bus_of_node(struct pci_bus *bus);
>  struct irq_domain *pci_host_bridge_of_msi_domain(struct pci_bus *bus);
> +struct device_node *pci_dev_get_dma_of_node(struct pci_dev *dev);
>
>  /* Arch may override this (weak) */
>  struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus);
> @@ -2110,6 +2111,8 @@ static inline struct device_node *
>  pci_device_to_OF_node(const struct pci_dev *pdev) { return NULL; }
>  static inline struct irq_domain *
>  pci_host_bridge_of_msi_domain(struct pci_bus *bus) { return NULL; }
> +static inline struct device_node *
> +pci_dev_get_dma_of_node(struct pci_dev *dev) { return NULL; }
>  #endif  /* CONFIG_OF */
>
>  #ifdef CONFIG_ACPI
>

pci and memory mapped device world is different. of course if you talk
from iommu perspective, all the master are same for IOMMU
but the original intention of the patch is to try to solve 2 problems.

1) expose generic API for pci world clients to configure their
dma-ranges. right now there is none.
2) same API can be used by IOMMU to derive dma_mask.

the implementation tries to handle dma-ranges for both memory mapped
and pci devices, which I think is overkill.
besides we have different configuration of dma-ranges based on iommu
is enabled or not enabled.

 #define PCIE_DMA_RANGES dma-ranges = <0x43000000 0x00 0x80000000 0x00
0x80000000 0x00 0x80000000 \
                                      0x43000000 0x08 0x00000000 0x08
0x00000000 0x08 0x00000000 \
                                      0x43000000 0x80 0x00000000 0x80
0x00000000 0x40 0x00000000>;


the of_dma_get_range is written with respect to traditional memory
ranges, they were not meant for pci dma-ranges.

Regards,
Oza.
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-29  4:43         ` Oza Oza
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Oza @ 2017-03-29  4:43 UTC (permalink / raw)
  To: Robin Murphy
  Cc: Rob Herring, Joerg Roedel, Linux IOMMU, linux-pci, linux-kernel,
	linux-arm-kernel, devicetree, bcm-kernel-feedback-list

On Tue, Mar 28, 2017 at 7:59 PM, Robin Murphy <robin.murphy@arm.com> wrote:
> On 28/03/17 06:27, Oza Oza wrote:
>> On Mon, Mar 27, 2017 at 8:16 PM, Rob Herring <robh@kernel.org> wrote:
>>> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
>>>> it is possible that PCI device supports 64-bit DMA addressing,
>>>> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
>>>> however PCI host bridge may have limitations on the inbound
>>>> transaction addressing. As an example, consider NVME SSD device
>>>> connected to iproc-PCIe controller.
>>>>
>>>> Currently, the IOMMU DMA ops only considers PCI device dma_mask
>>>> when allocating an IOVA. This is particularly problematic on
>>>> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
>>>> PA for in-bound transactions only after PCI Host has forwarded
>>>> these transactions on SOC IO bus. This means on such ARM/ARM64
>>>> SOCs the IOVA of in-bound transactions has to honor the addressing
>>>> restrictions of the PCI Host.
>>>>
>>>> current pcie frmework and of framework integration assumes dma-ranges
>>>> in a way where memory-mapped devices define their dma-ranges.
>>>> dma-ranges: (child-bus-address, parent-bus-address, length).
>>>>
>>>> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
>>>> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
>>>
>>> If you implement a common function, then I expect to see other users
>>> converted to use it. There's also PCI hosts in arch/powerpc that parse
>>> dma-ranges.
>>
>> the common function should be similar to what
>> of_pci_get_host_bridge_resources is doing right now.
>> it parses ranges property right now.
>>
>> the new function would look look following.
>>
>> of_pci_get_dma_ranges(struct device_node *dev, struct list_head *resources)
>> where resources would return the dma-ranges.
>>
>> but right now if you see the patch, of_dma_configure calls the new
>> function, which actually returns the largest possible size.
>> so this new function has to be generic in a way where other PCI hosts
>> can use it. but certainly iproc(Broadcom SOC) , rcar based SOCs can
>> use it for sure.
>>
>> although having powerpc using it;  is a separate exercise, since I do
>> not have any access to other PCI hosts such as powerpc. but we can
>> workout with them on thsi forum if required.
>>
>> so overall, of_pci_get_dma_ranges has to serve following 2 purposes.
>>
>> 1) it has to return largest possible size to of_dma_configure to
>> generate largest possible dma_mask.
>>
>> 2) it also has to return resources (dma-ranges) parsed, to the users.
>>
>> so to address above needs
>>
>> of_pci_get_dma_ranges(struct device_node *dev, struct list_head
>> *resources, u64 *size)
>>
>> dev -> device node.
>> resources -> dma-ranges in allocated list.
>> size -> highest possible size to generate possible dma_mask for
>> of_dma_configure.
>>
>> let em know how this sounds.
>
> Note that the point of passing PCI host bridges into of_dma_configure()
> in the first place was to avoid having some separate PCI-specific path
> for DMA configuration. I worry that introducing bus-specific dma-ranges
> parsing largely defeats that, since we end up with the worst of both
> worlds; effectively-duplicated code, and/or a load of extra complexity
> to then attempt to reconverge the divergent paths (there really
> shouldn't be any need to allocate a list of anything). Given that
> of_translate_dma_address() is already bus-agnostic, it hardly seems
> justifiable for its caller not to be so as well, especially when it
> mostly just comes down to getting the right #address-cells value.
>
> The patch below is actually enough to make typical cases work, but is
> vile, so I'm not seriously considering it (hence I've not bothered
> making IOMMU configuration handle all circumstances). What it has served
> to do, though, is give me a clear idea of how to properly sort out the
> not-quite-right device/parent assumptions between of_dma_configure() and
> of_dma_get_range() rather than bodging around them any further - stay tuned.
>
> Robin.
>
> ----->8-----
> From: Robin Murphy <robin.murphy@arm.com>
> Subject: [PATCH] of/pci: Use child node for DMA configuration
>
> of_dma_configure() expects to be passed an OF node representing the
> device being configured - for PCI devices we currently pass the node of
> the appropriate host controller, which sort of works for inherited
> properties which may appear at any level, like "dma-coherent", but falls
> apart for properties which actually care about specific device-parent
> relationships, like "dma-ranges".
>
> Solve this by attempting to find a suitable child node if the PCI
> hierarchy is actually represented in DT, and if not then faking one up
> as a last resort, to make all of DMA configuration work as expected.
>
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
>  drivers/iommu/of_iommu.c |  3 ++-
>  drivers/pci/of.c         | 24 ++++++++++++++++++++++++
>  drivers/pci/probe.c      | 14 +++++++++++++-
>  include/linux/pci.h      |  3 +++
>  4 files changed, 42 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
> index 2683e9fc0dcf..35c97b945c15 100644
> --- a/drivers/iommu/of_iommu.c
> +++ b/drivers/iommu/of_iommu.c
> @@ -146,7 +146,8 @@ const struct iommu_ops *of_iommu_configure(struct
> device *dev,
>         int idx = 0;
>
>         if (dev_is_pci(dev))
> -               return of_pci_iommu_configure(to_pci_dev(dev), master_np);
> +               return of_pci_iommu_configure(to_pci_dev(dev),
> +                                             of_get_next_parent(master_np));
>
>         /*
>          * We don't currently walk up the tree looking for a parent IOMMU.
> diff --git a/drivers/pci/of.c b/drivers/pci/of.c
> index e112da11630e..96eece1c423d 100644
> --- a/drivers/pci/of.c
> +++ b/drivers/pci/of.c
> @@ -88,3 +88,27 @@ struct irq_domain
> *pci_host_bridge_of_msi_domain(struct pci_bus *bus)
>         return NULL;
>  #endif
>  }
> +
> +struct device_node *pci_dev_get_dma_of_node(struct pci_dev *dev)
> +{
> +       struct device_node *np;
> +       struct pci_bus *bus = dev->bus;
> +
> +       /* Is the device itself actually described in the DT? */
> +       np = of_node_get(dev->dev.of_node);
> +       if (np)
> +               return np;
> +
> +       /* Or is some intermediate bridge? That would do... */
> +       for (bus = dev->bus; bus->parent; bus = bus->parent) {
> +               np = of_node_get(bus->bridge->of_node);
> +               if (np)
> +                       return np;
> +       }
> +
> +       /* Failing that, any child of the same host bridge? */
> +       if (bus->bridge->parent)
> +               np = of_get_next_child(bus->bridge->parent->of_node, NULL);
> +
> +       return np;
> +}
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index dfc9a2794141..4f7ade64aa3e 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -1906,7 +1906,19 @@ static void pci_dma_configure(struct pci_dev *dev)
>
>         if (IS_ENABLED(CONFIG_OF) &&
>                 bridge->parent && bridge->parent->of_node) {
> -                       of_dma_configure(&dev->dev, bridge->parent->of_node);
> +               struct device_node *np, tmp = {0};
> +
> +               np = pci_dev_get_dma_of_node(dev);
> +               if (!np) {
> +                       np = &tmp;
> +                       of_node_set_flag(np, OF_DETACHED);
> +                       of_node_init(np);
> +                       tmp.parent = bridge->parent->of_node;
> +               }
> +
> +               of_dma_configure(&dev->dev, np);
> +               if (np != &tmp)
> +                       of_node_put(np);
>         } else if (has_acpi_companion(bridge)) {
>                 struct acpi_device *adev = to_acpi_device_node(bridge->fwnode);
>                 enum dev_dma_attr attr = acpi_get_dma_attr(adev);
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index eb3da1a04e6c..94ecd1817f58 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -2086,6 +2086,7 @@ void pci_release_of_node(struct pci_dev *dev);
>  void pci_set_bus_of_node(struct pci_bus *bus);
>  void pci_release_bus_of_node(struct pci_bus *bus);
>  struct irq_domain *pci_host_bridge_of_msi_domain(struct pci_bus *bus);
> +struct device_node *pci_dev_get_dma_of_node(struct pci_dev *dev);
>
>  /* Arch may override this (weak) */
>  struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus);
> @@ -2110,6 +2111,8 @@ static inline struct device_node *
>  pci_device_to_OF_node(const struct pci_dev *pdev) { return NULL; }
>  static inline struct irq_domain *
>  pci_host_bridge_of_msi_domain(struct pci_bus *bus) { return NULL; }
> +static inline struct device_node *
> +pci_dev_get_dma_of_node(struct pci_dev *dev) { return NULL; }
>  #endif  /* CONFIG_OF */
>
>  #ifdef CONFIG_ACPI
>

pci and memory mapped device world is different. of course if you talk
from iommu perspective, all the master are same for IOMMU
but the original intention of the patch is to try to solve 2 problems.

1) expose generic API for pci world clients to configure their
dma-ranges. right now there is none.
2) same API can be used by IOMMU to derive dma_mask.

the implementation tries to handle dma-ranges for both memory mapped
and pci devices, which I think is overkill.
besides we have different configuration of dma-ranges based on iommu
is enabled or not enabled.

 #define PCIE_DMA_RANGES dma-ranges = <0x43000000 0x00 0x80000000 0x00
0x80000000 0x00 0x80000000 \
                                      0x43000000 0x08 0x00000000 0x08
0x00000000 0x08 0x00000000 \
                                      0x43000000 0x80 0x00000000 0x80
0x00000000 0x40 0x00000000>;


the of_dma_get_range is written with respect to traditional memory
ranges, they were not meant for pci dma-ranges.

Regards,
Oza.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-29  4:43         ` Oza Oza
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Oza @ 2017-03-29  4:43 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Mar 28, 2017 at 7:59 PM, Robin Murphy <robin.murphy@arm.com> wrote:
> On 28/03/17 06:27, Oza Oza wrote:
>> On Mon, Mar 27, 2017 at 8:16 PM, Rob Herring <robh@kernel.org> wrote:
>>> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
>>>> it is possible that PCI device supports 64-bit DMA addressing,
>>>> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
>>>> however PCI host bridge may have limitations on the inbound
>>>> transaction addressing. As an example, consider NVME SSD device
>>>> connected to iproc-PCIe controller.
>>>>
>>>> Currently, the IOMMU DMA ops only considers PCI device dma_mask
>>>> when allocating an IOVA. This is particularly problematic on
>>>> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
>>>> PA for in-bound transactions only after PCI Host has forwarded
>>>> these transactions on SOC IO bus. This means on such ARM/ARM64
>>>> SOCs the IOVA of in-bound transactions has to honor the addressing
>>>> restrictions of the PCI Host.
>>>>
>>>> current pcie frmework and of framework integration assumes dma-ranges
>>>> in a way where memory-mapped devices define their dma-ranges.
>>>> dma-ranges: (child-bus-address, parent-bus-address, length).
>>>>
>>>> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
>>>> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
>>>
>>> If you implement a common function, then I expect to see other users
>>> converted to use it. There's also PCI hosts in arch/powerpc that parse
>>> dma-ranges.
>>
>> the common function should be similar to what
>> of_pci_get_host_bridge_resources is doing right now.
>> it parses ranges property right now.
>>
>> the new function would look look following.
>>
>> of_pci_get_dma_ranges(struct device_node *dev, struct list_head *resources)
>> where resources would return the dma-ranges.
>>
>> but right now if you see the patch, of_dma_configure calls the new
>> function, which actually returns the largest possible size.
>> so this new function has to be generic in a way where other PCI hosts
>> can use it. but certainly iproc(Broadcom SOC) , rcar based SOCs can
>> use it for sure.
>>
>> although having powerpc using it;  is a separate exercise, since I do
>> not have any access to other PCI hosts such as powerpc. but we can
>> workout with them on thsi forum if required.
>>
>> so overall, of_pci_get_dma_ranges has to serve following 2 purposes.
>>
>> 1) it has to return largest possible size to of_dma_configure to
>> generate largest possible dma_mask.
>>
>> 2) it also has to return resources (dma-ranges) parsed, to the users.
>>
>> so to address above needs
>>
>> of_pci_get_dma_ranges(struct device_node *dev, struct list_head
>> *resources, u64 *size)
>>
>> dev -> device node.
>> resources -> dma-ranges in allocated list.
>> size -> highest possible size to generate possible dma_mask for
>> of_dma_configure.
>>
>> let em know how this sounds.
>
> Note that the point of passing PCI host bridges into of_dma_configure()
> in the first place was to avoid having some separate PCI-specific path
> for DMA configuration. I worry that introducing bus-specific dma-ranges
> parsing largely defeats that, since we end up with the worst of both
> worlds; effectively-duplicated code, and/or a load of extra complexity
> to then attempt to reconverge the divergent paths (there really
> shouldn't be any need to allocate a list of anything). Given that
> of_translate_dma_address() is already bus-agnostic, it hardly seems
> justifiable for its caller not to be so as well, especially when it
> mostly just comes down to getting the right #address-cells value.
>
> The patch below is actually enough to make typical cases work, but is
> vile, so I'm not seriously considering it (hence I've not bothered
> making IOMMU configuration handle all circumstances). What it has served
> to do, though, is give me a clear idea of how to properly sort out the
> not-quite-right device/parent assumptions between of_dma_configure() and
> of_dma_get_range() rather than bodging around them any further - stay tuned.
>
> Robin.
>
> ----->8-----
> From: Robin Murphy <robin.murphy@arm.com>
> Subject: [PATCH] of/pci: Use child node for DMA configuration
>
> of_dma_configure() expects to be passed an OF node representing the
> device being configured - for PCI devices we currently pass the node of
> the appropriate host controller, which sort of works for inherited
> properties which may appear at any level, like "dma-coherent", but falls
> apart for properties which actually care about specific device-parent
> relationships, like "dma-ranges".
>
> Solve this by attempting to find a suitable child node if the PCI
> hierarchy is actually represented in DT, and if not then faking one up
> as a last resort, to make all of DMA configuration work as expected.
>
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
>  drivers/iommu/of_iommu.c |  3 ++-
>  drivers/pci/of.c         | 24 ++++++++++++++++++++++++
>  drivers/pci/probe.c      | 14 +++++++++++++-
>  include/linux/pci.h      |  3 +++
>  4 files changed, 42 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
> index 2683e9fc0dcf..35c97b945c15 100644
> --- a/drivers/iommu/of_iommu.c
> +++ b/drivers/iommu/of_iommu.c
> @@ -146,7 +146,8 @@ const struct iommu_ops *of_iommu_configure(struct
> device *dev,
>         int idx = 0;
>
>         if (dev_is_pci(dev))
> -               return of_pci_iommu_configure(to_pci_dev(dev), master_np);
> +               return of_pci_iommu_configure(to_pci_dev(dev),
> +                                             of_get_next_parent(master_np));
>
>         /*
>          * We don't currently walk up the tree looking for a parent IOMMU.
> diff --git a/drivers/pci/of.c b/drivers/pci/of.c
> index e112da11630e..96eece1c423d 100644
> --- a/drivers/pci/of.c
> +++ b/drivers/pci/of.c
> @@ -88,3 +88,27 @@ struct irq_domain
> *pci_host_bridge_of_msi_domain(struct pci_bus *bus)
>         return NULL;
>  #endif
>  }
> +
> +struct device_node *pci_dev_get_dma_of_node(struct pci_dev *dev)
> +{
> +       struct device_node *np;
> +       struct pci_bus *bus = dev->bus;
> +
> +       /* Is the device itself actually described in the DT? */
> +       np = of_node_get(dev->dev.of_node);
> +       if (np)
> +               return np;
> +
> +       /* Or is some intermediate bridge? That would do... */
> +       for (bus = dev->bus; bus->parent; bus = bus->parent) {
> +               np = of_node_get(bus->bridge->of_node);
> +               if (np)
> +                       return np;
> +       }
> +
> +       /* Failing that, any child of the same host bridge? */
> +       if (bus->bridge->parent)
> +               np = of_get_next_child(bus->bridge->parent->of_node, NULL);
> +
> +       return np;
> +}
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index dfc9a2794141..4f7ade64aa3e 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -1906,7 +1906,19 @@ static void pci_dma_configure(struct pci_dev *dev)
>
>         if (IS_ENABLED(CONFIG_OF) &&
>                 bridge->parent && bridge->parent->of_node) {
> -                       of_dma_configure(&dev->dev, bridge->parent->of_node);
> +               struct device_node *np, tmp = {0};
> +
> +               np = pci_dev_get_dma_of_node(dev);
> +               if (!np) {
> +                       np = &tmp;
> +                       of_node_set_flag(np, OF_DETACHED);
> +                       of_node_init(np);
> +                       tmp.parent = bridge->parent->of_node;
> +               }
> +
> +               of_dma_configure(&dev->dev, np);
> +               if (np != &tmp)
> +                       of_node_put(np);
>         } else if (has_acpi_companion(bridge)) {
>                 struct acpi_device *adev = to_acpi_device_node(bridge->fwnode);
>                 enum dev_dma_attr attr = acpi_get_dma_attr(adev);
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index eb3da1a04e6c..94ecd1817f58 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -2086,6 +2086,7 @@ void pci_release_of_node(struct pci_dev *dev);
>  void pci_set_bus_of_node(struct pci_bus *bus);
>  void pci_release_bus_of_node(struct pci_bus *bus);
>  struct irq_domain *pci_host_bridge_of_msi_domain(struct pci_bus *bus);
> +struct device_node *pci_dev_get_dma_of_node(struct pci_dev *dev);
>
>  /* Arch may override this (weak) */
>  struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus);
> @@ -2110,6 +2111,8 @@ static inline struct device_node *
>  pci_device_to_OF_node(const struct pci_dev *pdev) { return NULL; }
>  static inline struct irq_domain *
>  pci_host_bridge_of_msi_domain(struct pci_bus *bus) { return NULL; }
> +static inline struct device_node *
> +pci_dev_get_dma_of_node(struct pci_dev *dev) { return NULL; }
>  #endif  /* CONFIG_OF */
>
>  #ifdef CONFIG_ACPI
>

pci and memory mapped device world is different. of course if you talk
from iommu perspective, all the master are same for IOMMU
but the original intention of the patch is to try to solve 2 problems.

1) expose generic API for pci world clients to configure their
dma-ranges. right now there is none.
2) same API can be used by IOMMU to derive dma_mask.

the implementation tries to handle dma-ranges for both memory mapped
and pci devices, which I think is overkill.
besides we have different configuration of dma-ranges based on iommu
is enabled or not enabled.

 #define PCIE_DMA_RANGES dma-ranges = <0x43000000 0x00 0x80000000 0x00
0x80000000 0x00 0x80000000 \
                                      0x43000000 0x08 0x00000000 0x08
0x00000000 0x08 0x00000000 \
                                      0x43000000 0x80 0x00000000 0x80
0x00000000 0x40 0x00000000>;


the of_dma_get_range is written with respect to traditional memory
ranges, they were not meant for pci dma-ranges.

Regards,
Oza.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-30  3:26           ` Oza Oza via iommu
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Oza @ 2017-03-30  3:26 UTC (permalink / raw)
  To: Robin Murphy
  Cc: Rob Herring, Joerg Roedel, Linux IOMMU, linux-pci, linux-kernel,
	linux-arm-kernel, devicetree, bcm-kernel-feedback-list

On Wed, Mar 29, 2017 at 10:13 AM, Oza Oza <oza.oza@broadcom.com> wrote:
> On Tue, Mar 28, 2017 at 7:59 PM, Robin Murphy <robin.murphy@arm.com> wrote:
>> On 28/03/17 06:27, Oza Oza wrote:
>>> On Mon, Mar 27, 2017 at 8:16 PM, Rob Herring <robh@kernel.org> wrote:
>>>> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
>>>>> it is possible that PCI device supports 64-bit DMA addressing,
>>>>> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
>>>>> however PCI host bridge may have limitations on the inbound
>>>>> transaction addressing. As an example, consider NVME SSD device
>>>>> connected to iproc-PCIe controller.
>>>>>
>>>>> Currently, the IOMMU DMA ops only considers PCI device dma_mask
>>>>> when allocating an IOVA. This is particularly problematic on
>>>>> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
>>>>> PA for in-bound transactions only after PCI Host has forwarded
>>>>> these transactions on SOC IO bus. This means on such ARM/ARM64
>>>>> SOCs the IOVA of in-bound transactions has to honor the addressing
>>>>> restrictions of the PCI Host.
>>>>>
>>>>> current pcie frmework and of framework integration assumes dma-ranges
>>>>> in a way where memory-mapped devices define their dma-ranges.
>>>>> dma-ranges: (child-bus-address, parent-bus-address, length).
>>>>>
>>>>> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
>>>>> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
>>>>
>>>> If you implement a common function, then I expect to see other users
>>>> converted to use it. There's also PCI hosts in arch/powerpc that parse
>>>> dma-ranges.
>>>
>>> the common function should be similar to what
>>> of_pci_get_host_bridge_resources is doing right now.
>>> it parses ranges property right now.
>>>
>>> the new function would look look following.
>>>
>>> of_pci_get_dma_ranges(struct device_node *dev, struct list_head *resources)
>>> where resources would return the dma-ranges.
>>>
>>> but right now if you see the patch, of_dma_configure calls the new
>>> function, which actually returns the largest possible size.
>>> so this new function has to be generic in a way where other PCI hosts
>>> can use it. but certainly iproc(Broadcom SOC) , rcar based SOCs can
>>> use it for sure.
>>>
>>> although having powerpc using it;  is a separate exercise, since I do
>>> not have any access to other PCI hosts such as powerpc. but we can
>>> workout with them on thsi forum if required.
>>>
>>> so overall, of_pci_get_dma_ranges has to serve following 2 purposes.
>>>
>>> 1) it has to return largest possible size to of_dma_configure to
>>> generate largest possible dma_mask.
>>>
>>> 2) it also has to return resources (dma-ranges) parsed, to the users.
>>>
>>> so to address above needs
>>>
>>> of_pci_get_dma_ranges(struct device_node *dev, struct list_head
>>> *resources, u64 *size)
>>>
>>> dev -> device node.
>>> resources -> dma-ranges in allocated list.
>>> size -> highest possible size to generate possible dma_mask for
>>> of_dma_configure.
>>>
>>> let em know how this sounds.
>>
>> Note that the point of passing PCI host bridges into of_dma_configure()
>> in the first place was to avoid having some separate PCI-specific path
>> for DMA configuration. I worry that introducing bus-specific dma-ranges
>> parsing largely defeats that, since we end up with the worst of both
>> worlds; effectively-duplicated code, and/or a load of extra complexity
>> to then attempt to reconverge the divergent paths (there really
>> shouldn't be any need to allocate a list of anything). Given that
>> of_translate_dma_address() is already bus-agnostic, it hardly seems
>> justifiable for its caller not to be so as well, especially when it
>> mostly just comes down to getting the right #address-cells value.
>>
>> The patch below is actually enough to make typical cases work, but is
>> vile, so I'm not seriously considering it (hence I've not bothered
>> making IOMMU configuration handle all circumstances). What it has served
>> to do, though, is give me a clear idea of how to properly sort out the
>> not-quite-right device/parent assumptions between of_dma_configure() and
>> of_dma_get_range() rather than bodging around them any further - stay tuned.
>>
>> Robin.
>>
>> ----->8-----
>> From: Robin Murphy <robin.murphy@arm.com>
>> Subject: [PATCH] of/pci: Use child node for DMA configuration
>>
>> of_dma_configure() expects to be passed an OF node representing the
>> device being configured - for PCI devices we currently pass the node of
>> the appropriate host controller, which sort of works for inherited
>> properties which may appear at any level, like "dma-coherent", but falls
>> apart for properties which actually care about specific device-parent
>> relationships, like "dma-ranges".
>>
>> Solve this by attempting to find a suitable child node if the PCI
>> hierarchy is actually represented in DT, and if not then faking one up
>> as a last resort, to make all of DMA configuration work as expected.
>>
>> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
>> ---
>>  drivers/iommu/of_iommu.c |  3 ++-
>>  drivers/pci/of.c         | 24 ++++++++++++++++++++++++
>>  drivers/pci/probe.c      | 14 +++++++++++++-
>>  include/linux/pci.h      |  3 +++
>>  4 files changed, 42 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
>> index 2683e9fc0dcf..35c97b945c15 100644
>> --- a/drivers/iommu/of_iommu.c
>> +++ b/drivers/iommu/of_iommu.c
>> @@ -146,7 +146,8 @@ const struct iommu_ops *of_iommu_configure(struct
>> device *dev,
>>         int idx = 0;
>>
>>         if (dev_is_pci(dev))
>> -               return of_pci_iommu_configure(to_pci_dev(dev), master_np);
>> +               return of_pci_iommu_configure(to_pci_dev(dev),
>> +                                             of_get_next_parent(master_np));
>>
>>         /*
>>          * We don't currently walk up the tree looking for a parent IOMMU.
>> diff --git a/drivers/pci/of.c b/drivers/pci/of.c
>> index e112da11630e..96eece1c423d 100644
>> --- a/drivers/pci/of.c
>> +++ b/drivers/pci/of.c
>> @@ -88,3 +88,27 @@ struct irq_domain
>> *pci_host_bridge_of_msi_domain(struct pci_bus *bus)
>>         return NULL;
>>  #endif
>>  }
>> +
>> +struct device_node *pci_dev_get_dma_of_node(struct pci_dev *dev)
>> +{
>> +       struct device_node *np;
>> +       struct pci_bus *bus = dev->bus;
>> +
>> +       /* Is the device itself actually described in the DT? */
>> +       np = of_node_get(dev->dev.of_node);
>> +       if (np)
>> +               return np;
>> +
>> +       /* Or is some intermediate bridge? That would do... */
>> +       for (bus = dev->bus; bus->parent; bus = bus->parent) {
>> +               np = of_node_get(bus->bridge->of_node);
>> +               if (np)
>> +                       return np;
>> +       }
>> +
>> +       /* Failing that, any child of the same host bridge? */
>> +       if (bus->bridge->parent)
>> +               np = of_get_next_child(bus->bridge->parent->of_node, NULL);
>> +
>> +       return np;
>> +}
>> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
>> index dfc9a2794141..4f7ade64aa3e 100644
>> --- a/drivers/pci/probe.c
>> +++ b/drivers/pci/probe.c
>> @@ -1906,7 +1906,19 @@ static void pci_dma_configure(struct pci_dev *dev)
>>
>>         if (IS_ENABLED(CONFIG_OF) &&
>>                 bridge->parent && bridge->parent->of_node) {
>> -                       of_dma_configure(&dev->dev, bridge->parent->of_node);
>> +               struct device_node *np, tmp = {0};
>> +
>> +               np = pci_dev_get_dma_of_node(dev);
>> +               if (!np) {
>> +                       np = &tmp;
>> +                       of_node_set_flag(np, OF_DETACHED);
>> +                       of_node_init(np);
>> +                       tmp.parent = bridge->parent->of_node;
>> +               }
>> +
>> +               of_dma_configure(&dev->dev, np);
>> +               if (np != &tmp)
>> +                       of_node_put(np);
>>         } else if (has_acpi_companion(bridge)) {
>>                 struct acpi_device *adev = to_acpi_device_node(bridge->fwnode);
>>                 enum dev_dma_attr attr = acpi_get_dma_attr(adev);
>> diff --git a/include/linux/pci.h b/include/linux/pci.h
>> index eb3da1a04e6c..94ecd1817f58 100644
>> --- a/include/linux/pci.h
>> +++ b/include/linux/pci.h
>> @@ -2086,6 +2086,7 @@ void pci_release_of_node(struct pci_dev *dev);
>>  void pci_set_bus_of_node(struct pci_bus *bus);
>>  void pci_release_bus_of_node(struct pci_bus *bus);
>>  struct irq_domain *pci_host_bridge_of_msi_domain(struct pci_bus *bus);
>> +struct device_node *pci_dev_get_dma_of_node(struct pci_dev *dev);
>>
>>  /* Arch may override this (weak) */
>>  struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus);
>> @@ -2110,6 +2111,8 @@ static inline struct device_node *
>>  pci_device_to_OF_node(const struct pci_dev *pdev) { return NULL; }
>>  static inline struct irq_domain *
>>  pci_host_bridge_of_msi_domain(struct pci_bus *bus) { return NULL; }
>> +static inline struct device_node *
>> +pci_dev_get_dma_of_node(struct pci_dev *dev) { return NULL; }
>>  #endif  /* CONFIG_OF */
>>
>>  #ifdef CONFIG_ACPI
>>
>
> pci and memory mapped device world is different. of course if you talk
> from iommu perspective, all the master are same for IOMMU
> but the original intention of the patch is to try to solve 2 problems.
>
> 1) expose generic API for pci world clients to configure their
> dma-ranges. right now there is none.
> 2) same API can be used by IOMMU to derive dma_mask.
>
> the implementation tries to handle dma-ranges for both memory mapped
> and pci devices, which I think is overkill.
> besides we have different configuration of dma-ranges based on iommu
> is enabled or not enabled.
>
>  #define PCIE_DMA_RANGES dma-ranges = <0x43000000 0x00 0x80000000 0x00
> 0x80000000 0x00 0x80000000 \
>                                       0x43000000 0x08 0x00000000 0x08
> 0x00000000 0x08 0x00000000 \
>                                       0x43000000 0x80 0x00000000 0x80
> 0x00000000 0x40 0x00000000>;
>
>
> the of_dma_get_range is written with respect to traditional memory
> ranges, they were not meant for pci dma-ranges.
>
> Regards,
> Oza.


Rob, gentle reminder on your viewpoint. my take on whole thing is as follows:

1) of_dma_get_range is supposed to handle traditional dma-ranges
(not pci one)

2) we need separate function for pci users such as iproc or rcar SOCs
to have their dma-ranges parsed,
which can be extended later for e.g. pci flags have some meanings.

besides of_dma_configure is written to pass only single out parameter
(..., *dma_addr, *size)
handling multiple ranges here, and passing only one range out is not desirable.

also you would not like to handle pci flags in of_dma_get_range (the
first portion of DT
entry) in this function if required in future.

this new function will be similar to of_pci_get_host_bridge_resources
which I am writing/improving right now..

Regards,
Oza.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-30  3:26           ` Oza Oza via iommu
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Oza via iommu @ 2017-03-30  3:26 UTC (permalink / raw)
  To: Robin Murphy
  Cc: Rob Herring, devicetree-u79uwXL29TY76Z2rM5mHXA,
	linux-pci-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA, Linux IOMMU,
	bcm-kernel-feedback-list-dY08KVG/lbpWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

On Wed, Mar 29, 2017 at 10:13 AM, Oza Oza <oza.oza-dY08KVG/lbpWk0Htik3J/w@public.gmane.org> wrote:
> On Tue, Mar 28, 2017 at 7:59 PM, Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org> wrote:
>> On 28/03/17 06:27, Oza Oza wrote:
>>> On Mon, Mar 27, 2017 at 8:16 PM, Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
>>>> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza-dY08KVG/lbpWk0Htik3J/w@public.gmane.org> wrote:
>>>>> it is possible that PCI device supports 64-bit DMA addressing,
>>>>> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
>>>>> however PCI host bridge may have limitations on the inbound
>>>>> transaction addressing. As an example, consider NVME SSD device
>>>>> connected to iproc-PCIe controller.
>>>>>
>>>>> Currently, the IOMMU DMA ops only considers PCI device dma_mask
>>>>> when allocating an IOVA. This is particularly problematic on
>>>>> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
>>>>> PA for in-bound transactions only after PCI Host has forwarded
>>>>> these transactions on SOC IO bus. This means on such ARM/ARM64
>>>>> SOCs the IOVA of in-bound transactions has to honor the addressing
>>>>> restrictions of the PCI Host.
>>>>>
>>>>> current pcie frmework and of framework integration assumes dma-ranges
>>>>> in a way where memory-mapped devices define their dma-ranges.
>>>>> dma-ranges: (child-bus-address, parent-bus-address, length).
>>>>>
>>>>> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
>>>>> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
>>>>
>>>> If you implement a common function, then I expect to see other users
>>>> converted to use it. There's also PCI hosts in arch/powerpc that parse
>>>> dma-ranges.
>>>
>>> the common function should be similar to what
>>> of_pci_get_host_bridge_resources is doing right now.
>>> it parses ranges property right now.
>>>
>>> the new function would look look following.
>>>
>>> of_pci_get_dma_ranges(struct device_node *dev, struct list_head *resources)
>>> where resources would return the dma-ranges.
>>>
>>> but right now if you see the patch, of_dma_configure calls the new
>>> function, which actually returns the largest possible size.
>>> so this new function has to be generic in a way where other PCI hosts
>>> can use it. but certainly iproc(Broadcom SOC) , rcar based SOCs can
>>> use it for sure.
>>>
>>> although having powerpc using it;  is a separate exercise, since I do
>>> not have any access to other PCI hosts such as powerpc. but we can
>>> workout with them on thsi forum if required.
>>>
>>> so overall, of_pci_get_dma_ranges has to serve following 2 purposes.
>>>
>>> 1) it has to return largest possible size to of_dma_configure to
>>> generate largest possible dma_mask.
>>>
>>> 2) it also has to return resources (dma-ranges) parsed, to the users.
>>>
>>> so to address above needs
>>>
>>> of_pci_get_dma_ranges(struct device_node *dev, struct list_head
>>> *resources, u64 *size)
>>>
>>> dev -> device node.
>>> resources -> dma-ranges in allocated list.
>>> size -> highest possible size to generate possible dma_mask for
>>> of_dma_configure.
>>>
>>> let em know how this sounds.
>>
>> Note that the point of passing PCI host bridges into of_dma_configure()
>> in the first place was to avoid having some separate PCI-specific path
>> for DMA configuration. I worry that introducing bus-specific dma-ranges
>> parsing largely defeats that, since we end up with the worst of both
>> worlds; effectively-duplicated code, and/or a load of extra complexity
>> to then attempt to reconverge the divergent paths (there really
>> shouldn't be any need to allocate a list of anything). Given that
>> of_translate_dma_address() is already bus-agnostic, it hardly seems
>> justifiable for its caller not to be so as well, especially when it
>> mostly just comes down to getting the right #address-cells value.
>>
>> The patch below is actually enough to make typical cases work, but is
>> vile, so I'm not seriously considering it (hence I've not bothered
>> making IOMMU configuration handle all circumstances). What it has served
>> to do, though, is give me a clear idea of how to properly sort out the
>> not-quite-right device/parent assumptions between of_dma_configure() and
>> of_dma_get_range() rather than bodging around them any further - stay tuned.
>>
>> Robin.
>>
>> ----->8-----
>> From: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org>
>> Subject: [PATCH] of/pci: Use child node for DMA configuration
>>
>> of_dma_configure() expects to be passed an OF node representing the
>> device being configured - for PCI devices we currently pass the node of
>> the appropriate host controller, which sort of works for inherited
>> properties which may appear at any level, like "dma-coherent", but falls
>> apart for properties which actually care about specific device-parent
>> relationships, like "dma-ranges".
>>
>> Solve this by attempting to find a suitable child node if the PCI
>> hierarchy is actually represented in DT, and if not then faking one up
>> as a last resort, to make all of DMA configuration work as expected.
>>
>> Signed-off-by: Robin Murphy <robin.murphy-5wv7dgnIgG8@public.gmane.org>
>> ---
>>  drivers/iommu/of_iommu.c |  3 ++-
>>  drivers/pci/of.c         | 24 ++++++++++++++++++++++++
>>  drivers/pci/probe.c      | 14 +++++++++++++-
>>  include/linux/pci.h      |  3 +++
>>  4 files changed, 42 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
>> index 2683e9fc0dcf..35c97b945c15 100644
>> --- a/drivers/iommu/of_iommu.c
>> +++ b/drivers/iommu/of_iommu.c
>> @@ -146,7 +146,8 @@ const struct iommu_ops *of_iommu_configure(struct
>> device *dev,
>>         int idx = 0;
>>
>>         if (dev_is_pci(dev))
>> -               return of_pci_iommu_configure(to_pci_dev(dev), master_np);
>> +               return of_pci_iommu_configure(to_pci_dev(dev),
>> +                                             of_get_next_parent(master_np));
>>
>>         /*
>>          * We don't currently walk up the tree looking for a parent IOMMU.
>> diff --git a/drivers/pci/of.c b/drivers/pci/of.c
>> index e112da11630e..96eece1c423d 100644
>> --- a/drivers/pci/of.c
>> +++ b/drivers/pci/of.c
>> @@ -88,3 +88,27 @@ struct irq_domain
>> *pci_host_bridge_of_msi_domain(struct pci_bus *bus)
>>         return NULL;
>>  #endif
>>  }
>> +
>> +struct device_node *pci_dev_get_dma_of_node(struct pci_dev *dev)
>> +{
>> +       struct device_node *np;
>> +       struct pci_bus *bus = dev->bus;
>> +
>> +       /* Is the device itself actually described in the DT? */
>> +       np = of_node_get(dev->dev.of_node);
>> +       if (np)
>> +               return np;
>> +
>> +       /* Or is some intermediate bridge? That would do... */
>> +       for (bus = dev->bus; bus->parent; bus = bus->parent) {
>> +               np = of_node_get(bus->bridge->of_node);
>> +               if (np)
>> +                       return np;
>> +       }
>> +
>> +       /* Failing that, any child of the same host bridge? */
>> +       if (bus->bridge->parent)
>> +               np = of_get_next_child(bus->bridge->parent->of_node, NULL);
>> +
>> +       return np;
>> +}
>> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
>> index dfc9a2794141..4f7ade64aa3e 100644
>> --- a/drivers/pci/probe.c
>> +++ b/drivers/pci/probe.c
>> @@ -1906,7 +1906,19 @@ static void pci_dma_configure(struct pci_dev *dev)
>>
>>         if (IS_ENABLED(CONFIG_OF) &&
>>                 bridge->parent && bridge->parent->of_node) {
>> -                       of_dma_configure(&dev->dev, bridge->parent->of_node);
>> +               struct device_node *np, tmp = {0};
>> +
>> +               np = pci_dev_get_dma_of_node(dev);
>> +               if (!np) {
>> +                       np = &tmp;
>> +                       of_node_set_flag(np, OF_DETACHED);
>> +                       of_node_init(np);
>> +                       tmp.parent = bridge->parent->of_node;
>> +               }
>> +
>> +               of_dma_configure(&dev->dev, np);
>> +               if (np != &tmp)
>> +                       of_node_put(np);
>>         } else if (has_acpi_companion(bridge)) {
>>                 struct acpi_device *adev = to_acpi_device_node(bridge->fwnode);
>>                 enum dev_dma_attr attr = acpi_get_dma_attr(adev);
>> diff --git a/include/linux/pci.h b/include/linux/pci.h
>> index eb3da1a04e6c..94ecd1817f58 100644
>> --- a/include/linux/pci.h
>> +++ b/include/linux/pci.h
>> @@ -2086,6 +2086,7 @@ void pci_release_of_node(struct pci_dev *dev);
>>  void pci_set_bus_of_node(struct pci_bus *bus);
>>  void pci_release_bus_of_node(struct pci_bus *bus);
>>  struct irq_domain *pci_host_bridge_of_msi_domain(struct pci_bus *bus);
>> +struct device_node *pci_dev_get_dma_of_node(struct pci_dev *dev);
>>
>>  /* Arch may override this (weak) */
>>  struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus);
>> @@ -2110,6 +2111,8 @@ static inline struct device_node *
>>  pci_device_to_OF_node(const struct pci_dev *pdev) { return NULL; }
>>  static inline struct irq_domain *
>>  pci_host_bridge_of_msi_domain(struct pci_bus *bus) { return NULL; }
>> +static inline struct device_node *
>> +pci_dev_get_dma_of_node(struct pci_dev *dev) { return NULL; }
>>  #endif  /* CONFIG_OF */
>>
>>  #ifdef CONFIG_ACPI
>>
>
> pci and memory mapped device world is different. of course if you talk
> from iommu perspective, all the master are same for IOMMU
> but the original intention of the patch is to try to solve 2 problems.
>
> 1) expose generic API for pci world clients to configure their
> dma-ranges. right now there is none.
> 2) same API can be used by IOMMU to derive dma_mask.
>
> the implementation tries to handle dma-ranges for both memory mapped
> and pci devices, which I think is overkill.
> besides we have different configuration of dma-ranges based on iommu
> is enabled or not enabled.
>
>  #define PCIE_DMA_RANGES dma-ranges = <0x43000000 0x00 0x80000000 0x00
> 0x80000000 0x00 0x80000000 \
>                                       0x43000000 0x08 0x00000000 0x08
> 0x00000000 0x08 0x00000000 \
>                                       0x43000000 0x80 0x00000000 0x80
> 0x00000000 0x40 0x00000000>;
>
>
> the of_dma_get_range is written with respect to traditional memory
> ranges, they were not meant for pci dma-ranges.
>
> Regards,
> Oza.


Rob, gentle reminder on your viewpoint. my take on whole thing is as follows:

1) of_dma_get_range is supposed to handle traditional dma-ranges
(not pci one)

2) we need separate function for pci users such as iproc or rcar SOCs
to have their dma-ranges parsed,
which can be extended later for e.g. pci flags have some meanings.

besides of_dma_configure is written to pass only single out parameter
(..., *dma_addr, *size)
handling multiple ranges here, and passing only one range out is not desirable.

also you would not like to handle pci flags in of_dma_get_range (the
first portion of DT
entry) in this function if required in future.

this new function will be similar to of_pci_get_host_bridge_resources
which I am writing/improving right now..

Regards,
Oza.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-30  3:26           ` Oza Oza via iommu
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Oza @ 2017-03-30  3:26 UTC (permalink / raw)
  To: Robin Murphy
  Cc: Rob Herring, devicetree, linux-pci, Joerg Roedel, linux-kernel,
	Linux IOMMU, bcm-kernel-feedback-list, linux-arm-kernel

On Wed, Mar 29, 2017 at 10:13 AM, Oza Oza <oza.oza@broadcom.com> wrote:
> On Tue, Mar 28, 2017 at 7:59 PM, Robin Murphy <robin.murphy@arm.com> wrote:
>> On 28/03/17 06:27, Oza Oza wrote:
>>> On Mon, Mar 27, 2017 at 8:16 PM, Rob Herring <robh@kernel.org> wrote:
>>>> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
>>>>> it is possible that PCI device supports 64-bit DMA addressing,
>>>>> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
>>>>> however PCI host bridge may have limitations on the inbound
>>>>> transaction addressing. As an example, consider NVME SSD device
>>>>> connected to iproc-PCIe controller.
>>>>>
>>>>> Currently, the IOMMU DMA ops only considers PCI device dma_mask
>>>>> when allocating an IOVA. This is particularly problematic on
>>>>> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
>>>>> PA for in-bound transactions only after PCI Host has forwarded
>>>>> these transactions on SOC IO bus. This means on such ARM/ARM64
>>>>> SOCs the IOVA of in-bound transactions has to honor the addressing
>>>>> restrictions of the PCI Host.
>>>>>
>>>>> current pcie frmework and of framework integration assumes dma-ranges
>>>>> in a way where memory-mapped devices define their dma-ranges.
>>>>> dma-ranges: (child-bus-address, parent-bus-address, length).
>>>>>
>>>>> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
>>>>> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
>>>>
>>>> If you implement a common function, then I expect to see other users
>>>> converted to use it. There's also PCI hosts in arch/powerpc that parse
>>>> dma-ranges.
>>>
>>> the common function should be similar to what
>>> of_pci_get_host_bridge_resources is doing right now.
>>> it parses ranges property right now.
>>>
>>> the new function would look look following.
>>>
>>> of_pci_get_dma_ranges(struct device_node *dev, struct list_head *resources)
>>> where resources would return the dma-ranges.
>>>
>>> but right now if you see the patch, of_dma_configure calls the new
>>> function, which actually returns the largest possible size.
>>> so this new function has to be generic in a way where other PCI hosts
>>> can use it. but certainly iproc(Broadcom SOC) , rcar based SOCs can
>>> use it for sure.
>>>
>>> although having powerpc using it;  is a separate exercise, since I do
>>> not have any access to other PCI hosts such as powerpc. but we can
>>> workout with them on thsi forum if required.
>>>
>>> so overall, of_pci_get_dma_ranges has to serve following 2 purposes.
>>>
>>> 1) it has to return largest possible size to of_dma_configure to
>>> generate largest possible dma_mask.
>>>
>>> 2) it also has to return resources (dma-ranges) parsed, to the users.
>>>
>>> so to address above needs
>>>
>>> of_pci_get_dma_ranges(struct device_node *dev, struct list_head
>>> *resources, u64 *size)
>>>
>>> dev -> device node.
>>> resources -> dma-ranges in allocated list.
>>> size -> highest possible size to generate possible dma_mask for
>>> of_dma_configure.
>>>
>>> let em know how this sounds.
>>
>> Note that the point of passing PCI host bridges into of_dma_configure()
>> in the first place was to avoid having some separate PCI-specific path
>> for DMA configuration. I worry that introducing bus-specific dma-ranges
>> parsing largely defeats that, since we end up with the worst of both
>> worlds; effectively-duplicated code, and/or a load of extra complexity
>> to then attempt to reconverge the divergent paths (there really
>> shouldn't be any need to allocate a list of anything). Given that
>> of_translate_dma_address() is already bus-agnostic, it hardly seems
>> justifiable for its caller not to be so as well, especially when it
>> mostly just comes down to getting the right #address-cells value.
>>
>> The patch below is actually enough to make typical cases work, but is
>> vile, so I'm not seriously considering it (hence I've not bothered
>> making IOMMU configuration handle all circumstances). What it has served
>> to do, though, is give me a clear idea of how to properly sort out the
>> not-quite-right device/parent assumptions between of_dma_configure() and
>> of_dma_get_range() rather than bodging around them any further - stay tuned.
>>
>> Robin.
>>
>> ----->8-----
>> From: Robin Murphy <robin.murphy@arm.com>
>> Subject: [PATCH] of/pci: Use child node for DMA configuration
>>
>> of_dma_configure() expects to be passed an OF node representing the
>> device being configured - for PCI devices we currently pass the node of
>> the appropriate host controller, which sort of works for inherited
>> properties which may appear at any level, like "dma-coherent", but falls
>> apart for properties which actually care about specific device-parent
>> relationships, like "dma-ranges".
>>
>> Solve this by attempting to find a suitable child node if the PCI
>> hierarchy is actually represented in DT, and if not then faking one up
>> as a last resort, to make all of DMA configuration work as expected.
>>
>> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
>> ---
>>  drivers/iommu/of_iommu.c |  3 ++-
>>  drivers/pci/of.c         | 24 ++++++++++++++++++++++++
>>  drivers/pci/probe.c      | 14 +++++++++++++-
>>  include/linux/pci.h      |  3 +++
>>  4 files changed, 42 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
>> index 2683e9fc0dcf..35c97b945c15 100644
>> --- a/drivers/iommu/of_iommu.c
>> +++ b/drivers/iommu/of_iommu.c
>> @@ -146,7 +146,8 @@ const struct iommu_ops *of_iommu_configure(struct
>> device *dev,
>>         int idx = 0;
>>
>>         if (dev_is_pci(dev))
>> -               return of_pci_iommu_configure(to_pci_dev(dev), master_np);
>> +               return of_pci_iommu_configure(to_pci_dev(dev),
>> +                                             of_get_next_parent(master_np));
>>
>>         /*
>>          * We don't currently walk up the tree looking for a parent IOMMU.
>> diff --git a/drivers/pci/of.c b/drivers/pci/of.c
>> index e112da11630e..96eece1c423d 100644
>> --- a/drivers/pci/of.c
>> +++ b/drivers/pci/of.c
>> @@ -88,3 +88,27 @@ struct irq_domain
>> *pci_host_bridge_of_msi_domain(struct pci_bus *bus)
>>         return NULL;
>>  #endif
>>  }
>> +
>> +struct device_node *pci_dev_get_dma_of_node(struct pci_dev *dev)
>> +{
>> +       struct device_node *np;
>> +       struct pci_bus *bus = dev->bus;
>> +
>> +       /* Is the device itself actually described in the DT? */
>> +       np = of_node_get(dev->dev.of_node);
>> +       if (np)
>> +               return np;
>> +
>> +       /* Or is some intermediate bridge? That would do... */
>> +       for (bus = dev->bus; bus->parent; bus = bus->parent) {
>> +               np = of_node_get(bus->bridge->of_node);
>> +               if (np)
>> +                       return np;
>> +       }
>> +
>> +       /* Failing that, any child of the same host bridge? */
>> +       if (bus->bridge->parent)
>> +               np = of_get_next_child(bus->bridge->parent->of_node, NULL);
>> +
>> +       return np;
>> +}
>> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
>> index dfc9a2794141..4f7ade64aa3e 100644
>> --- a/drivers/pci/probe.c
>> +++ b/drivers/pci/probe.c
>> @@ -1906,7 +1906,19 @@ static void pci_dma_configure(struct pci_dev *dev)
>>
>>         if (IS_ENABLED(CONFIG_OF) &&
>>                 bridge->parent && bridge->parent->of_node) {
>> -                       of_dma_configure(&dev->dev, bridge->parent->of_node);
>> +               struct device_node *np, tmp = {0};
>> +
>> +               np = pci_dev_get_dma_of_node(dev);
>> +               if (!np) {
>> +                       np = &tmp;
>> +                       of_node_set_flag(np, OF_DETACHED);
>> +                       of_node_init(np);
>> +                       tmp.parent = bridge->parent->of_node;
>> +               }
>> +
>> +               of_dma_configure(&dev->dev, np);
>> +               if (np != &tmp)
>> +                       of_node_put(np);
>>         } else if (has_acpi_companion(bridge)) {
>>                 struct acpi_device *adev = to_acpi_device_node(bridge->fwnode);
>>                 enum dev_dma_attr attr = acpi_get_dma_attr(adev);
>> diff --git a/include/linux/pci.h b/include/linux/pci.h
>> index eb3da1a04e6c..94ecd1817f58 100644
>> --- a/include/linux/pci.h
>> +++ b/include/linux/pci.h
>> @@ -2086,6 +2086,7 @@ void pci_release_of_node(struct pci_dev *dev);
>>  void pci_set_bus_of_node(struct pci_bus *bus);
>>  void pci_release_bus_of_node(struct pci_bus *bus);
>>  struct irq_domain *pci_host_bridge_of_msi_domain(struct pci_bus *bus);
>> +struct device_node *pci_dev_get_dma_of_node(struct pci_dev *dev);
>>
>>  /* Arch may override this (weak) */
>>  struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus);
>> @@ -2110,6 +2111,8 @@ static inline struct device_node *
>>  pci_device_to_OF_node(const struct pci_dev *pdev) { return NULL; }
>>  static inline struct irq_domain *
>>  pci_host_bridge_of_msi_domain(struct pci_bus *bus) { return NULL; }
>> +static inline struct device_node *
>> +pci_dev_get_dma_of_node(struct pci_dev *dev) { return NULL; }
>>  #endif  /* CONFIG_OF */
>>
>>  #ifdef CONFIG_ACPI
>>
>
> pci and memory mapped device world is different. of course if you talk
> from iommu perspective, all the master are same for IOMMU
> but the original intention of the patch is to try to solve 2 problems.
>
> 1) expose generic API for pci world clients to configure their
> dma-ranges. right now there is none.
> 2) same API can be used by IOMMU to derive dma_mask.
>
> the implementation tries to handle dma-ranges for both memory mapped
> and pci devices, which I think is overkill.
> besides we have different configuration of dma-ranges based on iommu
> is enabled or not enabled.
>
>  #define PCIE_DMA_RANGES dma-ranges = <0x43000000 0x00 0x80000000 0x00
> 0x80000000 0x00 0x80000000 \
>                                       0x43000000 0x08 0x00000000 0x08
> 0x00000000 0x08 0x00000000 \
>                                       0x43000000 0x80 0x00000000 0x80
> 0x00000000 0x40 0x00000000>;
>
>
> the of_dma_get_range is written with respect to traditional memory
> ranges, they were not meant for pci dma-ranges.
>
> Regards,
> Oza.


Rob, gentle reminder on your viewpoint. my take on whole thing is as follows:

1) of_dma_get_range is supposed to handle traditional dma-ranges
(not pci one)

2) we need separate function for pci users such as iproc or rcar SOCs
to have their dma-ranges parsed,
which can be extended later for e.g. pci flags have some meanings.

besides of_dma_configure is written to pass only single out parameter
(..., *dma_addr, *size)
handling multiple ranges here, and passing only one range out is not desirable.

also you would not like to handle pci flags in of_dma_get_range (the
first portion of DT
entry) in this function if required in future.

this new function will be similar to of_pci_get_host_bridge_resources
which I am writing/improving right now..

Regards,
Oza.

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-30  3:26           ` Oza Oza via iommu
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Oza @ 2017-03-30  3:26 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Mar 29, 2017 at 10:13 AM, Oza Oza <oza.oza@broadcom.com> wrote:
> On Tue, Mar 28, 2017 at 7:59 PM, Robin Murphy <robin.murphy@arm.com> wrote:
>> On 28/03/17 06:27, Oza Oza wrote:
>>> On Mon, Mar 27, 2017 at 8:16 PM, Rob Herring <robh@kernel.org> wrote:
>>>> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
>>>>> it is possible that PCI device supports 64-bit DMA addressing,
>>>>> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
>>>>> however PCI host bridge may have limitations on the inbound
>>>>> transaction addressing. As an example, consider NVME SSD device
>>>>> connected to iproc-PCIe controller.
>>>>>
>>>>> Currently, the IOMMU DMA ops only considers PCI device dma_mask
>>>>> when allocating an IOVA. This is particularly problematic on
>>>>> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
>>>>> PA for in-bound transactions only after PCI Host has forwarded
>>>>> these transactions on SOC IO bus. This means on such ARM/ARM64
>>>>> SOCs the IOVA of in-bound transactions has to honor the addressing
>>>>> restrictions of the PCI Host.
>>>>>
>>>>> current pcie frmework and of framework integration assumes dma-ranges
>>>>> in a way where memory-mapped devices define their dma-ranges.
>>>>> dma-ranges: (child-bus-address, parent-bus-address, length).
>>>>>
>>>>> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
>>>>> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
>>>>
>>>> If you implement a common function, then I expect to see other users
>>>> converted to use it. There's also PCI hosts in arch/powerpc that parse
>>>> dma-ranges.
>>>
>>> the common function should be similar to what
>>> of_pci_get_host_bridge_resources is doing right now.
>>> it parses ranges property right now.
>>>
>>> the new function would look look following.
>>>
>>> of_pci_get_dma_ranges(struct device_node *dev, struct list_head *resources)
>>> where resources would return the dma-ranges.
>>>
>>> but right now if you see the patch, of_dma_configure calls the new
>>> function, which actually returns the largest possible size.
>>> so this new function has to be generic in a way where other PCI hosts
>>> can use it. but certainly iproc(Broadcom SOC) , rcar based SOCs can
>>> use it for sure.
>>>
>>> although having powerpc using it;  is a separate exercise, since I do
>>> not have any access to other PCI hosts such as powerpc. but we can
>>> workout with them on thsi forum if required.
>>>
>>> so overall, of_pci_get_dma_ranges has to serve following 2 purposes.
>>>
>>> 1) it has to return largest possible size to of_dma_configure to
>>> generate largest possible dma_mask.
>>>
>>> 2) it also has to return resources (dma-ranges) parsed, to the users.
>>>
>>> so to address above needs
>>>
>>> of_pci_get_dma_ranges(struct device_node *dev, struct list_head
>>> *resources, u64 *size)
>>>
>>> dev -> device node.
>>> resources -> dma-ranges in allocated list.
>>> size -> highest possible size to generate possible dma_mask for
>>> of_dma_configure.
>>>
>>> let em know how this sounds.
>>
>> Note that the point of passing PCI host bridges into of_dma_configure()
>> in the first place was to avoid having some separate PCI-specific path
>> for DMA configuration. I worry that introducing bus-specific dma-ranges
>> parsing largely defeats that, since we end up with the worst of both
>> worlds; effectively-duplicated code, and/or a load of extra complexity
>> to then attempt to reconverge the divergent paths (there really
>> shouldn't be any need to allocate a list of anything). Given that
>> of_translate_dma_address() is already bus-agnostic, it hardly seems
>> justifiable for its caller not to be so as well, especially when it
>> mostly just comes down to getting the right #address-cells value.
>>
>> The patch below is actually enough to make typical cases work, but is
>> vile, so I'm not seriously considering it (hence I've not bothered
>> making IOMMU configuration handle all circumstances). What it has served
>> to do, though, is give me a clear idea of how to properly sort out the
>> not-quite-right device/parent assumptions between of_dma_configure() and
>> of_dma_get_range() rather than bodging around them any further - stay tuned.
>>
>> Robin.
>>
>> ----->8-----
>> From: Robin Murphy <robin.murphy@arm.com>
>> Subject: [PATCH] of/pci: Use child node for DMA configuration
>>
>> of_dma_configure() expects to be passed an OF node representing the
>> device being configured - for PCI devices we currently pass the node of
>> the appropriate host controller, which sort of works for inherited
>> properties which may appear at any level, like "dma-coherent", but falls
>> apart for properties which actually care about specific device-parent
>> relationships, like "dma-ranges".
>>
>> Solve this by attempting to find a suitable child node if the PCI
>> hierarchy is actually represented in DT, and if not then faking one up
>> as a last resort, to make all of DMA configuration work as expected.
>>
>> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
>> ---
>>  drivers/iommu/of_iommu.c |  3 ++-
>>  drivers/pci/of.c         | 24 ++++++++++++++++++++++++
>>  drivers/pci/probe.c      | 14 +++++++++++++-
>>  include/linux/pci.h      |  3 +++
>>  4 files changed, 42 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
>> index 2683e9fc0dcf..35c97b945c15 100644
>> --- a/drivers/iommu/of_iommu.c
>> +++ b/drivers/iommu/of_iommu.c
>> @@ -146,7 +146,8 @@ const struct iommu_ops *of_iommu_configure(struct
>> device *dev,
>>         int idx = 0;
>>
>>         if (dev_is_pci(dev))
>> -               return of_pci_iommu_configure(to_pci_dev(dev), master_np);
>> +               return of_pci_iommu_configure(to_pci_dev(dev),
>> +                                             of_get_next_parent(master_np));
>>
>>         /*
>>          * We don't currently walk up the tree looking for a parent IOMMU.
>> diff --git a/drivers/pci/of.c b/drivers/pci/of.c
>> index e112da11630e..96eece1c423d 100644
>> --- a/drivers/pci/of.c
>> +++ b/drivers/pci/of.c
>> @@ -88,3 +88,27 @@ struct irq_domain
>> *pci_host_bridge_of_msi_domain(struct pci_bus *bus)
>>         return NULL;
>>  #endif
>>  }
>> +
>> +struct device_node *pci_dev_get_dma_of_node(struct pci_dev *dev)
>> +{
>> +       struct device_node *np;
>> +       struct pci_bus *bus = dev->bus;
>> +
>> +       /* Is the device itself actually described in the DT? */
>> +       np = of_node_get(dev->dev.of_node);
>> +       if (np)
>> +               return np;
>> +
>> +       /* Or is some intermediate bridge? That would do... */
>> +       for (bus = dev->bus; bus->parent; bus = bus->parent) {
>> +               np = of_node_get(bus->bridge->of_node);
>> +               if (np)
>> +                       return np;
>> +       }
>> +
>> +       /* Failing that, any child of the same host bridge? */
>> +       if (bus->bridge->parent)
>> +               np = of_get_next_child(bus->bridge->parent->of_node, NULL);
>> +
>> +       return np;
>> +}
>> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
>> index dfc9a2794141..4f7ade64aa3e 100644
>> --- a/drivers/pci/probe.c
>> +++ b/drivers/pci/probe.c
>> @@ -1906,7 +1906,19 @@ static void pci_dma_configure(struct pci_dev *dev)
>>
>>         if (IS_ENABLED(CONFIG_OF) &&
>>                 bridge->parent && bridge->parent->of_node) {
>> -                       of_dma_configure(&dev->dev, bridge->parent->of_node);
>> +               struct device_node *np, tmp = {0};
>> +
>> +               np = pci_dev_get_dma_of_node(dev);
>> +               if (!np) {
>> +                       np = &tmp;
>> +                       of_node_set_flag(np, OF_DETACHED);
>> +                       of_node_init(np);
>> +                       tmp.parent = bridge->parent->of_node;
>> +               }
>> +
>> +               of_dma_configure(&dev->dev, np);
>> +               if (np != &tmp)
>> +                       of_node_put(np);
>>         } else if (has_acpi_companion(bridge)) {
>>                 struct acpi_device *adev = to_acpi_device_node(bridge->fwnode);
>>                 enum dev_dma_attr attr = acpi_get_dma_attr(adev);
>> diff --git a/include/linux/pci.h b/include/linux/pci.h
>> index eb3da1a04e6c..94ecd1817f58 100644
>> --- a/include/linux/pci.h
>> +++ b/include/linux/pci.h
>> @@ -2086,6 +2086,7 @@ void pci_release_of_node(struct pci_dev *dev);
>>  void pci_set_bus_of_node(struct pci_bus *bus);
>>  void pci_release_bus_of_node(struct pci_bus *bus);
>>  struct irq_domain *pci_host_bridge_of_msi_domain(struct pci_bus *bus);
>> +struct device_node *pci_dev_get_dma_of_node(struct pci_dev *dev);
>>
>>  /* Arch may override this (weak) */
>>  struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus);
>> @@ -2110,6 +2111,8 @@ static inline struct device_node *
>>  pci_device_to_OF_node(const struct pci_dev *pdev) { return NULL; }
>>  static inline struct irq_domain *
>>  pci_host_bridge_of_msi_domain(struct pci_bus *bus) { return NULL; }
>> +static inline struct device_node *
>> +pci_dev_get_dma_of_node(struct pci_dev *dev) { return NULL; }
>>  #endif  /* CONFIG_OF */
>>
>>  #ifdef CONFIG_ACPI
>>
>
> pci and memory mapped device world is different. of course if you talk
> from iommu perspective, all the master are same for IOMMU
> but the original intention of the patch is to try to solve 2 problems.
>
> 1) expose generic API for pci world clients to configure their
> dma-ranges. right now there is none.
> 2) same API can be used by IOMMU to derive dma_mask.
>
> the implementation tries to handle dma-ranges for both memory mapped
> and pci devices, which I think is overkill.
> besides we have different configuration of dma-ranges based on iommu
> is enabled or not enabled.
>
>  #define PCIE_DMA_RANGES dma-ranges = <0x43000000 0x00 0x80000000 0x00
> 0x80000000 0x00 0x80000000 \
>                                       0x43000000 0x08 0x00000000 0x08
> 0x00000000 0x08 0x00000000 \
>                                       0x43000000 0x80 0x00000000 0x80
> 0x00000000 0x40 0x00000000>;
>
>
> the of_dma_get_range is written with respect to traditional memory
> ranges, they were not meant for pci dma-ranges.
>
> Regards,
> Oza.


Rob, gentle reminder on your viewpoint. my take on whole thing is as follows:

1) of_dma_get_range is supposed to handle traditional dma-ranges
(not pci one)

2) we need separate function for pci users such as iproc or rcar SOCs
to have their dma-ranges parsed,
which can be extended later for e.g. pci flags have some meanings.

besides of_dma_configure is written to pass only single out parameter
(..., *dma_addr, *size)
handling multiple ranges here, and passing only one range out is not desirable.

also you would not like to handle pci flags in of_dma_get_range (the
first portion of DT
entry) in this function if required in future.

this new function will be similar to of_pci_get_host_bridge_resources
which I am writing/improving right now..

Regards,
Oza.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
  2017-03-28 14:13       ` Rob Herring
  (?)
  (?)
@ 2017-03-30 10:14         ` Oza Oza
  -1 siblings, 0 replies; 52+ messages in thread
From: Oza Oza @ 2017-03-30 10:14 UTC (permalink / raw)
  To: Rob Herring
  Cc: Joerg Roedel, Robin Murphy, Linux IOMMU, linux-pci, linux-kernel,
	linux-arm-kernel, devicetree, bcm-kernel-feedback-list

On Tue, Mar 28, 2017 at 7:43 PM, Rob Herring <robh@kernel.org> wrote:
> On Tue, Mar 28, 2017 at 12:27 AM, Oza Oza <oza.oza@broadcom.com> wrote:
>> On Mon, Mar 27, 2017 at 8:16 PM, Rob Herring <robh@kernel.org> wrote:
>>> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
>>>> it is possible that PCI device supports 64-bit DMA addressing,
>>>> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
>>>> however PCI host bridge may have limitations on the inbound
>>>> transaction addressing. As an example, consider NVME SSD device
>>>> connected to iproc-PCIe controller.
>>>>
>>>> Currently, the IOMMU DMA ops only considers PCI device dma_mask
>>>> when allocating an IOVA. This is particularly problematic on
>>>> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
>>>> PA for in-bound transactions only after PCI Host has forwarded
>>>> these transactions on SOC IO bus. This means on such ARM/ARM64
>>>> SOCs the IOVA of in-bound transactions has to honor the addressing
>>>> restrictions of the PCI Host.
>>>>
>>>> current pcie frmework and of framework integration assumes dma-ranges
>>>> in a way where memory-mapped devices define their dma-ranges.
>>>> dma-ranges: (child-bus-address, parent-bus-address, length).
>>>>
>>>> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
>>>> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
>>>
>>> If you implement a common function, then I expect to see other users
>>> converted to use it. There's also PCI hosts in arch/powerpc that parse
>>> dma-ranges.
>>
>> the common function should be similar to what
>> of_pci_get_host_bridge_resources is doing right now.
>> it parses ranges property right now.
>>
>> the new function would look look following.
>>
>> of_pci_get_dma_ranges(struct device_node *dev, struct list_head *resources)
>> where resources would return the dma-ranges.
>>
>> but right now if you see the patch, of_dma_configure calls the new
>> function, which actually returns the largest possible size.
>> so this new function has to be generic in a way where other PCI hosts
>> can use it. but certainly iproc(Broadcom SOC) , rcar based SOCs can
>> use it for sure.
>>
>> although having powerpc using it;  is a separate exercise, since I do
>> not have any access to other PCI hosts such as powerpc. but we can
>> workout with them on thsi forum if required.
>
> You don't need h/w. You can analyze what parts are common, write
> patches to convert to common implementation, and build test. The PPC
> and rcar folks can test on h/w.
>
> Rob


Hi Rob,

I have addressed your comment and made generic function.
Gentle request to have a look at following approach and patch.

[RFC PATCH 2/2] of/pci: call pci specific dma-ranges instead of memory-mapped.
[RFC PATCH 1/2] of/pci: implement inbound dma-ranges for PCI

I have tested this on our platform, with and without iommu, and seems to work.

let me know your view on this.

Regards,
Oza.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-30 10:14         ` Oza Oza
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Oza @ 2017-03-30 10:14 UTC (permalink / raw)
  To: Rob Herring
  Cc: Joerg Roedel, Robin Murphy, Linux IOMMU, linux-pci, linux-kernel,
	linux-arm-kernel, devicetree, bcm-kernel-feedback-list

On Tue, Mar 28, 2017 at 7:43 PM, Rob Herring <robh@kernel.org> wrote:
> On Tue, Mar 28, 2017 at 12:27 AM, Oza Oza <oza.oza@broadcom.com> wrote:
>> On Mon, Mar 27, 2017 at 8:16 PM, Rob Herring <robh@kernel.org> wrote:
>>> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
>>>> it is possible that PCI device supports 64-bit DMA addressing,
>>>> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
>>>> however PCI host bridge may have limitations on the inbound
>>>> transaction addressing. As an example, consider NVME SSD device
>>>> connected to iproc-PCIe controller.
>>>>
>>>> Currently, the IOMMU DMA ops only considers PCI device dma_mask
>>>> when allocating an IOVA. This is particularly problematic on
>>>> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
>>>> PA for in-bound transactions only after PCI Host has forwarded
>>>> these transactions on SOC IO bus. This means on such ARM/ARM64
>>>> SOCs the IOVA of in-bound transactions has to honor the addressing
>>>> restrictions of the PCI Host.
>>>>
>>>> current pcie frmework and of framework integration assumes dma-ranges
>>>> in a way where memory-mapped devices define their dma-ranges.
>>>> dma-ranges: (child-bus-address, parent-bus-address, length).
>>>>
>>>> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
>>>> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
>>>
>>> If you implement a common function, then I expect to see other users
>>> converted to use it. There's also PCI hosts in arch/powerpc that parse
>>> dma-ranges.
>>
>> the common function should be similar to what
>> of_pci_get_host_bridge_resources is doing right now.
>> it parses ranges property right now.
>>
>> the new function would look look following.
>>
>> of_pci_get_dma_ranges(struct device_node *dev, struct list_head *resources)
>> where resources would return the dma-ranges.
>>
>> but right now if you see the patch, of_dma_configure calls the new
>> function, which actually returns the largest possible size.
>> so this new function has to be generic in a way where other PCI hosts
>> can use it. but certainly iproc(Broadcom SOC) , rcar based SOCs can
>> use it for sure.
>>
>> although having powerpc using it;  is a separate exercise, since I do
>> not have any access to other PCI hosts such as powerpc. but we can
>> workout with them on thsi forum if required.
>
> You don't need h/w. You can analyze what parts are common, write
> patches to convert to common implementation, and build test. The PPC
> and rcar folks can test on h/w.
>
> Rob


Hi Rob,

I have addressed your comment and made generic function.
Gentle request to have a look at following approach and patch.

[RFC PATCH 2/2] of/pci: call pci specific dma-ranges instead of memory-mapped.
[RFC PATCH 1/2] of/pci: implement inbound dma-ranges for PCI

I have tested this on our platform, with and without iommu, and seems to work.

let me know your view on this.

Regards,
Oza.

^ permalink raw reply	[flat|nested] 52+ messages in thread

* Re: [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-30 10:14         ` Oza Oza
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Oza @ 2017-03-30 10:14 UTC (permalink / raw)
  To: Rob Herring
  Cc: devicetree, linux-pci, Joerg Roedel, linux-kernel, Linux IOMMU,
	bcm-kernel-feedback-list, Robin Murphy, linux-arm-kernel

On Tue, Mar 28, 2017 at 7:43 PM, Rob Herring <robh@kernel.org> wrote:
> On Tue, Mar 28, 2017 at 12:27 AM, Oza Oza <oza.oza@broadcom.com> wrote:
>> On Mon, Mar 27, 2017 at 8:16 PM, Rob Herring <robh@kernel.org> wrote:
>>> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
>>>> it is possible that PCI device supports 64-bit DMA addressing,
>>>> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
>>>> however PCI host bridge may have limitations on the inbound
>>>> transaction addressing. As an example, consider NVME SSD device
>>>> connected to iproc-PCIe controller.
>>>>
>>>> Currently, the IOMMU DMA ops only considers PCI device dma_mask
>>>> when allocating an IOVA. This is particularly problematic on
>>>> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
>>>> PA for in-bound transactions only after PCI Host has forwarded
>>>> these transactions on SOC IO bus. This means on such ARM/ARM64
>>>> SOCs the IOVA of in-bound transactions has to honor the addressing
>>>> restrictions of the PCI Host.
>>>>
>>>> current pcie frmework and of framework integration assumes dma-ranges
>>>> in a way where memory-mapped devices define their dma-ranges.
>>>> dma-ranges: (child-bus-address, parent-bus-address, length).
>>>>
>>>> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
>>>> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
>>>
>>> If you implement a common function, then I expect to see other users
>>> converted to use it. There's also PCI hosts in arch/powerpc that parse
>>> dma-ranges.
>>
>> the common function should be similar to what
>> of_pci_get_host_bridge_resources is doing right now.
>> it parses ranges property right now.
>>
>> the new function would look look following.
>>
>> of_pci_get_dma_ranges(struct device_node *dev, struct list_head *resources)
>> where resources would return the dma-ranges.
>>
>> but right now if you see the patch, of_dma_configure calls the new
>> function, which actually returns the largest possible size.
>> so this new function has to be generic in a way where other PCI hosts
>> can use it. but certainly iproc(Broadcom SOC) , rcar based SOCs can
>> use it for sure.
>>
>> although having powerpc using it;  is a separate exercise, since I do
>> not have any access to other PCI hosts such as powerpc. but we can
>> workout with them on thsi forum if required.
>
> You don't need h/w. You can analyze what parts are common, write
> patches to convert to common implementation, and build test. The PPC
> and rcar folks can test on h/w.
>
> Rob


Hi Rob,

I have addressed your comment and made generic function.
Gentle request to have a look at following approach and patch.

[RFC PATCH 2/2] of/pci: call pci specific dma-ranges instead of memory-mapped.
[RFC PATCH 1/2] of/pci: implement inbound dma-ranges for PCI

I have tested this on our platform, with and without iommu, and seems to work.

let me know your view on this.

Regards,
Oza.

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 52+ messages in thread

* [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask
@ 2017-03-30 10:14         ` Oza Oza
  0 siblings, 0 replies; 52+ messages in thread
From: Oza Oza @ 2017-03-30 10:14 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Mar 28, 2017 at 7:43 PM, Rob Herring <robh@kernel.org> wrote:
> On Tue, Mar 28, 2017 at 12:27 AM, Oza Oza <oza.oza@broadcom.com> wrote:
>> On Mon, Mar 27, 2017 at 8:16 PM, Rob Herring <robh@kernel.org> wrote:
>>> On Sat, Mar 25, 2017 at 12:31 AM, Oza Pawandeep <oza.oza@broadcom.com> wrote:
>>>> it is possible that PCI device supports 64-bit DMA addressing,
>>>> and thus it's driver sets device's dma_mask to DMA_BIT_MASK(64),
>>>> however PCI host bridge may have limitations on the inbound
>>>> transaction addressing. As an example, consider NVME SSD device
>>>> connected to iproc-PCIe controller.
>>>>
>>>> Currently, the IOMMU DMA ops only considers PCI device dma_mask
>>>> when allocating an IOVA. This is particularly problematic on
>>>> ARM/ARM64 SOCs where the IOMMU (i.e. SMMU) translates IOVA to
>>>> PA for in-bound transactions only after PCI Host has forwarded
>>>> these transactions on SOC IO bus. This means on such ARM/ARM64
>>>> SOCs the IOVA of in-bound transactions has to honor the addressing
>>>> restrictions of the PCI Host.
>>>>
>>>> current pcie frmework and of framework integration assumes dma-ranges
>>>> in a way where memory-mapped devices define their dma-ranges.
>>>> dma-ranges: (child-bus-address, parent-bus-address, length).
>>>>
>>>> but iproc based SOCs and even Rcar based SOCs has PCI world dma-ranges.
>>>> dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
>>>
>>> If you implement a common function, then I expect to see other users
>>> converted to use it. There's also PCI hosts in arch/powerpc that parse
>>> dma-ranges.
>>
>> the common function should be similar to what
>> of_pci_get_host_bridge_resources is doing right now.
>> it parses ranges property right now.
>>
>> the new function would look look following.
>>
>> of_pci_get_dma_ranges(struct device_node *dev, struct list_head *resources)
>> where resources would return the dma-ranges.
>>
>> but right now if you see the patch, of_dma_configure calls the new
>> function, which actually returns the largest possible size.
>> so this new function has to be generic in a way where other PCI hosts
>> can use it. but certainly iproc(Broadcom SOC) , rcar based SOCs can
>> use it for sure.
>>
>> although having powerpc using it;  is a separate exercise, since I do
>> not have any access to other PCI hosts such as powerpc. but we can
>> workout with them on thsi forum if required.
>
> You don't need h/w. You can analyze what parts are common, write
> patches to convert to common implementation, and build test. The PPC
> and rcar folks can test on h/w.
>
> Rob


Hi Rob,

I have addressed your comment and made generic function.
Gentle request to have a look at following approach and patch.

[RFC PATCH 2/2] of/pci: call pci specific dma-ranges instead of memory-mapped.
[RFC PATCH 1/2] of/pci: implement inbound dma-ranges for PCI

I have tested this on our platform, with and without iommu, and seems to work.

let me know your view on this.

Regards,
Oza.

^ permalink raw reply	[flat|nested] 52+ messages in thread

end of thread, other threads:[~2017-03-30 10:14 UTC | newest]

Thread overview: 52+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-25  5:31 [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask Oza Pawandeep
2017-03-25  5:31 ` Oza Pawandeep
2017-03-25  5:31 ` Oza Pawandeep
2017-03-25  5:31 ` Oza Pawandeep via iommu
2017-03-25  5:31 ` [RFC PATCH 2/3] iommu/dma: account pci host bridge dma_mask for IOVA allocation Oza Pawandeep
2017-03-25  5:31   ` Oza Pawandeep
2017-03-25  5:31   ` Oza Pawandeep
2017-03-25  5:31   ` Oza Pawandeep via iommu
2017-03-25  5:31 ` [RFC PATCH 3/3] of: fix node traversing in of_dma_get_range Oza Pawandeep
2017-03-25  5:31   ` Oza Pawandeep
2017-03-25  5:31   ` Oza Pawandeep
2017-03-25  5:31   ` Oza Pawandeep via iommu
2017-03-27 14:34   ` Rob Herring
2017-03-27 14:34     ` Rob Herring
2017-03-27 14:34     ` Rob Herring
2017-03-27 14:34     ` Rob Herring
2017-03-27 14:45     ` Robin Murphy
2017-03-27 14:45       ` Robin Murphy
2017-03-27 14:45       ` Robin Murphy
2017-03-27 14:45       ` Robin Murphy
2017-03-28  4:50       ` Oza Oza
2017-03-28  4:50         ` Oza Oza
2017-03-28  4:50         ` Oza Oza
2017-03-28  4:50         ` Oza Oza via iommu
2017-03-27 14:46 ` [RFC PATCH 1/3] of/pci: dma-ranges to account highest possible host bridge dma_mask Rob Herring
2017-03-27 14:46   ` Rob Herring
2017-03-27 14:46   ` Rob Herring
2017-03-27 14:46   ` Rob Herring
2017-03-28  5:27   ` Oza Oza
2017-03-28  5:27     ` Oza Oza
2017-03-28  5:27     ` Oza Oza
2017-03-28  5:27     ` Oza Oza via iommu
2017-03-28 14:13     ` Rob Herring
2017-03-28 14:13       ` Rob Herring
2017-03-28 14:13       ` Rob Herring
2017-03-28 14:13       ` Rob Herring
2017-03-30 10:14       ` Oza Oza
2017-03-30 10:14         ` Oza Oza
2017-03-30 10:14         ` Oza Oza
2017-03-30 10:14         ` Oza Oza
2017-03-28 14:29     ` Robin Murphy
2017-03-28 14:29       ` Robin Murphy
2017-03-28 14:29       ` Robin Murphy
2017-03-28 14:29       ` Robin Murphy
2017-03-29  4:43       ` Oza Oza
2017-03-29  4:43         ` Oza Oza
2017-03-29  4:43         ` Oza Oza
2017-03-29  4:43         ` Oza Oza
2017-03-30  3:26         ` Oza Oza
2017-03-30  3:26           ` Oza Oza
2017-03-30  3:26           ` Oza Oza
2017-03-30  3:26           ` Oza Oza via iommu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.