* [PATCH v4 1/3] of/pci/dma: fix DMA configuration for PCI masters
2017-05-05 13:47 [PATCH v4 0/3] OF/PCI address PCI inbound memory limitations Oza Pawandeep
@ 2017-05-05 13:47 ` Oza Pawandeep
2017-05-05 13:47 ` [PATCH v4 2/3] iommu/pci: reserve IOVA " Oza Pawandeep
2017-05-05 13:47 ` [PATCH v4 3/3] PCI/of fix of_dma_get_range; get PCI specific dma-ranges Oza Pawandeep
2 siblings, 0 replies; 5+ messages in thread
From: Oza Pawandeep @ 2017-05-05 13:47 UTC (permalink / raw)
To: Joerg Roedel, Robin Murphy
Cc: iommu, linux-pci, linux-kernel, linux-arm-kernel, devicetree,
bcm-kernel-feedback-list, Oza Pawandeep, Oza Pawandeep
current device framework and OF framework integration assumes
dma-ranges in a way where memory-mapped devices define their
dma-ranges. (child-bus-address, parent-bus-address, length).
of_dma_configure is specifically written to take care of memory
mapped devices. but no implementation exists for pci to take
care of pcie based memory ranges.
for e.g. iproc based SOCs and other SOCs(suc as rcar) have PCI
world dma-ranges.
dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
this patch serves following:
1) exposes interface to the pci host driver for their
inbound memory ranges
2) provide an interface to callers such as of_dma_get_ranges.
so then the returned size get best possible (largest) dma_mask.
because PCI RC drivers do not call APIs such as
dma_set_coherent_mask() and hence rather it shows its addressing
capabilities based on dma-ranges.
for e.g.
dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
we should get dev->coherent_dma_mask=0x7fffffffff.
3) this patch handles multiple inbound windows and dma-ranges.
it is left to the caller, how it wants to use them.
the new function returns the resources in a standard and unform way
4) this way the callers of for e.g. of_dma_get_ranges
does not need to change.
5) leaves scope of adding PCI flag handling for inbound memory
by the new function.
Signed-off-by: Oza Pawandeep <oza.oza@broadcom.com>
diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
index 0ee42c3..ed6e69a 100644
--- a/drivers/of/of_pci.c
+++ b/drivers/of/of_pci.c
@@ -283,6 +283,83 @@ int of_pci_get_host_bridge_resources(struct device_node *dev,
return err;
}
EXPORT_SYMBOL_GPL(of_pci_get_host_bridge_resources);
+
+/**
+ * of_pci_get_dma_ranges - Parse PCI host bridge inbound resources from DT
+ * @np: device node of the host bridge having the dma-ranges property
+ * @resources: list where the range of resources will be added after DT parsing
+ *
+ * It is the caller's job to free the @resources list.
+ *
+ * This function will parse the "dma-ranges" property of a
+ * PCI host bridge device node and setup the resource mapping based
+ * on its content.
+ *
+ * It returns zero if the range parsing has been successful or a standard error
+ * value if it failed.
+ */
+
+int of_pci_get_dma_ranges(struct device_node *np, struct list_head *resources)
+{
+ struct device_node *node = of_node_get(np);
+ int rlen;
+ int ret = 0;
+ const int na = 3, ns = 2;
+ struct resource *res;
+ struct of_pci_range_parser parser;
+ struct of_pci_range range;
+
+ if (!node)
+ return -EINVAL;
+
+ parser.node = node;
+ parser.pna = of_n_addr_cells(node);
+ parser.np = parser.pna + na + ns;
+
+ parser.range = of_get_property(node, "dma-ranges", &rlen);
+
+ if (!parser.range) {
+ pr_debug("pcie device has no dma-ranges defined for node(%s)\n",
+ np->full_name);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ parser.end = parser.range + rlen / sizeof(__be32);
+
+ for_each_of_pci_range(&parser, &range) {
+ /*
+ * If we failed translation or got a zero-sized region
+ * then skip this range
+ */
+ if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
+ continue;
+
+ res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+ if (!res) {
+ ret = -ENOMEM;
+ goto parse_failed;
+ }
+
+ ret = of_pci_range_to_resource(&range, np, res);
+ if (ret) {
+ kfree(res);
+ continue;
+ }
+
+ pci_add_resource_offset(resources, res,
+ res->start - range.pci_addr);
+ }
+
+ return ret;
+
+parse_failed:
+ pci_free_resource_list(resources);
+out:
+ of_node_put(node);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(of_pci_get_dma_ranges);
#endif /* CONFIG_OF_ADDRESS */
#ifdef CONFIG_PCI_MSI
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index 0e0974e..617b90d 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -76,6 +76,7 @@ static inline void of_pci_check_probe_only(void) { }
int of_pci_get_host_bridge_resources(struct device_node *dev,
unsigned char busno, unsigned char bus_max,
struct list_head *resources, resource_size_t *io_base);
+int of_pci_get_dma_ranges(struct device_node *np, struct list_head *resources);
#else
static inline int of_pci_get_host_bridge_resources(struct device_node *dev,
unsigned char busno, unsigned char bus_max,
@@ -83,6 +84,12 @@ static inline int of_pci_get_host_bridge_resources(struct device_node *dev,
{
return -EINVAL;
}
+
+static inline int of_pci_get_dma_ranges(struct device_node *np,
+ struct list_head *resources)
+{
+ return -EINVAL;
+}
#endif
#if defined(CONFIG_OF) && defined(CONFIG_PCI_MSI)
--
1.9.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v4 2/3] iommu/pci: reserve IOVA for PCI masters
2017-05-05 13:47 [PATCH v4 0/3] OF/PCI address PCI inbound memory limitations Oza Pawandeep
2017-05-05 13:47 ` [PATCH v4 1/3] of/pci/dma: fix DMA configuration for PCI masters Oza Pawandeep
@ 2017-05-05 13:47 ` Oza Pawandeep
2017-05-05 13:47 ` [PATCH v4 3/3] PCI/of fix of_dma_get_range; get PCI specific dma-ranges Oza Pawandeep
2 siblings, 0 replies; 5+ messages in thread
From: Oza Pawandeep @ 2017-05-05 13:47 UTC (permalink / raw)
To: Joerg Roedel, Robin Murphy
Cc: iommu, linux-pci, linux-kernel, linux-arm-kernel, devicetree,
bcm-kernel-feedback-list, Oza Pawandeep, Oza Pawandeep
this patch reserves the IOVA for PCI masters.
ARM64 based SOCs may have scattered memory banks.
such as iproc based SOC has
<0x00000000 0x80000000 0x0 0x80000000>, /* 2G @ 2G */
<0x00000008 0x80000000 0x3 0x80000000>, /* 14G @ 34G */
<0x00000090 0x00000000 0x4 0x00000000>, /* 16G @ 576G */
<0x000000a0 0x00000000 0x4 0x00000000>; /* 16G @ 640G */
but incoming PCI transcation addressing capability is limited
by host bridge, for example if max incoming window capability
is 512 GB, then 0x00000090 and 0x000000a0 will fall beyond it.
to address this problem, iommu has to avoid allocating IOVA which
are reserved. which inturn does not allocate IOVA if it falls into hole.
Signed-off-by: Oza Pawandeep <oza.oza@broadcom.com>
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 48d36ce..08764b0 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -27,6 +27,7 @@
#include <linux/iova.h>
#include <linux/irq.h>
#include <linux/mm.h>
+#include <linux/of_pci.h>
#include <linux/pci.h>
#include <linux/scatterlist.h>
#include <linux/vmalloc.h>
@@ -171,8 +172,12 @@ static void iova_reserve_pci_windows(struct pci_dev *dev,
struct iova_domain *iovad)
{
struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus);
+ struct device_node *np = bridge->dev.parent->of_node;
struct resource_entry *window;
unsigned long lo, hi;
+ int ret;
+ dma_addr_t tmp_dma_addr = 0, dma_addr;
+ LIST_HEAD(res);
resource_list_for_each_entry(window, &bridge->windows) {
if (resource_type(window->res) != IORESOURCE_MEM &&
@@ -183,6 +188,36 @@ static void iova_reserve_pci_windows(struct pci_dev *dev,
hi = iova_pfn(iovad, window->res->end - window->offset);
reserve_iova(iovad, lo, hi);
}
+
+ /* PCI inbound memory reservation. */
+ ret = of_pci_get_dma_ranges(np, &res);
+ if (!ret) {
+ resource_list_for_each_entry(window, &res) {
+ struct resource *res_dma = window->res;
+
+ dma_addr = res_dma->start - window->offset;
+ if (tmp_dma_addr > dma_addr) {
+ pr_warn("PCI: failed to reserve iovas; ranges should be sorted\n");
+ return;
+ }
+ if (tmp_dma_addr != dma_addr) {
+ lo = iova_pfn(iovad, tmp_dma_addr);
+ hi = iova_pfn(iovad, dma_addr - 1);
+ reserve_iova(iovad, lo, hi);
+ }
+ tmp_dma_addr = window->res->end - window->offset;
+ }
+ /*
+ * the last dma-range should honour based on the
+ * 32/64-bit dma addresses.
+ */
+ if (tmp_dma_addr < DMA_BIT_MASK(sizeof(dma_addr_t) * 8)) {
+ lo = iova_pfn(iovad, tmp_dma_addr);
+ hi = iova_pfn(iovad,
+ DMA_BIT_MASK(sizeof(dma_addr_t) * 8) - 1);
+ reserve_iova(iovad, lo, hi);
+ }
+ }
}
/**
--
1.9.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v4 3/3] PCI/of fix of_dma_get_range; get PCI specific dma-ranges
2017-05-05 13:47 [PATCH v4 0/3] OF/PCI address PCI inbound memory limitations Oza Pawandeep
2017-05-05 13:47 ` [PATCH v4 1/3] of/pci/dma: fix DMA configuration for PCI masters Oza Pawandeep
2017-05-05 13:47 ` [PATCH v4 2/3] iommu/pci: reserve IOVA " Oza Pawandeep
@ 2017-05-05 13:47 ` Oza Pawandeep
2017-05-07 6:43 ` kbuild test robot
2 siblings, 1 reply; 5+ messages in thread
From: Oza Pawandeep @ 2017-05-05 13:47 UTC (permalink / raw)
To: Joerg Roedel, Robin Murphy
Cc: iommu, linux-pci, linux-kernel, linux-arm-kernel, devicetree,
bcm-kernel-feedback-list, Oza Pawandeep, Oza Pawandeep
current device framework and OF framework integration assumes
dma-ranges in a way where memory-mapped devices define their
dma-ranges. (child-bus-address, parent-bus-address, length).
of_dma_configure is specifically written to take care of memory
mapped devices. but no implementation exists for pci to take
care of pcie based memory ranges.
for e.g. iproc based SOCs and other SOCs(suc as rcar) have PCI
world dma-ranges.
dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
this patch fixes the bug in of_dma_get_range, which with as is,
parses the PCI memory ranges and return wrong size as 0.
in order to get largest possible dma_mask. this patch also
retuns the largest possible size based on dma-ranges,
for e.g.
dma-ranges = <0x43000000 0x00 0x00 0x00 0x00 0x80 0x00>;
we should get dev->coherent_dma_mask=0x7fffffffff.
based on which IOVA allocation space will honour PCI host
bridge limitations.
the implementation hooks bus specific callbacks for getting
dma-ranges.
Signed-off-by: Oza Pawandeep <oza.oza@broadcom.com>
diff --git a/drivers/of/address.c b/drivers/of/address.c
index 02b2903..b43e347 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -6,6 +6,7 @@
#include <linux/ioport.h>
#include <linux/module.h>
#include <linux/of_address.h>
+#include <linux/of_pci.h>
#include <linux/pci.h>
#include <linux/pci_regs.h>
#include <linux/sizes.h>
@@ -46,6 +47,8 @@ struct of_bus {
int na, int ns, int pna);
int (*translate)(__be32 *addr, u64 offset, int na);
unsigned int (*get_flags)(const __be32 *addr);
+ int (*get_dma_ranges)(struct device_node *np,
+ u64 *dma_addr, u64 *paddr, u64 *size);
};
/*
@@ -171,6 +174,144 @@ static int of_bus_pci_translate(__be32 *addr, u64 offset, int na)
{
return of_bus_default_translate(addr + 1, offset, na - 1);
}
+
+static int of_bus_pci_get_dma_ranges(struct device_node *np, u64 *dma_addr,
+ u64 *paddr, u64 *size)
+{
+ struct device_node *node = of_node_get(np);
+ int ret = 0;
+ struct resource_entry *window;
+ LIST_HEAD(res);
+
+ if (!node)
+ return -EINVAL;
+
+ *size = 0;
+ /*
+ * PCI dma-ranges is not mandatory property.
+ * many devices do no need to have it, since
+ * host bridge does not require inbound memory
+ * configuration or rather have design limitations.
+ * so we look for dma-ranges, if missing we
+ * just return the caller full size, and also
+ * no dma-ranges suggests that, host bridge allows
+ * whatever comes in, so we set dma_addr to 0.
+ */
+ ret = of_pci_get_dma_ranges(np, &res);
+ if (!ret) {
+ resource_list_for_each_entry(window, &res) {
+ struct resource *res_dma = window->res;
+
+ if (*size < resource_size(res_dma)) {
+ *dma_addr = res_dma->start - window->offset;
+ *paddr = res_dma->start;
+ *size = resource_size(res_dma);
+ }
+ }
+ }
+ pci_free_resource_list(&res);
+
+ /*
+ * return the largest possible size,
+ * since PCI master allows everything.
+ */
+ if (*size == 0) {
+ pr_debug("empty/zero size dma-ranges found for node(%s)\n",
+ np->full_name);
+ *size = DMA_BIT_MASK(sizeof(dma_addr_t) * 8) - 1;
+ *dma_addr = *paddr = 0;
+ ret = 0;
+ }
+
+ pr_debug("dma_addr(%llx) cpu_addr(%llx) size(%llx)\n",
+ *dma_addr, *paddr, *size);
+
+ of_node_put(node);
+
+ return ret;
+}
+
+static int get_dma_ranges(struct device_node *np, u64 *dma_addr,
+ u64 *paddr, u64 *size)
+{
+ struct device_node *node = of_node_get(np);
+ const __be32 *ranges = NULL;
+ int len, naddr, nsize, pna;
+ int ret = 0;
+ u64 dmaaddr;
+
+ if (!node)
+ return -EINVAL;
+
+ while (1) {
+ naddr = of_n_addr_cells(node);
+ nsize = of_n_size_cells(node);
+ node = of_get_next_parent(node);
+ if (!node)
+ break;
+
+ ranges = of_get_property(node, "dma-ranges", &len);
+
+ /* Ignore empty ranges, they imply no translation required */
+ if (ranges && len > 0)
+ break;
+
+ /*
+ * At least empty ranges has to be defined for parent node if
+ * DMA is supported
+ */
+ if (!ranges)
+ break;
+ }
+
+ if (!ranges) {
+ pr_debug("no dma-ranges found for node(%s)\n", np->full_name);
+ ret = -ENODEV;
+ goto out;
+ }
+
+ len /= sizeof(u32);
+
+ pna = of_n_addr_cells(node);
+
+ /* dma-ranges format:
+ * DMA addr : naddr cells
+ * CPU addr : pna cells
+ * size : nsize cells
+ */
+ dmaaddr = of_read_number(ranges, naddr);
+ *paddr = of_translate_dma_address(np, ranges);
+ if (*paddr == OF_BAD_ADDR) {
+ pr_err("translation of DMA address(%pad) to CPU address failed node(%s)\n",
+ dma_addr, np->full_name);
+ ret = -EINVAL;
+ goto out;
+ }
+ *dma_addr = dmaaddr;
+
+ *size = of_read_number(ranges + naddr + pna, nsize);
+
+ pr_debug("dma_addr(%llx) cpu_addr(%llx) size(%llx)\n",
+ *dma_addr, *paddr, *size);
+
+out:
+ of_node_put(node);
+
+ return ret;
+}
+
+static int of_bus_isa_get_dma_ranges(struct device_node *np, u64 *dma_addr,
+ u64 *paddr, u64 *size)
+{
+ return get_dma_ranges(np, dma_addr, paddr, size);
+}
+
+static int of_bus_default_get_dma_ranges(struct device_node *np, u64 *dma_addr,
+ u64 *paddr, u64 *size)
+{
+ return get_dma_ranges(np, dma_addr, paddr, size);
+}
+
#endif /* CONFIG_OF_ADDRESS_PCI */
#ifdef CONFIG_PCI
@@ -424,6 +565,7 @@ static unsigned int of_bus_isa_get_flags(const __be32 *addr)
.map = of_bus_pci_map,
.translate = of_bus_pci_translate,
.get_flags = of_bus_pci_get_flags,
+ .get_dma_ranges = of_bus_pci_get_dma_ranges,
},
#endif /* CONFIG_OF_ADDRESS_PCI */
/* ISA */
@@ -435,6 +577,7 @@ static unsigned int of_bus_isa_get_flags(const __be32 *addr)
.map = of_bus_isa_map,
.translate = of_bus_isa_translate,
.get_flags = of_bus_isa_get_flags,
+ .get_dma_ranges = of_bus_isa_get_dma_ranges,
},
/* Default */
{
@@ -445,6 +588,7 @@ static unsigned int of_bus_isa_get_flags(const __be32 *addr)
.map = of_bus_default_map,
.translate = of_bus_default_translate,
.get_flags = of_bus_default_get_flags,
+ .get_dma_ranges = of_bus_default_get_dma_ranges,
},
};
@@ -820,74 +964,20 @@ void __iomem *of_io_request_and_map(struct device_node *np, int index,
* size : nsize cells
*
* It returns -ENODEV if "dma-ranges" property was not found
- * for this device in DT.
+ * for this device in DT, except if PCI device then, dma-ranges
+ * can be optional property, and in that case returns size with
+ * entire host memory.
*/
int of_dma_get_range(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *size)
{
- struct device_node *node = of_node_get(np);
- const __be32 *ranges = NULL;
- int len, naddr, nsize, pna;
- int ret = 0;
- u64 dmaaddr;
-
- if (!node)
- return -EINVAL;
-
- while (1) {
- naddr = of_n_addr_cells(node);
- nsize = of_n_size_cells(node);
- node = of_get_next_parent(node);
- if (!node)
- break;
-
- ranges = of_get_property(node, "dma-ranges", &len);
-
- /* Ignore empty ranges, they imply no translation required */
- if (ranges && len > 0)
- break;
-
- /*
- * At least empty ranges has to be defined for parent node if
- * DMA is supported
- */
- if (!ranges)
- break;
- }
-
- if (!ranges) {
- pr_debug("no dma-ranges found for node(%s)\n", np->full_name);
- ret = -ENODEV;
- goto out;
- }
-
- len /= sizeof(u32);
-
- pna = of_n_addr_cells(node);
-
- /* dma-ranges format:
- * DMA addr : naddr cells
- * CPU addr : pna cells
- * size : nsize cells
- */
- dmaaddr = of_read_number(ranges, naddr);
- *paddr = of_translate_dma_address(np, ranges);
- if (*paddr == OF_BAD_ADDR) {
- pr_err("translation of DMA address(%pad) to CPU address failed node(%s)\n",
- dma_addr, np->full_name);
- ret = -EINVAL;
- goto out;
- }
- *dma_addr = dmaaddr;
-
- *size = of_read_number(ranges + naddr + pna, nsize);
-
- pr_debug("dma_addr(%llx) cpu_addr(%llx) size(%llx)\n",
- *dma_addr, *paddr, *size);
+ struct of_bus *bus;
-out:
- of_node_put(node);
+ /* get bus specific dma-ranges. */
+ bus = of_match_bus(np);
+ if (bus->get_dma_ranges)
+ return bus->get_dma_ranges(np, dma_addr, paddr, size);
- return ret;
+ return 0;
}
EXPORT_SYMBOL_GPL(of_dma_get_range);
--
1.9.1
^ permalink raw reply related [flat|nested] 5+ messages in thread