* [PATCH 2/7] iommu: apple: Implement DMA mapping operations for Apple DART
2023-01-17 22:03 [PATCH 0/7] Apple PCIe/XHCI support Mark Kettenis
2023-01-17 22:03 ` [PATCH 1/7] iommu: Add DMA mapping operations Mark Kettenis
@ 2023-01-17 22:03 ` Mark Kettenis
2023-01-17 22:04 ` [PATCH 3/7] usb: xhci: Implement DMA mapping Mark Kettenis
` (4 subsequent siblings)
6 siblings, 0 replies; 12+ messages in thread
From: Mark Kettenis @ 2023-01-17 22:03 UTC (permalink / raw)
To: u-boot; +Cc: sjg, bmeng.cn, marex, Mark Kettenis
Implement translation table support for all the variations of
Apple's DART IOMMU that can be found on Apple's M1 and M2 SoCs.
Signed-off-by: Mark Kettenis <kettenis@openbsd.org>
---
drivers/iommu/apple_dart.c | 311 +++++++++++++++++++++++++++++++++----
1 file changed, 277 insertions(+), 34 deletions(-)
diff --git a/drivers/iommu/apple_dart.c b/drivers/iommu/apple_dart.c
index 2faacb8f3b..6ecd84303b 100644
--- a/drivers/iommu/apple_dart.c
+++ b/drivers/iommu/apple_dart.c
@@ -6,57 +6,296 @@
#include <common.h>
#include <cpu_func.h>
#include <dm.h>
+#include <iommu.h>
+#include <lmb.h>
+#include <memalign.h>
#include <asm/io.h>
#define DART_PARAMS2 0x0004
#define DART_PARAMS2_BYPASS_SUPPORT BIT(0)
-#define DART_TLB_OP 0x0020
-#define DART_TLB_OP_OPMASK (0xfff << 20)
-#define DART_TLB_OP_FLUSH (0x001 << 20)
-#define DART_TLB_OP_BUSY BIT(2)
-#define DART_TLB_OP_SIDMASK 0x0034
-#define DART_ERROR_STATUS 0x0040
-#define DART_TCR(sid) (0x0100 + 4 * (sid))
-#define DART_TCR_TRANSLATE_ENABLE BIT(7)
-#define DART_TCR_BYPASS_DART BIT(8)
-#define DART_TCR_BYPASS_DAPF BIT(12)
-#define DART_TTBR(sid, idx) (0x0200 + 16 * (sid) + 4 * (idx))
-#define DART_TTBR_VALID BIT(31)
-#define DART_TTBR_SHIFT 12
-#define DART_T8110_TCR(sid) (0x1000 + 4 * (sid))
+#define DART_T8020_TLB_CMD 0x0020
+#define DART_T8020_TLB_CMD_FLUSH BIT(20)
+#define DART_T8020_TLB_CMD_BUSY BIT(2)
+#define DART_T8020_TLB_SIDMASK 0x0034
+#define DART_T8020_ERROR 0x0040
+#define DART_T8020_ERROR_ADDR_LO 0x0050
+#define DART_T8020_ERROR_ADDR_HI 0x0054
+#define DART_T8020_CONFIG 0x0060
+#define DART_T8020_CONFIG_LOCK BIT(15)
+#define DART_T8020_SID_ENABLE 0x00fc
+#define DART_T8020_TCR_BASE 0x0100
+#define DART_T8020_TCR_TRANSLATE_ENABLE BIT(7)
+#define DART_T8020_TCR_BYPASS_DART BIT(8)
+#define DART_T8020_TCR_BYPASS_DAPF BIT(12)
+#define DART_T8020_TTBR_BASE 0x0200
+#define DART_T8020_TTBR_VALID BIT(31)
+
+#define DART_T8110_PARAMS4 0x000c
+#define DART_T8110_PARAMS4_NSID_MASK (0x1ff << 0)
+#define DART_T8110_TLB_CMD 0x0080
+#define DART_T8110_TLB_CMD_BUSY BIT(31)
+#define DART_T8110_TLB_CMD_FLUSH_ALL BIT(8)
+#define DART_T8110_ERROR 0x0100
+#define DART_T8110_ERROR_MASK 0x0104
+#define DART_T8110_ERROR_ADDR_LO 0x0170
+#define DART_T8110_ERROR_ADDR_HI 0x0174
+#define DART_T8110_PROTECT 0x0200
+#define DART_T8110_PROTECT_TTBR_TCR BIT(0)
+#define DART_T8110_SID_ENABLE_BASE 0x0c00
+#define DART_T8110_TCR_BASE 0x1000
#define DART_T8110_TCR_BYPASS_DAPF BIT(2)
#define DART_T8110_TCR_BYPASS_DART BIT(1)
#define DART_T8110_TCR_TRANSLATE_ENABLE BIT(0)
-#define DART_T8110_TTBR(sid) (0x1400 + 4 * (sid))
+#define DART_T8110_TTBR_BASE 0x1400
+#define DART_T8110_TTBR_VALID BIT(0)
+
+#define DART_SID_ENABLE(priv, idx) \
+ ((priv)->sid_enable_base + 4 * (idx))
+#define DART_TCR(priv, sid) ((priv)->tcr_base + 4 * (sid))
+#define DART_TTBR(priv, sid, idx) \
+ ((priv)->ttbr_base + 4 * (priv)->nttbr * (sid) + 4 * (idx))
+#define DART_TTBR_SHIFT 12
+
+#define DART_ALL_STREAMS(priv) ((1U << (priv)->nsid) - 1)
+
+#define DART_PAGE_SIZE SZ_16K
+#define DART_PAGE_MASK (DART_PAGE_SIZE - 1)
+
+#define DART_L1_TABLE 0x3
+#define DART_L2_INVAL 0
+#define DART_L2_VALID BIT(0)
+#define DART_L2_FULL_PAGE BIT(1)
+#define DART_L2_START(addr) ((((addr) & DART_PAGE_MASK) >> 2) << 52)
+#define DART_L2_END(addr) ((((addr) & DART_PAGE_MASK) >> 2) << 40)
+
+struct apple_dart_priv {
+ void *base;
+ struct lmb lmb;
+ u64 *l1, *l2;
+ int bypass, shift;
+
+ dma_addr_t dvabase;
+ dma_addr_t dvaend;
+
+ int nsid;
+ int nttbr;
+ int sid_enable_base;
+ int tcr_base;
+ u32 tcr_translate_enable;
+ u32 tcr_bypass;
+ int ttbr_base;
+ u32 ttbr_valid;
+ void (*flush_tlb)(struct apple_dart_priv *priv);
+};
+
+static void apple_dart_t8020_flush_tlb(struct apple_dart_priv *priv)
+{
+ dsb();
+
+ writel(DART_ALL_STREAMS(priv), priv->base + DART_T8020_TLB_SIDMASK);
+ writel(DART_T8020_TLB_CMD_FLUSH, priv->base + DART_T8020_TLB_CMD);
+ while (readl(priv->base + DART_T8020_TLB_CMD) &
+ DART_T8020_TLB_CMD_BUSY)
+ continue;
+}
+
+static void apple_dart_t8110_flush_tlb(struct apple_dart_priv *priv)
+{
+ dsb();
+
+ writel(DART_T8110_TLB_CMD_FLUSH_ALL,
+ priv->base + DART_T8110_TLB_CMD_FLUSH_ALL);
+ while (readl(priv->base + DART_T8110_TLB_CMD) &
+ DART_T8110_TLB_CMD_BUSY)
+ continue;
+}
+
+static dma_addr_t apple_dart_map(struct udevice *dev, void *addr, size_t size)
+{
+ struct apple_dart_priv *priv = dev_get_priv(dev);
+ phys_addr_t paddr, dva;
+ phys_size_t psize, off;
+ int i, idx;
+
+ if (priv->bypass)
+ return (phys_addr_t)addr;
+
+ paddr = ALIGN_DOWN((phys_addr_t)addr, DART_PAGE_SIZE);
+ off = (phys_addr_t)addr - paddr;
+ psize = ALIGN(size + off, DART_PAGE_SIZE);
+
+ dva = lmb_alloc(&priv->lmb, psize, DART_PAGE_SIZE);
+
+ idx = dva / DART_PAGE_SIZE;
+ for (i = 0; i < psize / DART_PAGE_SIZE; i++) {
+ priv->l2[idx + i] = (paddr >> priv->shift) | DART_L2_VALID |
+ DART_L2_START(0LL) | DART_L2_END(~0LL);
+ paddr += DART_PAGE_SIZE;
+ }
+ flush_dcache_range((unsigned long)&priv->l2[idx],
+ (unsigned long)&priv->l2[idx + i]);
+ priv->flush_tlb(priv);
+
+ return dva + off;
+}
+
+static void apple_dart_unmap(struct udevice *dev, dma_addr_t addr, size_t size)
+{
+ struct apple_dart_priv *priv = dev_get_priv(dev);
+ phys_addr_t dva;
+ phys_size_t psize;
+ int i, idx;
+
+ if (priv->bypass)
+ return;
+
+ dva = ALIGN_DOWN(addr, DART_PAGE_SIZE);
+ psize = size + (addr - dva);
+ psize = ALIGN(psize, DART_PAGE_SIZE);
+
+ idx = dva / DART_PAGE_SIZE;
+ for (i = 0; i < psize / DART_PAGE_SIZE; i++)
+ priv->l2[idx + i] = DART_L2_INVAL;
+ flush_dcache_range((unsigned long)&priv->l2[idx],
+ (unsigned long)&priv->l2[idx + i]);
+ priv->flush_tlb(priv);
+
+ lmb_free(&priv->lmb, dva, psize);
+}
+
+static struct iommu_ops apple_dart_ops = {
+ .map = apple_dart_map,
+ .unmap = apple_dart_unmap,
+};
static int apple_dart_probe(struct udevice *dev)
{
- void *base;
+ struct apple_dart_priv *priv = dev_get_priv(dev);
+ dma_addr_t addr;
+ phys_addr_t l2;
+ int ntte, nl1, nl2;
int sid, i;
+ u32 params2, params4;
- base = dev_read_addr_ptr(dev);
- if (!base)
+ priv->base = dev_read_addr_ptr(dev);
+ if (!priv->base)
return -EINVAL;
- u32 params2 = readl(base + DART_PARAMS2);
- if (!(params2 & DART_PARAMS2_BYPASS_SUPPORT))
- return 0;
+ if (device_is_compatible(dev, "apple,t8110-dart")) {
+ params4 = readl(priv->base + DART_T8110_PARAMS4);
+ priv->nsid = params4 & DART_T8110_PARAMS4_NSID_MASK;
+ priv->nttbr = 1;
+ priv->sid_enable_base = DART_T8110_SID_ENABLE_BASE;
+ priv->tcr_base = DART_T8110_TCR_BASE;
+ priv->tcr_translate_enable = DART_T8110_TCR_TRANSLATE_ENABLE;
+ priv->tcr_bypass =
+ DART_T8110_TCR_BYPASS_DAPF | DART_T8110_TCR_BYPASS_DART;
+ priv->ttbr_base = DART_T8110_TTBR_BASE;
+ priv->ttbr_valid = DART_T8110_TTBR_VALID;
+ priv->flush_tlb = apple_dart_t8110_flush_tlb;
+ } else {
+ priv->nsid = 16;
+ priv->nttbr = 4;
+ priv->sid_enable_base = DART_T8020_SID_ENABLE;
+ priv->tcr_base = DART_T8020_TCR_BASE;
+ priv->tcr_translate_enable = DART_T8020_TCR_TRANSLATE_ENABLE;
+ priv->tcr_bypass =
+ DART_T8020_TCR_BYPASS_DAPF | DART_T8020_TCR_BYPASS_DART;
+ priv->ttbr_base = DART_T8020_TTBR_BASE;
+ priv->ttbr_valid = DART_T8020_TTBR_VALID;
+ priv->flush_tlb = apple_dart_t8020_flush_tlb;
+ }
+
+ if (device_is_compatible(dev, "apple,t6000-dart") ||
+ device_is_compatible(dev, "apple,t8110-dart"))
+ priv->shift = 4;
- if (device_is_compatible(dev, "apple,t8112-dart")) {
- for (sid = 0; sid < 256; sid++) {
- writel(DART_T8110_TCR_BYPASS_DART | DART_T8110_TCR_BYPASS_DAPF,
- base + DART_T8110_TCR(sid));
- writel(0, base + DART_T8110_TTBR(sid));
+ priv->dvabase = DART_PAGE_SIZE;
+ priv->dvaend = SZ_4G - DART_PAGE_SIZE;
+
+ lmb_init(&priv->lmb);
+ lmb_add(&priv->lmb, priv->dvabase, priv->dvaend - priv->dvabase);
+
+ /* Disable translations. */
+ for (sid = 0; sid < priv->nsid; sid++)
+ writel(0, priv->base + DART_TCR(priv, sid));
+
+ /* Remove page tables. */
+ for (sid = 0; sid < priv->nsid; sid++) {
+ for (i = 0; i < priv->nttbr; i++)
+ writel(0, priv->base + DART_TTBR(priv, sid, i));
+ }
+ priv->flush_tlb(priv);
+
+ params2 = readl(priv->base + DART_PARAMS2);
+ if (params2 & DART_PARAMS2_BYPASS_SUPPORT) {
+ for (sid = 0; sid < priv->nsid; sid++) {
+ writel(priv->tcr_bypass,
+ priv->base + DART_TCR(priv, sid));
}
- } else {
- for (sid = 0; sid < 16; sid++) {
- writel(DART_TCR_BYPASS_DART | DART_TCR_BYPASS_DAPF,
- base + DART_TCR(sid));
- for (i = 0; i < 4; i++)
- writel(0, base + DART_TTBR(sid, i));
+ priv->bypass = 1;
+ return 0;
+ }
+
+ ntte = DIV_ROUND_UP(priv->dvaend, DART_PAGE_SIZE);
+ nl2 = DIV_ROUND_UP(ntte, DART_PAGE_SIZE / sizeof(u64));
+ nl1 = DIV_ROUND_UP(nl2, DART_PAGE_SIZE / sizeof(u64));
+
+ priv->l2 = memalign(DART_PAGE_SIZE, nl2 * DART_PAGE_SIZE);
+ memset(priv->l2, 0, nl2 * DART_PAGE_SIZE);
+ flush_dcache_range((unsigned long)priv->l2,
+ (unsigned long)priv->l2 + nl2 * DART_PAGE_SIZE);
+
+ priv->l1 = memalign(DART_PAGE_SIZE, nl1 * DART_PAGE_SIZE);
+ memset(priv->l1, 0, nl1 * DART_PAGE_SIZE);
+ l2 = (phys_addr_t)priv->l2;
+ for (i = 0; i < nl2; i++) {
+ priv->l1[i] = (l2 >> priv->shift) | DART_L1_TABLE;
+ l2 += DART_PAGE_SIZE;
+ }
+ flush_dcache_range((unsigned long)priv->l1,
+ (unsigned long)priv->l1 + nl1 * DART_PAGE_SIZE);
+
+ /* Install page tables. */
+ for (sid = 0; sid < priv->nsid; sid++) {
+ addr = (phys_addr_t)priv->l1;
+ for (i = 0; i < nl1; i++) {
+ writel(addr >> DART_TTBR_SHIFT | priv->ttbr_valid,
+ priv->base + DART_TTBR(priv, sid, i));
+ addr += DART_PAGE_SIZE;
}
}
+ priv->flush_tlb(priv);
+
+ /* Enable all streams. */
+ for (i = 0; i < priv->nsid / 32; i++)
+ writel(~0, priv->base + DART_SID_ENABLE(priv, i));
+
+ /* Enable translations. */
+ for (sid = 0; sid < priv->nsid; sid++) {
+ writel(priv->tcr_translate_enable,
+ priv->base + DART_TCR(priv, sid));
+ }
+
+ return 0;
+}
+
+static int apple_dart_remove(struct udevice *dev)
+{
+ struct apple_dart_priv *priv = dev_get_priv(dev);
+ int sid, i;
+
+ /* Disable translations. */
+ for (sid = 0; sid < priv->nsid; sid++)
+ writel(0, priv->base + DART_TCR(priv, sid));
+
+ /* Remove page tables. */
+ for (sid = 0; sid < priv->nsid; sid++) {
+ for (i = 0; i < priv->nttbr; i++)
+ writel(0, priv->base + DART_TTBR(priv, sid, i));
+ }
+ priv->flush_tlb(priv);
return 0;
}
@@ -64,7 +303,7 @@ static int apple_dart_probe(struct udevice *dev)
static const struct udevice_id apple_dart_ids[] = {
{ .compatible = "apple,t8103-dart" },
{ .compatible = "apple,t6000-dart" },
- { .compatible = "apple,t8112-dart" },
+ { .compatible = "apple,t8110-dart" },
{ /* sentinel */ }
};
@@ -72,5 +311,9 @@ U_BOOT_DRIVER(apple_dart) = {
.name = "apple_dart",
.id = UCLASS_IOMMU,
.of_match = apple_dart_ids,
- .probe = apple_dart_probe
+ .priv_auto = sizeof(struct apple_dart_priv),
+ .ops = &apple_dart_ops,
+ .probe = apple_dart_probe,
+ .remove = apple_dart_remove,
+ .flags = DM_FLAG_OS_PREPARE
};
--
2.39.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 3/7] usb: xhci: Implement DMA mapping
2023-01-17 22:03 [PATCH 0/7] Apple PCIe/XHCI support Mark Kettenis
2023-01-17 22:03 ` [PATCH 1/7] iommu: Add DMA mapping operations Mark Kettenis
2023-01-17 22:03 ` [PATCH 2/7] iommu: apple: Implement DMA mapping operations for Apple DART Mark Kettenis
@ 2023-01-17 22:04 ` Mark Kettenis
2023-01-17 22:51 ` Marek Vasut
2023-01-17 22:04 ` [PATCH 4/7] iommu: Implement mapping IOMMUs for PCI devices Mark Kettenis
` (3 subsequent siblings)
6 siblings, 1 reply; 12+ messages in thread
From: Mark Kettenis @ 2023-01-17 22:04 UTC (permalink / raw)
To: u-boot; +Cc: sjg, bmeng.cn, marex, Mark Kettenis
An XHCI controller that sits behind an IOMMU needs to map and unmap
its memory buffers to do DMA. Implement this by inroducing new
xhci_dma_map() and xhci_dma_unmap() helper functions. The
xhci_dma_map() function replaces the existing xhci_virt_to_bus()
function in the sense that it returns the bus address in the case
of simple address translation in the absence of an IOMMU. The
xhci_bus_to_virt() function is eliminated by storing the CPU
address of the allocated scratchpad memory in struct xhci_ctrl.
Signed-off-by: Mark Kettenis <kettenis@openbsd.org>
---
drivers/usb/host/xhci-mem.c | 84 +++++++++++++++++++++++-------------
drivers/usb/host/xhci-ring.c | 76 ++++++++++++++++++++------------
drivers/usb/host/xhci.c | 10 +++--
include/usb/xhci.h | 27 +++++++++---
4 files changed, 130 insertions(+), 67 deletions(-)
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 21cd03b662..72b7530626 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -64,8 +64,9 @@ void xhci_inval_cache(uintptr_t addr, u32 len)
* @param ptr pointer to "segement" to be freed
* Return: none
*/
-static void xhci_segment_free(struct xhci_segment *seg)
+static void xhci_segment_free(struct xhci_ctrl *ctrl, struct xhci_segment *seg)
{
+ xhci_dma_unmap(ctrl, seg->dma, SEGMENT_SIZE);
free(seg->trbs);
seg->trbs = NULL;
@@ -78,7 +79,7 @@ static void xhci_segment_free(struct xhci_segment *seg)
* @param ptr pointer to "ring" to be freed
* Return: none
*/
-static void xhci_ring_free(struct xhci_ring *ring)
+static void xhci_ring_free(struct xhci_ctrl *ctrl, struct xhci_ring *ring)
{
struct xhci_segment *seg;
struct xhci_segment *first_seg;
@@ -89,10 +90,10 @@ static void xhci_ring_free(struct xhci_ring *ring)
seg = first_seg->next;
while (seg != first_seg) {
struct xhci_segment *next = seg->next;
- xhci_segment_free(seg);
+ xhci_segment_free(ctrl, seg);
seg = next;
}
- xhci_segment_free(first_seg);
+ xhci_segment_free(ctrl, first_seg);
free(ring);
}
@@ -105,12 +106,20 @@ static void xhci_ring_free(struct xhci_ring *ring)
*/
static void xhci_scratchpad_free(struct xhci_ctrl *ctrl)
{
+ struct xhci_hccr *hccr = ctrl->hccr;
+ int num_sp;
+
if (!ctrl->scratchpad)
return;
+ num_sp = HCS_MAX_SCRATCHPAD(xhci_readl(&hccr->cr_hcsparams2));
+ xhci_dma_unmap(ctrl, ctrl->scratchpad->sp_array[0],
+ num_sp * ctrl->page_size);
+ xhci_dma_unmap(ctrl, ctrl->dcbaa->dev_context_ptrs[0],
+ num_sp * sizeof(u64));
ctrl->dcbaa->dev_context_ptrs[0] = 0;
- free(xhci_bus_to_virt(ctrl, le64_to_cpu(ctrl->scratchpad->sp_array[0])));
+ free(ctrl->scratchpad->scratchpad);
free(ctrl->scratchpad->sp_array);
free(ctrl->scratchpad);
ctrl->scratchpad = NULL;
@@ -122,8 +131,10 @@ static void xhci_scratchpad_free(struct xhci_ctrl *ctrl)
* @param ptr pointer to "xhci_container_ctx" to be freed
* Return: none
*/
-static void xhci_free_container_ctx(struct xhci_container_ctx *ctx)
+static void xhci_free_container_ctx(struct xhci_ctrl *ctrl,
+ struct xhci_container_ctx *ctx)
{
+ xhci_dma_unmap(ctrl, ctx->dma, ctx->size);
free(ctx->bytes);
free(ctx);
}
@@ -153,12 +164,12 @@ static void xhci_free_virt_devices(struct xhci_ctrl *ctrl)
for (i = 0; i < 31; ++i)
if (virt_dev->eps[i].ring)
- xhci_ring_free(virt_dev->eps[i].ring);
+ xhci_ring_free(ctrl, virt_dev->eps[i].ring);
if (virt_dev->in_ctx)
- xhci_free_container_ctx(virt_dev->in_ctx);
+ xhci_free_container_ctx(ctrl, virt_dev->in_ctx);
if (virt_dev->out_ctx)
- xhci_free_container_ctx(virt_dev->out_ctx);
+ xhci_free_container_ctx(ctrl, virt_dev->out_ctx);
free(virt_dev);
/* make sure we are pointing to NULL */
@@ -174,11 +185,15 @@ static void xhci_free_virt_devices(struct xhci_ctrl *ctrl)
*/
void xhci_cleanup(struct xhci_ctrl *ctrl)
{
- xhci_ring_free(ctrl->event_ring);
- xhci_ring_free(ctrl->cmd_ring);
+ xhci_ring_free(ctrl, ctrl->event_ring);
+ xhci_ring_free(ctrl, ctrl->cmd_ring);
xhci_scratchpad_free(ctrl);
xhci_free_virt_devices(ctrl);
+ xhci_dma_unmap(ctrl, ctrl->erst.erst_dma_addr,
+ sizeof(struct xhci_erst_entry) * ERST_NUM_SEGS);
free(ctrl->erst.entries);
+ xhci_dma_unmap(ctrl, ctrl->dcbaa->dma,
+ sizeof(struct xhci_device_context_array));
free(ctrl->dcbaa);
memset(ctrl, '\0', sizeof(struct xhci_ctrl));
}
@@ -218,15 +233,13 @@ static void xhci_link_segments(struct xhci_ctrl *ctrl, struct xhci_segment *prev
struct xhci_segment *next, bool link_trbs)
{
u32 val;
- u64 val_64 = 0;
if (!prev || !next)
return;
prev->next = next;
if (link_trbs) {
- val_64 = xhci_virt_to_bus(ctrl, next->trbs);
prev->trbs[TRBS_PER_SEGMENT-1].link.segment_ptr =
- cpu_to_le64(val_64);
+ cpu_to_le64(next->dma);
/*
* Set the last TRB in the segment to
@@ -273,7 +286,7 @@ static void xhci_initialize_ring_info(struct xhci_ring *ring)
* @param none
* Return: pointer to the newly allocated SEGMENT
*/
-static struct xhci_segment *xhci_segment_alloc(void)
+static struct xhci_segment *xhci_segment_alloc(struct xhci_ctrl *ctrl)
{
struct xhci_segment *seg;
@@ -281,6 +294,7 @@ static struct xhci_segment *xhci_segment_alloc(void)
BUG_ON(!seg);
seg->trbs = xhci_malloc(SEGMENT_SIZE);
+ seg->dma = xhci_dma_map(ctrl, seg->trbs, SEGMENT_SIZE);
seg->next = NULL;
@@ -314,7 +328,7 @@ struct xhci_ring *xhci_ring_alloc(struct xhci_ctrl *ctrl, unsigned int num_segs,
if (num_segs == 0)
return ring;
- ring->first_seg = xhci_segment_alloc();
+ ring->first_seg = xhci_segment_alloc(ctrl);
BUG_ON(!ring->first_seg);
num_segs--;
@@ -323,7 +337,7 @@ struct xhci_ring *xhci_ring_alloc(struct xhci_ctrl *ctrl, unsigned int num_segs,
while (num_segs > 0) {
struct xhci_segment *next;
- next = xhci_segment_alloc();
+ next = xhci_segment_alloc(ctrl);
BUG_ON(!next);
xhci_link_segments(ctrl, prev, next, link_trbs);
@@ -372,7 +386,8 @@ static int xhci_scratchpad_alloc(struct xhci_ctrl *ctrl)
if (!scratchpad->sp_array)
goto fail_sp2;
- val_64 = xhci_virt_to_bus(ctrl, scratchpad->sp_array);
+ val_64 = xhci_dma_map(ctrl, scratchpad->sp_array,
+ num_sp * sizeof(u64));
ctrl->dcbaa->dev_context_ptrs[0] = cpu_to_le64(val_64);
xhci_flush_cache((uintptr_t)&ctrl->dcbaa->dev_context_ptrs[0],
@@ -386,16 +401,18 @@ static int xhci_scratchpad_alloc(struct xhci_ctrl *ctrl)
}
BUG_ON(i == 16);
- page_size = 1 << (i + 12);
- buf = memalign(page_size, num_sp * page_size);
+ ctrl->page_size = 1 << (i + 12);
+ buf = memalign(ctrl->page_size, num_sp * ctrl->page_size);
if (!buf)
goto fail_sp3;
- memset(buf, '\0', num_sp * page_size);
- xhci_flush_cache((uintptr_t)buf, num_sp * page_size);
+ memset(buf, '\0', num_sp * ctrl->page_size);
+ xhci_flush_cache((uintptr_t)buf, num_sp * ctrl->page_size);
+ scratchpad->scratchpad = buf;
+ val_64 = xhci_dma_map(ctrl, buf, num_sp * ctrl->page_size);
for (i = 0; i < num_sp; i++) {
- val_64 = xhci_virt_to_bus(ctrl, buf + i * page_size);
scratchpad->sp_array[i] = cpu_to_le64(val_64);
+ val_64 += ctrl->page_size;
}
xhci_flush_cache((uintptr_t)scratchpad->sp_array,
@@ -437,6 +454,7 @@ static struct xhci_container_ctx
ctx->size += CTX_SIZE(xhci_readl(&ctrl->hccr->cr_hccparams));
ctx->bytes = xhci_malloc(ctx->size);
+ ctx->dma = xhci_dma_map(ctrl, ctx->bytes, ctx->size);
return ctx;
}
@@ -487,7 +505,7 @@ int xhci_alloc_virt_device(struct xhci_ctrl *ctrl, unsigned int slot_id)
/* Allocate endpoint 0 ring */
virt_dev->eps[0].ring = xhci_ring_alloc(ctrl, 1, true);
- byte_64 = xhci_virt_to_bus(ctrl, virt_dev->out_ctx->bytes);
+ byte_64 = virt_dev->out_ctx->dma;
/* Point to output device context in dcbaa. */
ctrl->dcbaa->dev_context_ptrs[slot_id] = cpu_to_le64(byte_64);
@@ -523,15 +541,16 @@ int xhci_mem_init(struct xhci_ctrl *ctrl, struct xhci_hccr *hccr,
return -ENOMEM;
}
- val_64 = xhci_virt_to_bus(ctrl, ctrl->dcbaa);
+ ctrl->dcbaa->dma = xhci_dma_map(ctrl, ctrl->dcbaa,
+ sizeof(struct xhci_device_context_array));
/* Set the pointer in DCBAA register */
- xhci_writeq(&hcor->or_dcbaap, val_64);
+ xhci_writeq(&hcor->or_dcbaap, ctrl->dcbaa->dma);
/* Command ring control pointer register initialization */
ctrl->cmd_ring = xhci_ring_alloc(ctrl, 1, true);
/* Set the address in the Command Ring Control register */
- trb_64 = xhci_virt_to_bus(ctrl, ctrl->cmd_ring->first_seg->trbs);
+ trb_64 = ctrl->cmd_ring->first_seg->dma;
val_64 = xhci_readq(&hcor->or_crcr);
val_64 = (val_64 & (u64) CMD_RING_RSVD_BITS) |
(trb_64 & (u64) ~CMD_RING_RSVD_BITS) |
@@ -555,6 +574,8 @@ int xhci_mem_init(struct xhci_ctrl *ctrl, struct xhci_hccr *hccr,
ctrl->event_ring = xhci_ring_alloc(ctrl, ERST_NUM_SEGS, false);
ctrl->erst.entries = xhci_malloc(sizeof(struct xhci_erst_entry) *
ERST_NUM_SEGS);
+ ctrl->erst.erst_dma_addr = xhci_dma_map(ctrl, ctrl->erst.entries,
+ sizeof(struct xhci_erst_entry) * ERST_NUM_SEGS);
ctrl->erst.num_entries = ERST_NUM_SEGS;
@@ -562,7 +583,7 @@ int xhci_mem_init(struct xhci_ctrl *ctrl, struct xhci_hccr *hccr,
val < ERST_NUM_SEGS;
val++) {
struct xhci_erst_entry *entry = &ctrl->erst.entries[val];
- trb_64 = xhci_virt_to_bus(ctrl, seg->trbs);
+ trb_64 = seg->dma;
entry->seg_addr = cpu_to_le64(trb_64);
entry->seg_size = cpu_to_le32(TRBS_PER_SEGMENT);
entry->rsvd = 0;
@@ -571,7 +592,8 @@ int xhci_mem_init(struct xhci_ctrl *ctrl, struct xhci_hccr *hccr,
xhci_flush_cache((uintptr_t)ctrl->erst.entries,
ERST_NUM_SEGS * sizeof(struct xhci_erst_entry));
- deq = xhci_virt_to_bus(ctrl, ctrl->event_ring->dequeue);
+ deq = xhci_trb_virt_to_dma(ctrl->event_ring->deq_seg,
+ ctrl->event_ring->dequeue);
/* Update HC event ring dequeue pointer */
xhci_writeq(&ctrl->ir_set->erst_dequeue,
@@ -586,7 +608,7 @@ int xhci_mem_init(struct xhci_ctrl *ctrl, struct xhci_hccr *hccr,
/* this is the event ring segment table pointer */
val_64 = xhci_readq(&ctrl->ir_set->erst_base);
val_64 &= ERST_PTR_MASK;
- val_64 |= xhci_virt_to_bus(ctrl, ctrl->erst.entries) & ~ERST_PTR_MASK;
+ val_64 |= ctrl->erst.erst_dma_addr & ~ERST_PTR_MASK;
xhci_writeq(&ctrl->ir_set->erst_base, val_64);
@@ -849,7 +871,7 @@ void xhci_setup_addressable_virt_dev(struct xhci_ctrl *ctrl,
/* EP 0 can handle "burst" sizes of 1, so Max Burst Size field is 0 */
ep0_ctx->ep_info2 |= cpu_to_le32(MAX_BURST(0) | ERROR_COUNT(3));
- trb_64 = xhci_virt_to_bus(ctrl, virt_dev->eps[0].ring->first_seg->trbs);
+ trb_64 = virt_dev->eps[0].ring->first_seg->dma;
ep0_ctx->deq = cpu_to_le64(trb_64 | virt_dev->eps[0].ring->cycle_state);
/*
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index eb6dfcdb09..c8260cbdf9 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -24,6 +24,24 @@
#include <usb/xhci.h>
+/*
+ * Returns zero if the TRB isn't in this segment, otherwise it returns the DMA
+ * address of the TRB.
+ */
+dma_addr_t xhci_trb_virt_to_dma(struct xhci_segment *seg,
+ union xhci_trb *trb)
+{
+ unsigned long segment_offset;
+
+ if (!seg || !trb || trb < seg->trbs)
+ return 0;
+ /* offset in TRBs */
+ segment_offset = trb - seg->trbs;
+ if (segment_offset >= TRBS_PER_SEGMENT)
+ return 0;
+ return seg->dma + (segment_offset * sizeof(*trb));
+}
+
/**
* Is this TRB a link TRB or was the last TRB the last TRB in this event ring
* segment? I.e. would the updated event TRB pointer step off the end of the
@@ -180,10 +198,8 @@ static void inc_deq(struct xhci_ctrl *ctrl, struct xhci_ring *ring)
* @param trb_fields pointer to trb field array containing TRB contents
* Return: pointer to the enqueued trb
*/
-static struct xhci_generic_trb *queue_trb(struct xhci_ctrl *ctrl,
- struct xhci_ring *ring,
- bool more_trbs_coming,
- unsigned int *trb_fields)
+static dma_addr_t queue_trb(struct xhci_ctrl *ctrl, struct xhci_ring *ring,
+ bool more_trbs_coming, unsigned int *trb_fields)
{
struct xhci_generic_trb *trb;
int i;
@@ -197,7 +213,7 @@ static struct xhci_generic_trb *queue_trb(struct xhci_ctrl *ctrl,
inc_enq(ctrl, ring, more_trbs_coming);
- return trb;
+ return xhci_trb_virt_to_dma(ring->enq_seg, (union xhci_trb *)trb);
}
/**
@@ -271,19 +287,15 @@ static int prepare_ring(struct xhci_ctrl *ctrl, struct xhci_ring *ep_ring,
* @param cmd Command type to enqueue
* Return: none
*/
-void xhci_queue_command(struct xhci_ctrl *ctrl, u8 *ptr, u32 slot_id,
+void xhci_queue_command(struct xhci_ctrl *ctrl, dma_addr_t addr, u32 slot_id,
u32 ep_index, trb_type cmd)
{
u32 fields[4];
- u64 val_64 = 0;
BUG_ON(prepare_ring(ctrl, ctrl->cmd_ring, EP_STATE_RUNNING));
- if (ptr)
- val_64 = xhci_virt_to_bus(ctrl, ptr);
-
- fields[0] = lower_32_bits(val_64);
- fields[1] = upper_32_bits(val_64);
+ fields[0] = lower_32_bits(addr);
+ fields[1] = upper_32_bits(addr);
fields[2] = 0;
fields[3] = TRB_TYPE(cmd) | SLOT_ID_FOR_TRB(slot_id) |
ctrl->cmd_ring->cycle_state;
@@ -399,12 +411,15 @@ static void giveback_first_trb(struct usb_device *udev, int ep_index,
*/
void xhci_acknowledge_event(struct xhci_ctrl *ctrl)
{
+ dma_addr_t deq;
+
/* Advance our dequeue pointer to the next event */
inc_deq(ctrl, ctrl->event_ring);
/* Inform the hardware */
- xhci_writeq(&ctrl->ir_set->erst_dequeue,
- xhci_virt_to_bus(ctrl, ctrl->event_ring->dequeue) | ERST_EHB);
+ deq = xhci_trb_virt_to_dma(ctrl->event_ring->deq_seg,
+ ctrl->event_ring->dequeue);
+ xhci_writeq(&ctrl->ir_set->erst_dequeue, deq | ERST_EHB);
}
/**
@@ -490,17 +505,19 @@ static void reset_ep(struct usb_device *udev, int ep_index)
struct xhci_ctrl *ctrl = xhci_get_ctrl(udev);
struct xhci_ring *ring = ctrl->devs[udev->slot_id]->eps[ep_index].ring;
union xhci_trb *event;
+ u64 addr;
u32 field;
printf("Resetting EP %d...\n", ep_index);
- xhci_queue_command(ctrl, NULL, udev->slot_id, ep_index, TRB_RESET_EP);
+ xhci_queue_command(ctrl, 0, udev->slot_id, ep_index, TRB_RESET_EP);
event = xhci_wait_for_event(ctrl, TRB_COMPLETION);
field = le32_to_cpu(event->trans_event.flags);
BUG_ON(TRB_TO_SLOT_ID(field) != udev->slot_id);
xhci_acknowledge_event(ctrl);
- xhci_queue_command(ctrl, (void *)((uintptr_t)ring->enqueue |
- ring->cycle_state), udev->slot_id, ep_index, TRB_SET_DEQ);
+ addr = xhci_trb_virt_to_dma(ring->enq_seg,
+ (void *)((uintptr_t)ring->enqueue | ring->cycle_state));
+ xhci_queue_command(ctrl, addr, udev->slot_id, ep_index, TRB_SET_DEQ);
event = xhci_wait_for_event(ctrl, TRB_COMPLETION);
BUG_ON(TRB_TO_SLOT_ID(le32_to_cpu(event->event_cmd.flags))
!= udev->slot_id || GET_COMP_CODE(le32_to_cpu(
@@ -521,9 +538,10 @@ static void abort_td(struct usb_device *udev, int ep_index)
struct xhci_ctrl *ctrl = xhci_get_ctrl(udev);
struct xhci_ring *ring = ctrl->devs[udev->slot_id]->eps[ep_index].ring;
union xhci_trb *event;
+ u64 addr;
u32 field;
- xhci_queue_command(ctrl, NULL, udev->slot_id, ep_index, TRB_STOP_RING);
+ xhci_queue_command(ctrl, 0, udev->slot_id, ep_index, TRB_STOP_RING);
event = xhci_wait_for_event(ctrl, TRB_TRANSFER);
field = le32_to_cpu(event->trans_event.flags);
@@ -539,8 +557,9 @@ static void abort_td(struct usb_device *udev, int ep_index)
event->event_cmd.status)) != COMP_SUCCESS);
xhci_acknowledge_event(ctrl);
- xhci_queue_command(ctrl, (void *)((uintptr_t)ring->enqueue |
- ring->cycle_state), udev->slot_id, ep_index, TRB_SET_DEQ);
+ addr = xhci_trb_virt_to_dma(ring->enq_seg,
+ (void *)((uintptr_t)ring->enqueue | ring->cycle_state));
+ xhci_queue_command(ctrl, addr, udev->slot_id, ep_index, TRB_SET_DEQ);
event = xhci_wait_for_event(ctrl, TRB_COMPLETION);
BUG_ON(TRB_TO_SLOT_ID(le32_to_cpu(event->event_cmd.flags))
!= udev->slot_id || GET_COMP_CODE(le32_to_cpu(
@@ -609,8 +628,8 @@ int xhci_bulk_tx(struct usb_device *udev, unsigned long pipe,
u64 addr;
int ret;
u32 trb_fields[4];
- u64 val_64 = xhci_virt_to_bus(ctrl, buffer);
- void *last_transfer_trb_addr;
+ u64 buf_64 = xhci_dma_map(ctrl, buffer, length);
+ dma_addr_t last_transfer_trb_addr;
int available_length;
debug("dev=%p, pipe=%lx, buffer=%p, length=%d\n",
@@ -633,7 +652,7 @@ int xhci_bulk_tx(struct usb_device *udev, unsigned long pipe,
* we send request in more than 1 TRB by chaining them.
*/
running_total = TRB_MAX_BUFF_SIZE -
- (lower_32_bits(val_64) & (TRB_MAX_BUFF_SIZE - 1));
+ (lower_32_bits(buf_64) & (TRB_MAX_BUFF_SIZE - 1));
trb_buff_len = running_total;
running_total &= TRB_MAX_BUFF_SIZE - 1;
@@ -678,7 +697,7 @@ int xhci_bulk_tx(struct usb_device *udev, unsigned long pipe,
* that the buffer should not span 64KB boundary. if so
* we send request in more than 1 TRB by chaining them.
*/
- addr = val_64;
+ addr = buf_64;
if (trb_buff_len > length)
trb_buff_len = length;
@@ -754,7 +773,7 @@ again:
}
if ((uintptr_t)(le64_to_cpu(event->trans_event.buffer)) !=
- (uintptr_t)xhci_virt_to_bus(ctrl, last_transfer_trb_addr)) {
+ (uintptr_t)last_transfer_trb_addr) {
available_length -=
(int)EVENT_TRB_LEN(le32_to_cpu(event->trans_event.transfer_len));
xhci_acknowledge_event(ctrl);
@@ -768,6 +787,7 @@ again:
record_transfer_result(udev, event, available_length);
xhci_acknowledge_event(ctrl);
xhci_inval_cache((uintptr_t)buffer, length);
+ xhci_dma_unmap(ctrl, buf_64, length);
return (udev->status != USB_ST_NOT_PROC) ? 0 : -1;
}
@@ -911,7 +931,7 @@ int xhci_ctrl_tx(struct usb_device *udev, unsigned long pipe,
if (length > 0) {
if (req->requesttype & USB_DIR_IN)
field |= TRB_DIR_IN;
- buf_64 = xhci_virt_to_bus(ctrl, buffer);
+ buf_64 = xhci_dma_map(ctrl, buffer, length);
trb_fields[0] = lower_32_bits(buf_64);
trb_fields[1] = upper_32_bits(buf_64);
@@ -961,8 +981,10 @@ int xhci_ctrl_tx(struct usb_device *udev, unsigned long pipe,
}
/* Invalidate buffer to make it available to usb-core */
- if (length > 0)
+ if (length > 0) {
xhci_inval_cache((uintptr_t)buffer, length);
+ xhci_dma_unmap(ctrl, buf_64, length);
+ }
if (GET_COMP_CODE(le32_to_cpu(event->trans_event.transfer_len))
== COMP_SHORT_TX) {
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index dbeb88afe3..440b0224b1 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -448,7 +448,7 @@ static int xhci_configure_endpoints(struct usb_device *udev, bool ctx_change)
in_ctx = virt_dev->in_ctx;
xhci_flush_cache((uintptr_t)in_ctx->bytes, in_ctx->size);
- xhci_queue_command(ctrl, in_ctx->bytes, udev->slot_id, 0,
+ xhci_queue_command(ctrl, in_ctx->dma, udev->slot_id, 0,
ctx_change ? TRB_EVAL_CONTEXT : TRB_CONFIG_EP);
event = xhci_wait_for_event(ctrl, TRB_COMPLETION);
BUG_ON(TRB_TO_SLOT_ID(le32_to_cpu(event->event_cmd.flags))
@@ -585,7 +585,8 @@ static int xhci_set_configuration(struct usb_device *udev)
cpu_to_le32(MAX_BURST(max_burst) |
ERROR_COUNT(err_count));
- trb_64 = xhci_virt_to_bus(ctrl, virt_dev->eps[ep_index].ring->enqueue);
+ trb_64 = xhci_trb_virt_to_dma(virt_dev->eps[ep_index].ring->enq_seg,
+ virt_dev->eps[ep_index].ring->enqueue);
ep_ctx[ep_index]->deq = cpu_to_le64(trb_64 |
virt_dev->eps[ep_index].ring->cycle_state);
@@ -643,7 +644,8 @@ static int xhci_address_device(struct usb_device *udev, int root_portnr)
ctrl_ctx->add_flags = cpu_to_le32(SLOT_FLAG | EP0_FLAG);
ctrl_ctx->drop_flags = 0;
- xhci_queue_command(ctrl, (void *)ctrl_ctx, slot_id, 0, TRB_ADDR_DEV);
+ xhci_queue_command(ctrl, virt_dev->in_ctx->dma,
+ slot_id, 0, TRB_ADDR_DEV);
event = xhci_wait_for_event(ctrl, TRB_COMPLETION);
BUG_ON(TRB_TO_SLOT_ID(le32_to_cpu(event->event_cmd.flags)) != slot_id);
@@ -718,7 +720,7 @@ static int _xhci_alloc_device(struct usb_device *udev)
return 0;
}
- xhci_queue_command(ctrl, NULL, 0, 0, TRB_ENABLE_SLOT);
+ xhci_queue_command(ctrl, 0, 0, 0, TRB_ENABLE_SLOT);
event = xhci_wait_for_event(ctrl, TRB_COMPLETION);
BUG_ON(GET_COMP_CODE(le32_to_cpu(event->event_cmd.status))
!= COMP_SUCCESS);
diff --git a/include/usb/xhci.h b/include/usb/xhci.h
index ea4cf3f52b..85c359fa1b 100644
--- a/include/usb/xhci.h
+++ b/include/usb/xhci.h
@@ -16,6 +16,7 @@
#ifndef HOST_XHCI_H_
#define HOST_XHCI_H_
+#include <iommu.h>
#include <phys2bus.h>
#include <asm/types.h>
#include <asm/cache.h>
@@ -490,6 +491,7 @@ struct xhci_container_ctx {
int size;
u8 *bytes;
+ dma_addr_t dma;
};
/**
@@ -688,6 +690,8 @@ struct xhci_input_control_ctx {
struct xhci_device_context_array {
/* 64-bit device addresses; we only write 32-bit addresses */
__le64 dev_context_ptrs[MAX_HC_SLOTS];
+ /* private xHCD pointers */
+ dma_addr_t dma;
};
/* TODO: write function to set the 64-bit device DMA address */
/*
@@ -997,6 +1001,7 @@ struct xhci_segment {
union xhci_trb *trbs;
/* private to HCD */
struct xhci_segment *next;
+ dma_addr_t dma;
};
struct xhci_ring {
@@ -1025,11 +1030,14 @@ struct xhci_erst_entry {
struct xhci_erst {
struct xhci_erst_entry *entries;
unsigned int num_entries;
+ /* xhci->event_ring keeps track of segment dma addresses */
+ dma_addr_t erst_dma_addr;
/* Num entries the ERST can contain */
unsigned int erst_size;
};
struct xhci_scratchpad {
+ void *scratchpad;
u64 *sp_array;
};
@@ -1216,6 +1224,7 @@ struct xhci_ctrl {
struct xhci_virt_device *devs[MAX_HC_SLOTS];
int rootdev;
u16 hci_version;
+ int page_size;
u32 quirks;
#define XHCI_MTK_HOST BIT(0)
};
@@ -1226,7 +1235,7 @@ struct xhci_ctrl {
#define xhci_to_dev(_ctrl) NULL
#endif
-unsigned long trb_addr(struct xhci_segment *seg, union xhci_trb *trb);
+dma_addr_t xhci_trb_virt_to_dma(struct xhci_segment *seg, union xhci_trb *trb);
struct xhci_input_control_ctx
*xhci_get_input_control_ctx(struct xhci_container_ctx *ctx);
struct xhci_slot_ctx *xhci_get_slot_ctx(struct xhci_ctrl *ctrl,
@@ -1243,7 +1252,7 @@ void xhci_slot_copy(struct xhci_ctrl *ctrl,
struct xhci_container_ctx *out_ctx);
void xhci_setup_addressable_virt_dev(struct xhci_ctrl *ctrl,
struct usb_device *udev, int hop_portnr);
-void xhci_queue_command(struct xhci_ctrl *ctrl, u8 *ptr,
+void xhci_queue_command(struct xhci_ctrl *ctrl, dma_addr_t addr,
u32 slot_id, u32 ep_index, trb_type cmd);
void xhci_acknowledge_event(struct xhci_ctrl *ctrl);
union xhci_trb *xhci_wait_for_event(struct xhci_ctrl *ctrl, trb_type expected);
@@ -1284,14 +1293,22 @@ extern struct dm_usb_ops xhci_usb_ops;
struct xhci_ctrl *xhci_get_ctrl(struct usb_device *udev);
-static inline dma_addr_t xhci_virt_to_bus(struct xhci_ctrl *ctrl, void *addr)
+static inline dma_addr_t xhci_dma_map(struct xhci_ctrl *ctrl, void *addr,
+ size_t size)
{
+#if CONFIG_IS_ENABLED(IOMMU)
+ return dev_iommu_dma_map(xhci_to_dev(ctrl), addr, size);
+#else
return dev_phys_to_bus(xhci_to_dev(ctrl), virt_to_phys(addr));
+#endif
}
-static inline void *xhci_bus_to_virt(struct xhci_ctrl *ctrl, dma_addr_t addr)
+static inline void xhci_dma_unmap(struct xhci_ctrl *ctrl, dma_addr_t addr,
+ size_t size)
{
- return phys_to_virt(dev_bus_to_phys(xhci_to_dev(ctrl), addr));
+#if CONFIG_IS_ENABLED(IOMMU)
+ dev_iommu_dma_unmap(xhci_to_dev(ctrl), addr, size);
+#endif
}
#endif /* HOST_XHCI_H_ */
--
2.39.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 5/7] pci: Add Apple PCIe controller driver
2023-01-17 22:03 [PATCH 0/7] Apple PCIe/XHCI support Mark Kettenis
` (3 preceding siblings ...)
2023-01-17 22:04 ` [PATCH 4/7] iommu: Implement mapping IOMMUs for PCI devices Mark Kettenis
@ 2023-01-17 22:04 ` Mark Kettenis
2023-01-17 22:04 ` [PATCH 6/7] arm: apple: Enable PCIe USB controller Mark Kettenis
2023-01-17 22:04 ` [PATCH 7/7] usb: xhci: Fix root hub descriptor Mark Kettenis
6 siblings, 0 replies; 12+ messages in thread
From: Mark Kettenis @ 2023-01-17 22:04 UTC (permalink / raw)
To: u-boot; +Cc: sjg, bmeng.cn, marex, Mark Kettenis
This driver supports the PCIe controller on the Apple M1 and
M2 SoCs. The code is adapted from the Linux driver.
Signed-off-by: Mark Kettenis <kettenis@openbsd.org>
---
MAINTAINERS | 1 +
arch/arm/Kconfig | 2 +
drivers/pci/Kconfig | 9 +
drivers/pci/Makefile | 1 +
drivers/pci/pcie_apple.c | 354 +++++++++++++++++++++++++++++++++++++++
5 files changed, 367 insertions(+)
create mode 100644 drivers/pci/pcie_apple.c
diff --git a/MAINTAINERS b/MAINTAINERS
index 3fc4cd0f12..b8a947f9d3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -122,6 +122,7 @@ F: arch/arm/mach-apple/
F: configs/apple_m1_defconfig
F: drivers/iommu/apple_dart.c
F: drivers/nvme/nvme_apple.c
+F: drivers/pci/pcie_apple.c
F: drivers/pinctrl/pinctrl-apple.c
F: drivers/watchdog/apple_wdt.c
F: include/configs/apple.h
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index cac4fa09fd..780815269b 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -963,6 +963,7 @@ config ARCH_APPLE
bool "Apple SoCs"
select ARM64
select CLK
+ select CMD_PCI
select CMD_USB
select DM
select DM_GPIO
@@ -977,6 +978,7 @@ config ARCH_APPLE
select LINUX_KERNEL_IMAGE_HEADER
select OF_BOARD_SETUP
select OF_CONTROL
+ select PCI
select PINCTRL
select POSITION_INDEPENDENT
select POWER_DOMAIN
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 22f4995453..d61596cd7c 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -105,6 +105,15 @@ config PCIE_ECAM_SYNQUACER
Note that this must be configured when boot because Linux driver
expects the PCIe RC has been configured in the bootloader.
+config PCIE_APPLE
+ bool "Enable Apple PCIe driver"
+ depends on ARCH_APPLE
+ imply PCI_INIT_R
+ default y
+ help
+ Say Y here if you want to enable PCIe controller support on
+ Apple SoCs.
+
config PCI_GT64120
bool "GT64120 PCI support"
depends on MIPS
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index dd1ad91ced..d393f1ba03 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_PCI) += pci_auto_common.o pci_common.o
obj-$(CONFIG_PCIE_ECAM_GENERIC) += pcie_ecam_generic.o
obj-$(CONFIG_PCIE_ECAM_SYNQUACER) += pcie_ecam_synquacer.o
+obj-$(CONFIG_PCIE_APPLE) += pcie_apple.o
obj-$(CONFIG_PCI_GT64120) += pci_gt64120.o
obj-$(CONFIG_PCI_MPC85XX) += pci_mpc85xx.o
obj-$(CONFIG_PCI_MSC01) += pci_msc01.o
diff --git a/drivers/pci/pcie_apple.c b/drivers/pci/pcie_apple.c
new file mode 100644
index 0000000000..9b08e1e5da
--- /dev/null
+++ b/drivers/pci/pcie_apple.c
@@ -0,0 +1,354 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe host bridge driver for Apple system-on-chips.
+ *
+ * The HW is ECAM compliant.
+ *
+ * Initialization requires enabling power and clocks, along with a
+ * number of register pokes.
+ *
+ * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
+ * Copyright (C) 2021 Google LLC
+ * Copyright (C) 2021 Corellium LLC
+ * Copyright (C) 2021 Mark Kettenis <kettenis@openbsd.org>
+ *
+ * Author: Alyssa Rosenzweig <alyssa@rosenzweig.io>
+ * Author: Marc Zyngier <maz@kernel.org>
+ */
+
+#include <common.h>
+#include <dm.h>
+#include <dm/device_compat.h>
+#include <dm/devres.h>
+#include <mapmem.h>
+#include <pci.h>
+#include <asm/io.h>
+#include <asm-generic/gpio.h>
+#include <linux/delay.h>
+#include <linux/iopoll.h>
+
+#define CORE_RC_PHYIF_CTL 0x00024
+#define CORE_RC_PHYIF_CTL_RUN BIT(0)
+#define CORE_RC_PHYIF_STAT 0x00028
+#define CORE_RC_PHYIF_STAT_REFCLK BIT(4)
+#define CORE_RC_CTL 0x00050
+#define CORE_RC_CTL_RUN BIT(0)
+#define CORE_RC_STAT 0x00058
+#define CORE_RC_STAT_READY BIT(0)
+#define CORE_FABRIC_STAT 0x04000
+#define CORE_FABRIC_STAT_MASK 0x001F001F
+#define CORE_LANE_CFG(port) (0x84000 + 0x4000 * (port))
+#define CORE_LANE_CFG_REFCLK0REQ BIT(0)
+#define CORE_LANE_CFG_REFCLK1REQ BIT(1)
+#define CORE_LANE_CFG_REFCLK0ACK BIT(2)
+#define CORE_LANE_CFG_REFCLK1ACK BIT(3)
+#define CORE_LANE_CFG_REFCLKEN (BIT(9) | BIT(10))
+#define CORE_LANE_CTL(port) (0x84004 + 0x4000 * (port))
+#define CORE_LANE_CTL_CFGACC BIT(15)
+
+#define PORT_LTSSMCTL 0x00080
+#define PORT_LTSSMCTL_START BIT(0)
+#define PORT_INTSTAT 0x00100
+#define PORT_INT_TUNNEL_ERR 31
+#define PORT_INT_CPL_TIMEOUT 23
+#define PORT_INT_RID2SID_MAPERR 22
+#define PORT_INT_CPL_ABORT 21
+#define PORT_INT_MSI_BAD_DATA 19
+#define PORT_INT_MSI_ERR 18
+#define PORT_INT_REQADDR_GT32 17
+#define PORT_INT_AF_TIMEOUT 15
+#define PORT_INT_LINK_DOWN 14
+#define PORT_INT_LINK_UP 12
+#define PORT_INT_LINK_BWMGMT 11
+#define PORT_INT_AER_MASK (15 << 4)
+#define PORT_INT_PORT_ERR 4
+#define PORT_INT_INTx(i) i
+#define PORT_INT_INTx_MASK 15
+#define PORT_INTMSK 0x00104
+#define PORT_INTMSKSET 0x00108
+#define PORT_INTMSKCLR 0x0010c
+#define PORT_MSICFG 0x00124
+#define PORT_MSICFG_EN BIT(0)
+#define PORT_MSICFG_L2MSINUM_SHIFT 4
+#define PORT_MSIBASE 0x00128
+#define PORT_MSIBASE_1_SHIFT 16
+#define PORT_MSIADDR 0x00168
+#define PORT_LINKSTS 0x00208
+#define PORT_LINKSTS_UP BIT(0)
+#define PORT_LINKSTS_BUSY BIT(2)
+#define PORT_LINKCMDSTS 0x00210
+#define PORT_OUTS_NPREQS 0x00284
+#define PORT_OUTS_NPREQS_REQ BIT(24)
+#define PORT_OUTS_NPREQS_CPL BIT(16)
+#define PORT_RXWR_FIFO 0x00288
+#define PORT_RXWR_FIFO_HDR GENMASK(15, 10)
+#define PORT_RXWR_FIFO_DATA GENMASK(9, 0)
+#define PORT_RXRD_FIFO 0x0028C
+#define PORT_RXRD_FIFO_REQ GENMASK(6, 0)
+#define PORT_OUTS_CPLS 0x00290
+#define PORT_OUTS_CPLS_SHRD GENMASK(14, 8)
+#define PORT_OUTS_CPLS_WAIT GENMASK(6, 0)
+#define PORT_APPCLK 0x00800
+#define PORT_APPCLK_EN BIT(0)
+#define PORT_APPCLK_CGDIS BIT(8)
+#define PORT_STATUS 0x00804
+#define PORT_STATUS_READY BIT(0)
+#define PORT_REFCLK 0x00810
+#define PORT_REFCLK_EN BIT(0)
+#define PORT_REFCLK_CGDIS BIT(8)
+#define PORT_PERST 0x00814
+#define PORT_PERST_OFF BIT(0)
+#define PORT_RID2SID(i16) (0x00828 + 4 * (i16))
+#define PORT_RID2SID_VALID BIT(31)
+#define PORT_RID2SID_SID_SHIFT 16
+#define PORT_RID2SID_BUS_SHIFT 8
+#define PORT_RID2SID_DEV_SHIFT 3
+#define PORT_RID2SID_FUNC_SHIFT 0
+#define PORT_OUTS_PREQS_HDR 0x00980
+#define PORT_OUTS_PREQS_HDR_MASK GENMASK(9, 0)
+#define PORT_OUTS_PREQS_DATA 0x00984
+#define PORT_OUTS_PREQS_DATA_MASK GENMASK(15, 0)
+#define PORT_TUNCTRL 0x00988
+#define PORT_TUNCTRL_PERST_ON BIT(0)
+#define PORT_TUNCTRL_PERST_ACK_REQ BIT(1)
+#define PORT_TUNSTAT 0x0098c
+#define PORT_TUNSTAT_PERST_ON BIT(0)
+#define PORT_TUNSTAT_PERST_ACK_PEND BIT(1)
+#define PORT_PREFMEM_ENABLE 0x00994
+
+struct apple_pcie_priv {
+ struct udevice *dev;
+ void __iomem *base;
+ void __iomem *cfg_base;
+ struct list_head ports;
+};
+
+struct apple_pcie_port {
+ struct apple_pcie_priv *pcie;
+ struct gpio_desc reset;
+ ofnode np;
+ void __iomem *base;
+ struct list_head entry;
+ int idx;
+};
+
+static void rmw_set(u32 set, void __iomem *addr)
+{
+ writel_relaxed(readl_relaxed(addr) | set, addr);
+}
+
+static void rmw_clear(u32 clr, void __iomem *addr)
+{
+ writel_relaxed(readl_relaxed(addr) & ~clr, addr);
+}
+
+static int apple_pcie_config_address(const struct udevice *bus,
+ pci_dev_t bdf, uint offset,
+ void **paddress)
+{
+ struct apple_pcie_priv *pcie = dev_get_priv(bus);
+ void *addr;
+
+ addr = pcie->cfg_base;
+ addr += PCIE_ECAM_OFFSET(PCI_BUS(bdf), PCI_DEV(bdf),
+ PCI_FUNC(bdf), offset);
+ *paddress = addr;
+
+ return 0;
+}
+
+static int apple_pcie_read_config(const struct udevice *bus, pci_dev_t bdf,
+ uint offset, ulong *valuep,
+ enum pci_size_t size)
+{
+ int ret;
+
+ ret = pci_generic_mmap_read_config(bus, apple_pcie_config_address,
+ bdf, offset, valuep, size);
+ return ret;
+}
+
+static int apple_pcie_write_config(struct udevice *bus, pci_dev_t bdf,
+ uint offset, ulong value,
+ enum pci_size_t size)
+{
+ return pci_generic_mmap_write_config(bus, apple_pcie_config_address,
+ bdf, offset, value, size);
+}
+
+static const struct dm_pci_ops apple_pcie_ops = {
+ .read_config = apple_pcie_read_config,
+ .write_config = apple_pcie_write_config,
+};
+
+static int apple_pcie_setup_refclk(struct apple_pcie_priv *pcie,
+ struct apple_pcie_port *port)
+{
+ u32 stat;
+ int res;
+
+ res = readl_poll_sleep_timeout(pcie->base + CORE_RC_PHYIF_STAT, stat,
+ stat & CORE_RC_PHYIF_STAT_REFCLK,
+ 100, 50000);
+ if (res < 0)
+ return res;
+
+ rmw_set(CORE_LANE_CTL_CFGACC, pcie->base + CORE_LANE_CTL(port->idx));
+ rmw_set(CORE_LANE_CFG_REFCLK0REQ, pcie->base + CORE_LANE_CFG(port->idx));
+
+ res = readl_poll_sleep_timeout(pcie->base + CORE_LANE_CFG(port->idx),
+ stat, stat & CORE_LANE_CFG_REFCLK0ACK,
+ 100, 50000);
+ if (res < 0)
+ return res;
+
+ rmw_set(CORE_LANE_CFG_REFCLK1REQ, pcie->base + CORE_LANE_CFG(port->idx));
+ res = readl_poll_sleep_timeout(pcie->base + CORE_LANE_CFG(port->idx),
+ stat, stat & CORE_LANE_CFG_REFCLK1ACK,
+ 100, 50000);
+
+ if (res < 0)
+ return res;
+
+ rmw_clear(CORE_LANE_CTL_CFGACC, pcie->base + CORE_LANE_CTL(port->idx));
+
+ rmw_set(CORE_LANE_CFG_REFCLKEN, pcie->base + CORE_LANE_CFG(port->idx));
+ rmw_set(PORT_REFCLK_EN, port->base + PORT_REFCLK);
+
+ return 0;
+}
+
+static int apple_pcie_setup_port(struct apple_pcie_priv *pcie, ofnode np)
+{
+ struct apple_pcie_port *port;
+ struct gpio_desc reset;
+ fdt_addr_t addr;
+ u32 stat, idx;
+ int ret;
+
+ ret = gpio_request_by_name_nodev(np, "reset-gpios", 0, &reset, 0);
+ if (ret)
+ return ret;
+
+ port = devm_kzalloc(pcie->dev, sizeof(*port), GFP_KERNEL);
+ if (!port)
+ return -ENOMEM;
+
+ ret = ofnode_read_u32_index(np, "reg", 0, &idx);
+ if (ret)
+ return ret;
+
+ /* Use the first reg entry to work out the port index */
+ port->idx = idx >> 11;
+ port->pcie = pcie;
+ port->reset = reset;
+ port->np = np;
+
+ addr = dev_read_addr_index(pcie->dev, port->idx + 2);
+ if (addr == FDT_ADDR_T_NONE)
+ return -EINVAL;
+ port->base = map_sysmem(addr, 0);
+
+ rmw_set(PORT_APPCLK_EN, port->base + PORT_APPCLK);
+
+ /* Assert PERST# before setting up the clock */
+ dm_gpio_set_value(&reset, 1);
+
+ ret = apple_pcie_setup_refclk(pcie, port);
+ if (ret < 0)
+ return ret;
+
+ /* The minimal Tperst-clk value is 100us (PCIe CEM r5.0, 2.9.2) */
+ udelay(100);
+
+ /* Deassert PERST# */
+ rmw_set(PORT_PERST_OFF, port->base + PORT_PERST);
+ dm_gpio_set_value(&reset, 0);
+
+ /* Wait for 100ms after PERST# deassertion (PCIe r5.0, 6.6.1) */
+ udelay(100 * 1000);
+
+ ret = readl_poll_sleep_timeout(port->base + PORT_STATUS, stat,
+ stat & PORT_STATUS_READY, 100, 250000);
+ if (ret < 0) {
+ dev_err(pcie->dev, "port %d ready wait timeout\n", port->idx);
+ return ret;
+ }
+
+ rmw_clear(PORT_REFCLK_CGDIS, port->base + PORT_REFCLK);
+ rmw_clear(PORT_APPCLK_CGDIS, port->base + PORT_APPCLK);
+
+ list_add_tail(&port->entry, &pcie->ports);
+
+ writel_relaxed(PORT_LTSSMCTL_START, port->base + PORT_LTSSMCTL);
+
+ /*
+ * Deliberately ignore the link not coming up as connected
+ * devices (e.g. the WiFi controller) may not be powerd up.
+ */
+ readl_poll_sleep_timeout(port->base + PORT_LINKSTS, stat,
+ (stat & PORT_LINKSTS_UP), 100, 100000);
+
+ return 0;
+}
+
+static int apple_pcie_probe(struct udevice *dev)
+{
+ struct apple_pcie_priv *pcie = dev_get_priv(dev);
+ fdt_addr_t addr;
+ ofnode of_port;
+ int i, ret;
+
+ pcie->dev = dev;
+ addr = dev_read_addr_index(dev, 0);
+ if (addr == FDT_ADDR_T_NONE)
+ return -EINVAL;
+ pcie->cfg_base = map_sysmem(addr, 0);
+
+ addr = dev_read_addr_index(dev, 1);
+ if (addr == FDT_ADDR_T_NONE)
+ return -EINVAL;
+ pcie->base = map_sysmem(addr, 0);
+
+ INIT_LIST_HEAD(&pcie->ports);
+
+ for (of_port = ofnode_first_subnode(dev_ofnode(dev));
+ ofnode_valid(of_port);
+ of_port = ofnode_next_subnode(of_port)) {
+ ret = apple_pcie_setup_port(pcie, of_port);
+ if (ret) {
+ dev_err(pcie->dev, "Port %d setup fail: %d\n", i, ret);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int apple_pcie_remove(struct udevice *dev)
+{
+ struct apple_pcie_priv *pcie = dev_get_priv(dev);
+ struct apple_pcie_port *port, *tmp;
+
+ list_for_each_entry_safe(port, tmp, &pcie->ports, entry) {
+ gpio_free_list_nodev(&port->reset, 1);
+ free(port);
+ }
+
+ return 0;
+}
+
+static const struct udevice_id apple_pcie_of_match[] = {
+ { .compatible = "apple,pcie" },
+ { /* sentinel */ }
+};
+
+U_BOOT_DRIVER(apple_pcie) = {
+ .name = "apple_pcie",
+ .id = UCLASS_PCI,
+ .of_match = apple_pcie_of_match,
+ .probe = apple_pcie_probe,
+ .remove = apple_pcie_remove,
+ .priv_auto = sizeof(struct apple_pcie_priv),
+ .ops = &apple_pcie_ops,
+};
--
2.39.0
^ permalink raw reply related [flat|nested] 12+ messages in thread