All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/7] Apple PCIe/XHCI support
@ 2023-01-17 22:03 Mark Kettenis
  2023-01-17 22:03 ` [PATCH 1/7] iommu: Add DMA mapping operations Mark Kettenis
                   ` (6 more replies)
  0 siblings, 7 replies; 12+ messages in thread
From: Mark Kettenis @ 2023-01-17 22:03 UTC (permalink / raw)
  To: u-boot; +Cc: sjg, bmeng.cn, marex, Mark Kettenis

This series adds support for the PCIe controller found on Apple M1 and
M2 machines and enables support for PCIe XHCI controllers.  This makes
the type-A USB ports on the M1 Mac mini work.  Since the use of Apples
DART IOMMU is mandatory (these PCIe DARTs don't support bypass mode),
this adds DMA mapping operations to the IOMMU uclass and implements
them for the Apple DART.  It modifies the XHCI driver code to go map
DMA buffers through the IOMMU if there is one.  Since the M1 Mac mini
now has two types of XHCI controllers with different number of ports
(2 for the DWC3 controllers, 8 for the Fresco Logic PCIe controller)
this uncovered an issue in with the way the hub descriptor is
implemented in the XHCI driver.


Mark Kettenis (7):
  iommu: Add DMA mapping operations
  iommu: apple: Implement DMA mapping operations for Apple DART
  usb: xhci: Implement DMA mapping
  iommu: Implement mapping IOMMUs for PCI devices
  pci: Add Apple PCIe controller driver
  arm: apple: Enable PCIe USB controller
  usb: xhci: Fix root hub descriptor

 MAINTAINERS                  |   1 +
 arch/arm/Kconfig             |   2 +
 configs/apple_m1_defconfig   |   1 +
 drivers/iommu/apple_dart.c   | 311 ++++++++++++++++++++++++++----
 drivers/iommu/iommu-uclass.c |  93 +++++++++
 drivers/pci/Kconfig          |   9 +
 drivers/pci/Makefile         |   1 +
 drivers/pci/pcie_apple.c     | 354 +++++++++++++++++++++++++++++++++++
 drivers/usb/host/xhci-mem.c  |  84 ++++++---
 drivers/usb/host/xhci-ring.c |  76 +++++---
 drivers/usb/host/xhci.c      |  25 +--
 include/dm/device.h          |   3 +
 include/iommu.h              |  24 +++
 include/usb/xhci.h           |  28 ++-
 14 files changed, 904 insertions(+), 108 deletions(-)
 create mode 100644 drivers/pci/pcie_apple.c

-- 
2.39.0


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/7] iommu: Add DMA mapping operations
  2023-01-17 22:03 [PATCH 0/7] Apple PCIe/XHCI support Mark Kettenis
@ 2023-01-17 22:03 ` Mark Kettenis
  2023-01-18 19:42   ` Simon Glass
  2023-01-17 22:03 ` [PATCH 2/7] iommu: apple: Implement DMA mapping operations for Apple DART Mark Kettenis
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 12+ messages in thread
From: Mark Kettenis @ 2023-01-17 22:03 UTC (permalink / raw)
  To: u-boot; +Cc: sjg, bmeng.cn, marex, Mark Kettenis

In order to support IOMMUs in non-bypass mode we need device ops
to map and unmap DMA memory.  The map operation enters a mapping
for a region specified by CPU address and size into the translation
table of the IOMMU and returns a DMA address suitable for
programming the device to do DMA.  The unmap operation removes
this mapping from the translation table of the IOMMU.

Signed-off-by: Mark Kettenis <kettenis@openbsd.org>
---
 drivers/iommu/iommu-uclass.c | 28 ++++++++++++++++++++++++++++
 include/dm/device.h          |  3 +++
 include/iommu.h              | 24 ++++++++++++++++++++++++
 3 files changed, 55 insertions(+)

diff --git a/drivers/iommu/iommu-uclass.c b/drivers/iommu/iommu-uclass.c
index ed917b3c3e..f6b1457736 100644
--- a/drivers/iommu/iommu-uclass.c
+++ b/drivers/iommu/iommu-uclass.c
@@ -7,6 +7,9 @@
 
 #include <common.h>
 #include <dm.h>
+#include <iommu.h>
+#include <phys2bus.h>
+#include <asm/io.h>
 
 #if (CONFIG_IS_ENABLED(OF_CONTROL) && !CONFIG_IS_ENABLED(OF_PLATDATA))
 int dev_iommu_enable(struct udevice *dev)
@@ -33,12 +36,37 @@ int dev_iommu_enable(struct udevice *dev)
 			      __func__, ret);
 			return ret;
 		}
+		dev->iommu = dev_iommu;
 	}
 
 	return 0;
 }
 #endif
 
+dma_addr_t dev_iommu_dma_map(struct udevice *dev, void *addr, size_t size)
+{
+	const struct iommu_ops *ops;
+
+	if (dev->iommu) {
+		ops = device_get_ops(dev->iommu);
+		if (ops && ops->map)
+			return ops->map(dev->iommu, addr, size);
+	}
+
+	return dev_phys_to_bus(dev, virt_to_phys(addr));
+}
+
+void dev_iommu_dma_unmap(struct udevice *dev, dma_addr_t addr, size_t size)
+{
+	const struct iommu_ops *ops;
+
+	if (dev->iommu) {
+		ops = device_get_ops(dev->iommu);
+		if (ops && ops->unmap)
+			ops->unmap(dev->iommu, addr, size);
+	}
+}
+
 UCLASS_DRIVER(iommu) = {
 	.id		= UCLASS_IOMMU,
 	.name		= "iommu",
diff --git a/include/dm/device.h b/include/dm/device.h
index f3f953c9af..abe1927ecd 100644
--- a/include/dm/device.h
+++ b/include/dm/device.h
@@ -194,6 +194,9 @@ struct udevice {
 #if CONFIG_IS_ENABLED(DM_DMA)
 	ulong dma_offset;
 #endif
+#if CONFIG_IS_ENABLED(IOMMU)
+	struct udevice *iommu;
+#endif
 };
 
 static inline int dm_udevice_size(void)
diff --git a/include/iommu.h b/include/iommu.h
index 6c46adf449..cf9719c5e9 100644
--- a/include/iommu.h
+++ b/include/iommu.h
@@ -3,6 +3,27 @@
 
 struct udevice;
 
+struct iommu_ops {
+	/**
+	 * map() - map DMA memory
+	 *
+	 * @dev:	device for which to map DMA memory
+	 * @addr:	CPU address of the memory
+	 * @size:	size of the memory
+	 * @return DMA address for the device
+	 */
+	dma_addr_t (*map)(struct udevice *dev, void *addr, size_t size);
+
+	/**
+	 * unmap() - unmap DMA memory
+	 *
+	 * @dev:	device for which to unmap DMA memory
+	 * @addr:	DMA address of the memory
+	 * @size:	size of the memory
+	 */
+	void (*unmap)(struct udevice *dev, dma_addr_t addr, size_t size);
+};
+
 #if (CONFIG_IS_ENABLED(OF_CONTROL) && !CONFIG_IS_ENABLED(OF_PLATDATA)) && \
 	CONFIG_IS_ENABLED(IOMMU)
 int dev_iommu_enable(struct udevice *dev);
@@ -13,4 +34,7 @@ static inline int dev_iommu_enable(struct udevice *dev)
 }
 #endif
 
+dma_addr_t dev_iommu_dma_map(struct udevice *dev, void *addr, size_t size);
+void dev_iommu_dma_unmap(struct udevice *dev, dma_addr_t addr, size_t size);
+
 #endif
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/7] iommu: apple: Implement DMA mapping operations for Apple DART
  2023-01-17 22:03 [PATCH 0/7] Apple PCIe/XHCI support Mark Kettenis
  2023-01-17 22:03 ` [PATCH 1/7] iommu: Add DMA mapping operations Mark Kettenis
@ 2023-01-17 22:03 ` Mark Kettenis
  2023-01-17 22:04 ` [PATCH 3/7] usb: xhci: Implement DMA mapping Mark Kettenis
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 12+ messages in thread
From: Mark Kettenis @ 2023-01-17 22:03 UTC (permalink / raw)
  To: u-boot; +Cc: sjg, bmeng.cn, marex, Mark Kettenis

Implement translation table support for all the variations of
Apple's DART IOMMU that can be found on Apple's M1 and M2 SoCs.

Signed-off-by: Mark Kettenis <kettenis@openbsd.org>
---
 drivers/iommu/apple_dart.c | 311 +++++++++++++++++++++++++++++++++----
 1 file changed, 277 insertions(+), 34 deletions(-)

diff --git a/drivers/iommu/apple_dart.c b/drivers/iommu/apple_dart.c
index 2faacb8f3b..6ecd84303b 100644
--- a/drivers/iommu/apple_dart.c
+++ b/drivers/iommu/apple_dart.c
@@ -6,57 +6,296 @@
 #include <common.h>
 #include <cpu_func.h>
 #include <dm.h>
+#include <iommu.h>
+#include <lmb.h>
+#include <memalign.h>
 #include <asm/io.h>
 
 #define DART_PARAMS2		0x0004
 #define  DART_PARAMS2_BYPASS_SUPPORT	BIT(0)
-#define DART_TLB_OP		0x0020
-#define  DART_TLB_OP_OPMASK	(0xfff << 20)
-#define  DART_TLB_OP_FLUSH	(0x001 << 20)
-#define  DART_TLB_OP_BUSY	BIT(2)
-#define DART_TLB_OP_SIDMASK	0x0034
-#define DART_ERROR_STATUS	0x0040
-#define DART_TCR(sid)		(0x0100 + 4 * (sid))
-#define  DART_TCR_TRANSLATE_ENABLE	BIT(7)
-#define  DART_TCR_BYPASS_DART		BIT(8)
-#define  DART_TCR_BYPASS_DAPF		BIT(12)
-#define DART_TTBR(sid, idx)	(0x0200 + 16 * (sid) + 4 * (idx))
-#define  DART_TTBR_VALID	BIT(31)
-#define  DART_TTBR_SHIFT	12
 
-#define DART_T8110_TCR(sid)	(0x1000 + 4 * (sid))
+#define DART_T8020_TLB_CMD		0x0020
+#define  DART_T8020_TLB_CMD_FLUSH		BIT(20)
+#define  DART_T8020_TLB_CMD_BUSY		BIT(2)
+#define DART_T8020_TLB_SIDMASK		0x0034
+#define DART_T8020_ERROR		0x0040
+#define DART_T8020_ERROR_ADDR_LO	0x0050
+#define DART_T8020_ERROR_ADDR_HI	0x0054
+#define DART_T8020_CONFIG		0x0060
+#define  DART_T8020_CONFIG_LOCK			BIT(15)
+#define DART_T8020_SID_ENABLE		0x00fc
+#define DART_T8020_TCR_BASE		0x0100
+#define  DART_T8020_TCR_TRANSLATE_ENABLE	BIT(7)
+#define  DART_T8020_TCR_BYPASS_DART		BIT(8)
+#define  DART_T8020_TCR_BYPASS_DAPF		BIT(12)
+#define DART_T8020_TTBR_BASE		0x0200
+#define  DART_T8020_TTBR_VALID			BIT(31)
+
+#define DART_T8110_PARAMS4		0x000c
+#define  DART_T8110_PARAMS4_NSID_MASK		(0x1ff << 0)
+#define DART_T8110_TLB_CMD		0x0080
+#define  DART_T8110_TLB_CMD_BUSY		BIT(31)
+#define  DART_T8110_TLB_CMD_FLUSH_ALL		BIT(8)
+#define DART_T8110_ERROR		0x0100
+#define DART_T8110_ERROR_MASK		0x0104
+#define DART_T8110_ERROR_ADDR_LO	0x0170
+#define DART_T8110_ERROR_ADDR_HI	0x0174
+#define DART_T8110_PROTECT		0x0200
+#define  DART_T8110_PROTECT_TTBR_TCR		BIT(0)
+#define DART_T8110_SID_ENABLE_BASE	0x0c00
+#define DART_T8110_TCR_BASE		0x1000
 #define  DART_T8110_TCR_BYPASS_DAPF		BIT(2)
 #define  DART_T8110_TCR_BYPASS_DART		BIT(1)
 #define  DART_T8110_TCR_TRANSLATE_ENABLE	BIT(0)
-#define DART_T8110_TTBR(sid)	(0x1400 + 4 * (sid))
+#define DART_T8110_TTBR_BASE		0x1400
+#define  DART_T8110_TTBR_VALID			BIT(0)
+
+#define DART_SID_ENABLE(priv, idx) \
+	((priv)->sid_enable_base + 4 * (idx))
+#define DART_TCR(priv, sid)	((priv)->tcr_base + 4 * (sid))
+#define DART_TTBR(priv, sid, idx)	\
+	((priv)->ttbr_base + 4 * (priv)->nttbr * (sid) + 4 * (idx))
+#define  DART_TTBR_SHIFT	12
+
+#define DART_ALL_STREAMS(priv)	((1U << (priv)->nsid) - 1)
+
+#define DART_PAGE_SIZE		SZ_16K
+#define DART_PAGE_MASK		(DART_PAGE_SIZE - 1)
+
+#define DART_L1_TABLE		0x3
+#define DART_L2_INVAL		0
+#define DART_L2_VALID		BIT(0)
+#define DART_L2_FULL_PAGE	BIT(1)
+#define DART_L2_START(addr)	((((addr) & DART_PAGE_MASK) >> 2) << 52)
+#define DART_L2_END(addr)	((((addr) & DART_PAGE_MASK) >> 2) << 40)
+
+struct apple_dart_priv {
+	void *base;
+	struct lmb lmb;
+	u64 *l1, *l2;
+	int bypass, shift;
+
+	dma_addr_t dvabase;
+	dma_addr_t dvaend;
+
+	int nsid;
+	int nttbr;
+	int sid_enable_base;
+	int tcr_base;
+	u32 tcr_translate_enable;
+	u32 tcr_bypass;
+	int ttbr_base;
+	u32 ttbr_valid;
+	void (*flush_tlb)(struct apple_dart_priv *priv);
+};
+
+static void apple_dart_t8020_flush_tlb(struct apple_dart_priv *priv)
+{
+	dsb();
+
+	writel(DART_ALL_STREAMS(priv), priv->base + DART_T8020_TLB_SIDMASK);
+	writel(DART_T8020_TLB_CMD_FLUSH, priv->base + DART_T8020_TLB_CMD);
+	while (readl(priv->base + DART_T8020_TLB_CMD) &
+	       DART_T8020_TLB_CMD_BUSY)
+		continue;
+}
+
+static void apple_dart_t8110_flush_tlb(struct apple_dart_priv *priv)
+{
+	dsb();
+
+	writel(DART_T8110_TLB_CMD_FLUSH_ALL,
+	       priv->base + DART_T8110_TLB_CMD_FLUSH_ALL);
+	while (readl(priv->base + DART_T8110_TLB_CMD) &
+	       DART_T8110_TLB_CMD_BUSY)
+		continue;
+}
+
+static dma_addr_t apple_dart_map(struct udevice *dev, void *addr, size_t size)
+{
+	struct apple_dart_priv *priv = dev_get_priv(dev);
+	phys_addr_t paddr, dva;
+	phys_size_t psize, off;
+	int i, idx;
+
+	if (priv->bypass)
+		return (phys_addr_t)addr;
+
+	paddr = ALIGN_DOWN((phys_addr_t)addr, DART_PAGE_SIZE);
+	off = (phys_addr_t)addr - paddr;
+	psize = ALIGN(size + off, DART_PAGE_SIZE);
+
+	dva = lmb_alloc(&priv->lmb, psize, DART_PAGE_SIZE);
+
+	idx = dva / DART_PAGE_SIZE;
+	for (i = 0; i < psize / DART_PAGE_SIZE; i++) {
+		priv->l2[idx + i] = (paddr  >> priv->shift) | DART_L2_VALID |
+			DART_L2_START(0LL) | DART_L2_END(~0LL);
+		paddr += DART_PAGE_SIZE;
+	}
+	flush_dcache_range((unsigned long)&priv->l2[idx],
+			   (unsigned long)&priv->l2[idx + i]);
+	priv->flush_tlb(priv);
+
+	return dva + off;
+}
+
+static void apple_dart_unmap(struct udevice *dev, dma_addr_t addr, size_t size)
+{
+	struct apple_dart_priv *priv = dev_get_priv(dev);
+	phys_addr_t dva;
+	phys_size_t psize;
+	int i, idx;
+
+	if (priv->bypass)
+		return;
+
+	dva = ALIGN_DOWN(addr, DART_PAGE_SIZE);
+	psize = size + (addr - dva);
+	psize = ALIGN(psize, DART_PAGE_SIZE);
+
+	idx = dva / DART_PAGE_SIZE;
+	for (i = 0; i < psize / DART_PAGE_SIZE; i++)
+		priv->l2[idx + i] = DART_L2_INVAL;
+	flush_dcache_range((unsigned long)&priv->l2[idx],
+			   (unsigned long)&priv->l2[idx + i]);
+	priv->flush_tlb(priv);
+
+	lmb_free(&priv->lmb, dva, psize);
+}
+
+static struct iommu_ops apple_dart_ops = {
+	.map = apple_dart_map,
+	.unmap = apple_dart_unmap,
+};
 
 static int apple_dart_probe(struct udevice *dev)
 {
-	void *base;
+	struct apple_dart_priv *priv = dev_get_priv(dev);
+	dma_addr_t addr;
+	phys_addr_t l2;
+	int ntte, nl1, nl2;
 	int sid, i;
+	u32 params2, params4;
 
-	base = dev_read_addr_ptr(dev);
-	if (!base)
+	priv->base = dev_read_addr_ptr(dev);
+	if (!priv->base)
 		return -EINVAL;
 
-	u32 params2 = readl(base + DART_PARAMS2);
-	if (!(params2 & DART_PARAMS2_BYPASS_SUPPORT))
-		return 0;
+	if (device_is_compatible(dev, "apple,t8110-dart")) {
+		params4 = readl(priv->base + DART_T8110_PARAMS4);
+		priv->nsid = params4 & DART_T8110_PARAMS4_NSID_MASK;
+		priv->nttbr = 1;
+		priv->sid_enable_base = DART_T8110_SID_ENABLE_BASE;
+		priv->tcr_base = DART_T8110_TCR_BASE;
+		priv->tcr_translate_enable = DART_T8110_TCR_TRANSLATE_ENABLE;
+		priv->tcr_bypass =
+		    DART_T8110_TCR_BYPASS_DAPF | DART_T8110_TCR_BYPASS_DART;
+		priv->ttbr_base = DART_T8110_TTBR_BASE;
+		priv->ttbr_valid = DART_T8110_TTBR_VALID;
+		priv->flush_tlb = apple_dart_t8110_flush_tlb;
+	} else {
+		priv->nsid = 16;
+		priv->nttbr = 4;
+		priv->sid_enable_base = DART_T8020_SID_ENABLE;
+		priv->tcr_base = DART_T8020_TCR_BASE;
+		priv->tcr_translate_enable = DART_T8020_TCR_TRANSLATE_ENABLE;
+		priv->tcr_bypass =
+		    DART_T8020_TCR_BYPASS_DAPF | DART_T8020_TCR_BYPASS_DART;
+		priv->ttbr_base = DART_T8020_TTBR_BASE;
+		priv->ttbr_valid = DART_T8020_TTBR_VALID;
+		priv->flush_tlb = apple_dart_t8020_flush_tlb;
+	}
+
+	if (device_is_compatible(dev, "apple,t6000-dart") ||
+	    device_is_compatible(dev, "apple,t8110-dart"))
+		priv->shift = 4;
 
-	if (device_is_compatible(dev, "apple,t8112-dart")) {
-		for (sid = 0; sid < 256; sid++) {
-			writel(DART_T8110_TCR_BYPASS_DART | DART_T8110_TCR_BYPASS_DAPF,
-			       base + DART_T8110_TCR(sid));
-			writel(0, base + DART_T8110_TTBR(sid));
+	priv->dvabase = DART_PAGE_SIZE;
+	priv->dvaend = SZ_4G - DART_PAGE_SIZE;
+
+	lmb_init(&priv->lmb);
+	lmb_add(&priv->lmb, priv->dvabase, priv->dvaend - priv->dvabase);
+
+	/* Disable translations. */
+	for (sid = 0; sid < priv->nsid; sid++)
+		writel(0, priv->base + DART_TCR(priv, sid));
+
+	/* Remove page tables. */
+	for (sid = 0; sid < priv->nsid; sid++) {
+		for (i = 0; i < priv->nttbr; i++)
+			writel(0, priv->base + DART_TTBR(priv, sid, i));
+	}
+	priv->flush_tlb(priv);
+
+	params2 = readl(priv->base + DART_PARAMS2);
+	if (params2 & DART_PARAMS2_BYPASS_SUPPORT) {
+		for (sid = 0; sid < priv->nsid; sid++) {
+			writel(priv->tcr_bypass,
+			       priv->base + DART_TCR(priv, sid));
 		}
-	} else {
-		for (sid = 0; sid < 16; sid++) {
-			writel(DART_TCR_BYPASS_DART | DART_TCR_BYPASS_DAPF,
-			       base + DART_TCR(sid));
-			for (i = 0; i < 4; i++)
-				writel(0, base + DART_TTBR(sid, i));
+		priv->bypass = 1;
+		return 0;
+	}
+
+	ntte = DIV_ROUND_UP(priv->dvaend, DART_PAGE_SIZE);
+	nl2 = DIV_ROUND_UP(ntte, DART_PAGE_SIZE / sizeof(u64));
+	nl1 = DIV_ROUND_UP(nl2, DART_PAGE_SIZE / sizeof(u64));
+
+	priv->l2 = memalign(DART_PAGE_SIZE, nl2 * DART_PAGE_SIZE);
+	memset(priv->l2, 0, nl2 * DART_PAGE_SIZE);
+	flush_dcache_range((unsigned long)priv->l2,
+			   (unsigned long)priv->l2 + nl2 * DART_PAGE_SIZE);
+
+	priv->l1 = memalign(DART_PAGE_SIZE, nl1 * DART_PAGE_SIZE);
+	memset(priv->l1, 0, nl1 * DART_PAGE_SIZE);
+	l2 = (phys_addr_t)priv->l2;
+	for (i = 0; i < nl2; i++) {
+		priv->l1[i] = (l2 >> priv->shift) | DART_L1_TABLE;
+		l2 += DART_PAGE_SIZE;
+	}
+	flush_dcache_range((unsigned long)priv->l1,
+			   (unsigned long)priv->l1 + nl1 * DART_PAGE_SIZE);
+
+	/* Install page tables. */
+	for (sid = 0; sid < priv->nsid; sid++) {
+		addr = (phys_addr_t)priv->l1;
+		for (i = 0; i < nl1; i++) {
+			writel(addr >> DART_TTBR_SHIFT | priv->ttbr_valid,
+			       priv->base + DART_TTBR(priv, sid, i));
+			addr += DART_PAGE_SIZE;
 		}
 	}
+	priv->flush_tlb(priv);
+
+	/* Enable all streams. */
+	for (i = 0; i < priv->nsid / 32; i++)
+		writel(~0, priv->base + DART_SID_ENABLE(priv, i));
+
+	/* Enable translations. */
+	for (sid = 0; sid < priv->nsid; sid++) {
+		writel(priv->tcr_translate_enable,
+		       priv->base + DART_TCR(priv, sid));
+	}
+
+	return 0;
+}
+
+static int apple_dart_remove(struct udevice *dev)
+{
+	struct apple_dart_priv *priv = dev_get_priv(dev);
+	int sid, i;
+
+	/* Disable translations. */
+	for (sid = 0; sid < priv->nsid; sid++)
+		writel(0, priv->base + DART_TCR(priv, sid));
+
+	/* Remove page tables. */
+	for (sid = 0; sid < priv->nsid; sid++) {
+		for (i = 0; i < priv->nttbr; i++)
+			writel(0, priv->base + DART_TTBR(priv, sid, i));
+	}
+	priv->flush_tlb(priv);
 
 	return 0;
 }
@@ -64,7 +303,7 @@ static int apple_dart_probe(struct udevice *dev)
 static const struct udevice_id apple_dart_ids[] = {
 	{ .compatible = "apple,t8103-dart" },
 	{ .compatible = "apple,t6000-dart" },
-	{ .compatible = "apple,t8112-dart" },
+	{ .compatible = "apple,t8110-dart" },
 	{ /* sentinel */ }
 };
 
@@ -72,5 +311,9 @@ U_BOOT_DRIVER(apple_dart) = {
 	.name = "apple_dart",
 	.id = UCLASS_IOMMU,
 	.of_match = apple_dart_ids,
-	.probe = apple_dart_probe
+	.priv_auto = sizeof(struct apple_dart_priv),
+	.ops = &apple_dart_ops,
+	.probe = apple_dart_probe,
+	.remove = apple_dart_remove,
+	.flags	= DM_FLAG_OS_PREPARE
 };
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/7] usb: xhci: Implement DMA mapping
  2023-01-17 22:03 [PATCH 0/7] Apple PCIe/XHCI support Mark Kettenis
  2023-01-17 22:03 ` [PATCH 1/7] iommu: Add DMA mapping operations Mark Kettenis
  2023-01-17 22:03 ` [PATCH 2/7] iommu: apple: Implement DMA mapping operations for Apple DART Mark Kettenis
@ 2023-01-17 22:04 ` Mark Kettenis
  2023-01-17 22:51   ` Marek Vasut
  2023-01-17 22:04 ` [PATCH 4/7] iommu: Implement mapping IOMMUs for PCI devices Mark Kettenis
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 12+ messages in thread
From: Mark Kettenis @ 2023-01-17 22:04 UTC (permalink / raw)
  To: u-boot; +Cc: sjg, bmeng.cn, marex, Mark Kettenis

An XHCI controller that sits behind an IOMMU needs to map and unmap
its memory buffers to do DMA.  Implement this by inroducing new
xhci_dma_map() and xhci_dma_unmap() helper functions.  The
xhci_dma_map() function replaces the existing xhci_virt_to_bus()
function in the sense that it returns the bus address in the case
of simple address translation in the absence of an IOMMU.  The
xhci_bus_to_virt() function is eliminated by storing the CPU
address of the allocated scratchpad memory in struct xhci_ctrl.

Signed-off-by: Mark Kettenis <kettenis@openbsd.org>
---
 drivers/usb/host/xhci-mem.c  | 84 +++++++++++++++++++++++-------------
 drivers/usb/host/xhci-ring.c | 76 ++++++++++++++++++++------------
 drivers/usb/host/xhci.c      | 10 +++--
 include/usb/xhci.h           | 27 +++++++++---
 4 files changed, 130 insertions(+), 67 deletions(-)

diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 21cd03b662..72b7530626 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -64,8 +64,9 @@ void xhci_inval_cache(uintptr_t addr, u32 len)
  * @param ptr	pointer to "segement" to be freed
  * Return: none
  */
-static void xhci_segment_free(struct xhci_segment *seg)
+static void xhci_segment_free(struct xhci_ctrl *ctrl, struct xhci_segment *seg)
 {
+	xhci_dma_unmap(ctrl, seg->dma, SEGMENT_SIZE);
 	free(seg->trbs);
 	seg->trbs = NULL;
 
@@ -78,7 +79,7 @@ static void xhci_segment_free(struct xhci_segment *seg)
  * @param ptr	pointer to "ring" to be freed
  * Return: none
  */
-static void xhci_ring_free(struct xhci_ring *ring)
+static void xhci_ring_free(struct xhci_ctrl *ctrl, struct xhci_ring *ring)
 {
 	struct xhci_segment *seg;
 	struct xhci_segment *first_seg;
@@ -89,10 +90,10 @@ static void xhci_ring_free(struct xhci_ring *ring)
 	seg = first_seg->next;
 	while (seg != first_seg) {
 		struct xhci_segment *next = seg->next;
-		xhci_segment_free(seg);
+		xhci_segment_free(ctrl, seg);
 		seg = next;
 	}
-	xhci_segment_free(first_seg);
+	xhci_segment_free(ctrl, first_seg);
 
 	free(ring);
 }
@@ -105,12 +106,20 @@ static void xhci_ring_free(struct xhci_ring *ring)
  */
 static void xhci_scratchpad_free(struct xhci_ctrl *ctrl)
 {
+	struct xhci_hccr *hccr = ctrl->hccr;
+	int num_sp;
+
 	if (!ctrl->scratchpad)
 		return;
 
+	num_sp = HCS_MAX_SCRATCHPAD(xhci_readl(&hccr->cr_hcsparams2));
+	xhci_dma_unmap(ctrl, ctrl->scratchpad->sp_array[0],
+		       num_sp * ctrl->page_size);
+	xhci_dma_unmap(ctrl, ctrl->dcbaa->dev_context_ptrs[0],
+		       num_sp * sizeof(u64));
 	ctrl->dcbaa->dev_context_ptrs[0] = 0;
 
-	free(xhci_bus_to_virt(ctrl, le64_to_cpu(ctrl->scratchpad->sp_array[0])));
+	free(ctrl->scratchpad->scratchpad);
 	free(ctrl->scratchpad->sp_array);
 	free(ctrl->scratchpad);
 	ctrl->scratchpad = NULL;
@@ -122,8 +131,10 @@ static void xhci_scratchpad_free(struct xhci_ctrl *ctrl)
  * @param ptr	pointer to "xhci_container_ctx" to be freed
  * Return: none
  */
-static void xhci_free_container_ctx(struct xhci_container_ctx *ctx)
+static void xhci_free_container_ctx(struct xhci_ctrl *ctrl,
+				    struct xhci_container_ctx *ctx)
 {
+	xhci_dma_unmap(ctrl, ctx->dma, ctx->size);
 	free(ctx->bytes);
 	free(ctx);
 }
@@ -153,12 +164,12 @@ static void xhci_free_virt_devices(struct xhci_ctrl *ctrl)
 
 		for (i = 0; i < 31; ++i)
 			if (virt_dev->eps[i].ring)
-				xhci_ring_free(virt_dev->eps[i].ring);
+				xhci_ring_free(ctrl, virt_dev->eps[i].ring);
 
 		if (virt_dev->in_ctx)
-			xhci_free_container_ctx(virt_dev->in_ctx);
+			xhci_free_container_ctx(ctrl, virt_dev->in_ctx);
 		if (virt_dev->out_ctx)
-			xhci_free_container_ctx(virt_dev->out_ctx);
+			xhci_free_container_ctx(ctrl, virt_dev->out_ctx);
 
 		free(virt_dev);
 		/* make sure we are pointing to NULL */
@@ -174,11 +185,15 @@ static void xhci_free_virt_devices(struct xhci_ctrl *ctrl)
  */
 void xhci_cleanup(struct xhci_ctrl *ctrl)
 {
-	xhci_ring_free(ctrl->event_ring);
-	xhci_ring_free(ctrl->cmd_ring);
+	xhci_ring_free(ctrl, ctrl->event_ring);
+	xhci_ring_free(ctrl, ctrl->cmd_ring);
 	xhci_scratchpad_free(ctrl);
 	xhci_free_virt_devices(ctrl);
+	xhci_dma_unmap(ctrl, ctrl->erst.erst_dma_addr,
+		       sizeof(struct xhci_erst_entry) * ERST_NUM_SEGS);
 	free(ctrl->erst.entries);
+	xhci_dma_unmap(ctrl, ctrl->dcbaa->dma,
+		       sizeof(struct xhci_device_context_array));
 	free(ctrl->dcbaa);
 	memset(ctrl, '\0', sizeof(struct xhci_ctrl));
 }
@@ -218,15 +233,13 @@ static void xhci_link_segments(struct xhci_ctrl *ctrl, struct xhci_segment *prev
 			       struct xhci_segment *next, bool link_trbs)
 {
 	u32 val;
-	u64 val_64 = 0;
 
 	if (!prev || !next)
 		return;
 	prev->next = next;
 	if (link_trbs) {
-		val_64 = xhci_virt_to_bus(ctrl, next->trbs);
 		prev->trbs[TRBS_PER_SEGMENT-1].link.segment_ptr =
-			cpu_to_le64(val_64);
+			cpu_to_le64(next->dma);
 
 		/*
 		 * Set the last TRB in the segment to
@@ -273,7 +286,7 @@ static void xhci_initialize_ring_info(struct xhci_ring *ring)
  * @param	none
  * Return: pointer to the newly allocated SEGMENT
  */
-static struct xhci_segment *xhci_segment_alloc(void)
+static struct xhci_segment *xhci_segment_alloc(struct xhci_ctrl *ctrl)
 {
 	struct xhci_segment *seg;
 
@@ -281,6 +294,7 @@ static struct xhci_segment *xhci_segment_alloc(void)
 	BUG_ON(!seg);
 
 	seg->trbs = xhci_malloc(SEGMENT_SIZE);
+	seg->dma = xhci_dma_map(ctrl, seg->trbs, SEGMENT_SIZE);
 
 	seg->next = NULL;
 
@@ -314,7 +328,7 @@ struct xhci_ring *xhci_ring_alloc(struct xhci_ctrl *ctrl, unsigned int num_segs,
 	if (num_segs == 0)
 		return ring;
 
-	ring->first_seg = xhci_segment_alloc();
+	ring->first_seg = xhci_segment_alloc(ctrl);
 	BUG_ON(!ring->first_seg);
 
 	num_segs--;
@@ -323,7 +337,7 @@ struct xhci_ring *xhci_ring_alloc(struct xhci_ctrl *ctrl, unsigned int num_segs,
 	while (num_segs > 0) {
 		struct xhci_segment *next;
 
-		next = xhci_segment_alloc();
+		next = xhci_segment_alloc(ctrl);
 		BUG_ON(!next);
 
 		xhci_link_segments(ctrl, prev, next, link_trbs);
@@ -372,7 +386,8 @@ static int xhci_scratchpad_alloc(struct xhci_ctrl *ctrl)
 	if (!scratchpad->sp_array)
 		goto fail_sp2;
 
-	val_64 = xhci_virt_to_bus(ctrl, scratchpad->sp_array);
+	val_64 = xhci_dma_map(ctrl, scratchpad->sp_array,
+			      num_sp * sizeof(u64));
 	ctrl->dcbaa->dev_context_ptrs[0] = cpu_to_le64(val_64);
 
 	xhci_flush_cache((uintptr_t)&ctrl->dcbaa->dev_context_ptrs[0],
@@ -386,16 +401,18 @@ static int xhci_scratchpad_alloc(struct xhci_ctrl *ctrl)
 	}
 	BUG_ON(i == 16);
 
-	page_size = 1 << (i + 12);
-	buf = memalign(page_size, num_sp * page_size);
+	ctrl->page_size = 1 << (i + 12);
+	buf = memalign(ctrl->page_size, num_sp * ctrl->page_size);
 	if (!buf)
 		goto fail_sp3;
-	memset(buf, '\0', num_sp * page_size);
-	xhci_flush_cache((uintptr_t)buf, num_sp * page_size);
+	memset(buf, '\0', num_sp * ctrl->page_size);
+	xhci_flush_cache((uintptr_t)buf, num_sp * ctrl->page_size);
 
+	scratchpad->scratchpad = buf;
+	val_64 = xhci_dma_map(ctrl, buf, num_sp * ctrl->page_size);
 	for (i = 0; i < num_sp; i++) {
-		val_64 = xhci_virt_to_bus(ctrl, buf + i * page_size);
 		scratchpad->sp_array[i] = cpu_to_le64(val_64);
+		val_64 += ctrl->page_size;
 	}
 
 	xhci_flush_cache((uintptr_t)scratchpad->sp_array,
@@ -437,6 +454,7 @@ static struct xhci_container_ctx
 		ctx->size += CTX_SIZE(xhci_readl(&ctrl->hccr->cr_hccparams));
 
 	ctx->bytes = xhci_malloc(ctx->size);
+	ctx->dma = xhci_dma_map(ctrl, ctx->bytes, ctx->size);
 
 	return ctx;
 }
@@ -487,7 +505,7 @@ int xhci_alloc_virt_device(struct xhci_ctrl *ctrl, unsigned int slot_id)
 	/* Allocate endpoint 0 ring */
 	virt_dev->eps[0].ring = xhci_ring_alloc(ctrl, 1, true);
 
-	byte_64 = xhci_virt_to_bus(ctrl, virt_dev->out_ctx->bytes);
+	byte_64 = virt_dev->out_ctx->dma;
 
 	/* Point to output device context in dcbaa. */
 	ctrl->dcbaa->dev_context_ptrs[slot_id] = cpu_to_le64(byte_64);
@@ -523,15 +541,16 @@ int xhci_mem_init(struct xhci_ctrl *ctrl, struct xhci_hccr *hccr,
 		return -ENOMEM;
 	}
 
-	val_64 = xhci_virt_to_bus(ctrl, ctrl->dcbaa);
+	ctrl->dcbaa->dma = xhci_dma_map(ctrl, ctrl->dcbaa,
+				sizeof(struct xhci_device_context_array));
 	/* Set the pointer in DCBAA register */
-	xhci_writeq(&hcor->or_dcbaap, val_64);
+	xhci_writeq(&hcor->or_dcbaap, ctrl->dcbaa->dma);
 
 	/* Command ring control pointer register initialization */
 	ctrl->cmd_ring = xhci_ring_alloc(ctrl, 1, true);
 
 	/* Set the address in the Command Ring Control register */
-	trb_64 = xhci_virt_to_bus(ctrl, ctrl->cmd_ring->first_seg->trbs);
+	trb_64 = ctrl->cmd_ring->first_seg->dma;
 	val_64 = xhci_readq(&hcor->or_crcr);
 	val_64 = (val_64 & (u64) CMD_RING_RSVD_BITS) |
 		(trb_64 & (u64) ~CMD_RING_RSVD_BITS) |
@@ -555,6 +574,8 @@ int xhci_mem_init(struct xhci_ctrl *ctrl, struct xhci_hccr *hccr,
 	ctrl->event_ring = xhci_ring_alloc(ctrl, ERST_NUM_SEGS, false);
 	ctrl->erst.entries = xhci_malloc(sizeof(struct xhci_erst_entry) *
 					 ERST_NUM_SEGS);
+	ctrl->erst.erst_dma_addr = xhci_dma_map(ctrl, ctrl->erst.entries,
+			sizeof(struct xhci_erst_entry) * ERST_NUM_SEGS);
 
 	ctrl->erst.num_entries = ERST_NUM_SEGS;
 
@@ -562,7 +583,7 @@ int xhci_mem_init(struct xhci_ctrl *ctrl, struct xhci_hccr *hccr,
 			val < ERST_NUM_SEGS;
 			val++) {
 		struct xhci_erst_entry *entry = &ctrl->erst.entries[val];
-		trb_64 = xhci_virt_to_bus(ctrl, seg->trbs);
+		trb_64 = seg->dma;
 		entry->seg_addr = cpu_to_le64(trb_64);
 		entry->seg_size = cpu_to_le32(TRBS_PER_SEGMENT);
 		entry->rsvd = 0;
@@ -571,7 +592,8 @@ int xhci_mem_init(struct xhci_ctrl *ctrl, struct xhci_hccr *hccr,
 	xhci_flush_cache((uintptr_t)ctrl->erst.entries,
 			 ERST_NUM_SEGS * sizeof(struct xhci_erst_entry));
 
-	deq = xhci_virt_to_bus(ctrl, ctrl->event_ring->dequeue);
+	deq = xhci_trb_virt_to_dma(ctrl->event_ring->deq_seg,
+				   ctrl->event_ring->dequeue);
 
 	/* Update HC event ring dequeue pointer */
 	xhci_writeq(&ctrl->ir_set->erst_dequeue,
@@ -586,7 +608,7 @@ int xhci_mem_init(struct xhci_ctrl *ctrl, struct xhci_hccr *hccr,
 	/* this is the event ring segment table pointer */
 	val_64 = xhci_readq(&ctrl->ir_set->erst_base);
 	val_64 &= ERST_PTR_MASK;
-	val_64 |= xhci_virt_to_bus(ctrl, ctrl->erst.entries) & ~ERST_PTR_MASK;
+	val_64 |= ctrl->erst.erst_dma_addr & ~ERST_PTR_MASK;
 
 	xhci_writeq(&ctrl->ir_set->erst_base, val_64);
 
@@ -849,7 +871,7 @@ void xhci_setup_addressable_virt_dev(struct xhci_ctrl *ctrl,
 	/* EP 0 can handle "burst" sizes of 1, so Max Burst Size field is 0 */
 	ep0_ctx->ep_info2 |= cpu_to_le32(MAX_BURST(0) | ERROR_COUNT(3));
 
-	trb_64 = xhci_virt_to_bus(ctrl, virt_dev->eps[0].ring->first_seg->trbs);
+	trb_64 = virt_dev->eps[0].ring->first_seg->dma;
 	ep0_ctx->deq = cpu_to_le64(trb_64 | virt_dev->eps[0].ring->cycle_state);
 
 	/*
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index eb6dfcdb09..c8260cbdf9 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -24,6 +24,24 @@
 
 #include <usb/xhci.h>
 
+/*
+ * Returns zero if the TRB isn't in this segment, otherwise it returns the DMA
+ * address of the TRB.
+ */
+dma_addr_t xhci_trb_virt_to_dma(struct xhci_segment *seg,
+				union xhci_trb *trb)
+{
+	unsigned long segment_offset;
+
+	if (!seg || !trb || trb < seg->trbs)
+		return 0;
+	/* offset in TRBs */
+	segment_offset = trb - seg->trbs;
+	if (segment_offset >= TRBS_PER_SEGMENT)
+		return 0;
+	return seg->dma + (segment_offset * sizeof(*trb));
+}
+
 /**
  * Is this TRB a link TRB or was the last TRB the last TRB in this event ring
  * segment?  I.e. would the updated event TRB pointer step off the end of the
@@ -180,10 +198,8 @@ static void inc_deq(struct xhci_ctrl *ctrl, struct xhci_ring *ring)
  * @param trb_fields	pointer to trb field array containing TRB contents
  * Return: pointer to the enqueued trb
  */
-static struct xhci_generic_trb *queue_trb(struct xhci_ctrl *ctrl,
-					  struct xhci_ring *ring,
-					  bool more_trbs_coming,
-					  unsigned int *trb_fields)
+static dma_addr_t queue_trb(struct xhci_ctrl *ctrl, struct xhci_ring *ring,
+			    bool more_trbs_coming, unsigned int *trb_fields)
 {
 	struct xhci_generic_trb *trb;
 	int i;
@@ -197,7 +213,7 @@ static struct xhci_generic_trb *queue_trb(struct xhci_ctrl *ctrl,
 
 	inc_enq(ctrl, ring, more_trbs_coming);
 
-	return trb;
+	return xhci_trb_virt_to_dma(ring->enq_seg, (union xhci_trb *)trb);
 }
 
 /**
@@ -271,19 +287,15 @@ static int prepare_ring(struct xhci_ctrl *ctrl, struct xhci_ring *ep_ring,
  * @param cmd		Command type to enqueue
  * Return: none
  */
-void xhci_queue_command(struct xhci_ctrl *ctrl, u8 *ptr, u32 slot_id,
+void xhci_queue_command(struct xhci_ctrl *ctrl, dma_addr_t addr, u32 slot_id,
 			u32 ep_index, trb_type cmd)
 {
 	u32 fields[4];
-	u64 val_64 = 0;
 
 	BUG_ON(prepare_ring(ctrl, ctrl->cmd_ring, EP_STATE_RUNNING));
 
-	if (ptr)
-		val_64 = xhci_virt_to_bus(ctrl, ptr);
-
-	fields[0] = lower_32_bits(val_64);
-	fields[1] = upper_32_bits(val_64);
+	fields[0] = lower_32_bits(addr);
+	fields[1] = upper_32_bits(addr);
 	fields[2] = 0;
 	fields[3] = TRB_TYPE(cmd) | SLOT_ID_FOR_TRB(slot_id) |
 		    ctrl->cmd_ring->cycle_state;
@@ -399,12 +411,15 @@ static void giveback_first_trb(struct usb_device *udev, int ep_index,
  */
 void xhci_acknowledge_event(struct xhci_ctrl *ctrl)
 {
+	dma_addr_t deq;
+
 	/* Advance our dequeue pointer to the next event */
 	inc_deq(ctrl, ctrl->event_ring);
 
 	/* Inform the hardware */
-	xhci_writeq(&ctrl->ir_set->erst_dequeue,
-		    xhci_virt_to_bus(ctrl, ctrl->event_ring->dequeue) | ERST_EHB);
+	deq = xhci_trb_virt_to_dma(ctrl->event_ring->deq_seg,
+				   ctrl->event_ring->dequeue);
+	xhci_writeq(&ctrl->ir_set->erst_dequeue, deq | ERST_EHB);
 }
 
 /**
@@ -490,17 +505,19 @@ static void reset_ep(struct usb_device *udev, int ep_index)
 	struct xhci_ctrl *ctrl = xhci_get_ctrl(udev);
 	struct xhci_ring *ring =  ctrl->devs[udev->slot_id]->eps[ep_index].ring;
 	union xhci_trb *event;
+	u64 addr;
 	u32 field;
 
 	printf("Resetting EP %d...\n", ep_index);
-	xhci_queue_command(ctrl, NULL, udev->slot_id, ep_index, TRB_RESET_EP);
+	xhci_queue_command(ctrl, 0, udev->slot_id, ep_index, TRB_RESET_EP);
 	event = xhci_wait_for_event(ctrl, TRB_COMPLETION);
 	field = le32_to_cpu(event->trans_event.flags);
 	BUG_ON(TRB_TO_SLOT_ID(field) != udev->slot_id);
 	xhci_acknowledge_event(ctrl);
 
-	xhci_queue_command(ctrl, (void *)((uintptr_t)ring->enqueue |
-		ring->cycle_state), udev->slot_id, ep_index, TRB_SET_DEQ);
+	addr = xhci_trb_virt_to_dma(ring->enq_seg,
+		(void *)((uintptr_t)ring->enqueue | ring->cycle_state));
+	xhci_queue_command(ctrl, addr, udev->slot_id, ep_index, TRB_SET_DEQ);
 	event = xhci_wait_for_event(ctrl, TRB_COMPLETION);
 	BUG_ON(TRB_TO_SLOT_ID(le32_to_cpu(event->event_cmd.flags))
 		!= udev->slot_id || GET_COMP_CODE(le32_to_cpu(
@@ -521,9 +538,10 @@ static void abort_td(struct usb_device *udev, int ep_index)
 	struct xhci_ctrl *ctrl = xhci_get_ctrl(udev);
 	struct xhci_ring *ring =  ctrl->devs[udev->slot_id]->eps[ep_index].ring;
 	union xhci_trb *event;
+	u64 addr;
 	u32 field;
 
-	xhci_queue_command(ctrl, NULL, udev->slot_id, ep_index, TRB_STOP_RING);
+	xhci_queue_command(ctrl, 0, udev->slot_id, ep_index, TRB_STOP_RING);
 
 	event = xhci_wait_for_event(ctrl, TRB_TRANSFER);
 	field = le32_to_cpu(event->trans_event.flags);
@@ -539,8 +557,9 @@ static void abort_td(struct usb_device *udev, int ep_index)
 		event->event_cmd.status)) != COMP_SUCCESS);
 	xhci_acknowledge_event(ctrl);
 
-	xhci_queue_command(ctrl, (void *)((uintptr_t)ring->enqueue |
-		ring->cycle_state), udev->slot_id, ep_index, TRB_SET_DEQ);
+	addr = xhci_trb_virt_to_dma(ring->enq_seg,
+		(void *)((uintptr_t)ring->enqueue | ring->cycle_state));
+	xhci_queue_command(ctrl, addr, udev->slot_id, ep_index, TRB_SET_DEQ);
 	event = xhci_wait_for_event(ctrl, TRB_COMPLETION);
 	BUG_ON(TRB_TO_SLOT_ID(le32_to_cpu(event->event_cmd.flags))
 		!= udev->slot_id || GET_COMP_CODE(le32_to_cpu(
@@ -609,8 +628,8 @@ int xhci_bulk_tx(struct usb_device *udev, unsigned long pipe,
 	u64 addr;
 	int ret;
 	u32 trb_fields[4];
-	u64 val_64 = xhci_virt_to_bus(ctrl, buffer);
-	void *last_transfer_trb_addr;
+	u64 buf_64 = xhci_dma_map(ctrl, buffer, length);
+	dma_addr_t last_transfer_trb_addr;
 	int available_length;
 
 	debug("dev=%p, pipe=%lx, buffer=%p, length=%d\n",
@@ -633,7 +652,7 @@ int xhci_bulk_tx(struct usb_device *udev, unsigned long pipe,
 	 * we send request in more than 1 TRB by chaining them.
 	 */
 	running_total = TRB_MAX_BUFF_SIZE -
-			(lower_32_bits(val_64) & (TRB_MAX_BUFF_SIZE - 1));
+			(lower_32_bits(buf_64) & (TRB_MAX_BUFF_SIZE - 1));
 	trb_buff_len = running_total;
 	running_total &= TRB_MAX_BUFF_SIZE - 1;
 
@@ -678,7 +697,7 @@ int xhci_bulk_tx(struct usb_device *udev, unsigned long pipe,
 	 * that the buffer should not span 64KB boundary. if so
 	 * we send request in more than 1 TRB by chaining them.
 	 */
-	addr = val_64;
+	addr = buf_64;
 
 	if (trb_buff_len > length)
 		trb_buff_len = length;
@@ -754,7 +773,7 @@ again:
 	}
 
 	if ((uintptr_t)(le64_to_cpu(event->trans_event.buffer)) !=
-	    (uintptr_t)xhci_virt_to_bus(ctrl, last_transfer_trb_addr)) {
+	    (uintptr_t)last_transfer_trb_addr) {
 		available_length -=
 			(int)EVENT_TRB_LEN(le32_to_cpu(event->trans_event.transfer_len));
 		xhci_acknowledge_event(ctrl);
@@ -768,6 +787,7 @@ again:
 	record_transfer_result(udev, event, available_length);
 	xhci_acknowledge_event(ctrl);
 	xhci_inval_cache((uintptr_t)buffer, length);
+	xhci_dma_unmap(ctrl, buf_64, length);
 
 	return (udev->status != USB_ST_NOT_PROC) ? 0 : -1;
 }
@@ -911,7 +931,7 @@ int xhci_ctrl_tx(struct usb_device *udev, unsigned long pipe,
 	if (length > 0) {
 		if (req->requesttype & USB_DIR_IN)
 			field |= TRB_DIR_IN;
-		buf_64 = xhci_virt_to_bus(ctrl, buffer);
+		buf_64 = xhci_dma_map(ctrl, buffer, length);
 
 		trb_fields[0] = lower_32_bits(buf_64);
 		trb_fields[1] = upper_32_bits(buf_64);
@@ -961,8 +981,10 @@ int xhci_ctrl_tx(struct usb_device *udev, unsigned long pipe,
 	}
 
 	/* Invalidate buffer to make it available to usb-core */
-	if (length > 0)
+	if (length > 0) {
 		xhci_inval_cache((uintptr_t)buffer, length);
+		xhci_dma_unmap(ctrl, buf_64, length);
+	}
 
 	if (GET_COMP_CODE(le32_to_cpu(event->trans_event.transfer_len))
 			== COMP_SHORT_TX) {
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index dbeb88afe3..440b0224b1 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -448,7 +448,7 @@ static int xhci_configure_endpoints(struct usb_device *udev, bool ctx_change)
 	in_ctx = virt_dev->in_ctx;
 
 	xhci_flush_cache((uintptr_t)in_ctx->bytes, in_ctx->size);
-	xhci_queue_command(ctrl, in_ctx->bytes, udev->slot_id, 0,
+	xhci_queue_command(ctrl, in_ctx->dma, udev->slot_id, 0,
 			   ctx_change ? TRB_EVAL_CONTEXT : TRB_CONFIG_EP);
 	event = xhci_wait_for_event(ctrl, TRB_COMPLETION);
 	BUG_ON(TRB_TO_SLOT_ID(le32_to_cpu(event->event_cmd.flags))
@@ -585,7 +585,8 @@ static int xhci_set_configuration(struct usb_device *udev)
 			cpu_to_le32(MAX_BURST(max_burst) |
 			ERROR_COUNT(err_count));
 
-		trb_64 = xhci_virt_to_bus(ctrl, virt_dev->eps[ep_index].ring->enqueue);
+		trb_64 = xhci_trb_virt_to_dma(virt_dev->eps[ep_index].ring->enq_seg,
+				virt_dev->eps[ep_index].ring->enqueue);
 		ep_ctx[ep_index]->deq = cpu_to_le64(trb_64 |
 				virt_dev->eps[ep_index].ring->cycle_state);
 
@@ -643,7 +644,8 @@ static int xhci_address_device(struct usb_device *udev, int root_portnr)
 	ctrl_ctx->add_flags = cpu_to_le32(SLOT_FLAG | EP0_FLAG);
 	ctrl_ctx->drop_flags = 0;
 
-	xhci_queue_command(ctrl, (void *)ctrl_ctx, slot_id, 0, TRB_ADDR_DEV);
+	xhci_queue_command(ctrl, virt_dev->in_ctx->dma,
+			   slot_id, 0, TRB_ADDR_DEV);
 	event = xhci_wait_for_event(ctrl, TRB_COMPLETION);
 	BUG_ON(TRB_TO_SLOT_ID(le32_to_cpu(event->event_cmd.flags)) != slot_id);
 
@@ -718,7 +720,7 @@ static int _xhci_alloc_device(struct usb_device *udev)
 		return 0;
 	}
 
-	xhci_queue_command(ctrl, NULL, 0, 0, TRB_ENABLE_SLOT);
+	xhci_queue_command(ctrl, 0, 0, 0, TRB_ENABLE_SLOT);
 	event = xhci_wait_for_event(ctrl, TRB_COMPLETION);
 	BUG_ON(GET_COMP_CODE(le32_to_cpu(event->event_cmd.status))
 		!= COMP_SUCCESS);
diff --git a/include/usb/xhci.h b/include/usb/xhci.h
index ea4cf3f52b..85c359fa1b 100644
--- a/include/usb/xhci.h
+++ b/include/usb/xhci.h
@@ -16,6 +16,7 @@
 #ifndef HOST_XHCI_H_
 #define HOST_XHCI_H_
 
+#include <iommu.h>
 #include <phys2bus.h>
 #include <asm/types.h>
 #include <asm/cache.h>
@@ -490,6 +491,7 @@ struct xhci_container_ctx {
 
 	int size;
 	u8 *bytes;
+	dma_addr_t dma;
 };
 
 /**
@@ -688,6 +690,8 @@ struct xhci_input_control_ctx {
 struct xhci_device_context_array {
 	/* 64-bit device addresses; we only write 32-bit addresses */
 	__le64			dev_context_ptrs[MAX_HC_SLOTS];
+	/* private xHCD pointers */
+	dma_addr_t	dma;
 };
 /* TODO: write function to set the 64-bit device DMA address */
 /*
@@ -997,6 +1001,7 @@ struct xhci_segment {
 	union xhci_trb		*trbs;
 	/* private to HCD */
 	struct xhci_segment	*next;
+	dma_addr_t		dma;
 };
 
 struct xhci_ring {
@@ -1025,11 +1030,14 @@ struct xhci_erst_entry {
 struct xhci_erst {
 	struct xhci_erst_entry	*entries;
 	unsigned int		num_entries;
+	/* xhci->event_ring keeps track of segment dma addresses */
+	dma_addr_t		erst_dma_addr;
 	/* Num entries the ERST can contain */
 	unsigned int		erst_size;
 };
 
 struct xhci_scratchpad {
+	void *scratchpad;
 	u64 *sp_array;
 };
 
@@ -1216,6 +1224,7 @@ struct xhci_ctrl {
 	struct xhci_virt_device *devs[MAX_HC_SLOTS];
 	int rootdev;
 	u16 hci_version;
+	int page_size;
 	u32 quirks;
 #define XHCI_MTK_HOST		BIT(0)
 };
@@ -1226,7 +1235,7 @@ struct xhci_ctrl {
 #define xhci_to_dev(_ctrl)	NULL
 #endif
 
-unsigned long trb_addr(struct xhci_segment *seg, union xhci_trb *trb);
+dma_addr_t xhci_trb_virt_to_dma(struct xhci_segment *seg, union xhci_trb *trb);
 struct xhci_input_control_ctx
 		*xhci_get_input_control_ctx(struct xhci_container_ctx *ctx);
 struct xhci_slot_ctx *xhci_get_slot_ctx(struct xhci_ctrl *ctrl,
@@ -1243,7 +1252,7 @@ void xhci_slot_copy(struct xhci_ctrl *ctrl,
 		    struct xhci_container_ctx *out_ctx);
 void xhci_setup_addressable_virt_dev(struct xhci_ctrl *ctrl,
 				     struct usb_device *udev, int hop_portnr);
-void xhci_queue_command(struct xhci_ctrl *ctrl, u8 *ptr,
+void xhci_queue_command(struct xhci_ctrl *ctrl, dma_addr_t addr,
 			u32 slot_id, u32 ep_index, trb_type cmd);
 void xhci_acknowledge_event(struct xhci_ctrl *ctrl);
 union xhci_trb *xhci_wait_for_event(struct xhci_ctrl *ctrl, trb_type expected);
@@ -1284,14 +1293,22 @@ extern struct dm_usb_ops xhci_usb_ops;
 
 struct xhci_ctrl *xhci_get_ctrl(struct usb_device *udev);
 
-static inline dma_addr_t xhci_virt_to_bus(struct xhci_ctrl *ctrl, void *addr)
+static inline dma_addr_t xhci_dma_map(struct xhci_ctrl *ctrl, void *addr,
+				      size_t size)
 {
+#if CONFIG_IS_ENABLED(IOMMU)
+	return dev_iommu_dma_map(xhci_to_dev(ctrl), addr, size);
+#else
 	return dev_phys_to_bus(xhci_to_dev(ctrl), virt_to_phys(addr));
+#endif
 }
 
-static inline void *xhci_bus_to_virt(struct xhci_ctrl *ctrl, dma_addr_t addr)
+static inline void xhci_dma_unmap(struct xhci_ctrl *ctrl, dma_addr_t addr,
+				  size_t size)
 {
-	return phys_to_virt(dev_bus_to_phys(xhci_to_dev(ctrl), addr));
+#if CONFIG_IS_ENABLED(IOMMU)
+	dev_iommu_dma_unmap(xhci_to_dev(ctrl), addr, size);
+#endif
 }
 
 #endif /* HOST_XHCI_H_ */
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 4/7] iommu: Implement mapping IOMMUs for PCI devices
  2023-01-17 22:03 [PATCH 0/7] Apple PCIe/XHCI support Mark Kettenis
                   ` (2 preceding siblings ...)
  2023-01-17 22:04 ` [PATCH 3/7] usb: xhci: Implement DMA mapping Mark Kettenis
@ 2023-01-17 22:04 ` Mark Kettenis
  2023-01-17 22:04 ` [PATCH 5/7] pci: Add Apple PCIe controller driver Mark Kettenis
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 12+ messages in thread
From: Mark Kettenis @ 2023-01-17 22:04 UTC (permalink / raw)
  To: u-boot; +Cc: sjg, bmeng.cn, marex, Mark Kettenis

Systems such as Apple's M1 and M2 SoCs may have separate IOMMUs
for each PCIe root port.  In this case the right IOMMU for a
PCI device behind a particular root port is described by an
"iommu-map" property in the device tree.  Parse this property
and use it to find the right IOMMU device for PCI devices.

Signed-off-by: Mark Kettenis <kettenis@openbsd.org>
---
 drivers/iommu/iommu-uclass.c | 65 ++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/drivers/iommu/iommu-uclass.c b/drivers/iommu/iommu-uclass.c
index f6b1457736..72f123df55 100644
--- a/drivers/iommu/iommu-uclass.c
+++ b/drivers/iommu/iommu-uclass.c
@@ -8,10 +8,71 @@
 #include <common.h>
 #include <dm.h>
 #include <iommu.h>
+#include <malloc.h>
 #include <phys2bus.h>
 #include <asm/io.h>
 
 #if (CONFIG_IS_ENABLED(OF_CONTROL) && !CONFIG_IS_ENABLED(OF_PLATDATA))
+
+#if CONFIG_IS_ENABLED(PCI)
+static int dev_pci_iommu_enable(struct udevice *dev)
+{
+	struct udevice *parent = dev->parent;
+	struct udevice *dev_iommu;
+	u32 *iommu_map;
+	u32 iommu_map_mask, length, phandle, rid, rid_base;
+	int i, count, len, ret;
+
+	while (parent) {
+		len = dev_read_size(parent, "iommu-map");
+		if (len > 0)
+			break;
+		parent = parent->parent;
+	}
+
+	if (len <= 0)
+		return 0;
+
+	iommu_map = malloc(len);
+	if (!iommu_map)
+		return -ENOMEM;
+
+	count = len / sizeof(u32);
+	ret = dev_read_u32_array(parent, "iommu-map", iommu_map, count);
+	if (ret < 0) {
+		free(iommu_map);
+		return 0;
+	}
+
+	iommu_map_mask = dev_read_u32_default(parent, "iommu-map-mask", ~0);
+	rid = (dm_pci_get_bdf(dev) >> 8) & iommu_map_mask;
+
+	/* Loop over entries until mapping is found. */
+	for (i = 0; i < count; i += 4) {
+		rid_base = iommu_map[i];
+		phandle = iommu_map[i + 1];
+		length = iommu_map[i + 3];
+
+		if (rid < rid_base || rid >= rid_base + length)
+			continue;
+
+		ret = uclass_get_device_by_phandle_id(UCLASS_IOMMU, phandle,
+						      &dev_iommu);
+		if (ret) {
+			debug("%s: uclass_get_device_by_ofnode failed: %d\n",
+			      __func__, ret);
+			free(iommu_map);
+			return ret;
+		}
+		dev->iommu = dev_iommu;
+		break;
+	}
+
+	free(iommu_map);
+	return 0;
+}
+#endif
+
 int dev_iommu_enable(struct udevice *dev)
 {
 	struct ofnode_phandle_args args;
@@ -39,6 +100,10 @@ int dev_iommu_enable(struct udevice *dev)
 		dev->iommu = dev_iommu;
 	}
 
+	if (CONFIG_IS_ENABLED(PCI) && count < 0 &&
+	    device_is_on_pci_bus(dev))
+		return dev_pci_iommu_enable(dev);
+
 	return 0;
 }
 #endif
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 5/7] pci: Add Apple PCIe controller driver
  2023-01-17 22:03 [PATCH 0/7] Apple PCIe/XHCI support Mark Kettenis
                   ` (3 preceding siblings ...)
  2023-01-17 22:04 ` [PATCH 4/7] iommu: Implement mapping IOMMUs for PCI devices Mark Kettenis
@ 2023-01-17 22:04 ` Mark Kettenis
  2023-01-17 22:04 ` [PATCH 6/7] arm: apple: Enable PCIe USB controller Mark Kettenis
  2023-01-17 22:04 ` [PATCH 7/7] usb: xhci: Fix root hub descriptor Mark Kettenis
  6 siblings, 0 replies; 12+ messages in thread
From: Mark Kettenis @ 2023-01-17 22:04 UTC (permalink / raw)
  To: u-boot; +Cc: sjg, bmeng.cn, marex, Mark Kettenis

This driver supports the PCIe controller on the Apple M1 and
M2 SoCs.  The code is adapted from the Linux driver.

Signed-off-by: Mark Kettenis <kettenis@openbsd.org>
---
 MAINTAINERS              |   1 +
 arch/arm/Kconfig         |   2 +
 drivers/pci/Kconfig      |   9 +
 drivers/pci/Makefile     |   1 +
 drivers/pci/pcie_apple.c | 354 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 367 insertions(+)
 create mode 100644 drivers/pci/pcie_apple.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 3fc4cd0f12..b8a947f9d3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -122,6 +122,7 @@ F:	arch/arm/mach-apple/
 F:	configs/apple_m1_defconfig
 F:	drivers/iommu/apple_dart.c
 F:	drivers/nvme/nvme_apple.c
+F:	drivers/pci/pcie_apple.c
 F:	drivers/pinctrl/pinctrl-apple.c
 F:	drivers/watchdog/apple_wdt.c
 F:	include/configs/apple.h
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index cac4fa09fd..780815269b 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -963,6 +963,7 @@ config ARCH_APPLE
 	bool "Apple SoCs"
 	select ARM64
 	select CLK
+	select CMD_PCI
 	select CMD_USB
 	select DM
 	select DM_GPIO
@@ -977,6 +978,7 @@ config ARCH_APPLE
 	select LINUX_KERNEL_IMAGE_HEADER
 	select OF_BOARD_SETUP
 	select OF_CONTROL
+	select PCI
 	select PINCTRL
 	select POSITION_INDEPENDENT
 	select POWER_DOMAIN
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 22f4995453..d61596cd7c 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -105,6 +105,15 @@ config PCIE_ECAM_SYNQUACER
 	  Note that this must be configured when boot because Linux driver
 	  expects the PCIe RC has been configured in the bootloader.
 
+config PCIE_APPLE
+	bool "Enable Apple PCIe driver"
+	depends on ARCH_APPLE
+	imply PCI_INIT_R
+	default y
+	help
+	  Say Y here if you want to enable PCIe controller support on
+	  Apple SoCs.
+
 config PCI_GT64120
 	bool "GT64120 PCI support"
 	depends on MIPS
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index dd1ad91ced..d393f1ba03 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_PCI) += pci_auto_common.o pci_common.o
 
 obj-$(CONFIG_PCIE_ECAM_GENERIC) += pcie_ecam_generic.o
 obj-$(CONFIG_PCIE_ECAM_SYNQUACER) += pcie_ecam_synquacer.o
+obj-$(CONFIG_PCIE_APPLE) += pcie_apple.o
 obj-$(CONFIG_PCI_GT64120) += pci_gt64120.o
 obj-$(CONFIG_PCI_MPC85XX) += pci_mpc85xx.o
 obj-$(CONFIG_PCI_MSC01) += pci_msc01.o
diff --git a/drivers/pci/pcie_apple.c b/drivers/pci/pcie_apple.c
new file mode 100644
index 0000000000..9b08e1e5da
--- /dev/null
+++ b/drivers/pci/pcie_apple.c
@@ -0,0 +1,354 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe host bridge driver for Apple system-on-chips.
+ *
+ * The HW is ECAM compliant.
+ *
+ * Initialization requires enabling power and clocks, along with a
+ * number of register pokes.
+ *
+ * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
+ * Copyright (C) 2021 Google LLC
+ * Copyright (C) 2021 Corellium LLC
+ * Copyright (C) 2021 Mark Kettenis <kettenis@openbsd.org>
+ *
+ * Author: Alyssa Rosenzweig <alyssa@rosenzweig.io>
+ * Author: Marc Zyngier <maz@kernel.org>
+ */
+
+#include <common.h>
+#include <dm.h>
+#include <dm/device_compat.h>
+#include <dm/devres.h>
+#include <mapmem.h>
+#include <pci.h>
+#include <asm/io.h>
+#include <asm-generic/gpio.h>
+#include <linux/delay.h>
+#include <linux/iopoll.h>
+
+#define CORE_RC_PHYIF_CTL		0x00024
+#define   CORE_RC_PHYIF_CTL_RUN		BIT(0)
+#define CORE_RC_PHYIF_STAT		0x00028
+#define   CORE_RC_PHYIF_STAT_REFCLK	BIT(4)
+#define CORE_RC_CTL			0x00050
+#define   CORE_RC_CTL_RUN		BIT(0)
+#define CORE_RC_STAT			0x00058
+#define   CORE_RC_STAT_READY		BIT(0)
+#define CORE_FABRIC_STAT		0x04000
+#define   CORE_FABRIC_STAT_MASK		0x001F001F
+#define CORE_LANE_CFG(port)		(0x84000 + 0x4000 * (port))
+#define   CORE_LANE_CFG_REFCLK0REQ	BIT(0)
+#define   CORE_LANE_CFG_REFCLK1REQ	BIT(1)
+#define   CORE_LANE_CFG_REFCLK0ACK	BIT(2)
+#define   CORE_LANE_CFG_REFCLK1ACK	BIT(3)
+#define   CORE_LANE_CFG_REFCLKEN	(BIT(9) | BIT(10))
+#define CORE_LANE_CTL(port)		(0x84004 + 0x4000 * (port))
+#define   CORE_LANE_CTL_CFGACC		BIT(15)
+
+#define PORT_LTSSMCTL			0x00080
+#define   PORT_LTSSMCTL_START		BIT(0)
+#define PORT_INTSTAT			0x00100
+#define   PORT_INT_TUNNEL_ERR		31
+#define   PORT_INT_CPL_TIMEOUT		23
+#define   PORT_INT_RID2SID_MAPERR	22
+#define   PORT_INT_CPL_ABORT		21
+#define   PORT_INT_MSI_BAD_DATA		19
+#define   PORT_INT_MSI_ERR		18
+#define   PORT_INT_REQADDR_GT32		17
+#define   PORT_INT_AF_TIMEOUT		15
+#define   PORT_INT_LINK_DOWN		14
+#define   PORT_INT_LINK_UP		12
+#define   PORT_INT_LINK_BWMGMT		11
+#define   PORT_INT_AER_MASK		(15 << 4)
+#define   PORT_INT_PORT_ERR		4
+#define   PORT_INT_INTx(i)		i
+#define   PORT_INT_INTx_MASK		15
+#define PORT_INTMSK			0x00104
+#define PORT_INTMSKSET			0x00108
+#define PORT_INTMSKCLR			0x0010c
+#define PORT_MSICFG			0x00124
+#define   PORT_MSICFG_EN		BIT(0)
+#define   PORT_MSICFG_L2MSINUM_SHIFT	4
+#define PORT_MSIBASE			0x00128
+#define   PORT_MSIBASE_1_SHIFT		16
+#define PORT_MSIADDR			0x00168
+#define PORT_LINKSTS			0x00208
+#define   PORT_LINKSTS_UP		BIT(0)
+#define   PORT_LINKSTS_BUSY		BIT(2)
+#define PORT_LINKCMDSTS			0x00210
+#define PORT_OUTS_NPREQS		0x00284
+#define   PORT_OUTS_NPREQS_REQ		BIT(24)
+#define   PORT_OUTS_NPREQS_CPL		BIT(16)
+#define PORT_RXWR_FIFO			0x00288
+#define   PORT_RXWR_FIFO_HDR		GENMASK(15, 10)
+#define   PORT_RXWR_FIFO_DATA		GENMASK(9, 0)
+#define PORT_RXRD_FIFO			0x0028C
+#define   PORT_RXRD_FIFO_REQ		GENMASK(6, 0)
+#define PORT_OUTS_CPLS			0x00290
+#define   PORT_OUTS_CPLS_SHRD		GENMASK(14, 8)
+#define   PORT_OUTS_CPLS_WAIT		GENMASK(6, 0)
+#define PORT_APPCLK			0x00800
+#define   PORT_APPCLK_EN		BIT(0)
+#define   PORT_APPCLK_CGDIS		BIT(8)
+#define PORT_STATUS			0x00804
+#define   PORT_STATUS_READY		BIT(0)
+#define PORT_REFCLK			0x00810
+#define   PORT_REFCLK_EN		BIT(0)
+#define   PORT_REFCLK_CGDIS		BIT(8)
+#define PORT_PERST			0x00814
+#define   PORT_PERST_OFF		BIT(0)
+#define PORT_RID2SID(i16)		(0x00828 + 4 * (i16))
+#define   PORT_RID2SID_VALID		BIT(31)
+#define   PORT_RID2SID_SID_SHIFT	16
+#define   PORT_RID2SID_BUS_SHIFT	8
+#define   PORT_RID2SID_DEV_SHIFT	3
+#define   PORT_RID2SID_FUNC_SHIFT	0
+#define PORT_OUTS_PREQS_HDR		0x00980
+#define   PORT_OUTS_PREQS_HDR_MASK	GENMASK(9, 0)
+#define PORT_OUTS_PREQS_DATA		0x00984
+#define   PORT_OUTS_PREQS_DATA_MASK	GENMASK(15, 0)
+#define PORT_TUNCTRL			0x00988
+#define   PORT_TUNCTRL_PERST_ON		BIT(0)
+#define   PORT_TUNCTRL_PERST_ACK_REQ	BIT(1)
+#define PORT_TUNSTAT			0x0098c
+#define   PORT_TUNSTAT_PERST_ON		BIT(0)
+#define   PORT_TUNSTAT_PERST_ACK_PEND	BIT(1)
+#define PORT_PREFMEM_ENABLE		0x00994
+
+struct apple_pcie_priv {
+	struct udevice		*dev;
+	void __iomem            *base;
+	void __iomem            *cfg_base;
+	struct list_head	ports;
+};
+
+struct apple_pcie_port {
+	struct apple_pcie_priv	*pcie;
+	struct gpio_desc	reset;
+	ofnode			np;
+	void __iomem		*base;
+	struct list_head	entry;
+	int			idx;
+};
+
+static void rmw_set(u32 set, void __iomem *addr)
+{
+	writel_relaxed(readl_relaxed(addr) | set, addr);
+}
+
+static void rmw_clear(u32 clr, void __iomem *addr)
+{
+	writel_relaxed(readl_relaxed(addr) & ~clr, addr);
+}
+
+static int apple_pcie_config_address(const struct udevice *bus,
+				     pci_dev_t bdf, uint offset,
+				     void **paddress)
+{
+	struct apple_pcie_priv *pcie = dev_get_priv(bus);
+	void *addr;
+
+	addr = pcie->cfg_base;
+	addr += PCIE_ECAM_OFFSET(PCI_BUS(bdf), PCI_DEV(bdf),
+				 PCI_FUNC(bdf), offset);
+	*paddress = addr;
+
+	return 0;
+}
+
+static int apple_pcie_read_config(const struct udevice *bus, pci_dev_t bdf,
+				  uint offset, ulong *valuep,
+				  enum pci_size_t size)
+{
+	int ret;
+
+	ret = pci_generic_mmap_read_config(bus, apple_pcie_config_address,
+					   bdf, offset, valuep, size);
+	return ret;
+}
+
+static int apple_pcie_write_config(struct udevice *bus, pci_dev_t bdf,
+				   uint offset, ulong value,
+				   enum pci_size_t size)
+{
+	return pci_generic_mmap_write_config(bus, apple_pcie_config_address,
+					     bdf, offset, value, size);
+}
+
+static const struct dm_pci_ops apple_pcie_ops = {
+	.read_config = apple_pcie_read_config,
+	.write_config = apple_pcie_write_config,
+};
+
+static int apple_pcie_setup_refclk(struct apple_pcie_priv *pcie,
+				   struct apple_pcie_port *port)
+{
+	u32 stat;
+	int res;
+
+	res = readl_poll_sleep_timeout(pcie->base + CORE_RC_PHYIF_STAT, stat,
+				       stat & CORE_RC_PHYIF_STAT_REFCLK,
+				       100, 50000);
+	if (res < 0)
+		return res;
+
+	rmw_set(CORE_LANE_CTL_CFGACC, pcie->base + CORE_LANE_CTL(port->idx));
+	rmw_set(CORE_LANE_CFG_REFCLK0REQ, pcie->base + CORE_LANE_CFG(port->idx));
+
+	res = readl_poll_sleep_timeout(pcie->base + CORE_LANE_CFG(port->idx),
+				       stat, stat & CORE_LANE_CFG_REFCLK0ACK,
+				       100, 50000);
+	if (res < 0)
+		return res;
+
+	rmw_set(CORE_LANE_CFG_REFCLK1REQ, pcie->base + CORE_LANE_CFG(port->idx));
+	res = readl_poll_sleep_timeout(pcie->base + CORE_LANE_CFG(port->idx),
+				       stat, stat & CORE_LANE_CFG_REFCLK1ACK,
+				       100, 50000);
+
+	if (res < 0)
+		return res;
+
+	rmw_clear(CORE_LANE_CTL_CFGACC, pcie->base + CORE_LANE_CTL(port->idx));
+
+	rmw_set(CORE_LANE_CFG_REFCLKEN, pcie->base + CORE_LANE_CFG(port->idx));
+	rmw_set(PORT_REFCLK_EN, port->base + PORT_REFCLK);
+
+	return 0;
+}
+
+static int apple_pcie_setup_port(struct apple_pcie_priv *pcie, ofnode np)
+{
+	struct apple_pcie_port *port;
+	struct gpio_desc reset;
+	fdt_addr_t addr;
+	u32 stat, idx;
+	int ret;
+
+	ret = gpio_request_by_name_nodev(np, "reset-gpios", 0, &reset, 0);
+	if (ret)
+		return ret;
+
+	port = devm_kzalloc(pcie->dev, sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return -ENOMEM;
+
+	ret = ofnode_read_u32_index(np, "reg", 0, &idx);
+	if (ret)
+		return ret;
+
+	/* Use the first reg entry to work out the port index */
+	port->idx = idx >> 11;
+	port->pcie = pcie;
+	port->reset = reset;
+	port->np = np;
+
+	addr = dev_read_addr_index(pcie->dev, port->idx + 2);
+	if (addr == FDT_ADDR_T_NONE)
+		return -EINVAL;
+	port->base = map_sysmem(addr, 0);
+
+	rmw_set(PORT_APPCLK_EN, port->base + PORT_APPCLK);
+
+	/* Assert PERST# before setting up the clock */
+	dm_gpio_set_value(&reset, 1);
+
+	ret = apple_pcie_setup_refclk(pcie, port);
+	if (ret < 0)
+		return ret;
+
+	/* The minimal Tperst-clk value is 100us (PCIe CEM r5.0, 2.9.2) */
+	udelay(100);
+
+	/* Deassert PERST# */
+	rmw_set(PORT_PERST_OFF, port->base + PORT_PERST);
+	dm_gpio_set_value(&reset, 0);
+
+	/* Wait for 100ms after PERST# deassertion (PCIe r5.0, 6.6.1) */
+	udelay(100 * 1000);
+
+	ret = readl_poll_sleep_timeout(port->base + PORT_STATUS, stat,
+				       stat & PORT_STATUS_READY, 100, 250000);
+	if (ret < 0) {
+		dev_err(pcie->dev, "port %d ready wait timeout\n", port->idx);
+		return ret;
+	}
+
+	rmw_clear(PORT_REFCLK_CGDIS, port->base + PORT_REFCLK);
+	rmw_clear(PORT_APPCLK_CGDIS, port->base + PORT_APPCLK);
+
+	list_add_tail(&port->entry, &pcie->ports);
+
+	writel_relaxed(PORT_LTSSMCTL_START, port->base + PORT_LTSSMCTL);
+
+	/*
+	 * Deliberately ignore the link not coming up as connected
+	 * devices (e.g. the WiFi controller) may not be powerd up.
+	 */
+	readl_poll_sleep_timeout(port->base + PORT_LINKSTS, stat,
+				 (stat & PORT_LINKSTS_UP), 100, 100000);
+
+	return 0;
+}
+
+static int apple_pcie_probe(struct udevice *dev)
+{
+	struct apple_pcie_priv *pcie = dev_get_priv(dev);
+	fdt_addr_t addr;
+	ofnode of_port;
+	int i, ret;
+
+	pcie->dev = dev;
+	addr = dev_read_addr_index(dev, 0);
+	if (addr == FDT_ADDR_T_NONE)
+		return -EINVAL;
+	pcie->cfg_base = map_sysmem(addr, 0);
+
+	addr = dev_read_addr_index(dev, 1);
+	if (addr == FDT_ADDR_T_NONE)
+		return -EINVAL;
+	pcie->base = map_sysmem(addr, 0);
+
+	INIT_LIST_HEAD(&pcie->ports);
+
+	for (of_port = ofnode_first_subnode(dev_ofnode(dev));
+	     ofnode_valid(of_port);
+	     of_port = ofnode_next_subnode(of_port)) {
+		ret = apple_pcie_setup_port(pcie, of_port);
+		if (ret) {
+			dev_err(pcie->dev, "Port %d setup fail: %d\n", i, ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int apple_pcie_remove(struct udevice *dev)
+{
+	struct apple_pcie_priv *pcie = dev_get_priv(dev);
+	struct apple_pcie_port *port, *tmp;
+
+	list_for_each_entry_safe(port, tmp, &pcie->ports, entry) {
+		gpio_free_list_nodev(&port->reset, 1);
+		free(port);
+	}
+
+	return 0;
+}
+
+static const struct udevice_id apple_pcie_of_match[] = {
+	{ .compatible = "apple,pcie" },
+	{ /* sentinel */ }
+};
+
+U_BOOT_DRIVER(apple_pcie) = {
+	.name = "apple_pcie",
+	.id = UCLASS_PCI,
+	.of_match = apple_pcie_of_match,
+	.probe = apple_pcie_probe,
+	.remove = apple_pcie_remove,
+	.priv_auto = sizeof(struct apple_pcie_priv),
+	.ops = &apple_pcie_ops,
+};
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 6/7] arm: apple: Enable PCIe USB controller
  2023-01-17 22:03 [PATCH 0/7] Apple PCIe/XHCI support Mark Kettenis
                   ` (4 preceding siblings ...)
  2023-01-17 22:04 ` [PATCH 5/7] pci: Add Apple PCIe controller driver Mark Kettenis
@ 2023-01-17 22:04 ` Mark Kettenis
  2023-01-17 22:04 ` [PATCH 7/7] usb: xhci: Fix root hub descriptor Mark Kettenis
  6 siblings, 0 replies; 12+ messages in thread
From: Mark Kettenis @ 2023-01-17 22:04 UTC (permalink / raw)
  To: u-boot; +Cc: sjg, bmeng.cn, marex, Mark Kettenis

Some Apple Silicon machines have a PCIe XHCI controller in additon
to the DWC3 controllers integrated on the SoC.  On the Mac mini
the Type-A ports are handled by this PCIe controller.  Enabling
it allows the use of these ports in U-Boot.

Signed-off-by: Mark Kettenis <kettenis@openbsd.org>
---
 configs/apple_m1_defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configs/apple_m1_defconfig b/configs/apple_m1_defconfig
index 4ba09334d4..b4ecf73cbc 100644
--- a/configs/apple_m1_defconfig
+++ b/configs/apple_m1_defconfig
@@ -15,6 +15,7 @@ CONFIG_APPLE_SPI_KEYB=y
 CONFIG_NVME_APPLE=y
 CONFIG_USB_XHCI_HCD=y
 CONFIG_USB_XHCI_DWC3=y
+CONFIG_USB_XHCI_PCI=y
 CONFIG_USB_KEYBOARD=y
 CONFIG_SYS_WHITE_ON_BLACK=y
 CONFIG_NO_FB_CLEAR=y
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 7/7] usb: xhci: Fix root hub descriptor
  2023-01-17 22:03 [PATCH 0/7] Apple PCIe/XHCI support Mark Kettenis
                   ` (5 preceding siblings ...)
  2023-01-17 22:04 ` [PATCH 6/7] arm: apple: Enable PCIe USB controller Mark Kettenis
@ 2023-01-17 22:04 ` Mark Kettenis
  2023-01-17 22:51   ` Marek Vasut
  6 siblings, 1 reply; 12+ messages in thread
From: Mark Kettenis @ 2023-01-17 22:04 UTC (permalink / raw)
  To: u-boot; +Cc: sjg, bmeng.cn, marex, Mark Kettenis

When a system has multiple XHCI controllers, some of the
properties described in the descriptor of the root hub (such as
the number of ports) might differ between controllers.  Fix this
by switching from a single global hub descriptor to a hub
descriptor per controller.

Signed-off-by: Mark Kettenis <kettenis@openbsd.org>
---
 drivers/usb/host/xhci.c | 15 ++++++++-------
 include/usb/xhci.h      |  1 +
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 440b0224b1..9e33c5d855 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -944,7 +944,7 @@ static int xhci_submit_root(struct usb_device *udev, unsigned long pipe,
 		case USB_DT_HUB:
 		case USB_DT_SS_HUB:
 			debug("USB_DT_HUB config\n");
-			srcptr = &descriptor.hub;
+			srcptr = &ctrl->hub_desc;
 			srclen = 0x8;
 			break;
 		default:
@@ -1203,21 +1203,22 @@ static int xhci_lowlevel_init(struct xhci_ctrl *ctrl)
 	/* initializing xhci data structures */
 	if (xhci_mem_init(ctrl, hccr, hcor) < 0)
 		return -ENOMEM;
+	ctrl->hub_desc = descriptor.hub;
 
 	reg = xhci_readl(&hccr->cr_hcsparams1);
-	descriptor.hub.bNbrPorts = HCS_MAX_PORTS(reg);
-	printf("Register %x NbrPorts %d\n", reg, descriptor.hub.bNbrPorts);
+	ctrl->hub_desc.bNbrPorts = HCS_MAX_PORTS(reg);
+	printf("Register %x NbrPorts %d\n", reg, ctrl->hub_desc.bNbrPorts);
 
 	/* Port Indicators */
 	reg = xhci_readl(&hccr->cr_hccparams);
 	if (HCS_INDICATOR(reg))
-		put_unaligned(get_unaligned(&descriptor.hub.wHubCharacteristics)
-				| 0x80, &descriptor.hub.wHubCharacteristics);
+		put_unaligned(get_unaligned(&ctrl->hub_desc.wHubCharacteristics)
+				| 0x80, &ctrl->hub_desc.wHubCharacteristics);
 
 	/* Port Power Control */
 	if (HCC_PPC(reg))
-		put_unaligned(get_unaligned(&descriptor.hub.wHubCharacteristics)
-				| 0x01, &descriptor.hub.wHubCharacteristics);
+		put_unaligned(get_unaligned(&ctrl->hub_desc.wHubCharacteristics)
+				| 0x01, &ctrl->hub_desc.wHubCharacteristics);
 
 	if (xhci_start(hcor)) {
 		xhci_reset(hcor);
diff --git a/include/usb/xhci.h b/include/usb/xhci.h
index 85c359fa1b..4a4ac10229 100644
--- a/include/usb/xhci.h
+++ b/include/usb/xhci.h
@@ -1222,6 +1222,7 @@ struct xhci_ctrl {
 	struct xhci_erst_entry entry[ERST_NUM_SEGS];
 	struct xhci_scratchpad *scratchpad;
 	struct xhci_virt_device *devs[MAX_HC_SLOTS];
+	struct usb_hub_descriptor hub_desc;
 	int rootdev;
 	u16 hci_version;
 	int page_size;
-- 
2.39.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 7/7] usb: xhci: Fix root hub descriptor
  2023-01-17 22:04 ` [PATCH 7/7] usb: xhci: Fix root hub descriptor Mark Kettenis
@ 2023-01-17 22:51   ` Marek Vasut
  0 siblings, 0 replies; 12+ messages in thread
From: Marek Vasut @ 2023-01-17 22:51 UTC (permalink / raw)
  To: Mark Kettenis, u-boot; +Cc: sjg, bmeng.cn

On 1/17/23 23:04, Mark Kettenis wrote:
> When a system has multiple XHCI controllers, some of the
> properties described in the descriptor of the root hub (such as
> the number of ports) might differ between controllers.  Fix this
> by switching from a single global hub descriptor to a hub
> descriptor per controller.
> 
> Signed-off-by: Mark Kettenis <kettenis@openbsd.org>

Reviewed-by: Marek Vasut <marex@denx.de>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/7] usb: xhci: Implement DMA mapping
  2023-01-17 22:04 ` [PATCH 3/7] usb: xhci: Implement DMA mapping Mark Kettenis
@ 2023-01-17 22:51   ` Marek Vasut
  0 siblings, 0 replies; 12+ messages in thread
From: Marek Vasut @ 2023-01-17 22:51 UTC (permalink / raw)
  To: Mark Kettenis, u-boot; +Cc: sjg, bmeng.cn

On 1/17/23 23:04, Mark Kettenis wrote:
> An XHCI controller that sits behind an IOMMU needs to map and unmap
> its memory buffers to do DMA.  Implement this by inroducing new
> xhci_dma_map() and xhci_dma_unmap() helper functions.  The
> xhci_dma_map() function replaces the existing xhci_virt_to_bus()
> function in the sense that it returns the bus address in the case
> of simple address translation in the absence of an IOMMU.  The
> xhci_bus_to_virt() function is eliminated by storing the CPU
> address of the allocated scratchpad memory in struct xhci_ctrl.

Reviewed-by: Marek Vasut <marex@denx.de>


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/7] iommu: Add DMA mapping operations
  2023-01-17 22:03 ` [PATCH 1/7] iommu: Add DMA mapping operations Mark Kettenis
@ 2023-01-18 19:42   ` Simon Glass
  2023-01-21 19:28     ` Mark Kettenis
  0 siblings, 1 reply; 12+ messages in thread
From: Simon Glass @ 2023-01-18 19:42 UTC (permalink / raw)
  To: Mark Kettenis; +Cc: u-boot, bmeng.cn, marex

Hi Mark,

On Tue, 17 Jan 2023 at 15:04, Mark Kettenis <kettenis@openbsd.org> wrote:
>
> In order to support IOMMUs in non-bypass mode we need device ops
> to map and unmap DMA memory.  The map operation enters a mapping
> for a region specified by CPU address and size into the translation
> table of the IOMMU and returns a DMA address suitable for
> programming the device to do DMA.  The unmap operation removes
> this mapping from the translation table of the IOMMU.
>
> Signed-off-by: Mark Kettenis <kettenis@openbsd.org>
> ---
>  drivers/iommu/iommu-uclass.c | 28 ++++++++++++++++++++++++++++
>  include/dm/device.h          |  3 +++
>  include/iommu.h              | 24 ++++++++++++++++++++++++
>  3 files changed, 55 insertions(+)
>
> diff --git a/drivers/iommu/iommu-uclass.c b/drivers/iommu/iommu-uclass.c
> index ed917b3c3e..f6b1457736 100644
> --- a/drivers/iommu/iommu-uclass.c
> +++ b/drivers/iommu/iommu-uclass.c
> @@ -7,6 +7,9 @@
>
>  #include <common.h>
>  #include <dm.h>
> +#include <iommu.h>
> +#include <phys2bus.h>
> +#include <asm/io.h>
>
>  #if (CONFIG_IS_ENABLED(OF_CONTROL) && !CONFIG_IS_ENABLED(OF_PLATDATA))
>  int dev_iommu_enable(struct udevice *dev)
> @@ -33,12 +36,37 @@ int dev_iommu_enable(struct udevice *dev)
>                               __func__, ret);
>                         return ret;
>                 }
> +               dev->iommu = dev_iommu;
>         }
>
>         return 0;
>  }
>  #endif
>
> +dma_addr_t dev_iommu_dma_map(struct udevice *dev, void *addr, size_t size)
> +{
> +       const struct iommu_ops *ops;
> +
> +       if (dev->iommu) {
> +               ops = device_get_ops(dev->iommu);
> +               if (ops && ops->map)
> +                       return ops->map(dev->iommu, addr, size);
> +       }
> +
> +       return dev_phys_to_bus(dev, virt_to_phys(addr));
> +}
> +
> +void dev_iommu_dma_unmap(struct udevice *dev, dma_addr_t addr, size_t size)
> +{
> +       const struct iommu_ops *ops;
> +
> +       if (dev->iommu) {
> +               ops = device_get_ops(dev->iommu);
> +               if (ops && ops->unmap)
> +                       ops->unmap(dev->iommu, addr, size);
> +       }
> +}
> +
>  UCLASS_DRIVER(iommu) = {
>         .id             = UCLASS_IOMMU,
>         .name           = "iommu",
> diff --git a/include/dm/device.h b/include/dm/device.h
> index f3f953c9af..abe1927ecd 100644
> --- a/include/dm/device.h
> +++ b/include/dm/device.h
> @@ -194,6 +194,9 @@ struct udevice {
>  #if CONFIG_IS_ENABLED(DM_DMA)
>         ulong dma_offset;
>  #endif
> +#if CONFIG_IS_ENABLED(IOMMU)
> +       struct udevice *iommu;
> +#endif
>  };
>
>  static inline int dm_udevice_size(void)
> diff --git a/include/iommu.h b/include/iommu.h
> index 6c46adf449..cf9719c5e9 100644
> --- a/include/iommu.h
> +++ b/include/iommu.h
> @@ -3,6 +3,27 @@
>
>  struct udevice;
>
> +struct iommu_ops {
> +       /**
> +        * map() - map DMA memory
> +        *
> +        * @dev:        device for which to map DMA memory
> +        * @addr:       CPU address of the memory
> +        * @size:       size of the memory
> +        * @return DMA address for the device
> +        */
> +       dma_addr_t (*map)(struct udevice *dev, void *addr, size_t size);
> +
> +       /**
> +        * unmap() - unmap DMA memory
> +        *
> +        * @dev:        device for which to unmap DMA memory
> +        * @addr:       DMA address of the memory
> +        * @size:       size of the memory
> +        */
> +       void (*unmap)(struct udevice *dev, dma_addr_t addr, size_t size);
> +};
> +
>  #if (CONFIG_IS_ENABLED(OF_CONTROL) && !CONFIG_IS_ENABLED(OF_PLATDATA)) && \
>         CONFIG_IS_ENABLED(IOMMU)
>  int dev_iommu_enable(struct udevice *dev);
> @@ -13,4 +34,7 @@ static inline int dev_iommu_enable(struct udevice *dev)
>  }
>  #endif
>
> +dma_addr_t dev_iommu_dma_map(struct udevice *dev, void *addr, size_t size);
> +void dev_iommu_dma_unmap(struct udevice *dev, dma_addr_t addr, size_t size);
> +
>  #endif

Please add some tests for these operations to test/dm/iommc.c

Regards,
Simon

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/7] iommu: Add DMA mapping operations
  2023-01-18 19:42   ` Simon Glass
@ 2023-01-21 19:28     ` Mark Kettenis
  0 siblings, 0 replies; 12+ messages in thread
From: Mark Kettenis @ 2023-01-21 19:28 UTC (permalink / raw)
  To: Simon Glass; +Cc: kettenis, u-boot, bmeng.cn, marex

> From: Simon Glass <sjg@chromium.org>
> Date: Wed, 18 Jan 2023 12:42:19 -0700
> 
> Hi Mark,
> 
> On Tue, 17 Jan 2023 at 15:04, Mark Kettenis <kettenis@openbsd.org> wrote:
> >
> > In order to support IOMMUs in non-bypass mode we need device ops
> > to map and unmap DMA memory.  The map operation enters a mapping
> > for a region specified by CPU address and size into the translation
> > table of the IOMMU and returns a DMA address suitable for
> > programming the device to do DMA.  The unmap operation removes
> > this mapping from the translation table of the IOMMU.
> >
> > Signed-off-by: Mark Kettenis <kettenis@openbsd.org>
> > ---
> >  drivers/iommu/iommu-uclass.c | 28 ++++++++++++++++++++++++++++
> >  include/dm/device.h          |  3 +++
> >  include/iommu.h              | 24 ++++++++++++++++++++++++
> >  3 files changed, 55 insertions(+)
> >
> > diff --git a/drivers/iommu/iommu-uclass.c b/drivers/iommu/iommu-uclass.c
> > index ed917b3c3e..f6b1457736 100644
> > --- a/drivers/iommu/iommu-uclass.c
> > +++ b/drivers/iommu/iommu-uclass.c
> > @@ -7,6 +7,9 @@
> >
> >  #include <common.h>
> >  #include <dm.h>
> > +#include <iommu.h>
> > +#include <phys2bus.h>
> > +#include <asm/io.h>
> >
> >  #if (CONFIG_IS_ENABLED(OF_CONTROL) && !CONFIG_IS_ENABLED(OF_PLATDATA))
> >  int dev_iommu_enable(struct udevice *dev)
> > @@ -33,12 +36,37 @@ int dev_iommu_enable(struct udevice *dev)
> >                               __func__, ret);
> >                         return ret;
> >                 }
> > +               dev->iommu = dev_iommu;
> >         }
> >
> >         return 0;
> >  }
> >  #endif
> >
> > +dma_addr_t dev_iommu_dma_map(struct udevice *dev, void *addr, size_t size)
> > +{
> > +       const struct iommu_ops *ops;
> > +
> > +       if (dev->iommu) {
> > +               ops = device_get_ops(dev->iommu);
> > +               if (ops && ops->map)
> > +                       return ops->map(dev->iommu, addr, size);
> > +       }
> > +
> > +       return dev_phys_to_bus(dev, virt_to_phys(addr));
> > +}
> > +
> > +void dev_iommu_dma_unmap(struct udevice *dev, dma_addr_t addr, size_t size)
> > +{
> > +       const struct iommu_ops *ops;
> > +
> > +       if (dev->iommu) {
> > +               ops = device_get_ops(dev->iommu);
> > +               if (ops && ops->unmap)
> > +                       ops->unmap(dev->iommu, addr, size);
> > +       }
> > +}
> > +
> >  UCLASS_DRIVER(iommu) = {
> >         .id             = UCLASS_IOMMU,
> >         .name           = "iommu",
> > diff --git a/include/dm/device.h b/include/dm/device.h
> > index f3f953c9af..abe1927ecd 100644
> > --- a/include/dm/device.h
> > +++ b/include/dm/device.h
> > @@ -194,6 +194,9 @@ struct udevice {
> >  #if CONFIG_IS_ENABLED(DM_DMA)
> >         ulong dma_offset;
> >  #endif
> > +#if CONFIG_IS_ENABLED(IOMMU)
> > +       struct udevice *iommu;
> > +#endif
> >  };
> >
> >  static inline int dm_udevice_size(void)
> > diff --git a/include/iommu.h b/include/iommu.h
> > index 6c46adf449..cf9719c5e9 100644
> > --- a/include/iommu.h
> > +++ b/include/iommu.h
> > @@ -3,6 +3,27 @@
> >
> >  struct udevice;
> >
> > +struct iommu_ops {
> > +       /**
> > +        * map() - map DMA memory
> > +        *
> > +        * @dev:        device for which to map DMA memory
> > +        * @addr:       CPU address of the memory
> > +        * @size:       size of the memory
> > +        * @return DMA address for the device
> > +        */
> > +       dma_addr_t (*map)(struct udevice *dev, void *addr, size_t size);
> > +
> > +       /**
> > +        * unmap() - unmap DMA memory
> > +        *
> > +        * @dev:        device for which to unmap DMA memory
> > +        * @addr:       DMA address of the memory
> > +        * @size:       size of the memory
> > +        */
> > +       void (*unmap)(struct udevice *dev, dma_addr_t addr, size_t size);
> > +};
> > +
> >  #if (CONFIG_IS_ENABLED(OF_CONTROL) && !CONFIG_IS_ENABLED(OF_PLATDATA)) && \
> >         CONFIG_IS_ENABLED(IOMMU)
> >  int dev_iommu_enable(struct udevice *dev);
> > @@ -13,4 +34,7 @@ static inline int dev_iommu_enable(struct udevice *dev)
> >  }
> >  #endif
> >
> > +dma_addr_t dev_iommu_dma_map(struct udevice *dev, void *addr, size_t size);
> > +void dev_iommu_dma_unmap(struct udevice *dev, dma_addr_t addr, size_t size);
> > +
> >  #endif
> 
> Please add some tests for these operations to test/dm/iommc.c

Sure; just sent v2 with some tests added.

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2023-01-21 19:30 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-17 22:03 [PATCH 0/7] Apple PCIe/XHCI support Mark Kettenis
2023-01-17 22:03 ` [PATCH 1/7] iommu: Add DMA mapping operations Mark Kettenis
2023-01-18 19:42   ` Simon Glass
2023-01-21 19:28     ` Mark Kettenis
2023-01-17 22:03 ` [PATCH 2/7] iommu: apple: Implement DMA mapping operations for Apple DART Mark Kettenis
2023-01-17 22:04 ` [PATCH 3/7] usb: xhci: Implement DMA mapping Mark Kettenis
2023-01-17 22:51   ` Marek Vasut
2023-01-17 22:04 ` [PATCH 4/7] iommu: Implement mapping IOMMUs for PCI devices Mark Kettenis
2023-01-17 22:04 ` [PATCH 5/7] pci: Add Apple PCIe controller driver Mark Kettenis
2023-01-17 22:04 ` [PATCH 6/7] arm: apple: Enable PCIe USB controller Mark Kettenis
2023-01-17 22:04 ` [PATCH 7/7] usb: xhci: Fix root hub descriptor Mark Kettenis
2023-01-17 22:51   ` Marek Vasut

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.