All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stefan Roese <sr@denx.de>
To: u-boot@lists.denx.de
Subject: [PATCH v1 18/24] mmc: Add MMC controller driver for OcteonTX / TX2
Date: Fri, 24 Jul 2020 12:08:50 +0200	[thread overview]
Message-ID: <20200724100856.1482324-19-sr@denx.de> (raw)
In-Reply-To: <20200724100856.1482324-1-sr@denx.de>

From: Suneel Garapati <sgarapati@marvell.com>

Adds support for MMC controllers found on OcteonTX or
OcteonTX2 SoC platforms.

Signed-off-by: Aaron Williams <awilliams@marvell.com>
Signed-off-by: Suneel Garapati <sgarapati@marvell.com>
Cc: Peng Fan <peng.fan@nxp.com>

Signed-off-by: Stefan Roese <sr@denx.de>
---

Changes in v1:
- Change patch subject
- Rebased on latest TOT
- Removed inclusion of common.h
- Fix most checkpatch errors / warnings (use IS_ENABLED etc)

 drivers/mmc/Kconfig          |    9 +
 drivers/mmc/Makefile         |    1 +
 drivers/mmc/octeontx_hsmmc.c | 3904 ++++++++++++++++++++++++++++++++++
 drivers/mmc/octeontx_hsmmc.h |  207 ++
 4 files changed, 4121 insertions(+)
 create mode 100644 drivers/mmc/octeontx_hsmmc.c
 create mode 100644 drivers/mmc/octeontx_hsmmc.h

diff --git a/drivers/mmc/Kconfig b/drivers/mmc/Kconfig
index ad86c232c4..ce68e68680 100644
--- a/drivers/mmc/Kconfig
+++ b/drivers/mmc/Kconfig
@@ -297,6 +297,15 @@ config MMC_PCI
 	  This selects PCI-based MMC controllers.
 	  If you have an MMC controller on a PCI bus, say Y here.
 
+config MMC_OCTEONTX
+	bool "Marvell OcteonTX Multimedia Card Interface support"
+	depends on (ARCH_OCTEONTX || ARCH_OCTEONTX2)
+	depends on DM_MMC
+	help
+	  This selects the OcteonTX Multimedia card Interface.
+	  If you have an OcteonTX/TX2 board with a Multimedia Card slot,
+	  say Y here.
+
 	  If unsure, say N.
 
 config PXA_MMC_GENERIC
diff --git a/drivers/mmc/Makefile b/drivers/mmc/Makefile
index e84c792999..e71297f725 100644
--- a/drivers/mmc/Makefile
+++ b/drivers/mmc/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_MVEBU_MMC) += mvebu_mmc.o
 obj-$(CONFIG_MMC_OMAP_HS)		+= omap_hsmmc.o
 obj-$(CONFIG_MMC_MXC)			+= mxcmmc.o
 obj-$(CONFIG_MMC_MXS)			+= mxsmmc.o
+obj-$(CONFIG_MMC_OCTEONTX)		+= octeontx_hsmmc.o
 obj-$(CONFIG_MMC_PCI)			+= pci_mmc.o
 obj-$(CONFIG_PXA_MMC_GENERIC) += pxa_mmc_gen.o
 obj-$(CONFIG_$(SPL_TPL_)SUPPORT_EMMC_RPMB) += rpmb.o
diff --git a/drivers/mmc/octeontx_hsmmc.c b/drivers/mmc/octeontx_hsmmc.c
new file mode 100644
index 0000000000..f8872b9739
--- /dev/null
+++ b/drivers/mmc/octeontx_hsmmc.c
@@ -0,0 +1,3904 @@
+// SPDX-License-Identifier:    GPL-2.0
+/*
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * https://spdx.org/licenses
+ */
+
+#include <cpu_func.h>
+#include <dm.h>
+#include <dm/lists.h>
+#include <env.h>
+#include <errno.h>
+#include <fdtdec.h>
+#include <log.h>
+#include <malloc.h>
+#include <memalign.h>
+#include <mmc.h>
+#include <part.h>
+#include <pci.h>
+#include <pci_ids.h>
+#include <time.h>
+#include <watchdog.h>
+
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/libfdt.h>
+
+#include <asm/arch/board.h>
+#include <asm/arch/clock.h>
+#include <asm/arch/csrs/csrs-mio_emm.h>
+#include <asm/io.h>
+
+#include <power/regulator.h>
+
+#include "octeontx_hsmmc.h"
+
+#define MMC_TIMEOUT_SHORT	20	/* in ms */
+#define MMC_TIMEOUT_LONG	1000
+#define MMC_TIMEOUT_ERASE	10000
+
+#define MMC_DEFAULT_DATA_IN_TAP			10
+#define MMC_DEFAULT_CMD_IN_TAP			10
+#define MMC_DEFAULT_CMD_OUT_TAP			39
+#define MMC_DEFAULT_DATA_OUT_TAP		39
+#define MMC_DEFAULT_HS200_CMD_IN_TAP		24
+#define MMC_DEFAULT_HS200_DATA_IN_TAP		24
+#define MMC_DEFAULT_HS200_CMD_OUT_TAP	(otx_is_soc(CN95XX) ? 10 : 5)
+#define MMC_DEFAULT_HS200_DATA_OUT_TAP	(otx_is_soc(CN95XX) ? 10 : 5)
+#define MMC_DEFAULT_HS400_CMD_OUT_TAP	(otx_is_soc(CN95XX) ? 10 : 5)
+#define MMC_DEFAULT_HS400_DATA_OUT_TAP	(otx_is_soc(CN95XX) ? 5 : 3)
+#define MMC_DEFAULT_HS200_CMD_OUT_DLY		800	/* Delay in ps */
+#define MMC_DEFAULT_HS200_DATA_OUT_DLY		800	/* Delay in ps */
+#define MMC_DEFAULT_HS400_CMD_OUT_DLY		800	/* Delay in ps */
+#define MMC_DEFAULT_HS400_DATA_OUT_DLY		400	/* Delay in ps */
+#define MMC_DEFAULT_SD_UHS_SDR104_CMD_OUT_TAP	MMC_DEFAULT_HS200_CMD_OUT_TAP
+#define MMC_DEFAULT_SD_UHS_SDR104_DATA_OUT_TAP	MMC_DEFAULT_HS200_DATA_OUT_TAP
+#define MMC_LEGACY_DEFAULT_CMD_OUT_TAP		39
+#define MMC_LEGACY_DEFAULT_DATA_OUT_TAP		39
+#define MMC_SD_LEGACY_DEFAULT_CMD_OUT_TAP	63
+#define MMC_SD_LEGACY_DEFAULT_DATA_OUT_TAP	63
+#define MMC_HS_CMD_OUT_TAP			32
+#define MMC_HS_DATA_OUT_TAP			32
+#define MMC_SD_HS_CMD_OUT_TAP			26
+#define MMC_SD_HS_DATA_OUT_TAP			26
+#define MMC_SD_UHS_SDR25_CMD_OUT_TAP		26
+#define MMC_SD_UHS_SDR25_DATA_OUT_TAP		26
+#define MMC_SD_UHS_SDR50_CMD_OUT_TAP		26
+#define MMC_SD_UHS_SDR50_DATA_OUT_TAP		26
+#define MMC_DEFAULT_TAP_DELAY			4
+#define TOTAL_NO_OF_TAPS			512
+static void octeontx_mmc_switch_to(struct mmc *mmc);
+static int octeontx_mmc_configure_delay(struct mmc *mmc);
+static void octeontx_mmc_set_timing(struct mmc *mmc);
+static void set_wdog(struct mmc *mmc, u64 us);
+static void do_switch(struct mmc *mmc, union mio_emm_switch emm_switch);
+static int octeontx_mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
+				 struct mmc_data *data);
+static int octeontx2_mmc_calc_delay(struct mmc *mmc, int delay);
+static int octeontx_mmc_calibrate_delay(struct mmc *mmc);
+static int octeontx_mmc_set_input_bus_timing(struct mmc *mmc);
+static int octeontx_mmc_set_output_bus_timing(struct mmc *mmc);
+
+static bool host_probed;
+
+/**
+ * Get the slot data structure from a MMC data structure
+ */
+static inline struct octeontx_mmc_slot *mmc_to_slot(struct mmc *mmc)
+{
+	return container_of(mmc, struct octeontx_mmc_slot, mmc);
+}
+
+static inline struct octeontx_mmc_host *mmc_to_host(struct mmc *mmc)
+{
+	return mmc_to_slot(mmc)->host;
+}
+
+static inline struct octeontx_mmc_slot *dev_to_mmc_slot(struct udevice *dev)
+{
+	return dev_get_priv(dev);
+}
+
+static inline struct mmc *dev_to_mmc(struct udevice *dev)
+{
+	return &((struct octeontx_mmc_slot *)dev_get_priv(dev))->mmc;
+}
+
+#ifdef DEBUG
+const char *mmc_reg_str(u64 reg)
+{
+	if (reg == MIO_EMM_DMA_CFG())
+		return "MIO_EMM_DMA_CFG";
+	if (reg == MIO_EMM_DMA_ADR())
+		return "MIO_EMM_DMA_ADR";
+	if (reg == MIO_EMM_DMA_INT())
+		return "MIO_EMM_DMA_INT";
+	if (reg == MIO_EMM_CFG())
+		return "MIO_EMM_CFG";
+	if (reg == MIO_EMM_MODEX(0))
+		return "MIO_EMM_MODE0";
+	if (reg == MIO_EMM_MODEX(1))
+		return "MIO_EMM_MODE1";
+	if (reg == MIO_EMM_MODEX(2))
+		return "MIO_EMM_MODE2";
+	if (reg == MIO_EMM_MODEX(3))
+		return "MIO_EMM_MODE3";
+	if (reg == MIO_EMM_IO_CTL())
+		return "MIO_EMM_IO_CTL";
+	if (reg == MIO_EMM_SWITCH())
+		return "MIO_EMM_SWITCH";
+	if (reg == MIO_EMM_DMA())
+		return "MIO_EMM_DMA";
+	if (reg == MIO_EMM_CMD())
+		return "MIO_EMM_CMD";
+	if (reg == MIO_EMM_RSP_STS())
+		return "MIO_EMM_RSP_STS";
+	if (reg == MIO_EMM_RSP_LO())
+		return "MIO_EMM_RSP_LO";
+	if (reg == MIO_EMM_RSP_HI())
+		return "MIO_EMM_RSP_HI";
+	if (reg == MIO_EMM_INT())
+		return "MIO_EMM_INT";
+	if (reg == MIO_EMM_WDOG())
+		return "MIO_EMM_WDOG";
+	if (reg == MIO_EMM_DMA_ARG())
+		return "MIO_EMM_DMA_ARG";
+	if (IS_ENABLED(CONFIG_ARCH_OCTEONTX)) {
+		if (reg == MIO_EMM_SAMPLE())
+			return "MIO_EMM_SAMPLE";
+	}
+	if (reg == MIO_EMM_STS_MASK())
+		return "MIO_EMM_STS_MASK";
+	if (reg == MIO_EMM_RCA())
+		return "MIO_EMM_RCA";
+	if (reg == MIO_EMM_BUF_IDX())
+		return "MIO_EMM_BUF_IDX";
+	if (reg == MIO_EMM_BUF_DAT())
+		return "MIO_EMM_BUF_DAT";
+	if (!IS_ENABLED(CONFIG_ARCH_OCTEONTX)) {
+		if (reg == MIO_EMM_CALB())
+			return "MIO_EMM_CALB";
+		if (reg == MIO_EMM_TAP())
+			return "MIO_EMM_TAP";
+		if (reg == MIO_EMM_TIMING())
+			return "MIO_EMM_TIMING";
+		if (reg == MIO_EMM_DEBUG())
+			return "MIO_EMM_DEBUG";
+	}
+
+	return "UNKNOWN";
+}
+#endif
+
+static void octeontx_print_rsp_sts(struct mmc *mmc)
+{
+#ifdef DEBUG
+	union mio_emm_rsp_sts emm_rsp_sts;
+	const struct octeontx_mmc_host *host = mmc_to_host(mmc);
+	static const char * const ctype_xor_str[] = {
+		"No data",
+		"Read data into Dbuf",
+		"Write data from Dbuf",
+		"Reserved"
+	};
+
+	static const char * const rtype_xor_str[] = {
+		"No response",
+		"R1, 48 bits",
+		"R2, 136 bits",
+		"R3, 48 bits",
+		"R4, 48 bits",
+		"R5, 48 bits",
+		"Reserved 6",
+		"Reserved 7"
+	};
+
+	emm_rsp_sts.u = readq(host->base_addr + MIO_EMM_RSP_STS());
+	printf("\nMIO_EMM_RSP_STS:              0x%016llx\n", emm_rsp_sts.u);
+	printf("    60-61: bus_id:              %u\n", emm_rsp_sts.s.bus_id);
+	printf("    59:    cmd_val:             %s\n",
+	       emm_rsp_sts.s.cmd_val ? "yes" : "no");
+	printf("    58:    switch_val:          %s\n",
+	       emm_rsp_sts.s.switch_val ? "yes" : "no");
+	printf("    57:    dma_val:             %s\n",
+	       emm_rsp_sts.s.dma_val ? "yes" : "no");
+	printf("    56:    dma_pend:            %s\n",
+	       emm_rsp_sts.s.dma_pend ? "yes" : "no");
+	printf("    28:    dbuf_err:            %s\n",
+	       emm_rsp_sts.s.dbuf_err ? "yes" : "no");
+	printf("    23:    dbuf:                %u\n", emm_rsp_sts.s.dbuf);
+	printf("    22:    blk_timeout:         %s\n",
+	       emm_rsp_sts.s.blk_timeout ? "yes" : "no");
+	printf("    21:    blk_crc_err:         %s\n",
+	       emm_rsp_sts.s.blk_crc_err ? "yes" : "no");
+	printf("    20:    rsp_busybit:         %s\n",
+	       emm_rsp_sts.s.rsp_busybit ? "yes" : "no");
+	printf("    19:    stp_timeout:         %s\n",
+	       emm_rsp_sts.s.stp_timeout ? "yes" : "no");
+	printf("    18:    stp_crc_err:         %s\n",
+	       emm_rsp_sts.s.stp_crc_err ? "yes" : "no");
+	printf("    17:    stp_bad_sts:         %s\n",
+	       emm_rsp_sts.s.stp_bad_sts ? "yes" : "no");
+	printf("    16:    stp_val:             %s\n",
+	       emm_rsp_sts.s.stp_val ? "yes" : "no");
+	printf("    15:    rsp_timeout:         %s\n",
+	       emm_rsp_sts.s.rsp_timeout ? "yes" : "no");
+	printf("    14:    rsp_crc_err:         %s\n",
+	       emm_rsp_sts.s.rsp_crc_err ? "yes" : "no");
+	printf("    13:    rsp_bad_sts:         %s\n",
+	       emm_rsp_sts.s.rsp_bad_sts ? "yes" : "no");
+	printf("    12:    rsp_val:             %s\n",
+	       emm_rsp_sts.s.rsp_val ? "yes" : "no");
+	printf("    9-11:  rsp_type:            %s\n",
+	       rtype_xor_str[emm_rsp_sts.s.rsp_type]);
+	printf("    7-8:   cmd_type:            %s\n",
+	       ctype_xor_str[emm_rsp_sts.s.cmd_type]);
+	printf("    1-6:   cmd_idx:             %u\n",
+	       emm_rsp_sts.s.cmd_idx);
+	printf("    0:     cmd_done:            %s\n",
+	       emm_rsp_sts.s.cmd_done ? "yes" : "no");
+#endif
+}
+
+static inline u64 read_csr(struct mmc *mmc, u64 reg)
+{
+	const struct octeontx_mmc_host *host = mmc_to_host(mmc);
+	u64 value = readq(host->base_addr + reg);
+#ifdef DEBUG_CSR
+	printf("        %s: %s(0x%p) => 0x%llx\n", __func__,
+	       mmc_reg_str(reg), host->base_addr + reg,
+	       value);
+#endif
+	return value;
+}
+
+/**
+ * Writes to a CSR register
+ *
+ * @param[in]	mmc	pointer to mmc data structure
+ * @param	reg	register offset
+ * @param	value	value to write to register
+ */
+static inline void write_csr(struct mmc *mmc, u64 reg, u64 value)
+{
+	const struct octeontx_mmc_host *host = mmc_to_host(mmc);
+	void *addr = host->base_addr + reg;
+
+#ifdef DEBUG_CSR
+	printf("        %s: %s(0x%p) <= 0x%llx\n", __func__, mmc_reg_str(reg),
+	       addr, value);
+#endif
+	writeq(value, addr);
+}
+
+#ifdef DEBUG
+static void mmc_print_status(u32 status)
+{
+#ifdef DEBUG_STATUS
+	static const char * const state[] = {
+		"Idle",		/* 0 */
+		"Ready",	/* 1 */
+		"Ident",	/* 2 */
+		"Standby",	/* 3 */
+		"Tran",		/* 4 */
+		"Data",		/* 5 */
+		"Receive",	/* 6 */
+		"Program",	/* 7 */
+		"Dis",		/* 8 */
+		"Btst",		/* 9 */
+		"Sleep",	/* 10 */
+		"reserved",	/* 11 */
+		"reserved",	/* 12 */
+		"reserved",	/* 13 */
+		"reserved",	/* 14 */
+		"reserved"	/* 15 */ };
+	if (status & R1_APP_CMD)
+		puts("MMC ACMD\n");
+	if (status & R1_SWITCH_ERROR)
+		puts("MMC switch error\n");
+	if (status & R1_READY_FOR_DATA)
+		puts("MMC ready for data\n");
+	printf("MMC %s state\n", state[R1_CURRENT_STATE(status)]);
+	if (status & R1_ERASE_RESET)
+		puts("MMC erase reset\n");
+	if (status & R1_WP_ERASE_SKIP)
+		puts("MMC partial erase due to write protected blocks\n");
+	if (status & R1_CID_CSD_OVERWRITE)
+		puts("MMC CID/CSD overwrite error\n");
+	if (status & R1_ERROR)
+		puts("MMC undefined device error\n");
+	if (status & R1_CC_ERROR)
+		puts("MMC device error\n");
+	if (status & R1_CARD_ECC_FAILED)
+		puts("MMC internal ECC failed to correct data\n");
+	if (status & R1_ILLEGAL_COMMAND)
+		puts("MMC illegal command\n");
+	if (status & R1_COM_CRC_ERROR)
+		puts("MMC CRC of previous command failed\n");
+	if (status & R1_LOCK_UNLOCK_FAILED)
+		puts("MMC sequence or password error in lock/unlock device command\n");
+	if (status & R1_CARD_IS_LOCKED)
+		puts("MMC device locked by host\n");
+	if (status & R1_WP_VIOLATION)
+		puts("MMC attempt to program write protected block\n");
+	if (status & R1_ERASE_PARAM)
+		puts("MMC invalid selection of erase groups for erase\n");
+	if (status & R1_ERASE_SEQ_ERROR)
+		puts("MMC error in sequence of erase commands\n");
+	if (status & R1_BLOCK_LEN_ERROR)
+		puts("MMC block length error\n");
+	if (status & R1_ADDRESS_ERROR)
+		puts("MMC address misalign error\n");
+	if (status & R1_OUT_OF_RANGE)
+		puts("MMC address out of range\n");
+#endif
+}
+#endif
+
+/**
+ * Print out all of the register values where mmc is optional
+ *
+ * @param mmc	MMC device (can be NULL)
+ * @param host	Pointer to host data structure (can be NULL if mmc is !NULL)
+ */
+static void octeontx_mmc_print_registers2(struct mmc *mmc,
+					  struct octeontx_mmc_host *host)
+{
+	struct octeontx_mmc_slot *slot = mmc ? mmc->priv : NULL;
+	union mio_emm_dma_cfg emm_dma_cfg;
+	union mio_emm_dma_adr emm_dma_adr;
+	union mio_emm_dma_int emm_dma_int;
+	union mio_emm_cfg emm_cfg;
+	union mio_emm_modex emm_mode;
+	union mio_emm_switch emm_switch;
+	union mio_emm_dma emm_dma;
+	union mio_emm_cmd emm_cmd;
+	union mio_emm_rsp_sts emm_rsp_sts;
+	union mio_emm_rsp_lo emm_rsp_lo;
+	union mio_emm_rsp_hi emm_rsp_hi;
+	union mio_emm_int emm_int;
+	union mio_emm_wdog emm_wdog;
+	union mio_emm_sample emm_sample;
+	union mio_emm_calb emm_calb;
+	union mio_emm_tap emm_tap;
+	union mio_emm_timing emm_timing;
+	union mio_emm_io_ctl io_ctl;
+	union mio_emm_debug emm_debug;
+	union mio_emm_sts_mask emm_sts_mask;
+	union mio_emm_rca emm_rca;
+	int bus;
+
+	static const char * const bus_width_str[] = {
+		"1-bit data bus (power on)",
+		"4-bit data bus",
+		"8-bit data bus",
+		"reserved (3)",
+		"reserved (4)",
+		"4-bit data bus (dual data rate)",
+		"8-bit data bus (dual data rate)",
+		"reserved (7)",
+		"reserved (8)",
+		"invalid (9)",
+		"invalid (10)",
+		"invalid (11)",
+		"invalid (12)",
+		"invalid (13)",
+		"invalid (14)",
+		"invalid (15)",
+	};
+	static const char * const ctype_xor_str[] = {
+		"No data",
+		"Read data into Dbuf",
+		"Write data from Dbuf",
+		"Reserved"
+	};
+
+	static const char * const rtype_xor_str[] = {
+		"No response",
+		"R1, 48 bits",
+		"R2, 136 bits",
+		"R3, 48 bits",
+		"R4, 48 bits",
+		"R5, 48 bits",
+		"Reserved 6",
+		"Reserved 7"
+	};
+
+	if (!host && mmc)
+		host = mmc_to_host(mmc);
+
+	if (mmc)
+		printf("%s: bus id: %u\n", __func__, slot->bus_id);
+	emm_dma_cfg.u = readq(host->base_addr + MIO_EMM_DMA_CFG());
+	printf("MIO_EMM_DMA_CFG:                0x%016llx\n",
+	       emm_dma_cfg.u);
+	printf("    63:    en:                  %s\n",
+	       emm_dma_cfg.s.en ? "enabled" : "disabled");
+	printf("    62:    rw:                  %s\n",
+	       emm_dma_cfg.s.rw ? "write" : "read");
+	printf("    61:    clr:                 %s\n",
+	       emm_dma_cfg.s.clr ? "clear" : "not clear");
+	printf("    59:    swap32:              %s\n",
+	       emm_dma_cfg.s.swap32 ? "yes" : "no");
+	printf("    58:    swap16:              %s\n",
+	       emm_dma_cfg.s.swap16 ? "yes" : "no");
+	printf("    57:    swap8:               %s\n",
+	       emm_dma_cfg.s.swap8 ? "yes" : "no");
+	printf("    56:    endian:              %s\n",
+	       emm_dma_cfg.s.endian ? "little" : "big");
+	printf("    36-55: size:                %u\n",
+	       emm_dma_cfg.s.size);
+
+	emm_dma_adr.u = readq(host->base_addr + MIO_EMM_DMA_ADR());
+	printf("MIO_EMM_DMA_ADR:              0x%016llx\n", emm_dma_adr.u);
+	printf("    0-49:  adr:                 0x%llx\n",
+	       (u64)emm_dma_adr.s.adr);
+
+	emm_dma_int.u = readq(host->base_addr + MIO_EMM_DMA_INT());
+	printf("\nMIO_EMM_DMA_INT:              0x%016llx\n",
+	       emm_dma_int.u);
+	printf("    1:     FIFO:                %s\n",
+	       emm_dma_int.s.fifo ? "yes" : "no");
+	printf("    0:     Done:                %s\n",
+	       emm_dma_int.s.done ? "yes" : "no");
+		emm_cfg.u = readq(host->base_addr + MIO_EMM_CFG());
+
+	printf("\nMIO_EMM_CFG:                  0x%016llx\n",
+	       emm_cfg.u);
+	printf("    3:     bus_ena3:            %s\n",
+	       emm_cfg.s.bus_ena & 0x08 ? "yes" : "no");
+	printf("    2:     bus_ena2:            %s\n",
+	       emm_cfg.s.bus_ena & 0x04 ? "yes" : "no");
+	printf("    1:     bus_ena1:            %s\n",
+	       emm_cfg.s.bus_ena & 0x02 ? "yes" : "no");
+	printf("    0:     bus_ena0:            %s\n",
+	       emm_cfg.s.bus_ena & 0x01 ? "yes" : "no");
+	for (bus = 0; bus < 4; bus++) {
+		emm_mode.u = readq(host->base_addr + MIO_EMM_MODEX(bus));
+		printf("\nMIO_EMM_MODE%u:               0x%016llx\n",
+		       bus, emm_mode.u);
+		if (!IS_ENABLED(CONFIG_ARCH_OCTEONTX)) {
+			printf("    50:    hs400_timing:        %s\n",
+			       emm_mode.s.hs400_timing ? "yes" : "no");
+			printf("    49:    hs200_timing:        %s\n",
+			       emm_mode.s.hs200_timing ? "yes" : "no");
+		}
+		printf("    48:    hs_timing:           %s\n",
+		       emm_mode.s.hs_timing ? "yes" : "no");
+		printf("    40-42: bus_width:           %s\n",
+		       bus_width_str[emm_mode.s.bus_width]);
+		printf("    32-35: power_class          %u\n",
+		       emm_mode.s.power_class);
+		printf("    16-31: clk_hi:              %u\n",
+		       emm_mode.s.clk_hi);
+		printf("    0-15:  clk_lo:              %u\n",
+		       emm_mode.s.clk_lo);
+	}
+
+	emm_switch.u = readq(host->base_addr + MIO_EMM_SWITCH());
+	printf("\nMIO_EMM_SWITCH:               0x%016llx\n", emm_switch.u);
+	printf("    60-61: bus_id:              %u\n", emm_switch.s.bus_id);
+	printf("    59:    switch_exe:          %s\n",
+	       emm_switch.s.switch_exe ? "yes" : "no");
+	printf("    58:    switch_err0:         %s\n",
+	       emm_switch.s.switch_err0 ? "yes" : "no");
+	printf("    57:    switch_err1:         %s\n",
+	       emm_switch.s.switch_err1 ? "yes" : "no");
+	printf("    56:    switch_err2:         %s\n",
+	       emm_switch.s.switch_err2 ? "yes" : "no");
+	printf("    48:    hs_timing:           %s\n",
+	       emm_switch.s.hs_timing ? "yes" : "no");
+	printf("    42-40: bus_width:           %s\n",
+	       bus_width_str[emm_switch.s.bus_width]);
+	printf("    32-35: power_class:         %u\n",
+	       emm_switch.s.power_class);
+	printf("    16-31: clk_hi:              %u\n",
+	       emm_switch.s.clk_hi);
+	printf("    0-15:  clk_lo:              %u\n", emm_switch.s.clk_lo);
+
+	emm_dma.u = readq(host->base_addr + MIO_EMM_DMA());
+	printf("\nMIO_EMM_DMA:                  0x%016llx\n", emm_dma.u);
+	printf("    60-61: bus_id:              %u\n", emm_dma.s.bus_id);
+	printf("    59:    dma_val:             %s\n",
+	       emm_dma.s.dma_val ? "yes" : "no");
+	printf("    58:    sector:              %s mode\n",
+	       emm_dma.s.sector ? "sector" : "byte");
+	printf("    57:    dat_null:            %s\n",
+	       emm_dma.s.dat_null ? "yes" : "no");
+	printf("    51-56: thres:               %u\n", emm_dma.s.thres);
+	printf("    50:    rel_wr:              %s\n",
+	       emm_dma.s.rel_wr ? "yes" : "no");
+	printf("    49:    rw:                  %s\n",
+	       emm_dma.s.rw ? "write" : "read");
+	printf("    48:    multi:               %s\n",
+	       emm_dma.s.multi ? "yes" : "no");
+	printf("    32-47: block_cnt:           %u\n",
+	       emm_dma.s.block_cnt);
+	printf("    0-31:  card_addr:           0x%x\n",
+	       emm_dma.s.card_addr);
+
+	emm_cmd.u = readq(host->base_addr + MIO_EMM_CMD());
+	printf("\nMIO_EMM_CMD:                  0x%016llx\n", emm_cmd.u);
+	printf("\n  62:    skip_busy:           %s\n",
+	       emm_cmd.s.skip_busy ? "yes" : "no");
+	printf("    60-61: bus_id:              %u\n", emm_cmd.s.bus_id);
+	printf("    59:    cmd_val:             %s\n",
+	       emm_cmd.s.cmd_val ? "yes" : "no");
+	printf("    55:    dbuf:                %u\n", emm_cmd.s.dbuf);
+	printf("    49-54: offset:              %u\n", emm_cmd.s.offset);
+	printf("    41-42: ctype_xor:           %s\n",
+	       ctype_xor_str[emm_cmd.s.ctype_xor]);
+	printf("    38-40: rtype_xor:           %s\n",
+	       rtype_xor_str[emm_cmd.s.rtype_xor]);
+	printf("    32-37: cmd_idx:             %u\n", emm_cmd.s.cmd_idx);
+	printf("    0-31:  arg:                 0x%x\n", emm_cmd.s.arg);
+
+	emm_rsp_sts.u = readq(host->base_addr + MIO_EMM_RSP_STS());
+	printf("\nMIO_EMM_RSP_STS:              0x%016llx\n", emm_rsp_sts.u);
+	printf("    60-61: bus_id:              %u\n", emm_rsp_sts.s.bus_id);
+	printf("    59:    cmd_val:             %s\n",
+	       emm_rsp_sts.s.cmd_val ? "yes" : "no");
+	printf("    58:    switch_val:          %s\n",
+	       emm_rsp_sts.s.switch_val ? "yes" : "no");
+	printf("    57:    dma_val:             %s\n",
+	       emm_rsp_sts.s.dma_val ? "yes" : "no");
+	printf("    56:    dma_pend:            %s\n",
+	       emm_rsp_sts.s.dma_pend ? "yes" : "no");
+	printf("    28:    dbuf_err:            %s\n",
+	       emm_rsp_sts.s.dbuf_err ? "yes" : "no");
+	printf("    23:    dbuf:                %u\n", emm_rsp_sts.s.dbuf);
+	printf("    22:    blk_timeout:         %s\n",
+	       emm_rsp_sts.s.blk_timeout ? "yes" : "no");
+	printf("    21:    blk_crc_err:         %s\n",
+	       emm_rsp_sts.s.blk_crc_err ? "yes" : "no");
+	printf("    20:    rsp_busybit:         %s\n",
+	       emm_rsp_sts.s.rsp_busybit ? "yes" : "no");
+	printf("    19:    stp_timeout:         %s\n",
+	       emm_rsp_sts.s.stp_timeout ? "yes" : "no");
+	printf("    18:    stp_crc_err:         %s\n",
+	       emm_rsp_sts.s.stp_crc_err ? "yes" : "no");
+	printf("    17:    stp_bad_sts:         %s\n",
+	       emm_rsp_sts.s.stp_bad_sts ? "yes" : "no");
+	printf("    16:    stp_val:             %s\n",
+	       emm_rsp_sts.s.stp_val ? "yes" : "no");
+	printf("    15:    rsp_timeout:         %s\n",
+	       emm_rsp_sts.s.rsp_timeout ? "yes" : "no");
+	printf("    14:    rsp_crc_err:         %s\n",
+	       emm_rsp_sts.s.rsp_crc_err ? "yes" : "no");
+	printf("    13:    rsp_bad_sts:         %s\n",
+	       emm_rsp_sts.s.rsp_bad_sts ? "yes" : "no");
+	printf("    12:    rsp_val:             %s\n",
+	       emm_rsp_sts.s.rsp_val ? "yes" : "no");
+	printf("    9-11:  rsp_type:            %s\n",
+	       rtype_xor_str[emm_rsp_sts.s.rsp_type]);
+	printf("    7-8:   cmd_type:            %s\n",
+	       ctype_xor_str[emm_rsp_sts.s.cmd_type]);
+	printf("    1-6:   cmd_idx:             %u\n",
+	       emm_rsp_sts.s.cmd_idx);
+	printf("    0:     cmd_done:            %s\n",
+	       emm_rsp_sts.s.cmd_done ? "yes" : "no");
+
+	emm_rsp_lo.u = readq(host->base_addr + MIO_EMM_RSP_LO());
+	printf("\nMIO_EMM_RSP_STS_LO:           0x%016llx\n", emm_rsp_lo.u);
+
+	emm_rsp_hi.u = readq(host->base_addr + MIO_EMM_RSP_HI());
+	printf("\nMIO_EMM_RSP_STS_HI:           0x%016llx\n", emm_rsp_hi.u);
+
+	emm_int.u = readq(host->base_addr + MIO_EMM_INT());
+	printf("\nMIO_EMM_INT:                  0x%016llx\n", emm_int.u);
+	printf("    6:    switch_err:           %s\n",
+	       emm_int.s.switch_err ? "yes" : "no");
+	printf("    5:    switch_done:          %s\n",
+	       emm_int.s.switch_done ? "yes" : "no");
+	printf("    4:    dma_err:              %s\n",
+	       emm_int.s.dma_err ? "yes" : "no");
+	printf("    3:    cmd_err:              %s\n",
+	       emm_int.s.cmd_err ? "yes" : "no");
+	printf("    2:    dma_done:             %s\n",
+	       emm_int.s.dma_done ? "yes" : "no");
+	printf("    1:    cmd_done:             %s\n",
+	       emm_int.s.cmd_done ? "yes" : "no");
+	printf("    0:    buf_done:             %s\n",
+	       emm_int.s.buf_done ? "yes" : "no");
+
+	emm_wdog.u = readq(host->base_addr + MIO_EMM_WDOG());
+	printf("\nMIO_EMM_WDOG:                 0x%016llx (%u)\n",
+	       emm_wdog.u, emm_wdog.s.clk_cnt);
+
+	if (IS_ENABLED(CONFIG_ARCH_OCTEONTX)) {
+		emm_sample.u = readq(host->base_addr + MIO_EMM_SAMPLE());
+		printf("\nMIO_EMM_SAMPLE:               0x%016llx\n",
+		       emm_sample.u);
+		printf("    16-25: cmd_cnt:             %u\n",
+		       emm_sample.s.cmd_cnt);
+		printf("    0-9:   dat_cnt:             %u\n",
+		       emm_sample.s.dat_cnt);
+	}
+
+	emm_sts_mask.u = readq(host->base_addr + MIO_EMM_STS_MASK());
+	printf("\nMIO_EMM_STS_MASK:             0x%016llx\n", emm_sts_mask.u);
+
+	emm_rca.u = readq(host->base_addr + MIO_EMM_RCA());
+	printf("\nMIO_EMM_RCA:                  0x%016llx\n", emm_rca.u);
+	printf("    0-15:  card_rca:            0x%04x\n",
+	       emm_rca.s.card_rca);
+	if (!IS_ENABLED(CONFIG_ARCH_OCTEONTX)) {
+		emm_calb.u = readq(host->base_addr + MIO_EMM_CALB());
+		printf("\nMIO_EMM_CALB:                 0x%016llx\n",
+		       emm_calb.u);
+		printf("       0:  start:               %u\n",
+		       emm_calb.s.start);
+		emm_tap.u = readq(host->base_addr + MIO_EMM_TAP());
+		printf("\nMIO_EMM_TAP:                  0x%016llx\n",
+		       emm_tap.u);
+		printf("     7-0:  delay:               %u\n", emm_tap.s.delay);
+		emm_timing.u = readq(host->base_addr + MIO_EMM_TIMING());
+		printf("\nMIO_EMM_TIMING:               0x%016llx\n",
+		       emm_timing.u);
+		printf("   53-48:  cmd_in_tap:          %u\n",
+		       emm_timing.s.cmd_in_tap);
+		printf("   37-32:  cmd_out_tap:         %u\n",
+		       emm_timing.s.cmd_out_tap);
+		printf("   21-16:  data_in_tap:         %u\n",
+		       emm_timing.s.data_in_tap);
+		printf("     5-0:  data_out_tap:        %u\n",
+		       emm_timing.s.data_out_tap);
+		io_ctl.u = readq(host->base_addr + MIO_EMM_IO_CTL());
+		printf("\nMIO_IO_CTL:                   0x%016llx\n", io_ctl.u);
+		printf("     3-2:  drive:               %u (%u mA)\n",
+		       io_ctl.s.drive, 2 << io_ctl.s.drive);
+		printf("       0:  slew:                %u %s\n", io_ctl.s.slew,
+		       io_ctl.s.slew ? "high" : "low");
+		emm_debug.u = readq(host->base_addr + MIO_EMM_DEBUG());
+		printf("\nMIO_EMM_DEBUG:                0x%016llx\n",
+		       emm_debug.u);
+		printf("      21: rdsync_rst            0x%x\n",
+		       emm_debug.s.rdsync_rst);
+		printf("      20: emmc_clk_disable      0x%x\n",
+		       emm_debug.s.emmc_clk_disable);
+		printf("   19-16: dma_sm:               0x%x\n",
+		       emm_debug.s.dma_sm);
+		printf("   15-12: data_sm:              0x%x\n",
+		       emm_debug.s.data_sm);
+		printf("    11-8: cmd_sm:               0x%x\n",
+		       emm_debug.s.cmd_sm);
+		printf("       0: clk_on:               0x%x\n",
+		       emm_debug.s.clk_on);
+	}
+
+	puts("\n");
+}
+
+/**
+ * Print out all of the register values
+ *
+ * @param mmc	MMC device
+ */
+static void octeontx_mmc_print_registers(struct mmc *mmc)
+{
+#ifdef DEBUG_REGISTERS
+	const int print = 1;
+#else
+	const int print = 0;
+#endif
+	if (print)
+		octeontx_mmc_print_registers2(mmc, mmc_to_host(mmc));
+}
+
+static const struct octeontx_sd_mods octeontx_cr_types[] = {
+{ {0, 0}, {0, 0}, {0, 0} },	/* CMD0 */
+{ {0, 3}, {0, 3}, {0, 0} },	/* CMD1 */
+{ {0, 2}, {0, 2}, {0, 0} },	/* CMD2 */
+{ {0, 1}, {0, 3}, {0, 0} },	/* CMD3 SD_CMD_SEND_RELATIVE_ADDR 0, 2 */
+{ {0, 0}, {0, 0}, {0, 0} },	/* CMD4 */
+{ {0, 1}, {0, 1}, {0, 0} },	/* CMD5 */
+{ {0, 1}, {1, 1}, {0, 1} },	/*
+				 * CMD6 SD_CMD_SWITCH_FUNC 1,0
+				 * (ACMD) SD_APP_SET_BUS_WIDTH
+				 */
+{ {0, 1}, {0, 1}, {0, 0} },	/* CMD7 */
+{ {1, 1}, {0, 3}, {0, 0} },	/* CMD8 SD_CMD_SEND_IF_COND 1,2 */
+{ {0, 2}, {0, 2}, {0, 0} },	/* CMD9 */
+{ {0, 2}, {0, 2}, {0, 0} },	/* CMD10 */
+{ {1, 1}, {0, 1}, {1, 1} },	/* CMD11 SD_CMD_SWITCH_UHS18V 1,0 */
+{ {0, 1}, {0, 1}, {0, 0} },	/* CMD12 */
+{ {0, 1}, {0, 1}, {1, 3} },	/* CMD13 (ACMD)) SD_CMD_APP_SD_STATUS 1,2 */
+{ {1, 1}, {1, 1}, {0, 0} },	/* CMD14 */
+{ {0, 0}, {0, 0}, {0, 0} },	/* CMD15 */
+{ {0, 1}, {0, 1}, {0, 0} },	/* CMD16 */
+{ {1, 1}, {1, 1}, {0, 0} },	/* CMD17 */
+{ {1, 1}, {1, 1}, {0, 0} },	/* CMD18 */
+{ {3, 1}, {3, 1}, {0, 0} },	/* CMD19 */
+{ {2, 1}, {0, 0}, {0, 0} },	/* CMD20 */	/* SD 2,0 */
+{ {0, 0}, {0, 0}, {0, 0} },	/* CMD21 */
+{ {0, 0}, {0, 0}, {1, 1} },	/* CMD22 (ACMD) SD_APP_SEND_NUM_WR_BLKS 1,0 */
+{ {0, 1}, {0, 1}, {0, 1} },	/* CMD23 */	/* SD ACMD 1,0 */
+{ {2, 1}, {2, 1}, {2, 1} },	/* CMD24 */
+{ {2, 1}, {2, 1}, {2, 1} },	/* CMD25 */
+{ {2, 1}, {2, 1}, {2, 1} },	/* CMD26 */
+{ {2, 1}, {2, 1}, {2, 1} },	/* CMD27 */
+{ {0, 1}, {0, 1}, {0, 1} },	/* CMD28 */
+{ {0, 1}, {0, 1}, {0, 1} },	/* CMD29 */
+{ {1, 1}, {1, 1}, {1, 1} },	/* CMD30 */
+{ {1, 1}, {1, 1}, {1, 1} },	/* CMD31 */
+{ {0, 0}, {0, 1}, {0, 0} },	/* CMD32 SD_CMD_ERASE_WR_BLK_START 0,1 */
+{ {0, 0}, {0, 1}, {0, 0} },	/* CMD33 SD_CMD_ERASE_WR_BLK_END 0,1 */
+{ {0, 0}, {0, 0}, {0, 0} },	/* CMD34 */
+{ {0, 1}, {0, 1}, {0, 1} },	/* CMD35 */
+{ {0, 1}, {0, 1}, {0, 1} },	/* CMD36 */
+{ {0, 0}, {0, 0}, {0, 0} },	/* CMD37 */
+{ {0, 1}, {0, 1}, {0, 1} },	/* CMD38 */
+{ {0, 4}, {0, 4}, {0, 4} },	/* CMD39 */
+{ {0, 5}, {0, 5}, {0, 5} },	/* CMD40 */
+{ {0, 0}, {0, 0}, {0, 3} },	/* CMD41 (ACMD) SD_CMD_APP_SEND_OP_COND 0,3 */
+{ {2, 1}, {2, 1}, {2, 1} },	/* CMD42 */
+{ {0, 0}, {0, 0}, {0, 0} },	/* CMD43 */
+{ {0, 0}, {0, 0}, {0, 0} },	/* CMD44 */
+{ {0, 0}, {0, 0}, {0, 0} },	/* CMD45 */
+{ {0, 0}, {0, 0}, {0, 0} },	/* CMD46 */
+{ {0, 0}, {0, 0}, {0, 0} },	/* CMD47 */
+{ {0, 0}, {1, 0}, {0, 0} },	/* CMD48 SD_CMD_READ_EXTR_SINGLE */
+{ {0, 0}, {2, 0}, {0, 0} },	/* CMD49 SD_CMD_WRITE_EXTR_SINGLE */
+{ {0, 0}, {0, 0}, {0, 0} },	/* CMD50 */
+{ {0, 0}, {0, 0}, {1, 1} },	/* CMD51 (ACMD) SD_CMD_APP_SEND_SCR 1,1 */
+{ {0, 0}, {0, 0}, {0, 0} },	/* CMD52 */
+{ {0, 0}, {0, 0}, {0, 0} },	/* CMD53 */
+{ {0, 0}, {0, 0}, {0, 0} },	/* CMD54 */
+{ {0, 1}, {0, 1}, {0, 1} },	/* CMD55 */
+{ {0xff, 0xff}, {0xff, 0xff}, {0xff, 0xff} },	/* CMD56 */
+{ {0, 0}, {0, 0}, {0, 0} },	/* CMD57 */
+{ {0, 0}, {0, 3}, {0, 3} },	/* CMD58 SD_CMD_SPI_READ_OCR 0,3 */
+{ {0, 0}, {0, 1}, {0, 0} },	/* CMD59 SD_CMD_SPI_CRC_ON_OFF 0,1 */
+{ {0, 0}, {0, 0}, {0, 0} },	/* CMD60 */
+{ {0, 0}, {0, 0}, {0, 0} },	/* CMD61 */
+{ {0, 0}, {0, 0}, {0, 0} },	/* CMD62 */
+{ {0, 0}, {0, 0}, {0, 0} }	/* CMD63 */
+};
+
+/**
+ * Returns XOR values needed for SD commands and other quirks
+ *
+ * @param	mmc	mmc device
+ * @param	cmd	command information
+ *
+ * @return octeontx_mmc_cr_mods data structure with various quirks and flags
+ */
+static struct octeontx_mmc_cr_mods
+octeontx_mmc_get_cr_mods(struct mmc *mmc, const struct mmc_cmd *cmd,
+			 const struct mmc_data *data)
+{
+	struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+	struct octeontx_mmc_cr_mods cr = {0, 0};
+	const struct octeontx_sd_mods *sdm =
+					&octeontx_cr_types[cmd->cmdidx & 0x3f];
+	u8 c = sdm->mmc.c, r = sdm->mmc.r;
+	u8 desired_ctype = 0;
+
+	if (IS_MMC(mmc)) {
+#ifdef MMC_SUPPORTS_TUNING
+		if (cmd->cmdidx == MMC_CMD_SEND_TUNING_BLOCK_HS200) {
+			if (cmd->resp_type == MMC_RSP_R1)
+				cr.rtype_xor = 1;
+			if (data && data->flags & MMC_DATA_READ)
+				cr.ctype_xor = 1;
+		}
+#endif
+		return cr;
+	}
+
+	if (cmd->cmdidx == 56)
+		c = (cmd->cmdarg & 1) ? 1 : 2;
+
+	if (data) {
+		if (data->flags & MMC_DATA_READ)
+			desired_ctype = 1;
+		else if (data->flags & MMC_DATA_WRITE)
+			desired_ctype = 2;
+	}
+
+	cr.ctype_xor = c ^ desired_ctype;
+	if (slot->is_acmd)
+		cr.rtype_xor = r ^ sdm->sdacmd.r;
+	else
+		cr.rtype_xor = r ^ sdm->sd.r;
+
+	debug("%s(%s): mmc c: %d, mmc r: %d, desired c: %d, xor c: %d, xor r: %d\n",
+	      __func__, mmc->dev->name, c, r, desired_ctype,
+	      cr.ctype_xor, cr.rtype_xor);
+	return cr;
+}
+
+/**
+ * Keep track of switch commands internally
+ */
+static void octeontx_mmc_track_switch(struct mmc *mmc, u32 cmd_arg)
+{
+	struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+	u8 how = (cmd_arg >> 24) & 3;
+	u8 where = (u8)(cmd_arg >> 16);
+	u8 val = (u8)(cmd_arg >> 8);
+
+	slot->want_switch = slot->cached_switch;
+
+	if (slot->is_acmd)
+		return;
+
+	if (how != 3)
+		return;
+
+	switch (where) {
+	case EXT_CSD_BUS_WIDTH:
+		slot->want_switch.s.bus_width = val;
+		break;
+	case EXT_CSD_POWER_CLASS:
+		slot->want_switch.s.power_class = val;
+		break;
+	case EXT_CSD_HS_TIMING:
+		slot->want_switch.s.hs_timing = 0;
+		slot->want_switch.s.hs200_timing = 0;
+		slot->want_switch.s.hs400_timing = 0;
+		switch (val & 0xf) {
+		case 0:
+			break;
+		case 1:
+			slot->want_switch.s.hs_timing = 1;
+			break;
+		case 2:
+			if (!slot->is_asim && !slot->is_emul)
+				slot->want_switch.s.hs200_timing = 1;
+			break;
+		case 3:
+			if (!slot->is_asim && !slot->is_emul)
+				slot->want_switch.s.hs400_timing = 1;
+			break;
+		default:
+			pr_err("%s(%s): Unsupported timing mode 0x%x\n",
+			       __func__, mmc->dev->name, val & 0xf);
+			break;
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+static int octeontx_mmc_print_rsp_errors(struct mmc *mmc,
+					 union mio_emm_rsp_sts rsp_sts)
+{
+	bool err = false;
+	const char *name = mmc->dev->name;
+
+	if (rsp_sts.s.acc_timeout) {
+		pr_warn("%s(%s): acc_timeout\n", __func__, name);
+		err = true;
+	}
+	if (rsp_sts.s.dbuf_err) {
+		pr_warn("%s(%s): dbuf_err\n", __func__, name);
+		err = true;
+	}
+	if (rsp_sts.s.blk_timeout) {
+		pr_warn("%s(%s): blk_timeout\n", __func__, name);
+		err = true;
+	}
+	if (rsp_sts.s.blk_crc_err) {
+		pr_warn("%s(%s): blk_crc_err\n", __func__, name);
+		err = true;
+	}
+	if (rsp_sts.s.stp_timeout) {
+		pr_warn("%s(%s): stp_timeout\n", __func__, name);
+		err = true;
+	}
+	if (rsp_sts.s.stp_crc_err) {
+		pr_warn("%s(%s): stp_crc_err\n", __func__, name);
+		err = true;
+	}
+	if (rsp_sts.s.stp_bad_sts) {
+		pr_warn("%s(%s): stp_bad_sts\n", __func__, name);
+		err = true;
+	}
+	if (err)
+		pr_warn("  rsp_sts: 0x%llx\n", rsp_sts.u);
+
+	return err ? -1 : 0;
+}
+
+/**
+ * Starts a DMA operation for block read/write
+ *
+ * @param	mmc	mmc device
+ * @param	write	true if write operation
+ * @param	clear	true to clear DMA operation
+ * @param	adr	source or destination DMA address
+ * @param	size	size in blocks
+ * @param	timeout	timeout in ms
+ */
+static void octeontx_mmc_start_dma(struct mmc *mmc, bool write,
+				   bool clear, u32 block, dma_addr_t adr,
+				   u32 size, int timeout)
+{
+	const struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+	union mio_emm_dma_cfg emm_dma_cfg;
+	union mio_emm_dma_adr emm_dma_adr;
+	union mio_emm_dma emm_dma;
+
+	/* Clear any interrupts */
+	write_csr(mmc, MIO_EMM_DMA_INT(),
+		  read_csr(mmc, MIO_EMM_DMA_INT()));
+
+	emm_dma_cfg.u = 0;
+	emm_dma_cfg.s.en = 1;
+	emm_dma_cfg.s.rw = !!write;
+	emm_dma_cfg.s.clr = !!clear;
+	emm_dma_cfg.s.size = ((u64)(size * mmc->read_bl_len) / 8) - 1;
+#if __BYTE_ORDER != __BIG_ENDIAN
+	emm_dma_cfg.s.endian = 1;
+#endif
+	emm_dma_adr.u = 0;
+	emm_dma_adr.s.adr = adr;
+	write_csr(mmc, MIO_EMM_DMA_ADR(), emm_dma_adr.u);
+	write_csr(mmc, MIO_EMM_DMA_CFG(), emm_dma_cfg.u);
+
+	emm_dma.u = 0;
+	emm_dma.s.bus_id = slot->bus_id;
+	emm_dma.s.dma_val = 1;
+	emm_dma.s.rw = !!write;
+	emm_dma.s.sector = mmc->high_capacity ? 1 : 0;
+
+	if (size > 1 && ((IS_SD(mmc) && (mmc->scr[0] & 2)) || !IS_SD(mmc)))
+		emm_dma.s.multi = 1;
+	else
+		emm_dma.s.multi = 0;
+
+	emm_dma.s.block_cnt = size;
+	if (!mmc->high_capacity)
+		block *= mmc->read_bl_len;
+	emm_dma.s.card_addr = block;
+	debug("%s(%s): card address: 0x%x, size: %d, multi: %d\n",
+	      __func__, mmc->dev->name, block, size, emm_dma.s.multi);
+
+	if (timeout > 0)
+		timeout = (timeout * 1000) - 1000;
+	set_wdog(mmc, timeout);
+
+	debug("  Writing 0x%llx to mio_emm_dma\n", emm_dma.u);
+	write_csr(mmc, MIO_EMM_DMA(), emm_dma.u);
+}
+
+/**
+ * Waits for a DMA operation to complete
+ *
+ * @param	mmc	mmc device
+ * @param	timeout	timeout in ms
+ *
+ * @return	0 for success (could be DMA errors), -ETIMEDOUT on timeout
+ */
+
+/**
+ * Cleanup DMA engine after a failure
+ *
+ * @param	mmc	mmc device
+ * @param	rsp_sts	rsp status
+ */
+static void octeontx_mmc_cleanup_dma(struct mmc *mmc,
+				     union mio_emm_rsp_sts rsp_sts)
+{
+	struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+	union mio_emm_dma emm_dma;
+	ulong start;
+	int retries = 3;
+
+	do {
+		debug("%s(%s): rsp_sts: 0x%llx, rsp_lo: 0x%llx, dma_int: 0x%llx\n",
+		      __func__, mmc->dev->name, rsp_sts.u,
+		      read_csr(mmc, MIO_EMM_RSP_LO()),
+		      read_csr(mmc, MIO_EMM_DMA_INT()));
+		emm_dma.u = read_csr(mmc, MIO_EMM_DMA());
+		emm_dma.s.dma_val = 1;
+		emm_dma.s.dat_null = 1;
+		emm_dma.s.bus_id = slot->bus_id;
+		write_csr(mmc, MIO_EMM_DMA(), emm_dma.u);
+		start = get_timer(0);
+		do {
+			rsp_sts.u = read_csr(mmc, MIO_EMM_RSP_STS());
+			WATCHDOG_RESET();
+		} while (get_timer(start) < 100 &&
+			 (rsp_sts.s.dma_val || rsp_sts.s.dma_pend));
+	} while (retries-- >= 0 && rsp_sts.s.dma_pend);
+	if (rsp_sts.s.dma_val)
+		pr_err("%s(%s): Error: could not clean up DMA.  RSP_STS: 0x%llx, RSP_LO: 0x%llx\n",
+		       __func__, mmc->dev->name, rsp_sts.u,
+		       read_csr(mmc, MIO_EMM_RSP_LO()));
+	debug("  rsp_sts after clearing up DMA: 0x%llx\n",
+	      read_csr(mmc, MIO_EMM_RSP_STS()));
+}
+
+/**
+ * Waits for a DMA operation to complete
+ *
+ * @param	mmc	mmc device
+ * @param	timeout	timeout in ms
+ * @param	verbose	true to print out error information
+ *
+ * @return	0 for success (could be DMA errors), -ETIMEDOUT on timeout
+ *		or -EIO if IO error.
+ */
+static int octeontx_mmc_wait_dma(struct mmc *mmc, bool write, ulong timeout,
+				 bool verbose)
+{
+	struct octeontx_mmc_host *host = mmc_to_host(mmc);
+	ulong start_time = get_timer(0);
+	union mio_emm_dma_int emm_dma_int;
+	union mio_emm_rsp_sts rsp_sts;
+	union mio_emm_dma emm_dma;
+	bool timed_out = false;
+	bool err = false;
+
+	debug("%s(%s, %lu, %d), delay: %uus\n", __func__, mmc->dev->name,
+	      timeout, verbose, host->dma_wait_delay);
+
+	udelay(host->dma_wait_delay);
+	do {
+		emm_dma_int.u = read_csr(mmc, MIO_EMM_DMA_INT());
+		rsp_sts.u = read_csr(mmc, MIO_EMM_RSP_STS());
+		if (write) {
+			if ((rsp_sts.s.dma_pend && !rsp_sts.s.dma_val) ||
+			    rsp_sts.s.blk_timeout ||
+			    rsp_sts.s.stp_timeout ||
+			    rsp_sts.s.rsp_timeout) {
+				err = true;
+#ifdef DEBUG
+				debug("%s: f1\n", __func__);
+				octeontx_mmc_print_rsp_errors(mmc, rsp_sts);
+#endif
+				break;
+			}
+		} else {
+			if (rsp_sts.s.blk_crc_err ||
+			    (rsp_sts.s.dma_pend && !rsp_sts.s.dma_val)) {
+				err = true;
+#if defined(DEBUG)
+				octeontx_mmc_print_rsp_errors(mmc, rsp_sts);
+#endif
+				break;
+			}
+		}
+		if (rsp_sts.s.dma_pend) {
+			/*
+			 * If this is set then an error has occurred.
+			 * Try and restart the DMA operation.
+			 */
+			emm_dma.u = read_csr(mmc, MIO_EMM_DMA());
+			if (verbose) {
+				pr_err("%s(%s): DMA pending error: rsp_sts: 0x%llx, dma_int: 0x%llx, emm_dma: 0x%llx\n",
+				       __func__, mmc->dev->name, rsp_sts.u,
+				       emm_dma_int.u, emm_dma.u);
+				octeontx_print_rsp_sts(mmc);
+				debug("  MIO_EMM_DEBUG: 0x%llx\n",
+				      read_csr(mmc, MIO_EMM_DEBUG()));
+				pr_err("%s: Trying DMA resume...\n", __func__);
+			}
+			emm_dma.s.dma_val = 1;
+			emm_dma.s.dat_null = 1;
+			write_csr(mmc, MIO_EMM_DMA(), emm_dma.u);
+			udelay(10);
+		} else if (!rsp_sts.s.dma_val && emm_dma_int.s.done) {
+			break;
+		}
+		WATCHDOG_RESET();
+		timed_out = (get_timer(start_time) > timeout);
+	} while (!timed_out);
+
+	if (timed_out || err) {
+		if (verbose) {
+			pr_err("%s(%s): MMC DMA %s after %lu ms, rsp_sts: 0x%llx, dma_int: 0x%llx, rsp_sts_lo: 0x%llx, emm_dma: 0x%llx\n",
+			       __func__, mmc->dev->name,
+			       timed_out ? "timed out" : "error",
+			       get_timer(start_time), rsp_sts.u,
+			       emm_dma_int.u,
+			       read_csr(mmc, MIO_EMM_RSP_LO()),
+			       read_csr(mmc, MIO_EMM_DMA()));
+			octeontx_print_rsp_sts(mmc);
+		}
+		if (rsp_sts.s.dma_pend)
+			octeontx_mmc_cleanup_dma(mmc, rsp_sts);
+	} else {
+		write_csr(mmc, MIO_EMM_DMA_INT(),
+			  read_csr(mmc, MIO_EMM_DMA_INT()));
+	}
+
+	return timed_out ? -ETIMEDOUT : (err ? -EIO : 0);
+}
+
+/**
+ * Read blocks from the MMC/SD device
+ *
+ * @param	mmc	mmc device
+ * @param	cmd	command
+ * @param	data	data for read
+ * @param	verbose	true to print out error information
+ *
+ * @return	number of blocks read or 0 if error
+ */
+static int octeontx_mmc_read_blocks(struct mmc *mmc, struct mmc_cmd *cmd,
+				    struct mmc_data *data, bool verbose)
+{
+	struct octeontx_mmc_host *host = mmc_to_host(mmc);
+	union mio_emm_rsp_sts rsp_sts;
+	dma_addr_t dma_addr = (dma_addr_t)dm_pci_virt_to_mem(host->dev,
+							     data->dest);
+	ulong count;
+	ulong blkcnt = data->blocks;
+	ulong start = cmd->cmdarg;
+	int timeout = 1000 + blkcnt * 20;
+	bool timed_out = false;
+	bool multi_xfer = cmd->cmdidx == MMC_CMD_READ_MULTIPLE_BLOCK;
+
+	debug("%s(%s): dest: %p, dma address: 0x%llx, blkcnt: %lu, start: %lu\n",
+	      __func__, mmc->dev->name, data->dest, dma_addr, blkcnt, start);
+	debug("%s: rsp_sts: 0x%llx\n", __func__,
+	      read_csr(mmc, MIO_EMM_RSP_STS()));
+	/* use max timeout for multi-block transfers */
+	/* timeout = 0; */
+
+	/*
+	 * If we have a valid SD card in the slot, we set the response bit
+	 * mask to check for CRC errors and timeouts only.
+	 * Otherwise, use the default power on reset value.
+	 */
+	write_csr(mmc, MIO_EMM_STS_MASK(),
+		  IS_SD(mmc) ? 0x00b00000ull : 0xe4390080ull);
+	invalidate_dcache_range((u64)data->dest,
+				(u64)data->dest + blkcnt * data->blocksize);
+
+	if (multi_xfer) {
+		octeontx_mmc_start_dma(mmc, false, false, start, dma_addr,
+				       blkcnt, timeout);
+		timed_out = !!octeontx_mmc_wait_dma(mmc, false, timeout,
+						    verbose);
+		rsp_sts.u = read_csr(mmc, MIO_EMM_RSP_STS());
+		if (timed_out || rsp_sts.s.dma_val || rsp_sts.s.dma_pend) {
+			if (!verbose)
+				return 0;
+
+			pr_err("%s(%s): Error: DMA timed out.  rsp_sts: 0x%llx, emm_int: 0x%llx, dma_int: 0x%llx, rsp_lo: 0x%llx\n",
+			       __func__, mmc->dev->name, rsp_sts.u,
+			       read_csr(mmc, MIO_EMM_INT()),
+			       read_csr(mmc, MIO_EMM_DMA_INT()),
+			       read_csr(mmc, MIO_EMM_RSP_LO()));
+			pr_err("%s: block count: %lu, start: 0x%lx\n",
+			       __func__, blkcnt, start);
+			octeontx_mmc_print_registers(mmc);
+			return 0;
+		}
+	} else {
+		count = blkcnt;
+		timeout = 1000;
+		do {
+			octeontx_mmc_start_dma(mmc, false, false, start,
+					       dma_addr, 1, timeout);
+			dma_addr += mmc->read_bl_len;
+			start++;
+
+			timed_out = !!octeontx_mmc_wait_dma(mmc, false,
+							    timeout, verbose);
+			rsp_sts.u = read_csr(mmc, MIO_EMM_RSP_STS());
+			if (timed_out || rsp_sts.s.dma_val ||
+			    rsp_sts.s.dma_pend) {
+				if (verbose) {
+					pr_err("%s: Error: DMA timed out.  rsp_sts: 0x%llx, emm_int: 0x%llx, dma_int: 0x%llx, rsp_lo: 0x%llx\n",
+					       __func__, rsp_sts.u,
+					       read_csr(mmc, MIO_EMM_INT()),
+					       read_csr(mmc, MIO_EMM_DMA_INT()),
+					       read_csr(mmc, MIO_EMM_RSP_LO()));
+					pr_err("%s: block count: 1, start: 0x%lx\n",
+					       __func__, start);
+					octeontx_mmc_print_registers(mmc);
+				}
+				return blkcnt - count;
+			}
+			WATCHDOG_RESET();
+		} while (--count);
+	}
+#ifdef DEBUG
+	debug("%s(%s): Read %lu (0x%lx) blocks starting at block %u (0x%x) to address %p (dma address 0x%llx)\n",
+	      __func__, mmc->dev->name, blkcnt, blkcnt,
+	      cmd->cmdarg, cmd->cmdarg, data->dest,
+	      dm_pci_virt_to_mem(host->dev, data->dest));
+	print_buffer(0, data->dest, 1, 0x200, 0);
+#endif
+	return blkcnt;
+}
+
+static int octeontx_mmc_poll_ready(struct mmc *mmc, ulong timeout)
+{
+	ulong start;
+	struct mmc_cmd cmd;
+	int err;
+	bool not_ready = false;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.cmdidx = MMC_CMD_SEND_STATUS;
+	cmd.cmdarg = mmc->rca << 16;
+	cmd.resp_type = MMC_RSP_R1;
+	start = get_timer(0);
+	do {
+		err = octeontx_mmc_send_cmd(mmc, &cmd, NULL);
+		if (err) {
+			pr_err("%s(%s): MMC command error: %d; Retry...\n",
+			       __func__, mmc->dev->name, err);
+			not_ready = true;
+		} else if (cmd.response[0] & R1_READY_FOR_DATA) {
+			return 0;
+		}
+		WATCHDOG_RESET();
+	} while (get_timer(start) < timeout);
+
+	if (not_ready)
+		pr_err("%s(%s): MMC command error; Retry timeout\n",
+		       __func__, mmc->dev->name);
+	return -ETIMEDOUT;
+}
+
+static ulong octeontx_mmc_write_blocks(struct mmc *mmc, struct mmc_cmd *cmd,
+				       struct mmc_data *data)
+{
+	struct octeontx_mmc_host *host = mmc_to_host(mmc);
+	ulong start = cmd->cmdarg;
+	ulong blkcnt = data->blocks;
+	dma_addr_t dma_addr;
+	union mio_emm_rsp_sts rsp_sts;
+	union mio_emm_sts_mask emm_sts_mask;
+	ulong timeout;
+	int count;
+	bool timed_out = false;
+	bool multi_xfer = (blkcnt > 1) &&
+			((IS_SD(mmc) && mmc->scr[0] & 2) || !IS_SD(mmc));
+
+	octeontx_mmc_switch_to(mmc);
+	emm_sts_mask.u = 0;
+	emm_sts_mask.s.sts_msk = R1_BLOCK_WRITE_MASK;
+	write_csr(mmc, MIO_EMM_STS_MASK(), emm_sts_mask.u);
+
+	if (octeontx_mmc_poll_ready(mmc, 10000)) {
+		pr_err("%s(%s): Ready timed out\n", __func__, mmc->dev->name);
+		return 0;
+	}
+	flush_dcache_range((u64)data->src,
+			   (u64)data->src + blkcnt * mmc->write_bl_len);
+	dma_addr = (u64)dm_pci_virt_to_mem(host->dev, (void *)data->src);
+	if (multi_xfer) {
+		timeout = 5000 + 100 * blkcnt;
+		octeontx_mmc_start_dma(mmc, true, false, start, dma_addr,
+				       blkcnt, timeout);
+		timed_out = !!octeontx_mmc_wait_dma(mmc, true, timeout, true);
+		rsp_sts.u = read_csr(mmc, MIO_EMM_RSP_STS());
+		if (timed_out || rsp_sts.s.dma_val || rsp_sts.s.dma_pend) {
+			pr_err("%s(%s): Error: multi-DMA timed out after %lums.  rsp_sts: 0x%llx, emm_int: 0x%llx, emm_dma_int: 0x%llx, rsp_sts_lo: 0x%llx, emm_dma: 0x%llx\n",
+			       __func__, mmc->dev->name, timeout,
+			       rsp_sts.u,
+			       read_csr(mmc, MIO_EMM_INT()),
+			       read_csr(mmc, MIO_EMM_DMA_INT()),
+			       read_csr(mmc, MIO_EMM_RSP_LO()),
+			       read_csr(mmc, MIO_EMM_DMA()));
+			return 0;
+		}
+	} else {
+		timeout = 5000;
+		count = blkcnt;
+		do {
+			octeontx_mmc_start_dma(mmc, true, false, start,
+					       dma_addr, 1, timeout);
+			dma_addr += mmc->read_bl_len;
+			start++;
+
+			timed_out = !!octeontx_mmc_wait_dma(mmc, true, timeout,
+							    true);
+			rsp_sts.u = read_csr(mmc, MIO_EMM_RSP_STS());
+			if (timed_out || rsp_sts.s.dma_val ||
+			    rsp_sts.s.dma_pend) {
+				pr_err("%s(%s): Error: single-DMA timed out after %lums.  rsp_sts: 0x%llx, emm_int: 0x%llx, emm_dma_int: 0x%llx, rsp_sts_lo: 0x%llx, emm_dma: 0x%llx\n",
+				       __func__, mmc->dev->name, timeout,
+				       rsp_sts.u,
+				       read_csr(mmc, MIO_EMM_RSP_STS()),
+				       read_csr(mmc, MIO_EMM_DMA_INT()),
+				       read_csr(mmc, MIO_EMM_RSP_LO()),
+				       read_csr(mmc, MIO_EMM_DMA()));
+				return blkcnt - count;
+			}
+			WATCHDOG_RESET();
+		} while (--count);
+	}
+
+	return blkcnt;
+}
+
+/**
+ * Send a command to the eMMC/SD device
+ *
+ * @param mmc	mmc device
+ * @param cmd	cmd to send and response
+ * @param data	additional data
+ * @param flags
+ * @return	0 for success, otherwise error
+ */
+static int octeontx_mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
+				 struct mmc_data *data)
+{
+	struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+	const char *name = slot->dev->name;
+	struct octeontx_mmc_cr_mods mods = {0, 0};
+	union mio_emm_rsp_sts rsp_sts;
+	union mio_emm_cmd emm_cmd;
+	union mio_emm_rsp_lo rsp_lo;
+	union mio_emm_buf_idx emm_buf_idx;
+	union mio_emm_buf_dat emm_buf_dat;
+	ulong start;
+	int i;
+	ulong blkcnt;
+
+	/**
+	 * This constant has a 1 bit for each command which should have a short
+	 * timeout and a 0 for each bit with a long timeout.  Currently the
+	 * following commands have a long timeout:
+	 *   CMD6, CMD17, CMD18, CMD24, CMD25, CMD32, CMD33, CMD35, CMD36 and
+	 *   CMD38.
+	 */
+	static const u64 timeout_short = 0xFFFFFFA4FCF9FFDFull;
+	uint timeout;
+
+	if (cmd->cmdidx == MMC_CMD_SEND_EXT_CSD) {
+		union mio_emm_rca emm_rca;
+
+		emm_rca.u = 0;
+		emm_rca.s.card_rca = mmc->rca;
+		write_csr(mmc, MIO_EMM_RCA(), emm_rca.u);
+	}
+
+	if (timeout_short & (1ull << cmd->cmdidx))
+		timeout = MMC_TIMEOUT_SHORT;
+	else if (cmd->cmdidx == MMC_CMD_SWITCH && IS_SD(mmc))
+		timeout = 2560;
+	else if (cmd->cmdidx == MMC_CMD_ERASE)
+		timeout = MMC_TIMEOUT_ERASE;
+	else
+		timeout = MMC_TIMEOUT_LONG;
+
+	debug("%s(%s): cmd idx: %u, arg: 0x%x, resp type: 0x%x, timeout: %u\n",
+	      __func__, name, cmd->cmdidx, cmd->cmdarg, cmd->resp_type,
+	      timeout);
+	if (data)
+		debug("  data: addr: %p, flags: 0x%x, blocks: %u, blocksize: %u\n",
+		      data->dest, data->flags, data->blocks, data->blocksize);
+
+	octeontx_mmc_switch_to(mmc);
+
+	/* Clear any interrupts */
+	write_csr(mmc, MIO_EMM_INT(), read_csr(mmc, MIO_EMM_INT()));
+
+	/*
+	 * We need to override the default command types and response types
+	 * when dealing with SD cards.
+	 */
+	mods = octeontx_mmc_get_cr_mods(mmc, cmd, data);
+
+	/* Handle block read/write/stop operations */
+	switch (cmd->cmdidx) {
+	case MMC_CMD_GO_IDLE_STATE:
+		slot->tuned = false;
+		slot->hs200_tuned = false;
+		slot->hs400_tuned = false;
+		break;
+	case MMC_CMD_STOP_TRANSMISSION:
+		return 0;
+	case MMC_CMD_READ_MULTIPLE_BLOCK:
+	case MMC_CMD_READ_SINGLE_BLOCK:
+		pr_debug("%s(%s): Reading blocks\n", __func__, name);
+		blkcnt = octeontx_mmc_read_blocks(mmc, cmd, data, true);
+		return (blkcnt > 0) ? 0 : -1;
+	case MMC_CMD_WRITE_MULTIPLE_BLOCK:
+	case MMC_CMD_WRITE_SINGLE_BLOCK:
+		blkcnt = octeontx_mmc_write_blocks(mmc, cmd, data);
+		return (blkcnt > 0) ? 0 : -1;
+	case MMC_CMD_SELECT_CARD:
+		/* Set the RCA register (is it set automatically?) */
+		if (IS_SD(mmc)) {
+			union mio_emm_rca emm_rca;
+
+			emm_rca.u = 0;
+			emm_rca.s.card_rca = (cmd->cmdarg >> 16);
+			write_csr(mmc, MIO_EMM_RCA(), emm_rca.u);
+			debug("%s: Set SD relative address (RCA) to 0x%x\n",
+			      __func__, emm_rca.s.card_rca);
+		}
+		break;
+
+	case MMC_CMD_SWITCH:
+		if (!data && !slot->is_acmd)
+			octeontx_mmc_track_switch(mmc, cmd->cmdarg);
+		break;
+	}
+
+	emm_cmd.u = 0;
+	emm_cmd.s.cmd_val = 1;
+	emm_cmd.s.bus_id = slot->bus_id;
+	emm_cmd.s.cmd_idx = cmd->cmdidx;
+	emm_cmd.s.arg = cmd->cmdarg;
+	emm_cmd.s.ctype_xor = mods.ctype_xor;
+	emm_cmd.s.rtype_xor = mods.rtype_xor;
+	if (data && data->blocks == 1 && data->blocksize != 512) {
+		emm_cmd.s.offset =
+			64 - ((data->blocks * data->blocksize) / 8);
+		debug("%s: offset set to %u\n", __func__, emm_cmd.s.offset);
+	}
+
+	if (data && data->flags & MMC_DATA_WRITE) {
+		u8 *src = (u8 *)data->src;
+
+		if (!src) {
+			pr_err("%s(%s): Error: data source for cmd 0x%x is NULL!\n",
+			       __func__, name, cmd->cmdidx);
+			return -1;
+		}
+		if (data->blocksize > 512) {
+			pr_err("%s(%s): Error: data for cmd 0x%x exceeds 512 bytes\n",
+			       __func__, name, cmd->cmdidx);
+			return -1;
+		}
+#ifdef DEBUG
+		debug("%s: Sending %d bytes data\n", __func__, data->blocksize);
+		print_buffer(0, src, 1, data->blocksize, 0);
+#endif
+		emm_buf_idx.u = 0;
+		emm_buf_idx.s.inc = 1;
+		write_csr(mmc, MIO_EMM_BUF_IDX(), emm_buf_idx.u);
+		for (i = 0; i < (data->blocksize + 7) / 8; i++) {
+			memcpy(&emm_buf_dat.u, src, sizeof(emm_buf_dat.u));
+			write_csr(mmc, MIO_EMM_BUF_DAT(),
+				  cpu_to_be64(emm_buf_dat.u));
+			src += sizeof(emm_buf_dat.u);
+		}
+		write_csr(mmc, MIO_EMM_BUF_IDX(), 0);
+	}
+	debug("%s(%s): Sending command %u (emm_cmd: 0x%llx)\n", __func__,
+	      name, cmd->cmdidx, emm_cmd.u);
+	set_wdog(mmc, timeout * 1000);
+	write_csr(mmc, MIO_EMM_CMD(), emm_cmd.u);
+
+	/* Wait for command to finish or time out */
+	start = get_timer(0);
+	do {
+		rsp_sts.u = read_csr(mmc, MIO_EMM_RSP_STS());
+		WATCHDOG_RESET();
+	} while (!rsp_sts.s.cmd_done && !rsp_sts.s.rsp_timeout &&
+		 (get_timer(start) < timeout + 10));
+	octeontx_mmc_print_rsp_errors(mmc, rsp_sts);
+	if (rsp_sts.s.rsp_timeout || !rsp_sts.s.cmd_done) {
+		debug("%s(%s): Error: command %u(0x%x) timed out.  rsp_sts: 0x%llx\n",
+		      __func__, name, cmd->cmdidx, cmd->cmdarg, rsp_sts.u);
+		octeontx_mmc_print_registers(mmc);
+		return -ETIMEDOUT;
+	}
+	if (rsp_sts.s.rsp_crc_err) {
+		debug("%s(%s): RSP CRC error, rsp_sts: 0x%llx, cmdidx: %u, arg: 0x%08x\n",
+		      __func__, name, rsp_sts.u, cmd->cmdidx, cmd->cmdarg);
+		octeontx_mmc_print_registers(mmc);
+		return -1;
+	}
+	if (slot->bus_id != rsp_sts.s.bus_id) {
+		pr_warn("%s(%s): bus id mismatch, got %d, expected %d for command 0x%x(0x%x)\n",
+			__func__, name,
+			rsp_sts.s.bus_id, slot->bus_id,
+			cmd->cmdidx, cmd->cmdarg);
+		goto error;
+	}
+	if (rsp_sts.s.rsp_bad_sts) {
+		rsp_lo.u = read_csr(mmc, MIO_EMM_RSP_LO());
+		debug("%s: Bad response for bus id %d, cmd id %d:\n"
+		      "    rsp_timeout: %d\n"
+		      "    rsp_bad_sts: %d\n"
+		      "    rsp_crc_err: %d\n",
+		      __func__, slot->bus_id, cmd->cmdidx,
+		      rsp_sts.s.rsp_timeout,
+		      rsp_sts.s.rsp_bad_sts,
+		      rsp_sts.s.rsp_crc_err);
+		if (rsp_sts.s.rsp_type == 1 && rsp_sts.s.rsp_bad_sts) {
+			debug("    Response status: 0x%llx\n",
+			      (rsp_lo.u >> 8) & 0xffffffff);
+#ifdef DEBUG
+			mmc_print_status((rsp_lo.u >> 8) & 0xffffffff);
+#endif
+		}
+		goto error;
+	}
+	if (rsp_sts.s.cmd_idx != cmd->cmdidx) {
+		debug("%s(%s): Command response index %d does not match command index %d\n",
+		      __func__, name, rsp_sts.s.cmd_idx, cmd->cmdidx);
+		octeontx_print_rsp_sts(mmc);
+		debug("%s: rsp_lo: 0x%llx\n", __func__,
+		      read_csr(mmc, MIO_EMM_RSP_LO()));
+
+		goto error;
+	}
+
+	slot->is_acmd = (cmd->cmdidx == MMC_CMD_APP_CMD);
+
+	if (!cmd->resp_type & MMC_RSP_PRESENT)
+		debug("  Response type: 0x%x, no response expected\n",
+		      cmd->resp_type);
+	/* Get the response if present */
+	if (rsp_sts.s.rsp_val && (cmd->resp_type & MMC_RSP_PRESENT)) {
+		union mio_emm_rsp_hi rsp_hi;
+
+		rsp_lo.u = read_csr(mmc, MIO_EMM_RSP_LO());
+
+		switch (rsp_sts.s.rsp_type) {
+		case 1:
+		case 3:
+		case 4:
+		case 5:
+			cmd->response[0] = (rsp_lo.u >> 8) & 0xffffffffull;
+			debug("  response: 0x%08x\n",
+			      cmd->response[0]);
+			cmd->response[1] = 0;
+			cmd->response[2] = 0;
+			cmd->response[3] = 0;
+			break;
+		case 2:
+			cmd->response[3] = rsp_lo.u & 0xffffffff;
+			cmd->response[2] = (rsp_lo.u >> 32) & 0xffffffff;
+			rsp_hi.u = read_csr(mmc, MIO_EMM_RSP_HI());
+			cmd->response[1] = rsp_hi.u & 0xffffffff;
+			cmd->response[0] = (rsp_hi.u >> 32) & 0xffffffff;
+			debug("  response: 0x%08x 0x%08x 0x%08x 0x%08x\n",
+			      cmd->response[0], cmd->response[1],
+			      cmd->response[2], cmd->response[3]);
+			break;
+		default:
+			pr_err("%s(%s): Unknown response type 0x%x for command %d, arg: 0x%x, rsp_sts: 0x%llx\n",
+			       __func__, name, rsp_sts.s.rsp_type, cmd->cmdidx,
+			       cmd->cmdarg, rsp_sts.u);
+			return -1;
+		}
+	} else {
+		debug("  Response not expected\n");
+	}
+
+	if (data && data->flags & MMC_DATA_READ) {
+		u8 *dest = (u8 *)data->dest;
+
+		if (!dest) {
+			pr_err("%s(%s): Error, destination buffer NULL!\n",
+			       __func__, mmc->dev->name);
+			goto error;
+		}
+		if (data->blocksize > 512) {
+			printf("%s(%s): Error: data size %u exceeds 512\n",
+			       __func__, mmc->dev->name,
+			       data->blocksize);
+			goto error;
+		}
+		emm_buf_idx.u = 0;
+		emm_buf_idx.s.inc = 1;
+		write_csr(mmc, MIO_EMM_BUF_IDX(), emm_buf_idx.u);
+		for (i = 0; i < (data->blocksize + 7) / 8; i++) {
+			emm_buf_dat.u = read_csr(mmc, MIO_EMM_BUF_DAT());
+			emm_buf_dat.u = be64_to_cpu(emm_buf_dat.u);
+			memcpy(dest, &emm_buf_dat.u, sizeof(emm_buf_dat.u));
+			dest += sizeof(emm_buf_dat.u);
+		}
+		write_csr(mmc, MIO_EMM_BUF_IDX(), 0);
+#ifdef DEBUG
+		debug("%s: Received %d bytes data\n", __func__,
+		      data->blocksize);
+		print_buffer(0, data->dest, 1, data->blocksize, 0);
+#endif
+	}
+
+	return 0;
+error:
+#ifdef DEBUG
+	octeontx_mmc_print_registers(mmc);
+#endif
+	return -1;
+}
+
+static int octeontx_mmc_dev_send_cmd(struct udevice *dev, struct mmc_cmd *cmd,
+				     struct mmc_data *data)
+{
+	return octeontx_mmc_send_cmd(dev_to_mmc(dev), cmd, data);
+}
+
+#ifdef MMC_SUPPORTS_TUNING
+static int octeontx_mmc_test_cmd(struct mmc *mmc, u32 opcode, int *statp)
+{
+	struct mmc_cmd cmd;
+	int err;
+
+	memset(&cmd, 0, sizeof(cmd));
+
+	debug("%s(%s, %u, %p)\n", __func__, mmc->dev->name, opcode, statp);
+	cmd.cmdidx = opcode;
+	cmd.resp_type = MMC_RSP_R1;
+	cmd.cmdarg = mmc->rca << 16;
+
+	err = octeontx_mmc_send_cmd(mmc, &cmd, NULL);
+	if (err)
+		debug("%s(%s, %u) returned %d\n", __func__,
+		      mmc->dev->name, opcode, err);
+	if (statp)
+		*statp = cmd.response[0];
+	return err;
+}
+
+static int octeontx_mmc_test_get_ext_csd(struct mmc *mmc, u32 opcode,
+					 int *statp)
+{
+	struct mmc_cmd cmd;
+	struct mmc_data data;
+	int err;
+	u8 ext_csd[MMC_MAX_BLOCK_LEN];
+
+	debug("%s(%s, %u, %p)\n",  __func__, mmc->dev->name, opcode, statp);
+	memset(&cmd, 0, sizeof(cmd));
+
+	cmd.cmdidx = MMC_CMD_SEND_EXT_CSD;
+	cmd.resp_type = MMC_RSP_R1;
+	cmd.cmdarg = 0;
+
+	data.dest = (char *)ext_csd;
+	data.blocks = 1;
+	data.blocksize = MMC_MAX_BLOCK_LEN;
+	data.flags = MMC_DATA_READ;
+
+	err = octeontx_mmc_send_cmd(mmc, &cmd, &data);
+	if (statp)
+		*statp = cmd.response[0];
+
+	return err;
+}
+
+/**
+ * Wrapper to set the MIO_EMM_TIMING register
+ *
+ * @param	mmc		pointer to mmc data structure
+ * @param	emm_timing	New emm_timing register value
+ *
+ * On some devices it is possible that changing the data out value can
+ * cause a glitch on an internal fifo.  This works around this problem
+ * by performing a soft-reset immediately before setting the timing register.
+ *
+ * Note: this function should not be called from any function that
+ * performs DMA or block operations since not all registers are
+ * preserved.
+ */
+static void octeontx_mmc_set_emm_timing(struct mmc *mmc,
+					union mio_emm_timing emm_timing)
+{
+	union mio_emm_cfg emm_cfg;
+	struct octeontx_mmc_slot *slot = mmc->priv;
+	union mio_emm_debug emm_debug;
+
+	debug("%s(%s, 0x%llx) din: %u\n", __func__, mmc->dev->name,
+	      emm_timing.u, emm_timing.s.data_in_tap);
+
+	udelay(1);
+	if (slot->host->tap_requires_noclk) {
+		/* Turn off the clock */
+		emm_debug.u = read_csr(mmc, MIO_EMM_DEBUG());
+		emm_debug.s.emmc_clk_disable = 1;
+		write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+		udelay(1);
+		emm_debug.s.rdsync_rst = 1;
+		write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+	}
+	emm_cfg.u = read_csr(mmc, MIO_EMM_CFG());
+	emm_cfg.s.bus_ena = 1 << 3;
+	write_csr(mmc, MIO_EMM_CFG(), emm_cfg.u);
+
+	udelay(1);
+	write_csr(mmc, MIO_EMM_TIMING(), emm_timing.u);
+	udelay(1);
+
+	if (slot->host->tap_requires_noclk) {
+		/* Turn on the clock */
+		emm_debug.s.rdsync_rst = 0;
+		write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+		udelay(1);
+		emm_debug.s.emmc_clk_disable = 0;
+		write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+		udelay(1);
+	}
+	emm_cfg.s.bus_ena = 1 << mmc_to_slot(mmc)->bus_id;
+	write_csr(mmc, MIO_EMM_CFG(), emm_cfg.u);
+}
+
+static const u8 octeontx_hs400_tuning_block[512] = {
+	0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
+	0xff, 0xff, 0xcc, 0xcc, 0xcc, 0x33, 0xcc, 0xcc,
+	0xcc, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xff, 0xff,
+	0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee, 0xff,
+	0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd, 0xdd,
+	0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff, 0xbb,
+	0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff, 0xff,
+	0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee, 0xff,
+	0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00,
+	0x00, 0xff, 0xff, 0xcc, 0xcc, 0xcc, 0x33, 0xcc,
+	0xcc, 0xcc, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xff,
+	0xff, 0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee,
+	0xff, 0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd,
+	0xdd, 0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff,
+	0xbb, 0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff,
+	0xff, 0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee,
+	0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
+	0xff, 0xff, 0xcc, 0xcc, 0xcc, 0x33, 0xcc, 0xcc,
+	0xcc, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xff, 0xff,
+	0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee, 0xff,
+	0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd, 0xdd,
+	0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff, 0xbb,
+	0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff, 0xff,
+	0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee, 0xff,
+	0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00,
+	0x00, 0xff, 0xff, 0xcc, 0xcc, 0xcc, 0x33, 0xcc,
+	0xcc, 0xcc, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xff,
+	0xff, 0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee,
+	0xff, 0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd,
+	0xdd, 0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff,
+	0xbb, 0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff,
+	0xff, 0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee,
+	0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
+	0xff, 0xff, 0xcc, 0xcc, 0xcc, 0x33, 0xcc, 0xcc,
+	0xcc, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xff, 0xff,
+	0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee, 0xff,
+	0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd, 0xdd,
+	0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff, 0xbb,
+	0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff, 0xff,
+	0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee, 0xff,
+	0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00,
+	0x00, 0xff, 0xff, 0xcc, 0xcc, 0xcc, 0x33, 0xcc,
+	0xcc, 0xcc, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xff,
+	0xff, 0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee,
+	0xff, 0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd,
+	0xdd, 0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff,
+	0xbb, 0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff,
+	0xff, 0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee,
+	0xff, 0x00, 0x00, 0xff, 0xff, 0x00, 0xff, 0x00,
+	0x00, 0xff, 0x00, 0xff, 0x55, 0xaa, 0x55, 0xaa,
+	0xcc, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xff, 0xff,
+	0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee, 0xff,
+	0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd, 0xdd,
+	0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff, 0xbb,
+	0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff, 0xff,
+	0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee, 0xff,
+	0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00,
+	0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff,
+	0x01, 0xfe, 0x01, 0xfe, 0xcc, 0xcc, 0xcc, 0xff,
+	0xff, 0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee,
+	0xff, 0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd,
+	0xdd, 0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff,
+	0xbb, 0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff,
+	0xff, 0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee,
+
+};
+
+/**
+ * Perform tuning in HS400 mode
+ *
+ * @param[in]	mmc	mmc data structure
+ *
+ * @ret		0 for success, otherwise error
+ */
+static int octeontx_tune_hs400(struct mmc *mmc)
+{
+	struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+	struct mmc_cmd cmd;
+	struct mmc_data data;
+	union mio_emm_timing emm_timing;
+	u8 buffer[mmc->read_bl_len];
+	int tap_adj;
+	int err = -1;
+	int tap;
+	int run = 0;
+	int start_run = -1;
+	int best_run = 0;
+	int best_start = -1;
+	bool prev_ok = false;
+	char env_name[64];
+	char how[MAX_NO_OF_TAPS + 1] = "";
+
+	if (slot->hs400_tuning_block == -1)
+		return 0;
+
+	/* The eMMC standard disables all tuning support when operating in
+	 * DDR modes like HS400.  The problem with this is that there are
+	 * many cases where the HS200 tuning does not work for HS400 mode.
+	 * In order to perform this tuning, while in HS200 a block is written
+	 * to a block specified in the device tree (marvell,hs400-tuning-block)
+	 * which is used for tuning in this function by repeatedly reading
+	 * this block and comparing the data and return code.  This function
+	 * chooses the data input tap in the middle of the longest run of
+	 * successful read operations.
+	 */
+
+	emm_timing = slot->hs200_taps;
+	debug("%s(%s): Start ci: %d, co: %d, di: %d, do: %d\n",
+	      __func__, mmc->dev->name, emm_timing.s.cmd_in_tap,
+	      emm_timing.s.cmd_out_tap, emm_timing.s.data_in_tap,
+	      emm_timing.s.data_out_tap);
+	memset(buffer, 0xdb, sizeof(buffer));
+
+	snprintf(env_name, sizeof(env_name), "emmc%d_data_in_tap_hs400",
+		 slot->bus_id);
+	tap = env_get_ulong(env_name, 10, -1L);
+	if (tap >= 0 && tap < MAX_NO_OF_TAPS) {
+		printf("Overriding data input tap for HS400 mode to %d\n", tap);
+		emm_timing.s.data_in_tap = tap;
+		octeontx_mmc_set_emm_timing(mmc, emm_timing);
+		return 0;
+	}
+
+	for (tap = 0; tap <= MAX_NO_OF_TAPS; tap++, prev_ok = !err) {
+		if (tap < MAX_NO_OF_TAPS) {
+			debug("%s: Testing data in tap %d\n", __func__, tap);
+			emm_timing.s.data_in_tap = tap;
+			octeontx_mmc_set_emm_timing(mmc, emm_timing);
+
+			cmd.cmdidx = MMC_CMD_READ_SINGLE_BLOCK;
+			cmd.cmdarg = slot->hs400_tuning_block;
+			cmd.resp_type = MMC_RSP_R1;
+			data.dest = (void *)buffer;
+			data.blocks = 1;
+			data.blocksize = mmc->read_bl_len;
+			data.flags = MMC_DATA_READ;
+			err = !octeontx_mmc_read_blocks(mmc, &cmd, &data,
+							false);
+			if (err || memcmp(buffer, octeontx_hs400_tuning_block,
+					  sizeof(buffer))) {
+#ifdef DEBUG
+				if (!err) {
+					debug("%s: data mismatch.  Read:\n",
+					      __func__);
+					print_buffer(0, buffer, 1,
+						     sizeof(buffer), 0);
+					debug("\nExpected:\n");
+					print_buffer(0,
+					    octeontx_hs400_tuning_block, 1,
+					    sizeof(octeontx_hs400_tuning_block),
+					    0);
+				} else {
+					debug("%s: Error %d reading block\n",
+					      __func__, err);
+				}
+#endif
+				err = -EINVAL;
+			} else {
+				debug("%s: tap %d good\n", __func__, tap);
+			}
+			how[tap] = "-+"[!err];
+		} else {
+			err = -EINVAL;
+		}
+
+		if (!err) {
+			if (!prev_ok)
+				start_run = tap;
+		} else if (prev_ok) {
+			run = tap - 1 - start_run;
+			if (start_run >= 0 && run > best_run) {
+				best_start = start_run;
+				best_run = run;
+			}
+		}
+	}
+
+	how[tap - 1] = '\0';
+	if (best_start < 0) {
+		printf("%s(%s): %lldMHz tuning failed for HS400\n",
+		       __func__, mmc->dev->name, slot->clock / 1000000);
+		return -EINVAL;
+	}
+	tap = best_start + best_run / 2;
+
+	snprintf(env_name, sizeof(env_name), "emmc%d_data_in_tap_adj_hs400",
+		 slot->bus_id);
+	tap_adj = env_get_ulong(env_name, 10, slot->hs400_tap_adj);
+	/*
+	 * Keep it in range and if out of range force it back in with a small
+	 * buffer.
+	 */
+	if (best_run > 3) {
+		tap = tap + tap_adj;
+		if (tap >= best_start + best_run)
+			tap = best_start + best_run - 2;
+		if (tap <= best_start)
+			tap = best_start + 2;
+	}
+	how[tap] = '@';
+	debug("Tuning: %s\n", how);
+	debug("%s(%s): HS400 tap: best run start: %d, length: %d, tap: %d\n",
+	      __func__, mmc->dev->name, best_start, best_run, tap);
+	slot->hs400_taps = slot->hs200_taps;
+	slot->hs400_taps.s.data_in_tap = tap;
+	slot->hs400_tuned = true;
+	if (env_get_yesno("emmc_export_hs400_taps") > 0) {
+		debug("%s(%s): Exporting HS400 taps\n",
+		      __func__, mmc->dev->name);
+		env_set_ulong("emmc_timing_tap", slot->host->timing_taps);
+		snprintf(env_name, sizeof(env_name),
+			 "emmc%d_hs400_data_in_tap_debug",
+			 slot->bus_id);
+		env_set(env_name, how);
+		snprintf(env_name, sizeof(env_name),
+			 "emmc%d_hs400_data_in_tap_val",
+			 slot->bus_id);
+		env_set_ulong(env_name, tap);
+		snprintf(env_name, sizeof(env_name),
+			 "emmc%d_hs400_data_in_tap_start",
+			 slot->bus_id);
+		env_set_ulong(env_name, best_start);
+		snprintf(env_name, sizeof(env_name),
+			 "emmc%d_hs400_data_in_tap_end",
+			 slot->bus_id);
+		env_set_ulong(env_name, best_start + best_run);
+		snprintf(env_name, sizeof(env_name),
+			 "emmc%d_hs400_cmd_in_tap",
+			 slot->bus_id);
+		env_set_ulong(env_name, slot->hs400_taps.s.cmd_in_tap);
+		snprintf(env_name, sizeof(env_name),
+			 "emmc%d_hs400_cmd_out_tap",
+			 slot->bus_id);
+		env_set_ulong(env_name, slot->hs400_taps.s.cmd_out_tap);
+		snprintf(env_name, sizeof(env_name),
+			 "emmc%d_hs400_cmd_out_delay",
+			 slot->bus_id);
+		env_set_ulong(env_name, slot->cmd_out_hs400_delay);
+		snprintf(env_name, sizeof(env_name),
+			 "emmc%d_hs400_data_out_tap",
+			 slot->bus_id);
+		env_set_ulong(env_name, slot->hs400_taps.s.data_out_tap);
+		snprintf(env_name, sizeof(env_name),
+			 "emmc%d_hs400_data_out_delay",
+			 slot->bus_id);
+		env_set_ulong(env_name, slot->data_out_hs400_delay);
+	} else {
+		debug("%s(%s): HS400 environment export disabled\n",
+		      __func__, mmc->dev->name);
+	}
+	octeontx_mmc_set_timing(mmc);
+
+	return 0;
+}
+
+struct adj {
+	const char *name;
+	u8 mask_shift;
+	int (*test)(struct mmc *mmc, u32 opcode, int *error);
+	u32 opcode;
+	bool ddr_only;
+	bool hs200_only;
+	bool not_hs200_only;
+	u8 num_runs;
+};
+
+struct adj adj[] = {
+	{ "CMD_IN", 48, octeontx_mmc_test_cmd, MMC_CMD_SEND_STATUS,
+	  false, false, false, 2, },
+/*	{ "CMD_OUT", 32, octeontx_mmc_test_cmd, MMC_CMD_SEND_STATUS, },*/
+	{ "DATA_IN(HS200)", 16, mmc_send_tuning,
+		MMC_CMD_SEND_TUNING_BLOCK_HS200, false, true, false, 2, },
+	{ "DATA_IN", 16, octeontx_mmc_test_get_ext_csd, 0, false, false,
+	  true, 2, },
+/*	{ "DATA_OUT", 0, octeontx_mmc_test_cmd, 0, true, false},*/
+	{ NULL, },
+};
+
+/**
+ * Perform tuning tests to find optimal timing
+ *
+ * @param	mmc	mmc device
+ * @param	adj	parameter to tune
+ * @param	opcode	command opcode to use
+ *
+ * @return	0 for success, -1 if tuning failed
+ */
+static int octeontx_mmc_adjust_tuning(struct mmc *mmc, struct adj *adj,
+				      u32 opcode)
+{
+	struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+	union mio_emm_timing timing;
+	union mio_emm_debug emm_debug;
+	int tap;
+	int err = -1;
+	int run = 0;
+	int count;
+	int start_run = -1;
+	int best_run = 0;
+	int best_start = -1;
+	bool prev_ok = false;
+	u64 tap_status = 0;
+	const int tap_adj = slot->hs200_tap_adj;
+	char how[MAX_NO_OF_TAPS + 1] = "";
+	bool is_hs200 = mmc->selected_mode == MMC_HS_200;
+
+	debug("%s(%s, %s, %d), hs200: %d\n", __func__, mmc->dev->name,
+	      adj->name, opcode, is_hs200);
+	octeontx_mmc_set_emm_timing(mmc,
+				    is_hs200 ? slot->hs200_taps : slot->taps);
+
+#ifdef DEBUG
+	if (opcode == MMC_CMD_SEND_TUNING_BLOCK_HS200) {
+		printf("%s(%s): Before tuning %s, opcode: %d\n",
+		       __func__, mmc->dev->name, adj->name, opcode);
+		octeontx_mmc_print_registers2(mmc, NULL);
+	}
+#endif
+
+	/*
+	 * The algorithm to find the optimal timing is to start
+	 * at the end and work backwards and select the second
+	 * value that passes.  Each test is repeated twice.
+	 */
+	for (tap = 0; tap <= MAX_NO_OF_TAPS; tap++, prev_ok = !err) {
+		if (tap < MAX_NO_OF_TAPS) {
+			if (slot->host->tap_requires_noclk) {
+				/* Turn off the clock */
+				emm_debug.u = read_csr(mmc, MIO_EMM_DEBUG());
+				emm_debug.s.emmc_clk_disable = 1;
+				write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+				udelay(1);
+				emm_debug.s.rdsync_rst = 1;
+				write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+				udelay(1);
+			}
+
+			timing.u = read_csr(mmc, MIO_EMM_TIMING());
+			timing.u &= ~(0x3full << adj->mask_shift);
+			timing.u |= (u64)tap << adj->mask_shift;
+			write_csr(mmc, MIO_EMM_TIMING(), timing.u);
+			debug("%s(%s): Testing ci: %d, co: %d, di: %d, do: %d\n",
+			      __func__, mmc->dev->name, timing.s.cmd_in_tap,
+			      timing.s.cmd_out_tap, timing.s.data_in_tap,
+			      timing.s.data_out_tap);
+
+			if (slot->host->tap_requires_noclk) {
+				/* Turn off the clock */
+				emm_debug.s.rdsync_rst = 0;
+				write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+				udelay(1);
+				emm_debug.u = read_csr(mmc, MIO_EMM_DEBUG());
+				emm_debug.s.emmc_clk_disable = 0;
+				write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+				udelay(1);
+			}
+			for (count = 0; count < 2; count++) {
+				err = adj->test(mmc, opcode, NULL);
+				if (err) {
+					debug("%s(%s, %s): tap %d failed, count: %d, rsp_sts: 0x%llx, rsp_lo: 0x%llx\n",
+					      __func__, mmc->dev->name,
+					      adj->name, tap, count,
+					      read_csr(mmc,
+						       MIO_EMM_RSP_STS()),
+					      read_csr(mmc,
+						       MIO_EMM_RSP_LO()));
+					debug("%s(%s, %s): tap: %d, do: %d, di: %d, co: %d, ci: %d\n",
+					      __func__, mmc->dev->name,
+					      adj->name, tap,
+					      timing.s.data_out_tap,
+					      timing.s.data_in_tap,
+					      timing.s.cmd_out_tap,
+					      timing.s.cmd_in_tap);
+					break;
+				}
+				debug("%s(%s, %s): tap %d passed, count: %d, rsp_sts: 0x%llx, rsp_lo: 0x%llx\n",
+				      __func__, mmc->dev->name, adj->name, tap,
+				      count,
+				      read_csr(mmc, MIO_EMM_RSP_STS()),
+				      read_csr(mmc, MIO_EMM_RSP_LO()));
+			}
+			tap_status |= (u64)(!err) << tap;
+			how[tap] = "-+"[!err];
+		} else {
+			/*
+			 * Putting the end+1 case in the loop simplifies
+			 * logic, allowing 'prev_ok' to process a sweet
+			 * spot in tuning which extends to the wall.
+			 */
+			err = -EINVAL;
+		}
+		if (!err) {
+			/*
+			 * If no CRC/etc errors in the response, but previous
+			 * failed, note the start of a new run.
+			 */
+			debug("  prev_ok: %d\n", prev_ok);
+			if (!prev_ok)
+				start_run = tap;
+		} else if (prev_ok) {
+			run = tap - 1 - start_run;
+			/* did we just exit a wider sweet spot? */
+			if (start_run >= 0 && run > best_run) {
+				best_start = start_run;
+				best_run = run;
+			}
+		}
+	}
+	how[tap - 1] = '\0';
+	if (best_start < 0) {
+		printf("%s(%s, %s): %lldMHz tuning %s failed\n", __func__,
+		       mmc->dev->name, adj->name, slot->clock / 1000000,
+		       adj->name);
+		return -EINVAL;
+	}
+
+	tap = best_start + best_run / 2;
+	debug("  tap %d is center, start: %d, run: %d\n", tap,
+	      best_start, best_run);
+	if (is_hs200) {
+		slot->hs200_taps.u &= ~(0x3full << adj->mask_shift);
+		slot->hs200_taps.u |= (u64)tap << adj->mask_shift;
+	} else {
+		slot->taps.u &= ~(0x3full << adj->mask_shift);
+		slot->taps.u |= (u64)tap << adj->mask_shift;
+	}
+	if (best_start < 0) {
+		printf("%s(%s, %s): %lldMHz tuning %s failed\n", __func__,
+		       mmc->dev->name, adj->name, slot->clock / 1000000,
+		       adj->name);
+		return -EINVAL;
+	}
+
+	tap = best_start + best_run / 2;
+	if (is_hs200 && (tap + tap_adj >= 0) && (tap + tap_adj < 64) &&
+	    tap_status & (1ULL << (tap + tap_adj))) {
+		debug("Adjusting tap from %d by %d to %d\n",
+		      tap, tap_adj, tap + tap_adj);
+		tap += tap_adj;
+	}
+	how[tap] = '@';
+	debug("%s/%s %d/%d/%d %s\n", mmc->dev->name,
+	      adj->name, best_start, tap, best_start + best_run, how);
+
+	if (is_hs200) {
+		slot->hs200_taps.u &= ~(0x3full << adj->mask_shift);
+		slot->hs200_taps.u |= (u64)tap << adj->mask_shift;
+	} else {
+		slot->taps.u &= ~(0x3full << adj->mask_shift);
+		slot->taps.u |= (u64)tap << adj->mask_shift;
+	}
+
+#ifdef DEBUG
+	if (opcode == MMC_CMD_SEND_TUNING_BLOCK_HS200) {
+		debug("%s(%s, %s): After successful tuning\n",
+		      __func__, mmc->dev->name, adj->name);
+		debug("%s(%s, %s): tap: %d, new do: %d, di: %d, co: %d, ci: %d\n",
+		      __func__, mmc->dev->name, adj->name, tap,
+		      slot->taps.s.data_out_tap,
+		      slot->taps.s.data_in_tap,
+		      slot->taps.s.cmd_out_tap,
+		      slot->taps.s.cmd_in_tap);
+		debug("%s(%s, %s): tap: %d, new do HS200: %d, di: %d, co: %d, ci: %d\n",
+		      __func__, mmc->dev->name, adj->name, tap,
+		      slot->hs200_taps.s.data_out_tap,
+		      slot->hs200_taps.s.data_in_tap,
+		      slot->hs200_taps.s.cmd_out_tap,
+		      slot->hs200_taps.s.cmd_in_tap);
+	}
+#endif
+	octeontx_mmc_set_timing(mmc);
+
+	if (is_hs200 && env_get_yesno("emmc_export_hs200_taps")) {
+		char env_name[64];
+
+		env_set_ulong("emmc_timing_tap", slot->host->timing_taps);
+		switch (opcode) {
+		case MMC_CMD_SEND_TUNING_BLOCK:
+			snprintf(env_name, sizeof(env_name),
+				 "emmc%d_hs200_data_in_tap_debug",
+				 slot->bus_id);
+			env_set(env_name, how);
+			snprintf(env_name, sizeof(env_name),
+				 "emmc%d_hs200_data_in_tap_val", slot->bus_id);
+			env_set_ulong(env_name, tap);
+			snprintf(env_name, sizeof(env_name),
+				 "emmc%d_hs200_data_in_tap_start",
+				 slot->bus_id);
+			env_set_ulong(env_name, best_start);
+			snprintf(env_name, sizeof(env_name),
+				 "emmc%d_hs200_data_in_tap_end",
+				 slot->bus_id);
+			env_set_ulong(env_name, best_start + best_run);
+			break;
+		case MMC_CMD_SEND_STATUS:
+			snprintf(env_name, sizeof(env_name),
+				 "emmc%d_hs200_cmd_in_tap_debug",
+				 slot->bus_id);
+			env_set(env_name, how);
+			snprintf(env_name, sizeof(env_name),
+				 "emmc%d_hs200_cmd_in_tap_val", slot->bus_id);
+			env_set_ulong(env_name, tap);
+			snprintf(env_name, sizeof(env_name),
+				 "emmc%d_hs200_cmd_in_tap_start",
+				 slot->bus_id);
+			env_set_ulong(env_name, best_start);
+			snprintf(env_name, sizeof(env_name),
+				 "emmc%d_hs200_cmd_in_tap_end",
+				 slot->bus_id);
+			env_set_ulong(env_name, best_start + best_run);
+			break;
+		default:
+			snprintf(env_name, sizeof(env_name),
+				 "emmc%d_hs200_data_out_tap", slot->bus_id);
+			env_set_ulong(env_name, slot->data_out_hs200_delay);
+			snprintf(env_name, sizeof(env_name),
+				 "emmc%d_hs200_cmd_out_tap", slot->bus_id);
+			env_set_ulong(env_name, slot->cmd_out_hs200_delay);
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int octeontx_mmc_execute_tuning(struct udevice *dev, u32 opcode)
+{
+	struct mmc *mmc = dev_to_mmc(dev);
+	struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+	union mio_emm_timing emm_timing;
+	int err;
+	struct adj *a;
+	bool is_hs200;
+	char env_name[64];
+
+	pr_info("%s re-tuning, opcode 0x%x\n", dev->name, opcode);
+
+	if (slot->is_asim || slot->is_emul)
+		return 0;
+
+	is_hs200 = (mmc->selected_mode == MMC_HS_200);
+	if (is_hs200) {
+		slot->hs200_tuned = false;
+		slot->hs400_tuned = false;
+	} else {
+		slot->tuned = false;
+	}
+	octeontx_mmc_set_output_bus_timing(mmc);
+	octeontx_mmc_set_input_bus_timing(mmc);
+	emm_timing.u = read_csr(mmc, MIO_EMM_TIMING());
+	if (mmc->selected_mode == MMC_HS_200) {
+		slot->hs200_taps.s.cmd_out_tap = emm_timing.s.cmd_out_tap;
+		slot->hs200_taps.s.data_out_tap = emm_timing.s.data_out_tap;
+	} else {
+		slot->taps.s.cmd_out_tap = emm_timing.s.cmd_out_tap;
+		slot->taps.s.data_out_tap = emm_timing.s.data_out_tap;
+	}
+	octeontx_mmc_set_input_bus_timing(mmc);
+	octeontx_mmc_set_output_bus_timing(mmc);
+
+	for (a = adj; a->name; a++) {
+		ulong in_tap;
+
+		if (!strcmp(a->name, "CMD_IN")) {
+			snprintf(env_name, sizeof(env_name),
+				 "emmc%d_cmd_in_tap", slot->bus_id);
+			in_tap = env_get_ulong(env_name, 10, (ulong)-1);
+			if (in_tap != (ulong)-1) {
+				if (mmc->selected_mode == MMC_HS_200 ||
+				    a->hs200_only) {
+					slot->hs200_taps.s.cmd_in_tap = in_tap;
+					slot->hs400_taps.s.cmd_in_tap = in_tap;
+				} else {
+					slot->taps.s.cmd_in_tap = in_tap;
+				}
+				continue;
+			}
+		} else if (a->hs200_only &&
+			   !strcmp(a->name, "DATA_IN(HS200)")) {
+			snprintf(env_name, sizeof(env_name),
+				 "emmc%d_data_in_tap_hs200", slot->bus_id);
+			in_tap = env_get_ulong(env_name, 10, (ulong)-1);
+			if (in_tap != (ulong)-1) {
+				debug("%s(%s): Overriding HS200 data in tap to %d\n",
+				      __func__, dev->name, (int)in_tap);
+				slot->hs200_taps.s.data_in_tap = in_tap;
+				continue;
+			}
+		} else if (!a->hs200_only && !strcmp(a->name, "DATA_IN")) {
+			snprintf(env_name, sizeof(env_name),
+				 "emmc%d_data_in_tap", slot->bus_id);
+			in_tap = env_get_ulong(env_name, 10, (ulong)-1);
+			if (in_tap != (ulong)-1) {
+				debug("%s(%s): Overriding non-HS200 data in tap to %d\n",
+				      __func__, dev->name, (int)in_tap);
+				slot->taps.s.data_in_tap = in_tap;
+				continue;
+			}
+		}
+
+		debug("%s(%s): Testing: %s, mode: %s, opcode: %u\n", __func__,
+		      dev->name, a->name, mmc_mode_name(mmc->selected_mode),
+		      opcode);
+
+		/* Skip DDR only test when not in DDR mode */
+		if (a->ddr_only && !mmc->ddr_mode) {
+			debug("%s(%s): Skipping %s due to non-DDR mode\n",
+			      __func__, dev->name, a->name);
+			continue;
+		}
+		/* Skip hs200 tests in non-hs200 mode and
+		 * non-hs200 tests in hs200 mode
+		 */
+		if (is_hs200) {
+			if (a->not_hs200_only) {
+				debug("%s(%s): Skipping %s\n", __func__,
+				      dev->name, a->name);
+				continue;
+			}
+		} else {
+			if (a->hs200_only) {
+				debug("%s(%s): Skipping %s\n", __func__,
+				      dev->name, a->name);
+				continue;
+			}
+		}
+
+		err = octeontx_mmc_adjust_tuning(mmc, a, a->opcode ?
+						 a->opcode : opcode);
+		if (err) {
+			pr_err("%s(%s, %u): tuning %s failed\n", __func__,
+			       dev->name, opcode, a->name);
+			return err;
+		}
+	}
+
+	octeontx_mmc_set_timing(mmc);
+	if (is_hs200)
+		slot->hs200_tuned = true;
+	else
+		slot->tuned = true;
+
+	if (slot->hs400_tuning_block != -1) {
+		struct mmc_cmd cmd;
+		struct mmc_data data;
+		u8 buffer[mmc->read_bl_len];
+
+		cmd.cmdidx = MMC_CMD_READ_SINGLE_BLOCK;
+		cmd.cmdarg = slot->hs400_tuning_block;
+		cmd.resp_type = MMC_RSP_R1;
+		data.dest = (void *)buffer;
+		data.blocks = 1;
+		data.blocksize = mmc->read_bl_len;
+		data.flags = MMC_DATA_READ;
+		err = octeontx_mmc_read_blocks(mmc, &cmd, &data, true) != 1;
+
+		if (err) {
+			printf("%s: Cannot read HS400 tuning block %u\n",
+			       dev->name, slot->hs400_tuning_block);
+			return err;
+		}
+		if (memcmp(buffer, octeontx_hs400_tuning_block,
+			   sizeof(buffer))) {
+			debug("%s(%s): Writing new HS400 tuning block to block %d\n",
+			      __func__, dev->name, slot->hs400_tuning_block);
+			cmd.cmdidx = MMC_CMD_WRITE_SINGLE_BLOCK;
+			data.src = (void *)octeontx_hs400_tuning_block;
+			data.flags = MMC_DATA_WRITE;
+			err = !octeontx_mmc_write_blocks(mmc, &cmd, &data);
+			if (err) {
+				printf("%s: Cannot write HS400 tuning block %u\n",
+				       dev->name, slot->hs400_tuning_block);
+				return -EINVAL;
+			}
+		}
+	}
+
+	return 0;
+}
+#else /* MMC_SUPPORTS_TUNING */
+static void octeontx_mmc_set_emm_timing(struct mmc *mmc,
+					union mio_emm_timing emm_timing)
+{
+}
+#endif /* MMC_SUPPORTS_TUNING */
+
+/**
+ * Calculate the clock period with rounding up
+ *
+ * @param	mmc	mmc device
+ * @return	clock period in system clocks for clk_lo + clk_hi
+ */
+static u32 octeontx_mmc_calc_clk_period(struct mmc *mmc)
+{
+	struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+	struct octeontx_mmc_host *host = slot->host;
+
+	return DIV_ROUND_UP(host->sys_freq, mmc->clock);
+}
+
+static int octeontx_mmc_set_ios(struct udevice *dev)
+{
+	struct octeontx_mmc_slot *slot = dev_to_mmc_slot(dev);
+	struct mmc *mmc = &slot->mmc;
+	struct octeontx_mmc_host *host = slot->host;
+	union mio_emm_switch emm_switch;
+	union mio_emm_modex mode;
+	uint clock;
+	int bus_width = 0;
+	int clk_period = 0;
+	int power_class = 10;
+	int err = 0;
+	bool is_hs200 = false;
+	bool is_hs400 = false;
+
+	debug("%s(%s): Entry\n", __func__, dev->name);
+	debug("  clock: %u, bus width: %u, mode: %u\n", mmc->clock,
+	      mmc->bus_width, mmc->selected_mode);
+	debug("  host caps: 0x%x, card caps: 0x%x\n", mmc->host_caps,
+	      mmc->card_caps);
+	octeontx_mmc_switch_to(mmc);
+
+	clock = mmc->clock;
+	if (!clock)
+		clock = mmc->cfg->f_min;
+
+	switch (mmc->bus_width) {
+	case 8:
+		bus_width = 2;
+		break;
+	case 4:
+		bus_width = 1;
+		break;
+	case 1:
+		bus_width = 0;
+		break;
+	default:
+		pr_warn("%s(%s): Invalid bus width %d, defaulting to 1\n",
+			__func__, dev->name, mmc->bus_width);
+		bus_width = 0;
+	}
+
+	/* DDR is available for 4/8 bit bus width */
+	if (mmc->ddr_mode && bus_width)
+		bus_width |= 4;
+
+	debug("%s: sys_freq: %llu\n", __func__, host->sys_freq);
+	clk_period = octeontx_mmc_calc_clk_period(mmc);
+
+	emm_switch.u = 0;
+	emm_switch.s.bus_width = bus_width;
+	emm_switch.s.power_class = power_class;
+	emm_switch.s.clk_hi = clk_period / 2;
+	emm_switch.s.clk_lo = clk_period / 2;
+
+	debug("%s: last mode: %d, mode: %d, last clock: %u, clock: %u, ddr: %d\n",
+	      __func__, slot->last_mode, mmc->selected_mode,
+	      slot->last_clock, mmc->clock, mmc->ddr_mode);
+	switch (mmc->selected_mode) {
+	case MMC_LEGACY:
+		break;
+	case MMC_HS:
+	case SD_HS:
+	case MMC_HS_52:
+		emm_switch.s.hs_timing = 1;
+		break;
+	case MMC_HS_200:
+		is_hs200 = true;
+		fallthrough;
+	case UHS_SDR12:
+	case UHS_SDR25:
+	case UHS_SDR50:
+	case UHS_SDR104:
+		emm_switch.s.hs200_timing = 1;
+		break;
+	case MMC_HS_400:
+		is_hs400 = true;
+		fallthrough;
+	case UHS_DDR50:
+	case MMC_DDR_52:
+		emm_switch.s.hs400_timing = 1;
+		break;
+	default:
+		pr_err("%s(%s): Unsupported mode 0x%x\n", __func__, dev->name,
+		       mmc->selected_mode);
+		return -1;
+	}
+	emm_switch.s.bus_id = slot->bus_id;
+
+	if (!is_hs200 && !is_hs400 &&
+	    (mmc->selected_mode != slot->last_mode ||
+	     mmc->clock != slot->last_clock) &&
+	    !mmc->ddr_mode) {
+		slot->tuned = false;
+		slot->last_mode = mmc->selected_mode;
+		slot->last_clock = mmc->clock;
+	}
+
+	if (CONFIG_IS_ENABLED(MMC_VERBOSE)) {
+		debug("%s(%s): Setting bus mode to %s\n", __func__, dev->name,
+		      mmc_mode_name(mmc->selected_mode));
+	} else {
+		debug("%s(%s): Setting bus mode to 0x%x\n", __func__, dev->name,
+		      mmc->selected_mode);
+	}
+
+	debug(" Trying switch 0x%llx w%d hs:%d hs200:%d hs400:%d\n",
+	      emm_switch.u, emm_switch.s.bus_width, emm_switch.s.hs_timing,
+	      emm_switch.s.hs200_timing, emm_switch.s.hs400_timing);
+
+	set_wdog(mmc, 1000);
+	do_switch(mmc, emm_switch);
+	mdelay(100);
+	mode.u = read_csr(mmc, MIO_EMM_MODEX(slot->bus_id));
+	debug("%s(%s): mode: 0x%llx w:%d, hs:%d, hs200:%d, hs400:%d\n",
+	      __func__, dev->name, mode.u, mode.s.bus_width,
+	      mode.s.hs_timing, mode.s.hs200_timing, mode.s.hs400_timing);
+
+	err = octeontx_mmc_configure_delay(mmc);
+
+#ifdef MMC_SUPPORTS_TUNING
+	if (!err && mmc->selected_mode == MMC_HS_400 && !slot->hs400_tuned) {
+		debug("%s: Tuning HS400 mode\n", __func__);
+		err = octeontx_tune_hs400(mmc);
+	}
+#endif
+
+	return err;
+}
+
+/**
+ * Gets the status of the card detect pin
+ */
+static int octeontx_mmc_get_cd(struct udevice *dev)
+{
+	struct octeontx_mmc_slot *slot = dev_to_mmc_slot(dev);
+	int val = 1;
+
+	if (dm_gpio_is_valid(&slot->cd_gpio)) {
+		val = dm_gpio_get_value(&slot->cd_gpio);
+		val ^= slot->cd_inverted;
+	}
+	debug("%s(%s): cd: %d\n", __func__, dev->name, val);
+	return val;
+}
+
+/**
+ * Gets the status of the write protect pin
+ */
+static int octeontx_mmc_get_wp(struct udevice *dev)
+{
+	struct octeontx_mmc_slot *slot = dev_to_mmc_slot(dev);
+	int val = 0;
+
+	if (dm_gpio_is_valid(&slot->wp_gpio)) {
+		val = dm_gpio_get_value(&slot->wp_gpio);
+		val ^= slot->wp_inverted;
+	}
+	debug("%s(%s): wp: %d\n", __func__, dev->name, val);
+	return val;
+}
+
+static void octeontx_mmc_set_timing(struct mmc *mmc)
+{
+	union mio_emm_timing timing;
+	struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+
+	switch (mmc->selected_mode) {
+	case MMC_HS_200:
+		timing = slot->hs200_taps;
+		break;
+	case MMC_HS_400:
+		timing = slot->hs400_tuned ?
+				slot->hs400_taps : slot->hs200_taps;
+		break;
+	default:
+		timing = slot->taps;
+		break;
+	}
+
+	debug("%s(%s):\n  cmd_in_tap: %u\n  cmd_out_tap: %u\n  data_in_tap: %u\n  data_out_tap: %u\n",
+	      __func__, mmc->dev->name, timing.s.cmd_in_tap,
+	      timing.s.cmd_out_tap, timing.s.data_in_tap,
+	      timing.s.data_out_tap);
+
+	octeontx_mmc_set_emm_timing(mmc, timing);
+}
+
+static int octeontx_mmc_configure_delay(struct mmc *mmc)
+{
+	struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+	struct octeontx_mmc_host *host __maybe_unused = slot->host;
+	bool __maybe_unused is_hs200;
+	bool __maybe_unused is_hs400;
+
+	debug("%s(%s)\n", __func__, mmc->dev->name);
+
+	if (IS_ENABLED(CONFIG_ARCH_OCTEONTX)) {
+		union mio_emm_sample emm_sample;
+
+		emm_sample.u = 0;
+		emm_sample.s.cmd_cnt = slot->cmd_cnt;
+		emm_sample.s.dat_cnt = slot->dat_cnt;
+		write_csr(mmc, MIO_EMM_SAMPLE(), emm_sample.u);
+	} else {
+		is_hs200 = (mmc->selected_mode == MMC_HS_200);
+		is_hs400 = (mmc->selected_mode == MMC_HS_400);
+
+		if ((is_hs200 && slot->hs200_tuned) ||
+		    (is_hs400 && slot->hs400_tuned) ||
+		    (!is_hs200 && !is_hs400 && slot->tuned)) {
+			octeontx_mmc_set_output_bus_timing(mmc);
+		} else {
+			int half = MAX_NO_OF_TAPS / 2;
+			int dout, cout;
+
+			switch (mmc->selected_mode) {
+			case MMC_LEGACY:
+				if (IS_SD(mmc)) {
+					cout = MMC_SD_LEGACY_DEFAULT_CMD_OUT_TAP;
+					dout = MMC_SD_LEGACY_DEFAULT_DATA_OUT_TAP;
+				} else {
+					cout = MMC_LEGACY_DEFAULT_CMD_OUT_TAP;
+					dout = MMC_LEGACY_DEFAULT_DATA_OUT_TAP;
+				}
+				break;
+			case MMC_HS:
+				cout = MMC_HS_CMD_OUT_TAP;
+				dout = MMC_HS_DATA_OUT_TAP;
+				break;
+			case SD_HS:
+			case UHS_SDR12:
+			case UHS_SDR25:
+			case UHS_SDR50:
+				cout = MMC_SD_HS_CMD_OUT_TAP;
+				dout = MMC_SD_HS_DATA_OUT_TAP;
+				break;
+			case UHS_SDR104:
+			case UHS_DDR50:
+			case MMC_HS_52:
+			case MMC_DDR_52:
+				cout = MMC_DEFAULT_CMD_OUT_TAP;
+				dout = MMC_DEFAULT_DATA_OUT_TAP;
+				break;
+			case MMC_HS_200:
+				cout = -1;
+				dout = -1;
+				if (host->timing_calibrated) {
+					cout = octeontx2_mmc_calc_delay(
+						mmc, slot->cmd_out_hs200_delay);
+					dout = octeontx2_mmc_calc_delay(
+						mmc,
+						slot->data_out_hs200_delay);
+					debug("%s(%s): Calibrated HS200/HS400 cmd out delay: %dps tap: %d, data out delay: %d, tap: %d\n",
+					      __func__, mmc->dev->name,
+					      slot->cmd_out_hs200_delay, cout,
+					      slot->data_out_hs200_delay, dout);
+				} else {
+					cout = MMC_DEFAULT_HS200_CMD_OUT_TAP;
+					dout = MMC_DEFAULT_HS200_DATA_OUT_TAP;
+				}
+				is_hs200 = true;
+				break;
+			case MMC_HS_400:
+				cout = -1;
+				dout = -1;
+				if (host->timing_calibrated) {
+					if (slot->cmd_out_hs400_delay)
+						cout = octeontx2_mmc_calc_delay(
+							mmc,
+							slot->cmd_out_hs400_delay);
+					if (slot->data_out_hs400_delay)
+						dout = octeontx2_mmc_calc_delay(
+							mmc,
+							slot->data_out_hs400_delay);
+					debug("%s(%s): Calibrated HS200/HS400 cmd out delay: %dps tap: %d, data out delay: %d, tap: %d\n",
+					      __func__, mmc->dev->name,
+					      slot->cmd_out_hs400_delay, cout,
+					      slot->data_out_hs400_delay, dout);
+				} else {
+					cout = MMC_DEFAULT_HS400_CMD_OUT_TAP;
+					dout = MMC_DEFAULT_HS400_DATA_OUT_TAP;
+				}
+				is_hs400 = true;
+				break;
+			default:
+				pr_err("%s(%s): Invalid mode %d\n", __func__,
+				       mmc->dev->name, mmc->selected_mode);
+				return -1;
+			}
+			debug("%s(%s): Not tuned, hs200: %d, hs200 tuned: %d, hs400: %d, hs400 tuned: %d, tuned: %d\n",
+			      __func__, mmc->dev->name, is_hs200,
+			      slot->hs200_tuned,
+			      is_hs400, slot->hs400_tuned, slot->tuned);
+			/* Set some defaults */
+			if (is_hs200) {
+				slot->hs200_taps.u = 0;
+				slot->hs200_taps.s.cmd_out_tap = cout;
+				slot->hs200_taps.s.data_out_tap = dout;
+				slot->hs200_taps.s.cmd_in_tap = half;
+				slot->hs200_taps.s.data_in_tap = half;
+			} else if (is_hs400) {
+				slot->hs400_taps.u = 0;
+				slot->hs400_taps.s.cmd_out_tap = cout;
+				slot->hs400_taps.s.data_out_tap = dout;
+				slot->hs400_taps.s.cmd_in_tap = half;
+				slot->hs400_taps.s.data_in_tap = half;
+			} else {
+				slot->taps.u = 0;
+				slot->taps.s.cmd_out_tap = cout;
+				slot->taps.s.data_out_tap = dout;
+				slot->taps.s.cmd_in_tap = half;
+				slot->taps.s.data_in_tap = half;
+			}
+		}
+
+		if (is_hs200)
+			debug("%s(%s): hs200 taps: ci: %u, co: %u, di: %u, do: %u\n",
+			      __func__, mmc->dev->name,
+			      slot->hs200_taps.s.cmd_in_tap,
+			      slot->hs200_taps.s.cmd_out_tap,
+			      slot->hs200_taps.s.data_in_tap,
+			      slot->hs200_taps.s.data_out_tap);
+		else if (is_hs400)
+			debug("%s(%s): hs400 taps: ci: %u, co: %u, di: %u, do: %u\n",
+			      __func__, mmc->dev->name,
+			      slot->hs400_taps.s.cmd_in_tap,
+			      slot->hs400_taps.s.cmd_out_tap,
+			      slot->hs400_taps.s.data_in_tap,
+			      slot->hs400_taps.s.data_out_tap);
+		else
+			debug("%s(%s): taps: ci: %u, co: %u, di: %u, do: %u\n",
+			      __func__, mmc->dev->name, slot->taps.s.cmd_in_tap,
+			      slot->taps.s.cmd_out_tap,
+			      slot->taps.s.data_in_tap,
+			      slot->taps.s.data_out_tap);
+		octeontx_mmc_set_timing(mmc);
+		debug("%s: Done\n", __func__);
+	}
+
+	return 0;
+}
+
+/**
+ * Sets the MMC watchdog timer in microseconds
+ *
+ * @param	mmc	mmc device
+ * @param	us	timeout in microseconds, 0 for maximum timeout
+ */
+static void set_wdog(struct mmc *mmc, u64 us)
+{
+	union mio_emm_wdog wdog;
+	u64 val;
+
+	val = (us * mmc->clock) / 1000000;
+	if (val >= (1 << 26) || !us) {
+		if (us)
+			pr_debug("%s: warning: timeout %llu exceeds max value %llu, truncating\n",
+				 __func__, us,
+				 (u64)(((1ULL << 26) - 1) * 1000000ULL) /
+					mmc->clock);
+		val = (1 << 26) - 1;
+	}
+	wdog.u = 0;
+	wdog.s.clk_cnt = val;
+	write_csr(mmc, MIO_EMM_WDOG(), wdog.u);
+}
+
+/**
+ * Set the IO drive strength and slew
+ *
+ * @param	mmc	mmc device
+ */
+static void octeontx_mmc_io_drive_setup(struct mmc *mmc)
+{
+	if (!IS_ENABLED(CONFIG_ARCH_OCTEONTX)) {
+		struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+		union mio_emm_io_ctl io_ctl;
+
+		if (slot->drive < 0 || slot->slew < 0)
+			return;
+
+		io_ctl.u = 0;
+		io_ctl.s.drive = slot->drive;
+		io_ctl.s.slew = slot->slew;
+		write_csr(mmc, MIO_EMM_IO_CTL(), io_ctl.u);
+	}
+}
+
+/**
+ * Print switch errors
+ *
+ * @param	mmc	mmc device
+ */
+static void check_switch_errors(struct mmc *mmc)
+{
+	union mio_emm_switch emm_switch;
+
+	emm_switch.u = read_csr(mmc, MIO_EMM_SWITCH());
+	if (emm_switch.s.switch_err0)
+		pr_err("%s: Switch power class error\n", mmc->cfg->name);
+	if (emm_switch.s.switch_err1)
+		pr_err("%s: Switch HS timing error\n", mmc->cfg->name);
+	if (emm_switch.s.switch_err2)
+		pr_err("%s: Switch bus width error\n", mmc->cfg->name);
+}
+
+static void do_switch(struct mmc *mmc, union mio_emm_switch emm_switch)
+{
+	union mio_emm_rsp_sts rsp_sts;
+	struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+	int bus_id = emm_switch.s.bus_id;
+	ulong start;
+
+	if (emm_switch.s.bus_id != 0) {
+		emm_switch.s.bus_id = 0;
+		write_csr(mmc, MIO_EMM_SWITCH(), emm_switch.u);
+		udelay(100);
+		emm_switch.s.bus_id = bus_id;
+	}
+	debug("%s(%s, 0x%llx)\n", __func__, mmc->dev->name, emm_switch.u);
+	write_csr(mmc, MIO_EMM_SWITCH(), emm_switch.u);
+
+	start = get_timer(0);
+	do {
+		rsp_sts.u = read_csr(mmc, MIO_EMM_RSP_STS());
+		if (!rsp_sts.s.switch_val)
+			break;
+		udelay(100);
+	} while (get_timer(start) < 10);
+	if (rsp_sts.s.switch_val) {
+		pr_warn("%s(%s): Warning: writing 0x%llx to emm_switch timed out, status: 0x%llx\n",
+			__func__, mmc->dev->name, emm_switch.u, rsp_sts.u);
+	}
+	slot->cached_switch = emm_switch;
+	check_switch_errors(mmc);
+	slot->cached_switch.u = emm_switch.u;
+	debug("%s: emm_switch: 0x%llx, rsp_lo: 0x%llx\n",
+	      __func__, read_csr(mmc, MIO_EMM_SWITCH()),
+				 read_csr(mmc, MIO_EMM_RSP_LO()));
+}
+
+/**
+ * Given a delay in ps, return the tap delay count
+ *
+ * @param	mmc	mmc data structure
+ * @param	delay	delay in picoseconds
+ *
+ * @return	Number of tap cycles or error if -1
+ */
+static int octeontx2_mmc_calc_delay(struct mmc *mmc, int delay)
+{
+	struct octeontx_mmc_host *host = mmc_to_host(mmc);
+
+	if (host->is_asim || host->is_emul)
+		return 63;
+
+	if (!host->timing_taps) {
+		pr_err("%s(%s): Error: host timing not calibrated\n",
+		       __func__, mmc->dev->name);
+		return -1;
+	}
+	debug("%s(%s, %d) timing taps: %llu\n", __func__, mmc->dev->name,
+	      delay, host->timing_taps);
+	return min_t(int, DIV_ROUND_UP(delay, host->timing_taps), 63);
+}
+
+/**
+ * Calibrates the delay based on the internal clock
+ *
+ * @param	mmc	Pointer to mmc data structure
+ *
+ * @return	0 for success or -ETIMEDOUT on error
+ *
+ * NOTE: On error a default value will be calculated.
+ */
+static int octeontx_mmc_calibrate_delay(struct mmc *mmc)
+{
+	union mio_emm_calb emm_calb;
+	union mio_emm_tap emm_tap;
+	union mio_emm_cfg emm_cfg;
+	union mio_emm_io_ctl emm_io_ctl;
+	union mio_emm_switch emm_switch;
+	union mio_emm_wdog emm_wdog;
+	union mio_emm_sts_mask emm_sts_mask;
+	union mio_emm_debug emm_debug;
+	union mio_emm_timing emm_timing;
+	struct octeontx_mmc_host *host = mmc_to_host(mmc);
+	ulong start;
+	u8 bus_id, bus_ena;
+
+	debug("%s: Calibrating delay\n", __func__);
+	if (host->is_asim || host->is_emul) {
+		debug("  No calibration for ASIM\n");
+		return 0;
+	}
+	emm_tap.u = 0;
+	if (host->calibrate_glitch) {
+		emm_tap.s.delay = MMC_DEFAULT_TAP_DELAY;
+	} else {
+		/* Save registers */
+		emm_cfg.u = read_csr(mmc, MIO_EMM_CFG());
+		emm_io_ctl.u = read_csr(mmc, MIO_EMM_IO_CTL());
+		emm_switch.u = read_csr(mmc, MIO_EMM_SWITCH());
+		emm_wdog.u = read_csr(mmc, MIO_EMM_WDOG());
+		emm_sts_mask.u = read_csr(mmc, MIO_EMM_STS_MASK());
+		emm_debug.u = read_csr(mmc, MIO_EMM_DEBUG());
+		emm_timing.u = read_csr(mmc, MIO_EMM_TIMING());
+		bus_ena = emm_cfg.s.bus_ena;
+		bus_id = emm_switch.s.bus_id;
+		emm_cfg.s.bus_ena = 0;
+		write_csr(mmc, MIO_EMM_CFG(), emm_cfg.u);
+		udelay(1);
+		emm_cfg.s.bus_ena = 1ULL << 3;
+		write_csr(mmc, MIO_EMM_CFG(), emm_cfg.u);
+		mdelay(1);
+		emm_calb.u = 0;
+		write_csr(mmc, MIO_EMM_CALB(), emm_calb.u);
+		emm_calb.s.start = 1;
+		write_csr(mmc, MIO_EMM_CALB(), emm_calb.u);
+		start = get_timer(0);
+		/* This should only take 3 microseconds */
+		do {
+			udelay(5);
+			emm_tap.u = read_csr(mmc, MIO_EMM_TAP());
+		} while (!emm_tap.s.delay && get_timer(start) < 10);
+
+		emm_calb.s.start = 0;
+		write_csr(mmc, MIO_EMM_CALB(), emm_calb.u);
+
+		emm_cfg.s.bus_ena = 0;
+		write_csr(mmc, MIO_EMM_CFG(), emm_cfg.u);
+		udelay(1);
+		/* Restore registers */
+		emm_cfg.s.bus_ena = bus_ena;
+		write_csr(mmc, MIO_EMM_CFG(), emm_cfg.u);
+		if (host->tap_requires_noclk) {
+			/* Turn off the clock */
+			emm_debug.u = read_csr(mmc, MIO_EMM_DEBUG());
+			emm_debug.s.emmc_clk_disable = 1;
+			write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+			udelay(1);
+			emm_debug.s.rdsync_rst = 1;
+			write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+			udelay(1);
+		}
+
+		write_csr(mmc, MIO_EMM_TIMING(), emm_timing.u);
+		if (host->tap_requires_noclk) {
+			/* Turn the clock back on */
+			udelay(1);
+			emm_debug.s.rdsync_rst = 0;
+			write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+			udelay(1);
+			emm_debug.s.emmc_clk_disable = 0;
+			write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+		}
+		udelay(1);
+		write_csr(mmc, MIO_EMM_IO_CTL(), emm_io_ctl.u);
+		bus_id = emm_switch.s.bus_id;
+		emm_switch.s.bus_id = 0;
+		write_csr(mmc, MIO_EMM_SWITCH(), emm_switch.u);
+		emm_switch.s.bus_id = bus_id;
+		write_csr(mmc, MIO_EMM_SWITCH(), emm_switch.u);
+		write_csr(mmc, MIO_EMM_WDOG(), emm_wdog.u);
+		write_csr(mmc, MIO_EMM_STS_MASK(), emm_sts_mask.u);
+		write_csr(mmc, MIO_EMM_RCA(), mmc->rca);
+		write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+
+		if (!emm_tap.s.delay) {
+			pr_err("%s: Error: delay calibration failed, timed out.\n",
+			       __func__);
+			/* Set to default value if timed out */
+			emm_tap.s.delay = MMC_DEFAULT_TAP_DELAY;
+			return -ETIMEDOUT;
+		}
+	}
+	/* Round up */
+	host->timing_taps = (10 * 1000 * emm_tap.s.delay) / TOTAL_NO_OF_TAPS;
+	debug("%s(%s): timing taps: %llu, delay: %u\n",
+	      __func__, mmc->dev->name, host->timing_taps, emm_tap.s.delay);
+	host->timing_calibrated = true;
+	return 0;
+}
+
+static int octeontx_mmc_set_input_bus_timing(struct mmc *mmc)
+{
+	struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+
+	if (IS_ENABLED(CONFIG_ARCH_OCTEONTX)) {
+		union mio_emm_sample sample;
+
+		sample.u = 0;
+		sample.s.cmd_cnt = slot->cmd_clk_skew;
+		sample.s.dat_cnt = slot->dat_clk_skew;
+		write_csr(mmc, MIO_EMM_SAMPLE(), sample.u);
+	} else {
+		union mio_emm_timing timing;
+
+		timing.u = read_csr(mmc, MIO_EMM_TIMING());
+		if (mmc->selected_mode == MMC_HS_200) {
+			if (slot->hs200_tuned) {
+				timing.s.cmd_in_tap =
+					slot->hs200_taps.s.cmd_in_tap;
+				timing.s.data_in_tap =
+					slot->hs200_taps.s.data_in_tap;
+			} else {
+				pr_warn("%s(%s): Warning: hs200 timing not tuned\n",
+					__func__, mmc->dev->name);
+				timing.s.cmd_in_tap =
+					MMC_DEFAULT_HS200_CMD_IN_TAP;
+				timing.s.data_in_tap =
+					MMC_DEFAULT_HS200_DATA_IN_TAP;
+			}
+		} else if (mmc->selected_mode == MMC_HS_400) {
+			if (slot->hs400_tuned) {
+				timing.s.cmd_in_tap =
+					slot->hs400_taps.s.cmd_in_tap;
+				timing.s.data_in_tap =
+					slot->hs400_taps.s.data_in_tap;
+			} else if (slot->hs200_tuned) {
+				timing.s.cmd_in_tap =
+					slot->hs200_taps.s.cmd_in_tap;
+				timing.s.data_in_tap =
+					slot->hs200_taps.s.data_in_tap;
+			} else {
+				pr_warn("%s(%s): Warning: hs400 timing not tuned\n",
+					__func__, mmc->dev->name);
+				timing.s.cmd_in_tap =
+					MMC_DEFAULT_HS200_CMD_IN_TAP;
+				timing.s.data_in_tap =
+					MMC_DEFAULT_HS200_DATA_IN_TAP;
+			}
+		} else if (slot->tuned) {
+			timing.s.cmd_in_tap = slot->taps.s.cmd_in_tap;
+			timing.s.data_in_tap = slot->taps.s.data_in_tap;
+		} else {
+			timing.s.cmd_in_tap = MMC_DEFAULT_CMD_IN_TAP;
+			timing.s.data_in_tap = MMC_DEFAULT_DATA_IN_TAP;
+		}
+		octeontx_mmc_set_emm_timing(mmc, timing);
+	}
+
+	return 0;
+}
+
+/**
+ * Sets the default bus timing for the current mode.
+ *
+ * @param	mmc	mmc data structure
+ *
+ * @return	0 for success, error otherwise
+ */
+static int octeontx_mmc_set_output_bus_timing(struct mmc *mmc)
+{
+	struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+	union mio_emm_timing timing;
+	int cout_bdelay, dout_bdelay;
+	unsigned int cout_delay, dout_delay;
+	char env_name[32];
+
+	if (IS_ENABLED(CONFIG_ARCH_OCTEONTX))
+		return 0;
+
+	debug("%s(%s)\n", __func__, mmc->dev->name);
+	if (slot->is_asim || slot->is_emul)
+		return 0;
+
+	octeontx_mmc_calibrate_delay(mmc);
+
+	if (mmc->clock < 26000000) {
+		cout_delay = 5000;
+		dout_delay = 5000;
+	} else if (mmc->clock <= 52000000) {
+		cout_delay = 2500;
+		dout_delay = 2500;
+	} else if (!mmc_is_mode_ddr(mmc->selected_mode)) {
+		cout_delay = slot->cmd_out_hs200_delay;
+		dout_delay = slot->data_out_hs200_delay;
+	} else {
+		cout_delay = slot->cmd_out_hs400_delay;
+		dout_delay = slot->data_out_hs400_delay;
+	}
+
+	snprintf(env_name, sizeof(env_name), "mmc%d_hs200_dout_delay_ps",
+		 slot->bus_id);
+	dout_delay = env_get_ulong(env_name, 10, dout_delay);
+	debug("%s: dout_delay: %u\n", __func__, dout_delay);
+
+	cout_bdelay = octeontx2_mmc_calc_delay(mmc, cout_delay);
+	dout_bdelay = octeontx2_mmc_calc_delay(mmc, dout_delay);
+
+	debug("%s: cmd output delay: %u, data output delay: %u, cmd bdelay: %d, data bdelay: %d, clock: %d\n",
+	      __func__, cout_delay, dout_delay, cout_bdelay, dout_bdelay,
+	      mmc->clock);
+	if (cout_bdelay < 0 || dout_bdelay < 0) {
+		pr_err("%s: Error: could not calculate command and/or data clock skew\n",
+		       __func__);
+		return -1;
+	}
+	timing.u = read_csr(mmc, MIO_EMM_TIMING());
+	timing.s.cmd_out_tap = cout_bdelay;
+	timing.s.data_out_tap = dout_bdelay;
+	if (mmc->selected_mode == MMC_HS_200) {
+		slot->hs200_taps.s.cmd_out_tap = cout_bdelay;
+		slot->hs200_taps.s.data_out_tap = dout_bdelay;
+	} else if (mmc->selected_mode == MMC_HS_400) {
+		slot->hs400_taps.s.cmd_out_tap = cout_bdelay;
+		slot->hs400_taps.s.data_out_tap = dout_bdelay;
+	} else {
+		slot->taps.s.cmd_out_tap = cout_bdelay;
+		slot->taps.s.data_out_tap = dout_bdelay;
+	}
+	octeontx_mmc_set_emm_timing(mmc, timing);
+	debug("%s(%s): bdelay: %d/%d, clock: %d, ddr: %s, timing taps: %llu, do: %d, di: %d, co: %d, ci: %d\n",
+	      __func__, mmc->dev->name, cout_bdelay, dout_bdelay, mmc->clock,
+	      mmc->ddr_mode ? "yes" : "no",
+	      mmc_to_host(mmc)->timing_taps,
+	      timing.s.data_out_tap,
+	      timing.s.data_in_tap,
+	      timing.s.cmd_out_tap,
+	      timing.s.cmd_in_tap);
+
+	return 0;
+}
+
+static void octeontx_mmc_set_clock(struct mmc *mmc)
+{
+	struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+	uint clock;
+
+	clock = min(mmc->cfg->f_max, (uint)slot->clock);
+	clock = max(mmc->cfg->f_min, clock);
+	debug("%s(%s): f_min: %u, f_max: %u, clock: %u\n", __func__,
+	      mmc->dev->name, mmc->cfg->f_min, mmc->cfg->f_max, clock);
+	slot->clock = clock;
+	mmc->clock = clock;
+}
+
+/**
+ * This switches I/O power as needed when switching between slots.
+ *
+ * @param	mmc	mmc data structure
+ */
+static void octeontx_mmc_switch_io(struct mmc *mmc)
+{
+	struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+	struct octeontx_mmc_host *host = slot->host;
+	struct mmc *last_mmc = host->last_mmc;
+	static struct udevice *last_reg;
+	union mio_emm_cfg emm_cfg;
+	int bus;
+	static bool initialized;
+
+	/* First time? */
+	if (!initialized || mmc != host->last_mmc) {
+		struct mmc *ommc;
+
+		/* Switch to bus 3 which is unused */
+		emm_cfg.u = read_csr(mmc, MIO_EMM_CFG());
+		emm_cfg.s.bus_ena = 1 << 3;
+		write_csr(mmc, MIO_EMM_CFG(), emm_cfg.u);
+
+		/* Turn off all other I/O interfaces with first initialization
+		 * if@least one supply was found.
+		 */
+		for (bus = 0; bus <= OCTEONTX_MAX_MMC_SLOT; bus++) {
+			ommc = &host->slots[bus].mmc;
+
+			/* Handle self case later */
+			if (ommc == mmc || !ommc->vqmmc_supply)
+				continue;
+
+			/* Skip if we're not switching regulators */
+			if (last_reg == mmc->vqmmc_supply)
+				continue;
+
+			/* Turn off other regulators */
+			if (ommc->vqmmc_supply != mmc->vqmmc_supply)
+				regulator_set_enable(ommc->vqmmc_supply, false);
+		}
+		/* Turn ourself on */
+		if (mmc->vqmmc_supply && last_reg != mmc->vqmmc_supply)
+			regulator_set_enable(mmc->vqmmc_supply, true);
+		mdelay(1);	/* Settle time */
+		/* Switch to new bus */
+		emm_cfg.s.bus_ena = 1 << slot->bus_id;
+		write_csr(mmc, MIO_EMM_CFG(), emm_cfg.u);
+		last_reg = mmc->vqmmc_supply;
+		initialized = true;
+		return;
+	}
+
+	/* No change in device */
+	if (last_mmc == mmc)
+		return;
+
+	if (!last_mmc) {
+		pr_warn("%s(%s): No previous slot detected in IO slot switch!\n",
+			__func__, mmc->dev->name);
+		return;
+	}
+
+	debug("%s(%s): last: %s, supply: %p\n", __func__, mmc->dev->name,
+	      last_mmc->dev->name, mmc->vqmmc_supply);
+
+	/* The supply is the same so we do nothing */
+	if (last_mmc->vqmmc_supply == mmc->vqmmc_supply)
+		return;
+
+	/* Turn off the old slot I/O supply */
+	if (last_mmc->vqmmc_supply) {
+		debug("%s(%s): Turning off IO to %s, supply: %s\n",
+		      __func__, mmc->dev->name, last_mmc->dev->name,
+		      last_mmc->vqmmc_supply->name);
+		regulator_set_enable(last_mmc->vqmmc_supply, false);
+	}
+	/* Turn on the new slot I/O supply */
+	if (mmc->vqmmc_supply)  {
+		debug("%s(%s): Turning on IO to slot %d, supply: %s\n",
+		      __func__, mmc->dev->name, slot->bus_id,
+		      mmc->vqmmc_supply->name);
+		regulator_set_enable(mmc->vqmmc_supply, true);
+	}
+	/* Allow power to settle */
+	mdelay(1);
+}
+
+/**
+ * Called to switch between mmc devices
+ *
+ * @param	mmc	new mmc device
+ */
+static void octeontx_mmc_switch_to(struct mmc *mmc)
+{
+	struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+	struct octeontx_mmc_slot *old_slot;
+	struct octeontx_mmc_host *host = slot->host;
+	union mio_emm_switch emm_switch;
+	union mio_emm_sts_mask emm_sts_mask;
+	union mio_emm_rca emm_rca;
+
+	if (slot->bus_id == host->last_slotid)
+		return;
+
+	debug("%s(%s) switching from slot %d to slot %d\n", __func__,
+	      mmc->dev->name, host->last_slotid, slot->bus_id);
+	octeontx_mmc_switch_io(mmc);
+
+	if (host->last_slotid >= 0 && slot->valid) {
+		old_slot = &host->slots[host->last_slotid];
+		old_slot->cached_switch.u = read_csr(mmc, MIO_EMM_SWITCH());
+		old_slot->cached_rca.u = read_csr(mmc, MIO_EMM_RCA());
+	}
+	if (mmc->rca)
+		write_csr(mmc, MIO_EMM_RCA(), mmc->rca);
+	emm_switch = slot->cached_switch;
+	do_switch(mmc, emm_switch);
+	emm_rca.u = 0;
+	emm_rca.s.card_rca = mmc->rca;
+	write_csr(mmc, MIO_EMM_RCA(), emm_rca.u);
+	mdelay(100);
+
+	set_wdog(mmc, 100000);
+	if (octeontx_mmc_set_output_bus_timing(mmc) ||
+	    octeontx_mmc_set_input_bus_timing(mmc))
+		pr_err("%s(%s): Error setting bus timing\n", __func__,
+		       mmc->dev->name);
+	octeontx_mmc_io_drive_setup(mmc);
+
+	emm_sts_mask.u = 0;
+	emm_sts_mask.s.sts_msk = 1 << 7 | 1 << 22 | 1 << 23 | 1 << 19;
+	write_csr(mmc, MIO_EMM_STS_MASK(), emm_sts_mask.u);
+	host->last_slotid = slot->bus_id;
+	host->last_mmc = mmc;
+	mdelay(10);
+}
+
+/**
+ * Perform initial timing configuration
+ *
+ * @param mmc	mmc device
+ *
+ * @return 0 for success
+ *
+ * NOTE: This will need to be updated when new silicon comes out
+ */
+static int octeontx_mmc_init_timing(struct mmc *mmc)
+{
+	union mio_emm_timing timing;
+
+	if (mmc_to_slot(mmc)->is_asim || mmc_to_slot(mmc)->is_emul)
+		return 0;
+
+	debug("%s(%s)\n", __func__, mmc->dev->name);
+	timing.u = 0;
+	timing.s.cmd_out_tap = MMC_DEFAULT_CMD_OUT_TAP;
+	timing.s.data_out_tap = MMC_DEFAULT_DATA_OUT_TAP;
+	timing.s.cmd_in_tap = MMC_DEFAULT_CMD_IN_TAP;
+	timing.s.data_in_tap = MMC_DEFAULT_DATA_IN_TAP;
+	octeontx_mmc_set_emm_timing(mmc, timing);
+	return 0;
+}
+
+/**
+ * Perform low-level initialization
+ *
+ * @param	mmc	mmc device
+ *
+ * @return	0 for success, error otherwise
+ */
+static int octeontx_mmc_init_lowlevel(struct mmc *mmc)
+{
+	struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+	struct octeontx_mmc_host *host = slot->host;
+	union mio_emm_switch emm_switch;
+	u32 clk_period;
+
+	debug("%s(%s): lowlevel init for slot %d\n", __func__,
+	      mmc->dev->name, slot->bus_id);
+	host->emm_cfg.s.bus_ena &= ~(1 << slot->bus_id);
+	write_csr(mmc, MIO_EMM_CFG(), host->emm_cfg.u);
+	udelay(100);
+	host->emm_cfg.s.bus_ena |= 1 << slot->bus_id;
+	write_csr(mmc, MIO_EMM_CFG(), host->emm_cfg.u);
+	udelay(10);
+	slot->clock = mmc->cfg->f_min;
+	octeontx_mmc_set_clock(&slot->mmc);
+
+	if (IS_ENABLED(CONFIG_ARCH_OCTEONTX2)) {
+		if (host->cond_clock_glitch) {
+			union mio_emm_debug emm_debug;
+
+			emm_debug.u = read_csr(mmc, MIO_EMM_DEBUG());
+			emm_debug.s.clk_on = 1;
+			write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+		}
+		octeontx_mmc_calibrate_delay(&slot->mmc);
+	}
+
+	clk_period = octeontx_mmc_calc_clk_period(mmc);
+	emm_switch.u = 0;
+	emm_switch.s.power_class = 10;
+	emm_switch.s.clk_lo = clk_period / 2;
+	emm_switch.s.clk_hi = clk_period / 2;
+
+	emm_switch.s.bus_id = slot->bus_id;
+	debug("%s: Performing switch\n", __func__);
+	do_switch(mmc, emm_switch);
+	slot->cached_switch.u = emm_switch.u;
+
+	if (!IS_ENABLED(CONFIG_ARCH_OCTEONTX))
+		octeontx_mmc_init_timing(mmc);
+
+	set_wdog(mmc, 1000000); /* Set to 1 second */
+	write_csr(mmc, MIO_EMM_STS_MASK(), 0xe4390080ull);
+	write_csr(mmc, MIO_EMM_RCA(), 1);
+	mdelay(10);
+	debug("%s: done\n", __func__);
+	return 0;
+}
+
+/**
+ * Translates a voltage number to bits in MMC register
+ *
+ * @param	voltage	voltage in microvolts
+ *
+ * @return	MMC register value for voltage
+ */
+static u32 xlate_voltage(u32 voltage)
+{
+	u32 volt = 0;
+
+	/* Convert to millivolts */
+	voltage /= 1000;
+	if (voltage >= 1650 && voltage <= 1950)
+		volt |= MMC_VDD_165_195;
+	if (voltage >= 2000 && voltage <= 2100)
+		volt |= MMC_VDD_20_21;
+	if (voltage >= 2100 && voltage <= 2200)
+		volt |= MMC_VDD_21_22;
+	if (voltage >= 2200 && voltage <= 2300)
+		volt |= MMC_VDD_22_23;
+	if (voltage >= 2300 && voltage <= 2400)
+		volt |= MMC_VDD_23_24;
+	if (voltage >= 2400 && voltage <= 2500)
+		volt |= MMC_VDD_24_25;
+	if (voltage >= 2500 && voltage <= 2600)
+		volt |= MMC_VDD_25_26;
+	if (voltage >= 2600 && voltage <= 2700)
+		volt |= MMC_VDD_26_27;
+	if (voltage >= 2700 && voltage <= 2800)
+		volt |= MMC_VDD_27_28;
+	if (voltage >= 2800 && voltage <= 2900)
+		volt |= MMC_VDD_28_29;
+	if (voltage >= 2900 && voltage <= 3000)
+		volt |= MMC_VDD_29_30;
+	if (voltage >= 3000 && voltage <= 3100)
+		volt |= MMC_VDD_30_31;
+	if (voltage >= 3100 && voltage <= 3200)
+		volt |= MMC_VDD_31_32;
+	if (voltage >= 3200 && voltage <= 3300)
+		volt |= MMC_VDD_32_33;
+	if (voltage >= 3300 && voltage <= 3400)
+		volt |= MMC_VDD_33_34;
+	if (voltage >= 3400 && voltage <= 3500)
+		volt |= MMC_VDD_34_35;
+	if (voltage >= 3500 && voltage <= 3600)
+		volt |= MMC_VDD_35_36;
+
+	return volt;
+}
+
+/**
+ * Check if a slot is valid in the device tree
+ *
+ * @param	dev	slot device to check
+ *
+ * @return	true if status reports "ok" or "okay" or if no status,
+ *		false otherwise.
+ */
+static bool octeontx_mmc_get_valid(struct udevice *dev)
+{
+	const char *stat = ofnode_read_string(dev->node, "status");
+
+	if (!stat || !strncmp(stat, "ok", 2))
+		return true;
+	else
+		return false;
+}
+
+/**
+ * Reads slot configuration from the device tree
+ *
+ * @param	dev	slot device
+ *
+ * @return	0 on success, otherwise error
+ */
+static int octeontx_mmc_get_config(struct udevice *dev)
+{
+	struct octeontx_mmc_slot *slot = dev_to_mmc_slot(dev);
+	uint voltages[2];
+	uint low, high;
+	char env_name[32];
+	int err;
+	ofnode node = dev->node;
+	int bus_width = 1;
+	ulong new_max_freq;
+
+	debug("%s(%s)", __func__, dev->name);
+	slot->cfg.name = dev->name;
+
+	slot->cfg.f_max = ofnode_read_s32_default(dev->node, "max-frequency",
+						  26000000);
+	snprintf(env_name, sizeof(env_name), "mmc_max_frequency%d",
+		 slot->bus_id);
+
+	new_max_freq = env_get_ulong(env_name, 10, slot->cfg.f_max);
+	debug("Reading %s, got %lu\n", env_name, new_max_freq);
+
+	if (new_max_freq != slot->cfg.f_max) {
+		printf("Overriding device tree MMC maximum frequency %u to %lu\n",
+		       slot->cfg.f_max, new_max_freq);
+		slot->cfg.f_max = new_max_freq;
+	}
+	slot->cfg.f_min = 400000;
+	slot->cfg.b_max = CONFIG_SYS_MMC_MAX_BLK_COUNT;
+
+	if (IS_ENABLED(CONFIG_ARCH_OCTEONTX2)) {
+		slot->hs400_tuning_block =
+			ofnode_read_s32_default(dev->node,
+						"marvell,hs400-tuning-block",
+						-1);
+		debug("%s(%s): mmc HS400 tuning block: %d\n", __func__,
+		      dev->name, slot->hs400_tuning_block);
+
+		slot->hs200_tap_adj =
+			ofnode_read_s32_default(dev->node,
+						"marvell,hs200-tap-adjust", 0);
+		debug("%s(%s): hs200-tap-adjust: %d\n", __func__, dev->name,
+		      slot->hs200_tap_adj);
+		slot->hs400_tap_adj =
+			ofnode_read_s32_default(dev->node,
+						"marvell,hs400-tap-adjust", 0);
+		debug("%s(%s): hs400-tap-adjust: %d\n", __func__, dev->name,
+		      slot->hs400_tap_adj);
+	}
+
+	err = ofnode_read_u32_array(dev->node, "voltage-ranges", voltages, 2);
+	if (err) {
+		slot->cfg.voltages = MMC_VDD_32_33 | MMC_VDD_33_34;
+	} else {
+		low = xlate_voltage(voltages[0]);
+		high = xlate_voltage(voltages[1]);
+		debug("  low voltage: 0x%x (%u), high: 0x%x (%u)\n",
+		      low, voltages[0], high, voltages[1]);
+		if (low > high || !low || !high) {
+			pr_err("Invalid MMC voltage range [%u-%u] specified for %s\n",
+			       low, high, dev->name);
+			return -1;
+		}
+		slot->cfg.voltages = 0;
+		do {
+			slot->cfg.voltages |= low;
+			low <<= 1;
+		} while (low <= high);
+	}
+	debug("%s: config voltages: 0x%x\n", __func__, slot->cfg.voltages);
+	slot->slew = ofnode_read_s32_default(node, "cavium,clk-slew", -1);
+	slot->drive = ofnode_read_s32_default(node, "cavium,drv-strength", -1);
+	gpio_request_by_name(dev, "cd-gpios", 0, &slot->cd_gpio, GPIOD_IS_IN);
+	slot->cd_inverted = ofnode_read_bool(node, "cd-inverted");
+	gpio_request_by_name(dev, "wp-gpios", 0, &slot->wp_gpio, GPIOD_IS_IN);
+	slot->wp_inverted = ofnode_read_bool(node, "wp-inverted");
+	if (slot->cfg.voltages & MMC_VDD_165_195) {
+		slot->is_1_8v = true;
+		slot->is_3_3v = false;
+	} else if (slot->cfg.voltages & (MMC_VDD_30_31 | MMC_VDD_31_32 |
+					 MMC_VDD_33_34 | MMC_VDD_34_35 |
+					 MMC_VDD_35_36)) {
+		slot->is_1_8v = false;
+		slot->is_3_3v = true;
+	}
+
+	bus_width = ofnode_read_u32_default(node, "bus-width", 1);
+	/* Note fall-through */
+	switch (bus_width) {
+	case 8:
+		slot->cfg.host_caps |= MMC_MODE_8BIT;
+	case 4:
+		slot->cfg.host_caps |= MMC_MODE_4BIT;
+	case 1:
+		slot->cfg.host_caps |= MMC_MODE_1BIT;
+		break;
+	}
+	if (ofnode_read_bool(node, "no-1-8-v")) {
+		slot->is_3_3v = true;
+		slot->is_1_8v = false;
+		if (!(slot->cfg.voltages & (MMC_VDD_32_33 | MMC_VDD_33_34)))
+			pr_warn("%s(%s): voltages indicate 3.3v but 3.3v not supported\n",
+				__func__, dev->name);
+	}
+	if (ofnode_read_bool(node, "mmc-ddr-3-3v")) {
+		slot->is_3_3v = true;
+		slot->is_1_8v = false;
+		if (!(slot->cfg.voltages & (MMC_VDD_32_33 | MMC_VDD_33_34)))
+			pr_warn("%s(%s): voltages indicate 3.3v but 3.3v not supported\n",
+				__func__, dev->name);
+	}
+	if (ofnode_read_bool(node, "cap-sd-highspeed") ||
+	    ofnode_read_bool(node, "cap-mmc-highspeed") ||
+	    ofnode_read_bool(node, "sd-uhs-sdr25"))
+		slot->cfg.host_caps |= MMC_MODE_HS;
+	if (slot->cfg.f_max >= 50000000 &&
+	    slot->cfg.host_caps & MMC_MODE_HS)
+		slot->cfg.host_caps |= MMC_MODE_HS_52MHz | MMC_MODE_HS;
+	if (ofnode_read_bool(node, "sd-uhs-sdr50"))
+		slot->cfg.host_caps |= MMC_MODE_HS_52MHz | MMC_MODE_HS;
+	if (ofnode_read_bool(node, "sd-uhs-ddr50"))
+		slot->cfg.host_caps |= MMC_MODE_HS | MMC_MODE_HS_52MHz |
+				       MMC_MODE_DDR_52MHz;
+
+	if (IS_ENABLED(CONFIG_ARCH_OCTEONTX2)) {
+		if (!slot->is_asim && !slot->is_emul) {
+			if (ofnode_read_bool(node, "mmc-hs200-1_8v"))
+				slot->cfg.host_caps |= MMC_MODE_HS200 |
+					MMC_MODE_HS_52MHz;
+			if (ofnode_read_bool(node, "mmc-hs400-1_8v"))
+				slot->cfg.host_caps |= MMC_MODE_HS400 |
+					MMC_MODE_HS_52MHz |
+					MMC_MODE_HS200 |
+					MMC_MODE_DDR_52MHz;
+			slot->cmd_out_hs200_delay =
+				ofnode_read_u32_default(node,
+					"marvell,cmd-out-hs200-dly",
+					MMC_DEFAULT_HS200_CMD_OUT_DLY);
+			debug("%s(%s): HS200 cmd out delay: %d\n",
+			      __func__, dev->name, slot->cmd_out_hs200_delay);
+			slot->data_out_hs200_delay =
+				ofnode_read_u32_default(node,
+					"marvell,data-out-hs200-dly",
+					MMC_DEFAULT_HS200_DATA_OUT_DLY);
+			debug("%s(%s): HS200 data out delay: %d\n",
+			      __func__, dev->name, slot->data_out_hs200_delay);
+			slot->cmd_out_hs400_delay =
+				ofnode_read_u32_default(node,
+					"marvell,cmd-out-hs400-dly",
+					MMC_DEFAULT_HS400_CMD_OUT_DLY);
+			debug("%s(%s): HS400 cmd out delay: %d\n",
+			      __func__, dev->name, slot->cmd_out_hs400_delay);
+			slot->data_out_hs400_delay =
+				ofnode_read_u32_default(node,
+					"marvell,data-out-hs400-dly",
+					MMC_DEFAULT_HS400_DATA_OUT_DLY);
+			debug("%s(%s): HS400 data out delay: %d\n",
+			      __func__, dev->name, slot->data_out_hs400_delay);
+		}
+	}
+
+	slot->disable_ddr = ofnode_read_bool(node, "marvell,disable-ddr");
+	slot->non_removable = ofnode_read_bool(node, "non-removable");
+	slot->cmd_clk_skew = ofnode_read_u32_default(node,
+						     "cavium,cmd-clk-skew", 0);
+	slot->dat_clk_skew = ofnode_read_u32_default(node,
+						     "cavium,dat-clk-skew", 0);
+	debug("%s(%s): host caps: 0x%x\n", __func__,
+	      dev->name, slot->cfg.host_caps);
+	return 0;
+}
+
+/**
+ * Probes a MMC slot
+ *
+ * @param	dev	mmc device
+ *
+ * @return	0 for success, error otherwise
+ */
+static int octeontx_mmc_slot_probe(struct udevice *dev)
+{
+	struct octeontx_mmc_slot *slot;
+	struct mmc *mmc;
+	int err;
+
+	debug("%s(%s)\n", __func__, dev->name);
+	if (!host_probed) {
+		pr_err("%s(%s): Error: host not probed yet\n",
+		       __func__, dev->name);
+	}
+	slot = dev_to_mmc_slot(dev);
+	mmc = &slot->mmc;
+	mmc->dev = dev;
+
+	slot->valid = false;
+	if (!octeontx_mmc_get_valid(dev)) {
+		debug("%s(%s): slot is invalid\n", __func__, dev->name);
+		return -ENODEV;
+	}
+
+	debug("%s(%s): Getting config\n", __func__, dev->name);
+	err = octeontx_mmc_get_config(dev);
+	if (err) {
+		pr_err("probe(%s): Error getting config\n", dev->name);
+		return err;
+	}
+
+	debug("%s(%s): mmc bind, mmc: %p\n", __func__, dev->name, &slot->mmc);
+	err = mmc_bind(dev, &slot->mmc, &slot->cfg);
+	if (err) {
+		pr_err("%s(%s): Error binding mmc\n", __func__, dev->name);
+		return -1;
+	}
+
+	/* For some reason, mmc_bind always assigns priv to the device */
+	slot->mmc.priv = slot;
+
+	debug("%s(%s): lowlevel init\n", __func__, dev->name);
+	err = octeontx_mmc_init_lowlevel(mmc);
+	if (err) {
+		pr_err("probe(%s): Low-level init failed\n", dev->name);
+		return err;
+	}
+
+	slot->valid = true;
+
+	if (!err) {
+		/* Initialize each device regulator */
+		err = mmc_power_init(&slot->mmc);
+		debug("%s: Initialized %s\n", __func__, dev->name);
+		/* Turn off all I/O supplies */
+		if (!err && slot->mmc.vqmmc_supply)
+			regulator_set_enable(slot->mmc.vqmmc_supply, false);
+	}
+
+	debug("%s(%s):\n"
+	      "  base address : %p\n"
+	      "  bus id       : %d\n", __func__, dev->name,
+		slot->base_addr, slot->bus_id);
+
+	return err;
+}
+
+/**
+ * MMC slot driver operations
+ */
+static const struct dm_mmc_ops octeontx_hsmmc_ops = {
+	.send_cmd = octeontx_mmc_dev_send_cmd,
+	.set_ios = octeontx_mmc_set_ios,
+	.get_cd = octeontx_mmc_get_cd,
+	.get_wp = octeontx_mmc_get_wp,
+#ifdef MMC_SUPPORTS_TUNING
+	.execute_tuning = octeontx_mmc_execute_tuning,
+#endif
+};
+
+static const struct udevice_id octeontx_hsmmc_ids[] = {
+	{ .compatible = "mmc-slot" },
+	{ }
+};
+
+U_BOOT_DRIVER(octeontx_hsmmc_slot) = {
+	.name	= "octeontx_hsmmc_slot",
+	.id	= UCLASS_MMC,
+	.of_match = of_match_ptr(octeontx_hsmmc_ids),
+	.probe = octeontx_mmc_slot_probe,
+	.ops = &octeontx_hsmmc_ops,
+};
+
+/*****************************************************************
+ * PCI host driver
+ *
+ * The PCI host driver contains the resources used by all of the
+ * slot drivers.
+ *
+ * The slot drivers are pseudo drivers.
+ */
+
+/**
+ * Probe the MMC host controller
+ *
+ * @param	dev	mmc host controller device
+ *
+ * @return	0 for success, -1 on error
+ */
+static int octeontx_mmc_host_probe(struct udevice *dev)
+{
+	pci_dev_t bdf = dm_pci_get_bdf(dev);
+	struct octeontx_mmc_host *host = dev_get_priv(dev);
+	union mio_emm_int emm_int;
+	u8 rev;
+
+	debug("%s(%s): Entry host: %p\n", __func__, dev->name, host);
+
+	if (!octeontx_mmc_get_valid(dev)) {
+		debug("%s(%s): mmc host not valid\n", __func__, dev->name);
+		return -ENODEV;
+	}
+	memset(host, 0, sizeof(*host));
+	host->base_addr = dm_pci_map_bar(dev, PCI_BASE_ADDRESS_0,
+					 PCI_REGION_MEM);
+	if (!host->base_addr) {
+		pr_err("%s: Error: MMC base address not found\n", __func__);
+		return -1;
+	}
+	host->dev = dev;
+	debug("%s(%s): Base address: %p\n", __func__, dev->name,
+	      host->base_addr);
+	if (!dev_has_of_node(dev)) {
+		pr_err("%s: No device tree information found\n", __func__);
+		return -1;
+	}
+	host->node = dev->node;
+	dev->req_seq = PCI_FUNC(bdf);
+	host->last_slotid = -1;
+	if (otx_is_platform(PLATFORM_ASIM))
+		host->is_asim = true;
+	if (otx_is_platform(PLATFORM_EMULATOR))
+		host->is_emul = true;
+	host->dma_wait_delay =
+		ofnode_read_u32_default(dev->node, "marvell,dma-wait-delay", 1);
+	/* Force reset of eMMC */
+	writeq(0, host->base_addr + MIO_EMM_CFG());
+	debug("%s: Clearing MIO_EMM_CFG\n", __func__);
+	udelay(100);
+	emm_int.u = readq(host->base_addr + MIO_EMM_INT());
+	debug("%s: Writing 0x%llx to MIO_EMM_INT\n", __func__, emm_int.u);
+	writeq(emm_int.u, host->base_addr + MIO_EMM_INT());
+
+	debug("%s(%s): Getting I/O clock\n", __func__, dev->name);
+	host->sys_freq = octeontx_get_io_clock();
+	debug("%s(%s): I/O clock %llu\n", __func__, dev->name, host->sys_freq);
+
+	if (IS_ENABLED(CONFIG_ARCH_OCTEONTX2)) {
+		/* Flags for issues to work around */
+		dm_pci_read_config8(dev, PCI_REVISION_ID, &rev);
+		if (otx_is_soc(CN96XX)) {
+			debug("%s: CN96XX revision %d\n", __func__, rev);
+			switch (rev) {
+			case 0:
+				host->calibrate_glitch = true;
+				host->cond_clock_glitch = true;
+				break;
+			case 1:
+				break;
+			case 2:
+				break;
+			case 0x10:	/* C0 */
+				host->hs400_skew_needed = true;
+				debug("HS400 skew support enabled\n");
+				fallthrough;
+			default:
+				debug("CN96XX rev C0+ detected\n");
+				host->tap_requires_noclk = true;
+				break;
+			}
+		} else if (otx_is_soc(CN95XX)) {
+			if (!rev)
+				host->cond_clock_glitch = true;
+		}
+	}
+
+	host_probed = true;
+
+	return 0;
+}
+
+/**
+ * This performs some initial setup before a probe occurs.
+ *
+ * @param dev:	MMC slot device
+ *
+ * @return 0 for success, -1 on failure
+ *
+ * Do some pre-initialization before probing a slot.
+ */
+static int octeontx_mmc_host_child_pre_probe(struct udevice *dev)
+{
+	struct octeontx_mmc_host *host = dev_get_priv(dev_get_parent(dev));
+	struct octeontx_mmc_slot *slot;
+	struct mmc_uclass_priv *upriv;
+	ofnode node = dev->node;
+	u32 bus_id;
+	char name[16];
+	int err;
+
+	debug("%s(%s) Pre-Probe\n", __func__, dev->name);
+	if (ofnode_read_u32(node, "reg", &bus_id)) {
+		pr_err("%s(%s): Error: \"reg\" not found in device tree\n",
+		       __func__, dev->name);
+		return -1;
+	}
+	if (bus_id > OCTEONTX_MAX_MMC_SLOT) {
+		pr_err("%s(%s): Error: \"reg\" out of range of 0..%d\n",
+		       __func__, dev->name, OCTEONTX_MAX_MMC_SLOT);
+		return -1;
+	}
+
+	slot = &host->slots[bus_id];
+	dev->priv = slot;
+	slot->host = host;
+	slot->bus_id = bus_id;
+	slot->dev = dev;
+	slot->base_addr = host->base_addr;
+	slot->is_asim = host->is_asim;
+	slot->is_emul = host->is_emul;
+
+	snprintf(name, sizeof(name), "octeontx-mmc%d", bus_id);
+	err = device_set_name(dev, name);
+
+	if (!dev->uclass_priv) {
+		debug("%s(%s): Allocating uclass priv\n", __func__,
+		      dev->name);
+		upriv = calloc(1, sizeof(struct mmc_uclass_priv));
+		if (!upriv)
+			return -ENOMEM;
+		dev->uclass_priv = upriv;
+		dev->uclass->priv = upriv;
+	} else {
+		upriv = dev->uclass_priv;
+	}
+
+	upriv->mmc = &slot->mmc;
+	debug("%s: uclass priv: %p, mmc: %p\n", dev->name, upriv, upriv->mmc);
+
+	debug("%s: ret: %d\n", __func__, err);
+	return err;
+}
+
+static const struct udevice_id octeontx_hsmmc_host_ids[] = {
+	{ .compatible = "cavium,thunder-8890-mmc" },
+	{ }
+};
+
+U_BOOT_DRIVER(octeontx_hsmmc_host) = {
+	.name	= "octeontx_hsmmc_host",
+	.id	= UCLASS_MISC,
+	.of_match = of_match_ptr(octeontx_hsmmc_host_ids),
+	.probe	= octeontx_mmc_host_probe,
+	.priv_auto_alloc_size = sizeof(struct octeontx_mmc_host),
+	.child_pre_probe = octeontx_mmc_host_child_pre_probe,
+	.flags	= DM_FLAG_PRE_RELOC,
+};
+
+static struct pci_device_id octeontx_mmc_supported[] = {
+	{ PCI_VDEVICE(CAVIUM, PCI_DEVICE_ID_CAVIUM_EMMC) },
+	{ },
+};
+
+U_BOOT_PCI_DEVICE(octeontx_hsmmc_host, octeontx_mmc_supported);
diff --git a/drivers/mmc/octeontx_hsmmc.h b/drivers/mmc/octeontx_hsmmc.h
new file mode 100644
index 0000000000..70844b1cba
--- /dev/null
+++ b/drivers/mmc/octeontx_hsmmc.h
@@ -0,0 +1,207 @@
+/* SPDX-License-Identifier:    GPL-2.0
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * https://spdx.org/licenses
+ */
+#ifndef __OCTEONTX_HSMMC_H__
+#define __OCTEONTX_HSMMC_H__
+#include <asm/gpio.h>
+
+/** Name of our driver */
+#define OCTEONTX_MMC_DRIVER_NAME	"octeontx-hsmmc"
+
+/** Maximum supported MMC slots */
+#define OCTEONTX_MAX_MMC_SLOT		3
+
+#define POWER_ON_TIME			40 /** See SD 4.1 spec figure 6-5 */
+
+/**
+ * Timeout used when waiting for commands to complete.  We need to keep this
+ * above the hardware watchdog timeout which is usually limited to 1000ms
+ */
+#define WATCHDOG_COUNT			(1100)	/* in msecs */
+
+/**
+ * Long timeout for commands which might take a while to complete.
+ */
+#define MMC_TIMEOUT_LONG		1000
+
+/**
+ * Short timeout used for most commands in msecs
+ */
+#define MMC_TIMEOUT_SHORT		20
+
+#define NSEC_PER_SEC			1000000000L
+
+#define MAX_NO_OF_TAPS			64
+
+#define EXT_CSD_POWER_CLASS		187	/* R/W */
+
+/* default HS400 tuning block number */
+#define DEFAULT_HS400_TUNING_BLOCK	1
+
+struct octeontx_mmc_host;
+
+/** MMC/SD slot data structure */
+struct octeontx_mmc_slot {
+	struct mmc		mmc;
+	struct mmc_config	cfg;
+	struct octeontx_mmc_host *host;
+	struct udevice		*dev;
+	void			*base_addr;	/** Same as host base_addr */
+	u64			clock;
+	int			bus_id;		/** slot number */
+	uint			bus_width;
+	uint			max_width;
+	int			hs200_tap_adj;
+	int			hs400_tap_adj;
+	int			hs400_tuning_block;
+	struct gpio_desc	cd_gpio;
+	struct gpio_desc	wp_gpio;
+	struct gpio_desc	power_gpio;
+	enum bus_mode		mode;
+	union mio_emm_switch	cached_switch;
+	union mio_emm_switch	want_switch;
+	union mio_emm_rca	cached_rca;
+	union mio_emm_timing	taps;	/* otx2: MIO_EMM_TIMING */
+	union mio_emm_timing	hs200_taps;
+	union mio_emm_timing	hs400_taps;
+	/* These are used to see if our tuning is still valid or not */
+	enum bus_mode		last_mode;
+	u32			last_clock;
+	u32			block_len;
+	u32			block_count;
+	int			cmd_clk_skew;
+	int			dat_clk_skew;
+	uint			cmd_cnt;	/* otx: sample cmd in delay */
+	uint			dat_cnt;	/* otx: sample data in delay */
+	uint			drive;		/* Current drive */
+	uint			slew;		/* clock skew */
+	uint			cmd_out_hs200_delay;
+	uint			data_out_hs200_delay;
+	uint			cmd_out_hs400_delay;
+	uint			data_out_hs400_delay;
+	uint			clk_period;
+	bool			valid:1;
+	bool			is_acmd:1;
+	bool			tuned:1;
+	bool			hs200_tuned:1;
+	bool			hs400_tuned:1;
+	bool			is_1_8v:1;
+	bool			is_3_3v:1;
+	bool			is_ddr:1;
+	bool			is_asim:1;
+	bool			is_emul:1;
+	bool			cd_inverted:1;
+	bool			wp_inverted:1;
+	bool			disable_ddr:1;
+	bool			non_removable:1;
+};
+
+struct octeontx_mmc_cr_mods {
+	u8 ctype_xor;
+	u8 rtype_xor;
+};
+
+struct octeontx_mmc_cr {
+	u8 c;
+	u8 r;
+};
+
+struct octeontx_sd_mods {
+	struct octeontx_mmc_cr mmc;
+	struct octeontx_mmc_cr sd;
+	struct octeontx_mmc_cr sdacmd;
+};
+
+/** Host controller data structure */
+struct octeontx_mmc_host {
+	struct		udevice *dev;
+	void		*base_addr;
+	struct octeontx_mmc_slot slots[OCTEONTX_MAX_MMC_SLOT + 1];
+	pci_dev_t	pdev;
+	u64		sys_freq;
+	union mio_emm_cfg emm_cfg;
+	u64		timing_taps;
+	struct mmc	*last_mmc;	/** Last mmc used */
+	ofnode		node;
+	int		cur_slotid;
+	int		last_slotid;
+	int		max_width;
+	uint		per_tap_delay;
+	uint		num_slots;
+	uint		dma_wait_delay;	/* Delay before polling DMA in usecs */
+	bool		initialized:1;
+	bool		timing_calibrated:1;
+	bool		is_asim:1;
+	bool		is_emul:1;
+	bool		calibrate_glitch:1;
+	bool		cond_clock_glitch:1;
+	bool		tap_requires_noclk:1;
+	bool		hs400_skew_needed:1;
+};
+
+/*
+ * NOTE: This was copied from the Linux kernel.
+ *
+ * MMC status in R1, for native mode (SPI bits are different)
+ * Type
+ *	e:error bit
+ *	s:status bit
+ *	r:detected and set for the actual command response
+ *	x:detected and set during command execution. the host must poll
+ *	    the card by sending status command in order to read these bits.
+ * Clear condition
+ *	a:according to the card state
+ *	b:always related to the previous command. Reception of
+ *	    a valid command will clear it (with a delay of one command)
+ *	c:clear by read
+ */
+#define R1_OUT_OF_RANGE		BIT(31)		/* er, c */
+#define R1_ADDRESS_ERROR	BIT(30)		/* erx, c */
+#define R1_BLOCK_LEN_ERROR	BIT(29)		/* er, c */
+#define R1_ERASE_SEQ_ERROR	BIT(28)		/* er, c */
+#define R1_ERASE_PARAM          BIT(27)		/* ex, c */
+#define R1_WP_VIOLATION		BIT(26)		/* erx, c */
+#define R1_CARD_IS_LOCKED	BIT(25)		/* sx, a */
+#define R1_LOCK_UNLOCK_FAILED	BIT(24)		/* erx, c */
+#define R1_COM_CRC_ERROR	BIT(23)		/* er, b */
+/*#define R1_ILLEGAL_COMMAND	BIT(22)*/		/* er, b */
+#define R1_CARD_ECC_FAILED	BIT(21)		/* ex, c */
+#define R1_CC_ERROR		BIT(20)		/* erx, c */
+#define R1_ERROR		BIT(19)		/* erx, c */
+#define R1_UNDERRUN		BIT(18)		/* ex, c */
+#define R1_OVERRUN		BIT(17)		/* ex, c */
+#define R1_CID_CSD_OVERWRITE	BIT(16)		/* erx, c, CID/CSD overwrite */
+#define R1_WP_ERASE_SKIP	BIT(15)		/* sx, c */
+#define R1_CARD_ECC_DISABLED	BIT(14)		/* sx, a */
+#define R1_ERASE_RESET		BIT(13)		/* sr, c */
+#define R1_STATUS(x)		((x) & 0xFFFFE000)
+#define R1_CURRENT_STATE(x)	(((x) & 0x00001E00) >> 9) /* sx, b (4 bits) */
+#define R1_READY_FOR_DATA	BIT(8)		/* sx, a */
+#define R1_SWITCH_ERROR		BIT(7)		/* sx, c */
+
+#define R1_BLOCK_READ_MASK	R1_OUT_OF_RANGE |	\
+				R1_ADDRESS_ERROR |	\
+				R1_BLOCK_LEN_ERROR |	\
+				R1_CARD_IS_LOCKED |	\
+				R1_COM_CRC_ERROR |	\
+				R1_ILLEGAL_COMMAND |	\
+				R1_CARD_ECC_FAILED |	\
+				R1_CC_ERROR |		\
+				R1_ERROR
+#define R1_BLOCK_WRITE_MASK	R1_OUT_OF_RANGE |	\
+				R1_ADDRESS_ERROR |	\
+				R1_BLOCK_LEN_ERROR |	\
+				R1_WP_VIOLATION |	\
+				R1_CARD_IS_LOCKED |	\
+				R1_COM_CRC_ERROR |	\
+				R1_ILLEGAL_COMMAND |	\
+				R1_CARD_ECC_FAILED |	\
+				R1_CC_ERROR |		\
+				R1_ERROR |		\
+				R1_UNDERRUN |		\
+				R1_OVERRUN
+
+#endif /* __OCTEONTX_HSMMC_H__ */
-- 
2.27.0

  parent reply	other threads:[~2020-07-24 10:08 UTC|newest]

Thread overview: 67+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-24 10:08 [PATCH v1 00/24] arm: Introduce Marvell/Cavium OcteonTX/TX2 Stefan Roese
2020-07-24 10:08 ` [PATCH v1 01/24] fdtdec: Add API to read pci bus-range property Stefan Roese
2020-07-28 19:01   ` Simon Glass
2020-07-30 15:09     ` Stefan Roese
2020-07-24 10:08 ` [PATCH v1 02/24] pci: pci-uclass: Remove #ifdef CONFIG_NR_DRAM_BANKS as its always set Stefan Roese
2020-07-28 19:01   ` Simon Glass
2020-07-24 10:08 ` [PATCH v1 03/24] pci: pci-uclass: Dynamically allocate the PCI regions Stefan Roese
2020-07-28 19:01   ` Simon Glass
2020-07-30 15:16     ` Stefan Roese
2020-08-05  9:12       ` Stefan Roese
2020-07-24 10:08 ` [PATCH v1 04/24] pci: pci-uclass: Fix incorrect argument in map_sysmem Stefan Roese
2020-07-28 19:01   ` Simon Glass
2020-07-24 10:08 ` [PATCH v1 05/24] pci: pci-uclass: Make DT subnode parse optional Stefan Roese
2020-07-28 19:01   ` Simon Glass
2020-07-24 10:08 ` [PATCH v1 06/24] pci: pci-uclass: Add multi entry support for memory regions Stefan Roese
2020-07-28 19:01   ` Simon Glass
2020-07-30 15:35     ` Stefan Roese
2020-07-31 18:44       ` Simon Glass
2020-08-04 14:03         ` Stefan Roese
2020-08-04 15:05           ` Simon Glass
2020-08-14 11:40             ` Stefan Roese
2020-08-22 15:09               ` Simon Glass
2020-08-23  9:41                 ` Stefan Roese
2020-08-23 14:03                   ` Tom Rini
2020-08-24  7:36                     ` Stefan Roese
2020-08-24 13:09                       ` Tom Rini
2020-08-25 15:04                         ` Simon Glass
2020-08-25 15:09                           ` Tom Rini
2020-07-24 10:08 ` [PATCH v1 07/24] pci: pci-uclass: Add support for Enhanced Allocation in Bridges Stefan Roese
2020-07-28 19:01   ` Simon Glass
2020-07-24 10:08 ` [PATCH v1 08/24] pci: pci-uclass: Add support for Single-Root I/O Virtualization Stefan Roese
2020-07-28 19:01   ` Simon Glass
2020-07-24 10:08 ` [PATCH v1 09/24] pci: pci-uclass: Add VF BAR map support for Enhanced Allocation Stefan Roese
2020-07-28 19:01   ` Simon Glass
2020-07-24 10:08 ` [PATCH v1 10/24] pci: pci-uclass: Add support for Alternate-RoutingID capability Stefan Roese
2020-07-24 10:08 ` [PATCH v1 11/24] pci: pci-uclass: Check validity of ofnode Stefan Roese
2020-07-28 19:01   ` Simon Glass
2020-07-24 10:08 ` [PATCH v1 12/24] arm: include/asm/io.h: Add 64bit clrbits and setbits helpers Stefan Roese
2020-07-24 10:08 ` [PATCH v1 13/24] arm: octeontx: Add headers for OcteonTX Stefan Roese
2020-07-28 19:01   ` Simon Glass
2020-07-31 14:21     ` Stefan Roese
2020-07-31 18:44       ` Simon Glass
2020-07-24 10:08 ` [PATCH v1 14/24] arm: octeontx2: Add headers for OcteonTX2 Stefan Roese
2020-07-28 19:01   ` Simon Glass
2020-07-31 14:23     ` Stefan Roese
2020-07-24 10:08 ` [PATCH v1 15/24] ata: ahci: Add BAR index quirk for Cavium PCI SATA device Stefan Roese
2020-07-28 19:01   ` Simon Glass
2020-07-30 15:41     ` Stefan Roese
2020-07-31 18:35       ` Simon Glass
2020-08-04 13:37         ` Stefan Roese
2020-07-24 10:08 ` [PATCH v1 16/24] pci: Add PCI controller driver for OcteonTX / TX2 Stefan Roese
2020-07-28 19:01   ` Simon Glass
2020-07-30 16:25     ` Stefan Roese
2020-07-31 18:44       ` Simon Glass
2020-08-05 13:25         ` Stefan Roese
2020-07-24 10:08 ` [PATCH v1 17/24] mmc: Remove static qualifier on mmc_power_init Stefan Roese
2020-07-28 19:01   ` Simon Glass
2020-07-24 10:08 ` Stefan Roese [this message]
2020-07-24 10:08 ` [PATCH v1 19/24] mtd: nand: Add NAND controller driver for OcteonTX Stefan Roese
2020-07-24 10:08 ` [PATCH v1 20/24] net: Add NIC " Stefan Roese
2020-07-24 10:08 ` [PATCH v1 21/24] net: Add NIC controller driver for OcteonTX2 Stefan Roese
2020-07-24 10:08 ` [PATCH v1 22/24] watchdog: Add reset support for OcteonTX / TX2 Stefan Roese
2020-07-28 19:01   ` Simon Glass
2020-07-31 14:25     ` Stefan Roese
2020-08-05 13:47       ` Stefan Roese
2020-07-24 10:08 ` [PATCH v1 23/24] arm: octeontx: Add support for OcteonTX SoC platforms Stefan Roese
2020-07-24 10:08 ` [PATCH v1 24/24] arm: octeontx2: Add support for OcteonTX2 " Stefan Roese

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200724100856.1482324-19-sr@denx.de \
    --to=sr@denx.de \
    --cc=u-boot@lists.denx.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.