All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
@ 2018-05-25 16:06 ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-05-25 16:06 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit adds the TmFifo driver for Mellanox BlueField Soc.
TmFifo is a shared FIFO which enables external host machine to
exchange data with the SoC via USB or PCIe. The driver is based on
virtio framework and has console and network access enabled.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/Kconfig                |    1 +
 drivers/soc/Makefile               |    1 +
 drivers/soc/mellanox/Kconfig       |   18 +
 drivers/soc/mellanox/Makefile      |    5 +
 drivers/soc/mellanox/tmfifo.c      | 1265 ++++++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/tmfifo_regs.h |  112 ++++
 6 files changed, 1402 insertions(+)
 create mode 100644 drivers/soc/mellanox/Kconfig
 create mode 100644 drivers/soc/mellanox/Makefile
 create mode 100644 drivers/soc/mellanox/tmfifo.c
 create mode 100644 drivers/soc/mellanox/tmfifo_regs.h

diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index c07b4a8..fa87dc8 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -7,6 +7,7 @@ source "drivers/soc/bcm/Kconfig"
 source "drivers/soc/fsl/Kconfig"
 source "drivers/soc/imx/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
+source "drivers/soc/mellanox/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/renesas/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 4052357..868163f 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ARCH_GEMINI)	+= gemini/
 obj-$(CONFIG_ARCH_MXC)		+= imx/
 obj-$(CONFIG_SOC_XWAY)		+= lantiq/
 obj-y				+= mediatek/
+obj-$(CONFIG_SOC_MLNX)		+= mellanox/
 obj-$(CONFIG_ARCH_MESON)	+= amlogic/
 obj-$(CONFIG_ARCH_QCOM)		+= qcom/
 obj-y				+= renesas/
diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
new file mode 100644
index 0000000..d88efa1
--- /dev/null
+++ b/drivers/soc/mellanox/Kconfig
@@ -0,0 +1,18 @@
+menuconfig SOC_MLNX
+	bool "Mellanox SoC drivers"
+	default y if ARCH_MLNX_BLUEFIELD
+
+if ARCH_MLNX_BLUEFIELD || COMPILE_TEST
+
+config MLNX_BLUEFIELD_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo driver"
+	depends on ARM64
+	default m
+	select VIRTIO_CONSOLE
+	select VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides the
+	  virtio driver framework for the TMFIFO of Mellanox BlueField SoC and
+	  the implementation of a console and network driver.
+
+endif # ARCH_MLNX_BLUEFIELD
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
new file mode 100644
index 0000000..c44c0e2
--- /dev/null
+++ b/drivers/soc/mellanox/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Mellanox SoC drivers.
+#
+obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
new file mode 100644
index 0000000..a3303d1
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo.c
@@ -0,0 +1,1265 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/acpi.h>
+#include <linux/cache.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <asm/byteorder.h>
+
+#include "tmfifo_regs.h"
+
+/* Several utility macros to get/set the register fields. */
+#define TMFIFO_GET_FIELD(reg, field) \
+	(((reg) >> field##_SHIFT) & ((1UL << field##_WIDTH) - 1))
+
+#define TMFIFO_SET_FIELD(reg, field, value) ({ \
+	u64 _mask = ((1UL << field##_WIDTH) - 1) << field##_SHIFT; \
+	((reg) & ~_mask) | (((value) << field##_SHIFT) & _mask); \
+})
+
+#define TMFIFO_RX_GET_STS_CNT(sts) \
+	TMFIFO_GET_FIELD(sts, TMFIFO_RX_STS__COUNT)
+
+#define TMFIFO_TX_GET_STS_CNT(sts) \
+	TMFIFO_GET_FIELD(sts, TMFIFO_TX_STS__COUNT)
+
+/* Vring size. */
+#define TMFIFO_VRING_SIZE			1024
+
+/* Console Tx buffer size. */
+#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
+
+/* Use a timer for house-keeping. */
+static int tmfifo_timer_interval = HZ / 10;
+
+/* Global lock. */
+static struct mutex tmfifo_lock;
+
+/* Virtio ring size. */
+static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
+module_param(tmfifo_vring_size, int, 0444);
+MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring.");
+
+struct tmfifo;
+
+/* A flag to indicate TmFifo ready. */
+static bool tmfifo_ready;
+
+/* Virtual devices sharing the TM FIFO. */
+#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/* Spin lock. */
+static DEFINE_SPINLOCK(tmfifo_spin_lock);
+
+/* Structure to maintain the ring state. */
+struct tmfifo_vring {
+	void *va;			/* virtual address */
+	dma_addr_t dma;			/* dma address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	int vdev_id;			/* TMFIFO_VDEV_xxx */
+	u32 pkt_len;			/* packet total length */
+	__virtio16 next_avail;		/* next avail desc id */
+	struct tmfifo *fifo;		/* pointer back to the tmfifo */
+};
+
+/* Interrupt types. */
+enum {
+	TM_RX_LWM_IRQ,			/* Rx low water mark irq */
+	TM_RX_HWM_IRQ,			/* Rx high water mark irq */
+	TM_TX_LWM_IRQ,			/* Tx low water mark irq */
+	TM_TX_HWM_IRQ,			/* Tx high water mark irq */
+	TM_IRQ_CNT
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	TMFIFO_VRING_RX,		/* Rx ring */
+	TMFIFO_VRING_TX,		/* Tx ring */
+	TMFIFO_VRING_NUM
+};
+
+struct tmfifo_vdev {
+	struct virtio_device vdev;	/* virtual device */
+	u8 status;
+	u64 features;
+	union {				/* virtio config space */
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
+	u8 *tx_buf;			/* tx buffer */
+	u32 tx_head;			/* tx buffer head */
+	u32 tx_tail;			/* tx buffer tail */
+};
+
+#define TMFIFO_VDEV_TX_BUF_AVAIL(vdev) \
+	(((vdev)->tx_tail >= (vdev)->tx_head) ? \
+	(TMFIFO_CONS_TX_BUF_SIZE - 8 - ((vdev)->tx_tail - (vdev)->tx_head)) : \
+	((vdev)->tx_head - (vdev)->tx_tail - 8))
+
+#define TMFIFO_VDEV_TX_BUF_PUSH(vdev, len) do { \
+	(vdev)->tx_tail += (len); \
+	if ((vdev)->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE) \
+		(vdev)->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE; \
+} while (0)
+
+#define TMFIFO_VDEV_TX_BUF_POP(vdev, len) do { \
+	(vdev)->tx_head += (len); \
+	if ((vdev)->tx_head >= TMFIFO_CONS_TX_BUF_SIZE) \
+		(vdev)->tx_head -= TMFIFO_CONS_TX_BUF_SIZE; \
+} while (0)
+
+/* TMFIFO device structure */
+struct tmfifo {
+	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
+	struct platform_device *pdev;	/* platform device */
+	struct mutex lock;
+	void __iomem *rx_base;		/* mapped register base */
+	void __iomem *tx_base;		/* mapped register base */
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	int irq[TM_IRQ_CNT];		/* irq numbers */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
+};
+
+union tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* MTU setting of the virtio-net interface. */
+#define TMFIFO_NET_MTU		1500
+
+/* Supported virtio-net features. */
+#define TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
+				 (1UL << VIRTIO_NET_F_STATUS) | \
+				 (1UL << VIRTIO_NET_F_MAC))
+
+/* Forward declaration. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx);
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc);
+
+/* Allocate vrings for the fifo. */
+static int tmfifo_alloc_vrings(struct tmfifo *fifo,
+			       struct tmfifo_vdev *tm_vdev, int vdev_id)
+{
+	dma_addr_t dma;
+	void *va;
+	int i, size;
+	struct tmfifo_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->size = tmfifo_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+		vring->vdev_id = vdev_id;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
+					GFP_KERNEL);
+		if (!va) {
+			dev_err(tm_vdev->vdev.dev.parent,
+				"vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void tmfifo_free_vrings(struct tmfifo *fifo, int vdev_id)
+{
+	int i, size;
+	struct tmfifo_vring *vring;
+	struct tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Free interrupts of the fifo device. */
+static void tmfifo_free_irqs(struct tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		irq = fifo->irq[i];
+		if (irq) {
+			fifo->irq[i] = 0;
+			disable_irq(irq);
+			free_irq(irq, (u8 *)fifo + i);
+		}
+	}
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void tmfifo_work_handler(struct work_struct *work)
+{
+	int i;
+	struct tmfifo_vdev *tm_vdev;
+	struct tmfifo *fifo = container_of(work, struct tmfifo, work);
+
+	if (!tmfifo_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx. */
+	if (test_and_clear_bit(TM_TX_LWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_TX_LWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_TX].vq,
+					false);
+			}
+		}
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(TM_RX_HWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_RX_HWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_RX].vq,
+					true);
+			}
+		}
+	}
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* Interrupt handler. */
+static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
+{
+	int i = (uintptr_t)dev_id % sizeof(void *);
+	struct tmfifo *fifo = dev_id - i;
+
+	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
+		schedule_work(&fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Nothing to do for now. */
+static void tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+tmfifo_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+
+	if (!vr || vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vr->num);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+static inline void tmfifo_virtio_release_desc(struct virtio_device *vdev,
+					      struct vring *vr,
+					      struct vring_desc *desc, u32 len)
+{
+	unsigned int idx;
+
+	idx = vr->used->idx % vr->num;
+	vr->used->ring[idx].id = desc - vr->desc;
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/* Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc)
+{
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+	struct vring_desc *desc_head;
+	uint32_t pkt_len = 0;
+
+	if (!vr)
+		return;
+
+	if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
+		desc_head = vring->desc_head;
+		pkt_len = vring->pkt_len;
+	} else {
+		desc_head = tmfifo_virtio_get_next_desc(vring->vq);
+		if (desc_head != NULL) {
+			pkt_len = tmfifo_virtio_get_pkt_len(vdev,
+							desc_head, vr);
+		}
+	}
+
+	if (desc_head != NULL)
+		tmfifo_virtio_release_desc(vdev, vr, desc_head, pkt_len);
+
+	if (desc != NULL)
+		*desc = NULL;
+	vring->pkt_len = 0;
+}
+
+/* House-keeping timer. */
+static void tmfifo_timer(struct timer_list *arg)
+{
+	struct tmfifo *fifo = container_of(arg, struct tmfifo, timer);
+
+	/*
+	 * Wake up the work handler to poll the Rx FIFO in case interrupt
+	 * missing or any leftover bytes stuck in the FIFO.
+	 */
+	test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+}
+
+/* Buffer the console output. */
+static void tmfifo_console_output(struct tmfifo_vdev *cons,
+				  struct virtqueue *vq)
+{
+	u32 len, pkt_len, idx;
+	struct vring_desc *head_desc, *desc = NULL;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &cons->vdev;
+	void *addr;
+	union tmfifo_msg_hdr *hdr;
+
+	for (;;) {
+		head_desc = tmfifo_virtio_get_next_desc(vq);
+		if (head_desc == NULL)
+			break;
+
+		/* Release the packet if no more space. */
+		pkt_len = tmfifo_virtio_get_pkt_len(vdev, head_desc, vr);
+		if (pkt_len + sizeof(*hdr) > TMFIFO_VDEV_TX_BUF_AVAIL(cons)) {
+			tmfifo_virtio_release_desc(vdev, vr, head_desc,
+						   pkt_len);
+			break;
+		}
+
+		hdr = (union tmfifo_msg_hdr *)&cons->tx_buf[cons->tx_tail];
+		hdr->data = 0;
+		hdr->type = VIRTIO_ID_CONSOLE;
+		hdr->len = htons(pkt_len);
+
+		TMFIFO_VDEV_TX_BUF_PUSH(cons, sizeof(*hdr));
+		desc = head_desc;
+
+		while (desc != NULL) {
+			addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+			len = virtio32_to_cpu(vdev, desc->len);
+
+			if (len <= TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail) {
+				memcpy(cons->tx_buf + cons->tx_tail, addr, len);
+			} else {
+				u32 seg;
+
+				seg = TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail;
+				memcpy(cons->tx_buf + cons->tx_tail, addr, seg);
+				addr += seg;
+				memcpy(cons->tx_buf, addr, len - seg);
+			}
+			TMFIFO_VDEV_TX_BUF_PUSH(cons, len);
+
+			if (!(virtio16_to_cpu(vdev, desc->flags) &
+			    VRING_DESC_F_NEXT))
+				break;
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+		}
+
+		/* Make each packet 8-byte aligned. */
+		TMFIFO_VDEV_TX_BUF_PUSH(cons, ((pkt_len + 7) & -8) - pkt_len);
+
+		tmfifo_virtio_release_desc(vdev, vr, head_desc, pkt_len);
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct tmfifo_vring *vring;
+	struct tmfifo *fifo;
+	struct vring *vr;
+	struct virtio_device *vdev;
+	u64 sts, data;
+	int num_avail = 0, hdr_len, tx_reserve;
+	void *addr;
+	u32 len, idx;
+	struct vring_desc *desc;
+	unsigned long flags;
+	struct tmfifo_vdev *cons;
+
+	if (!vq)
+		return;
+
+	vring = (struct tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+	vr = (struct vring *)virtqueue_get_vring(vq);
+
+	if (!fifo->vdev[vring->vdev_id])
+		return;
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+
+	/* Don't continue if another vring is running. */
+	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
+		return;
+
+	/* tx_reserve is used to reserved some room in FIFO for console. */
+	if (vring->vdev_id == VIRTIO_ID_NET) {
+		hdr_len = sizeof(struct virtio_net_hdr);
+		tx_reserve = fifo->tx_fifo_size / 16;
+	} else {
+		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
+		hdr_len = 0;
+		tx_reserve = 1;
+	}
+
+	desc = vring->desc;
+
+again:
+	while (1) {
+		/* Get available FIFO space. */
+		if (num_avail == 0) {
+			if (is_rx) {
+				/* Get the number of available words in FIFO. */
+				sts = readq(fifo->rx_base + TMFIFO_RX_STS);
+				num_avail = TMFIFO_RX_GET_STS_CNT(sts);
+
+				/* Don't continue if nothing in FIFO. */
+				if (num_avail <= 0)
+					break;
+			} else {
+				/* Get available space in FIFO. */
+				sts = readq(fifo->tx_base + TMFIFO_TX_STS);
+				num_avail = fifo->tx_fifo_size - tx_reserve -
+					TMFIFO_TX_GET_STS_CNT(sts);
+
+				if (num_avail <= 0)
+					break;
+			}
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
+		    cons != NULL && cons->tx_buf != NULL) {
+			for (;;) {
+				spin_lock_irqsave(&tmfifo_spin_lock, flags);
+				if (cons->tx_head == cons->tx_tail) {
+					spin_unlock_irqrestore(
+						&tmfifo_spin_lock, flags);
+					return;
+				}
+				addr = cons->tx_buf + cons->tx_head;
+				writeq(cpu_to_le64(*(u64 *)addr),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+				TMFIFO_VDEV_TX_BUF_POP(cons, sizeof(u64));
+				spin_unlock_irqrestore(&tmfifo_spin_lock,
+						       flags);
+				if (--num_avail <= 0)
+					goto again;
+			}
+		}
+
+		/* Get the desc of next packet. */
+		if (!desc) {
+			/* Save the head desc of the chain. */
+			vring->desc_head = tmfifo_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				return;
+			}
+			desc = vring->desc_head;
+			vring->desc = desc;
+
+			if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+						vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			}
+		}
+
+		/* Beginning of each packet. */
+		if (vring->pkt_len == 0) {
+			int vdev_id, vring_change = 0;
+			union tmfifo_msg_hdr hdr;
+
+			num_avail--;
+
+			/* Read/Write packet length. */
+			if (is_rx) {
+				hdr.data = readq(fifo->rx_base +
+						 TMFIFO_RX_DATA);
+				hdr.data = le64_to_cpu(hdr.data);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (hdr.len == 0)
+					continue;
+
+				/* Check packet type. */
+				if (hdr.type == VIRTIO_ID_NET) {
+					vdev_id = VIRTIO_ID_NET;
+					hdr_len = sizeof(struct virtio_net_hdr);
+				} else if (hdr.type == VIRTIO_ID_CONSOLE) {
+					vdev_id = VIRTIO_ID_CONSOLE;
+					hdr_len = 0;
+				} else {
+					continue;
+				}
+
+				/*
+				 * Check whether the new packet still belongs
+				 * to this vring or not. If not, update the
+				 * pkt_len of the new vring and return.
+				 */
+				if (vdev_id != vring->vdev_id) {
+					struct tmfifo_vdev *dev2 =
+						fifo->vdev[vdev_id];
+
+					if (!dev2)
+						break;
+					vring->desc = desc;
+					vring = &dev2->vrings[TMFIFO_VRING_RX];
+					vring_change = 1;
+				}
+				vring->pkt_len = ntohs(hdr.len) + hdr_len;
+			} else {
+				vring->pkt_len = tmfifo_virtio_get_pkt_len(
+					vdev, desc, vr);
+
+				hdr.data = 0;
+				hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+					VIRTIO_ID_NET :
+					VIRTIO_ID_CONSOLE;
+				hdr.len = htons(vring->pkt_len - hdr_len);
+				writeq(cpu_to_le64(hdr.data),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+			}
+
+			vring->cur_len = hdr_len;
+			vring->rem_len = vring->pkt_len;
+			fifo->vring[is_rx] = vring;
+
+			if (vring_change)
+				return;
+			continue;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check if the current desc is already done. */
+		if (vring->cur_len == len)
+			goto check_done;
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		/* Read a word from FIFO for Rx. */
+		if (is_rx) {
+			data = readq(fifo->rx_base + TMFIFO_RX_DATA);
+			data = le64_to_cpu(data);
+		}
+
+		if (vring->cur_len + sizeof(u64) <= len) {
+			/* The whole word. */
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       sizeof(u64));
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       sizeof(u64));
+			}
+			vring->cur_len += sizeof(u64);
+		} else {
+			/* Leftover bytes. */
+			BUG_ON(vring->cur_len > len);
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       len - vring->cur_len);
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       len - vring->cur_len);
+			}
+			vring->cur_len = len;
+		}
+
+		/* Write the word into FIFO for Tx. */
+		if (!is_rx) {
+			writeq(cpu_to_le64(data),
+			       fifo->tx_base + TMFIFO_TX_DATA);
+		}
+
+		num_avail--;
+
+check_done:
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done and release the desc. */
+			tmfifo_release_pkt(vdev, vring, &desc);
+			fifo->vring[is_rx] = NULL;
+
+			/* Notify upper layer that packet is done. */
+			spin_lock_irqsave(&tmfifo_spin_lock, flags);
+			vring_interrupt(0, vq);
+			spin_unlock_irqrestore(&tmfifo_spin_lock, flags);
+			continue;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+	struct tmfifo *fifo = vring->fifo;
+	unsigned long flags;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX HWM bit to start Rx. */
+		if (!test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&tmfifo_spin_lock, flags);
+			tmfifo_console_output(fifo->vdev[VIRTIO_ID_CONSOLE],
+					      vq);
+			spin_unlock_irqrestore(&tmfifo_spin_lock, flags);
+			schedule_work(&fifo->work);
+		} else if (!test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	}
+
+	return true;
+}
+
+/* Get the array of feature bits for this device. */
+static u64 tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			tmfifo_release_pkt(&tm_vdev->vdev, vring, &vring->desc);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+				  unsigned int nvqs,
+				  struct virtqueue *vqs[],
+				  vq_callback_t *callbacks[],
+				  const char * const names[],
+				  const bool *ctx,
+				  struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void tmfifo_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void tmfifo_virtio_get(struct virtio_device *vdev,
+			      unsigned int offset,
+			      void *buf,
+			      unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void tmfifo_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops tmfifo_virtio_config_ops = {
+	.get_features = tmfifo_virtio_get_features,
+	.finalize_features = tmfifo_virtio_finalize_features,
+	.find_vqs = tmfifo_virtio_find_vqs,
+	.del_vqs = tmfifo_virtio_del_vqs,
+	.reset = tmfifo_virtio_reset,
+	.set_status = tmfifo_virtio_set_status,
+	.get_status = tmfifo_virtio_get_status,
+	.get = tmfifo_virtio_get,
+	.set = tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+int tmfifo_create_vdev(struct tmfifo *fifo, int vdev_id, u64 features,
+		       void *config, u32 size)
+{
+	struct tmfifo_vdev *tm_vdev;
+	int ret = 0;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev != NULL) {
+		pr_err("vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto already_exist;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto already_exist;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+	if (tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
+		pr_err("Unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto alloc_vring_fail;
+	}
+	if (vdev_id == VIRTIO_ID_CONSOLE) {
+		tm_vdev->tx_buf = kmalloc(TMFIFO_CONS_TX_BUF_SIZE,
+					  GFP_KERNEL);
+	}
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
+		goto register_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+register_fail:
+	tmfifo_free_vrings(fifo, vdev_id);
+	fifo->vdev[vdev_id] = NULL;
+alloc_vring_fail:
+	kfree(tm_vdev);
+already_exist:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+int tmfifo_delete_vdev(struct tmfifo *fifo, int vdev_id)
+{
+	struct tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		tmfifo_free_vrings(fifo, vdev_id);
+		kfree(tm_vdev->tx_buf);
+		kfree(tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Device remove function. */
+static int tmfifo_remove(struct platform_device *pdev)
+{
+	int i;
+	struct tmfifo *fifo = platform_get_drvdata(pdev);
+	struct resource *rx_res, *tx_res;
+
+	tmfifo_ready = false;
+
+	if (fifo) {
+		mutex_lock(&tmfifo_lock);
+
+		/* Stop the timer. */
+		del_timer_sync(&fifo->timer);
+
+		/* Release interrupts. */
+		tmfifo_free_irqs(fifo);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&fifo->work);
+
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++)
+			tmfifo_delete_vdev(fifo, i);
+
+		/* Release IO resources. */
+		if (fifo->rx_base)
+			iounmap(fifo->rx_base);
+		if (fifo->tx_base)
+			iounmap(fifo->tx_base);
+
+		platform_set_drvdata(pdev, NULL);
+		kfree(fifo);
+
+		mutex_unlock(&tmfifo_lock);
+	}
+
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (rx_res)
+		release_mem_region(rx_res->start, resource_size(rx_res));
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (tx_res)
+		release_mem_region(tx_res->start, resource_size(tx_res));
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void tmfifo_get_cfg_mac(u8 *mac)
+{
+	u8 buf[6];
+	efi_status_t status;
+	unsigned long size = sizeof(buf);
+	efi_char16_t name[] = { 'R', 's', 'h', 'i', 'm', 'M', 'a', 'c',
+				'A', 'd', 'd', 'r', 0 };
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+
+	status = efi.get_variable(name, &guid, NULL, &size, buf);
+	if (status == EFI_SUCCESS && size == sizeof(buf))
+		memcpy(mac, buf, sizeof(buf));
+}
+
+/* Probe the TMFIFO. */
+static int tmfifo_probe(struct platform_device *pdev)
+{
+	u64 ctl;
+	struct tmfifo *fifo;
+	struct resource *rx_res, *tx_res;
+	struct virtio_net_config net_config;
+	int i, ret;
+
+	/* Get the resource of the Rx & Tx FIFO. */
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!rx_res || !tx_res) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (request_mem_region(rx_res->start,
+			       resource_size(rx_res), "bf-tmfifo") == NULL) {
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	if (request_mem_region(tx_res->start,
+			       resource_size(tx_res), "bf-tmfifo") == NULL) {
+		release_mem_region(rx_res->start, resource_size(rx_res));
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	ret = -ENOMEM;
+	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
+	if (!fifo)
+		goto err;
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	INIT_WORK(&fifo->work, tmfifo_work_handler);
+
+	timer_setup(&fifo->timer, tmfifo_timer, 0);
+	fifo->timer.function = tmfifo_timer;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		fifo->irq[i] = platform_get_irq(pdev, i);
+		ret = request_irq(fifo->irq[i], tmfifo_irq_handler, 0,
+				  "tmfifo", (u8 *)fifo + i);
+		if (ret) {
+			pr_err("Unable to request irq\n");
+			fifo->irq[i] = 0;
+			goto err;
+		}
+	}
+
+	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
+	if (!fifo->rx_base)
+		goto err;
+
+	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
+	if (!fifo->tx_base)
+		goto err;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		TMFIFO_GET_FIELD(ctl, TMFIFO_TX_CTL__MAX_ENTRIES);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_TX_CTL__LWM, fifo->tx_fifo_size / 2);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_TX_CTL__HWM, fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		TMFIFO_GET_FIELD(ctl, TMFIFO_RX_CTL__MAX_ENTRIES);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_RX_CTL__LWM, 0);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_RX_CTL__HWM, 1);
+	writeq(ctl, fifo->rx_base + TMFIFO_RX_CTL);
+
+	mutex_init(&fifo->lock);
+
+	/* Create the console vdev. */
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (ret)
+		goto err;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = TMFIFO_NET_MTU;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	memcpy(net_config.mac, tmfifo_net_default_mac, 6);
+	tmfifo_get_cfg_mac(net_config.mac);
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_NET, TMFIFO_NET_FEATURES,
+				 &net_config, sizeof(net_config));
+	if (ret)
+		goto err;
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+
+	tmfifo_ready = true;
+
+	return 0;
+
+err:
+	tmfifo_remove(pdev);
+early_err:
+	dev_err(&pdev->dev, "Probe Failed\n");
+	return ret;
+}
+
+static const struct of_device_id tmfifo_match[] = {
+	{ .compatible = "mellanox,bf-tmfifo" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, tmfifo_match);
+
+static const struct acpi_device_id bf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, bf_tmfifo_acpi_match);
+
+static struct platform_driver tmfifo_driver = {
+	.probe = tmfifo_probe,
+	.remove = tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.of_match_table = tmfifo_match,
+		.acpi_match_table = ACPI_PTR(bf_tmfifo_acpi_match),
+	},
+};
+
+static int __init tmfifo_init(void)
+{
+	int ret;
+
+	mutex_init(&tmfifo_lock);
+
+	ret = platform_driver_register(&tmfifo_driver);
+	if (ret)
+		pr_err("Failed to register tmfifo driver.\n");
+
+	return ret;
+}
+
+static void __exit tmfifo_exit(void)
+{
+	platform_driver_unregister(&tmfifo_driver);
+}
+
+module_init(tmfifo_init);
+module_exit(tmfifo_exit);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
+MODULE_AUTHOR("Mellanox Technologies, Ltd");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("0.7");
diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
new file mode 100644
index 0000000..b07f353
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo_regs.h
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __TMFIFO_REGS_H__
+#define __TMFIFO_REGS_H__
+
+#ifdef __ASSEMBLER__
+#define _64bit(x) x
+#else /* __ASSEMBLER__ */
+#ifdef __tile__
+#define _64bit(x) x ## UL
+#else /* __tile__ */
+#define _64bit(x) x ## ULL
+#endif /* __tile__ */
+#endif /* __ASSEMBLER */
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+#ifndef __DOXYGEN__
+
+#define TMFIFO_TX_DATA 0x0
+
+#define TMFIFO_TX_STS 0x8
+#define TMFIFO_TX_STS__LENGTH 0x0001
+#define TMFIFO_TX_STS__COUNT_SHIFT 0
+#define TMFIFO_TX_STS__COUNT_WIDTH 9
+#define TMFIFO_TX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_TX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_TX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_TX_CTL 0x10
+#define TMFIFO_TX_CTL__LENGTH 0x0001
+#define TMFIFO_TX_CTL__LWM_SHIFT 0
+#define TMFIFO_TX_CTL__LWM_WIDTH 8
+#define TMFIFO_TX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__LWM_RMASK 0xff
+#define TMFIFO_TX_CTL__LWM_MASK  0xff
+#define TMFIFO_TX_CTL__HWM_SHIFT 8
+#define TMFIFO_TX_CTL__HWM_WIDTH 8
+#define TMFIFO_TX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__HWM_RMASK 0xff
+#define TMFIFO_TX_CTL__HWM_MASK  0xff00
+#define TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_TX_CTL__MAX_ENTRIES_MASK  _64bit(0x1ff00000000)
+
+#define TMFIFO_RX_DATA 0x0
+
+#define TMFIFO_RX_STS 0x8
+#define TMFIFO_RX_STS__LENGTH 0x0001
+#define TMFIFO_RX_STS__COUNT_SHIFT 0
+#define TMFIFO_RX_STS__COUNT_WIDTH 9
+#define TMFIFO_RX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_RX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_RX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_RX_CTL 0x10
+#define TMFIFO_RX_CTL__LENGTH 0x0001
+#define TMFIFO_RX_CTL__LWM_SHIFT 0
+#define TMFIFO_RX_CTL__LWM_WIDTH 8
+#define TMFIFO_RX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__LWM_RMASK 0xff
+#define TMFIFO_RX_CTL__LWM_MASK  0xff
+#define TMFIFO_RX_CTL__HWM_SHIFT 8
+#define TMFIFO_RX_CTL__HWM_WIDTH 8
+#define TMFIFO_RX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__HWM_RMASK 0xff
+#define TMFIFO_RX_CTL__HWM_MASK  0xff00
+#define TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_RX_CTL__MAX_ENTRIES_MASK  _64bit(0x1ff00000000)
+
+#endif /* !defined(__DOXYGEN__) */
+#endif /* !defined(__TMFIFO_REGS_H__) */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
@ 2018-05-25 16:06 ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-05-25 16:06 UTC (permalink / raw)
  To: linux-arm-kernel

This commit adds the TmFifo driver for Mellanox BlueField Soc.
TmFifo is a shared FIFO which enables external host machine to
exchange data with the SoC via USB or PCIe. The driver is based on
virtio framework and has console and network access enabled.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/Kconfig                |    1 +
 drivers/soc/Makefile               |    1 +
 drivers/soc/mellanox/Kconfig       |   18 +
 drivers/soc/mellanox/Makefile      |    5 +
 drivers/soc/mellanox/tmfifo.c      | 1265 ++++++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/tmfifo_regs.h |  112 ++++
 6 files changed, 1402 insertions(+)
 create mode 100644 drivers/soc/mellanox/Kconfig
 create mode 100644 drivers/soc/mellanox/Makefile
 create mode 100644 drivers/soc/mellanox/tmfifo.c
 create mode 100644 drivers/soc/mellanox/tmfifo_regs.h

diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index c07b4a8..fa87dc8 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -7,6 +7,7 @@ source "drivers/soc/bcm/Kconfig"
 source "drivers/soc/fsl/Kconfig"
 source "drivers/soc/imx/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
+source "drivers/soc/mellanox/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/renesas/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 4052357..868163f 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ARCH_GEMINI)	+= gemini/
 obj-$(CONFIG_ARCH_MXC)		+= imx/
 obj-$(CONFIG_SOC_XWAY)		+= lantiq/
 obj-y				+= mediatek/
+obj-$(CONFIG_SOC_MLNX)		+= mellanox/
 obj-$(CONFIG_ARCH_MESON)	+= amlogic/
 obj-$(CONFIG_ARCH_QCOM)		+= qcom/
 obj-y				+= renesas/
diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
new file mode 100644
index 0000000..d88efa1
--- /dev/null
+++ b/drivers/soc/mellanox/Kconfig
@@ -0,0 +1,18 @@
+menuconfig SOC_MLNX
+	bool "Mellanox SoC drivers"
+	default y if ARCH_MLNX_BLUEFIELD
+
+if ARCH_MLNX_BLUEFIELD || COMPILE_TEST
+
+config MLNX_BLUEFIELD_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo driver"
+	depends on ARM64
+	default m
+	select VIRTIO_CONSOLE
+	select VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides the
+	  virtio driver framework for the TMFIFO of Mellanox BlueField SoC and
+	  the implementation of a console and network driver.
+
+endif # ARCH_MLNX_BLUEFIELD
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
new file mode 100644
index 0000000..c44c0e2
--- /dev/null
+++ b/drivers/soc/mellanox/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Mellanox SoC drivers.
+#
+obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
new file mode 100644
index 0000000..a3303d1
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo.c
@@ -0,0 +1,1265 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/acpi.h>
+#include <linux/cache.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <asm/byteorder.h>
+
+#include "tmfifo_regs.h"
+
+/* Several utility macros to get/set the register fields. */
+#define TMFIFO_GET_FIELD(reg, field) \
+	(((reg) >> field##_SHIFT) & ((1UL << field##_WIDTH) - 1))
+
+#define TMFIFO_SET_FIELD(reg, field, value) ({ \
+	u64 _mask = ((1UL << field##_WIDTH) - 1) << field##_SHIFT; \
+	((reg) & ~_mask) | (((value) << field##_SHIFT) & _mask); \
+})
+
+#define TMFIFO_RX_GET_STS_CNT(sts) \
+	TMFIFO_GET_FIELD(sts, TMFIFO_RX_STS__COUNT)
+
+#define TMFIFO_TX_GET_STS_CNT(sts) \
+	TMFIFO_GET_FIELD(sts, TMFIFO_TX_STS__COUNT)
+
+/* Vring size. */
+#define TMFIFO_VRING_SIZE			1024
+
+/* Console Tx buffer size. */
+#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
+
+/* Use a timer for house-keeping. */
+static int tmfifo_timer_interval = HZ / 10;
+
+/* Global lock. */
+static struct mutex tmfifo_lock;
+
+/* Virtio ring size. */
+static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
+module_param(tmfifo_vring_size, int, 0444);
+MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring.");
+
+struct tmfifo;
+
+/* A flag to indicate TmFifo ready. */
+static bool tmfifo_ready;
+
+/* Virtual devices sharing the TM FIFO. */
+#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/* Spin lock. */
+static DEFINE_SPINLOCK(tmfifo_spin_lock);
+
+/* Structure to maintain the ring state. */
+struct tmfifo_vring {
+	void *va;			/* virtual address */
+	dma_addr_t dma;			/* dma address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	int vdev_id;			/* TMFIFO_VDEV_xxx */
+	u32 pkt_len;			/* packet total length */
+	__virtio16 next_avail;		/* next avail desc id */
+	struct tmfifo *fifo;		/* pointer back to the tmfifo */
+};
+
+/* Interrupt types. */
+enum {
+	TM_RX_LWM_IRQ,			/* Rx low water mark irq */
+	TM_RX_HWM_IRQ,			/* Rx high water mark irq */
+	TM_TX_LWM_IRQ,			/* Tx low water mark irq */
+	TM_TX_HWM_IRQ,			/* Tx high water mark irq */
+	TM_IRQ_CNT
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	TMFIFO_VRING_RX,		/* Rx ring */
+	TMFIFO_VRING_TX,		/* Tx ring */
+	TMFIFO_VRING_NUM
+};
+
+struct tmfifo_vdev {
+	struct virtio_device vdev;	/* virtual device */
+	u8 status;
+	u64 features;
+	union {				/* virtio config space */
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
+	u8 *tx_buf;			/* tx buffer */
+	u32 tx_head;			/* tx buffer head */
+	u32 tx_tail;			/* tx buffer tail */
+};
+
+#define TMFIFO_VDEV_TX_BUF_AVAIL(vdev) \
+	(((vdev)->tx_tail >= (vdev)->tx_head) ? \
+	(TMFIFO_CONS_TX_BUF_SIZE - 8 - ((vdev)->tx_tail - (vdev)->tx_head)) : \
+	((vdev)->tx_head - (vdev)->tx_tail - 8))
+
+#define TMFIFO_VDEV_TX_BUF_PUSH(vdev, len) do { \
+	(vdev)->tx_tail += (len); \
+	if ((vdev)->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE) \
+		(vdev)->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE; \
+} while (0)
+
+#define TMFIFO_VDEV_TX_BUF_POP(vdev, len) do { \
+	(vdev)->tx_head += (len); \
+	if ((vdev)->tx_head >= TMFIFO_CONS_TX_BUF_SIZE) \
+		(vdev)->tx_head -= TMFIFO_CONS_TX_BUF_SIZE; \
+} while (0)
+
+/* TMFIFO device structure */
+struct tmfifo {
+	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
+	struct platform_device *pdev;	/* platform device */
+	struct mutex lock;
+	void __iomem *rx_base;		/* mapped register base */
+	void __iomem *tx_base;		/* mapped register base */
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	int irq[TM_IRQ_CNT];		/* irq numbers */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
+};
+
+union tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* MTU setting of the virtio-net interface. */
+#define TMFIFO_NET_MTU		1500
+
+/* Supported virtio-net features. */
+#define TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
+				 (1UL << VIRTIO_NET_F_STATUS) | \
+				 (1UL << VIRTIO_NET_F_MAC))
+
+/* Forward declaration. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx);
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc);
+
+/* Allocate vrings for the fifo. */
+static int tmfifo_alloc_vrings(struct tmfifo *fifo,
+			       struct tmfifo_vdev *tm_vdev, int vdev_id)
+{
+	dma_addr_t dma;
+	void *va;
+	int i, size;
+	struct tmfifo_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->size = tmfifo_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+		vring->vdev_id = vdev_id;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
+					GFP_KERNEL);
+		if (!va) {
+			dev_err(tm_vdev->vdev.dev.parent,
+				"vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void tmfifo_free_vrings(struct tmfifo *fifo, int vdev_id)
+{
+	int i, size;
+	struct tmfifo_vring *vring;
+	struct tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Free interrupts of the fifo device. */
+static void tmfifo_free_irqs(struct tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		irq = fifo->irq[i];
+		if (irq) {
+			fifo->irq[i] = 0;
+			disable_irq(irq);
+			free_irq(irq, (u8 *)fifo + i);
+		}
+	}
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void tmfifo_work_handler(struct work_struct *work)
+{
+	int i;
+	struct tmfifo_vdev *tm_vdev;
+	struct tmfifo *fifo = container_of(work, struct tmfifo, work);
+
+	if (!tmfifo_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx. */
+	if (test_and_clear_bit(TM_TX_LWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_TX_LWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_TX].vq,
+					false);
+			}
+		}
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(TM_RX_HWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_RX_HWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_RX].vq,
+					true);
+			}
+		}
+	}
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* Interrupt handler. */
+static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
+{
+	int i = (uintptr_t)dev_id % sizeof(void *);
+	struct tmfifo *fifo = dev_id - i;
+
+	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
+		schedule_work(&fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Nothing to do for now. */
+static void tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+tmfifo_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+
+	if (!vr || vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vr->num);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+static inline void tmfifo_virtio_release_desc(struct virtio_device *vdev,
+					      struct vring *vr,
+					      struct vring_desc *desc, u32 len)
+{
+	unsigned int idx;
+
+	idx = vr->used->idx % vr->num;
+	vr->used->ring[idx].id = desc - vr->desc;
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/* Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc)
+{
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+	struct vring_desc *desc_head;
+	uint32_t pkt_len = 0;
+
+	if (!vr)
+		return;
+
+	if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
+		desc_head = vring->desc_head;
+		pkt_len = vring->pkt_len;
+	} else {
+		desc_head = tmfifo_virtio_get_next_desc(vring->vq);
+		if (desc_head != NULL) {
+			pkt_len = tmfifo_virtio_get_pkt_len(vdev,
+							desc_head, vr);
+		}
+	}
+
+	if (desc_head != NULL)
+		tmfifo_virtio_release_desc(vdev, vr, desc_head, pkt_len);
+
+	if (desc != NULL)
+		*desc = NULL;
+	vring->pkt_len = 0;
+}
+
+/* House-keeping timer. */
+static void tmfifo_timer(struct timer_list *arg)
+{
+	struct tmfifo *fifo = container_of(arg, struct tmfifo, timer);
+
+	/*
+	 * Wake up the work handler to poll the Rx FIFO in case interrupt
+	 * missing or any leftover bytes stuck in the FIFO.
+	 */
+	test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+}
+
+/* Buffer the console output. */
+static void tmfifo_console_output(struct tmfifo_vdev *cons,
+				  struct virtqueue *vq)
+{
+	u32 len, pkt_len, idx;
+	struct vring_desc *head_desc, *desc = NULL;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &cons->vdev;
+	void *addr;
+	union tmfifo_msg_hdr *hdr;
+
+	for (;;) {
+		head_desc = tmfifo_virtio_get_next_desc(vq);
+		if (head_desc == NULL)
+			break;
+
+		/* Release the packet if no more space. */
+		pkt_len = tmfifo_virtio_get_pkt_len(vdev, head_desc, vr);
+		if (pkt_len + sizeof(*hdr) > TMFIFO_VDEV_TX_BUF_AVAIL(cons)) {
+			tmfifo_virtio_release_desc(vdev, vr, head_desc,
+						   pkt_len);
+			break;
+		}
+
+		hdr = (union tmfifo_msg_hdr *)&cons->tx_buf[cons->tx_tail];
+		hdr->data = 0;
+		hdr->type = VIRTIO_ID_CONSOLE;
+		hdr->len = htons(pkt_len);
+
+		TMFIFO_VDEV_TX_BUF_PUSH(cons, sizeof(*hdr));
+		desc = head_desc;
+
+		while (desc != NULL) {
+			addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+			len = virtio32_to_cpu(vdev, desc->len);
+
+			if (len <= TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail) {
+				memcpy(cons->tx_buf + cons->tx_tail, addr, len);
+			} else {
+				u32 seg;
+
+				seg = TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail;
+				memcpy(cons->tx_buf + cons->tx_tail, addr, seg);
+				addr += seg;
+				memcpy(cons->tx_buf, addr, len - seg);
+			}
+			TMFIFO_VDEV_TX_BUF_PUSH(cons, len);
+
+			if (!(virtio16_to_cpu(vdev, desc->flags) &
+			    VRING_DESC_F_NEXT))
+				break;
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+		}
+
+		/* Make each packet 8-byte aligned. */
+		TMFIFO_VDEV_TX_BUF_PUSH(cons, ((pkt_len + 7) & -8) - pkt_len);
+
+		tmfifo_virtio_release_desc(vdev, vr, head_desc, pkt_len);
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct tmfifo_vring *vring;
+	struct tmfifo *fifo;
+	struct vring *vr;
+	struct virtio_device *vdev;
+	u64 sts, data;
+	int num_avail = 0, hdr_len, tx_reserve;
+	void *addr;
+	u32 len, idx;
+	struct vring_desc *desc;
+	unsigned long flags;
+	struct tmfifo_vdev *cons;
+
+	if (!vq)
+		return;
+
+	vring = (struct tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+	vr = (struct vring *)virtqueue_get_vring(vq);
+
+	if (!fifo->vdev[vring->vdev_id])
+		return;
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+
+	/* Don't continue if another vring is running. */
+	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
+		return;
+
+	/* tx_reserve is used to reserved some room in FIFO for console. */
+	if (vring->vdev_id == VIRTIO_ID_NET) {
+		hdr_len = sizeof(struct virtio_net_hdr);
+		tx_reserve = fifo->tx_fifo_size / 16;
+	} else {
+		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
+		hdr_len = 0;
+		tx_reserve = 1;
+	}
+
+	desc = vring->desc;
+
+again:
+	while (1) {
+		/* Get available FIFO space. */
+		if (num_avail == 0) {
+			if (is_rx) {
+				/* Get the number of available words in FIFO. */
+				sts = readq(fifo->rx_base + TMFIFO_RX_STS);
+				num_avail = TMFIFO_RX_GET_STS_CNT(sts);
+
+				/* Don't continue if nothing in FIFO. */
+				if (num_avail <= 0)
+					break;
+			} else {
+				/* Get available space in FIFO. */
+				sts = readq(fifo->tx_base + TMFIFO_TX_STS);
+				num_avail = fifo->tx_fifo_size - tx_reserve -
+					TMFIFO_TX_GET_STS_CNT(sts);
+
+				if (num_avail <= 0)
+					break;
+			}
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
+		    cons != NULL && cons->tx_buf != NULL) {
+			for (;;) {
+				spin_lock_irqsave(&tmfifo_spin_lock, flags);
+				if (cons->tx_head == cons->tx_tail) {
+					spin_unlock_irqrestore(
+						&tmfifo_spin_lock, flags);
+					return;
+				}
+				addr = cons->tx_buf + cons->tx_head;
+				writeq(cpu_to_le64(*(u64 *)addr),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+				TMFIFO_VDEV_TX_BUF_POP(cons, sizeof(u64));
+				spin_unlock_irqrestore(&tmfifo_spin_lock,
+						       flags);
+				if (--num_avail <= 0)
+					goto again;
+			}
+		}
+
+		/* Get the desc of next packet. */
+		if (!desc) {
+			/* Save the head desc of the chain. */
+			vring->desc_head = tmfifo_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				return;
+			}
+			desc = vring->desc_head;
+			vring->desc = desc;
+
+			if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+						vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			}
+		}
+
+		/* Beginning of each packet. */
+		if (vring->pkt_len == 0) {
+			int vdev_id, vring_change = 0;
+			union tmfifo_msg_hdr hdr;
+
+			num_avail--;
+
+			/* Read/Write packet length. */
+			if (is_rx) {
+				hdr.data = readq(fifo->rx_base +
+						 TMFIFO_RX_DATA);
+				hdr.data = le64_to_cpu(hdr.data);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (hdr.len == 0)
+					continue;
+
+				/* Check packet type. */
+				if (hdr.type == VIRTIO_ID_NET) {
+					vdev_id = VIRTIO_ID_NET;
+					hdr_len = sizeof(struct virtio_net_hdr);
+				} else if (hdr.type == VIRTIO_ID_CONSOLE) {
+					vdev_id = VIRTIO_ID_CONSOLE;
+					hdr_len = 0;
+				} else {
+					continue;
+				}
+
+				/*
+				 * Check whether the new packet still belongs
+				 * to this vring or not. If not, update the
+				 * pkt_len of the new vring and return.
+				 */
+				if (vdev_id != vring->vdev_id) {
+					struct tmfifo_vdev *dev2 =
+						fifo->vdev[vdev_id];
+
+					if (!dev2)
+						break;
+					vring->desc = desc;
+					vring = &dev2->vrings[TMFIFO_VRING_RX];
+					vring_change = 1;
+				}
+				vring->pkt_len = ntohs(hdr.len) + hdr_len;
+			} else {
+				vring->pkt_len = tmfifo_virtio_get_pkt_len(
+					vdev, desc, vr);
+
+				hdr.data = 0;
+				hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+					VIRTIO_ID_NET :
+					VIRTIO_ID_CONSOLE;
+				hdr.len = htons(vring->pkt_len - hdr_len);
+				writeq(cpu_to_le64(hdr.data),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+			}
+
+			vring->cur_len = hdr_len;
+			vring->rem_len = vring->pkt_len;
+			fifo->vring[is_rx] = vring;
+
+			if (vring_change)
+				return;
+			continue;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check if the current desc is already done. */
+		if (vring->cur_len == len)
+			goto check_done;
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		/* Read a word from FIFO for Rx. */
+		if (is_rx) {
+			data = readq(fifo->rx_base + TMFIFO_RX_DATA);
+			data = le64_to_cpu(data);
+		}
+
+		if (vring->cur_len + sizeof(u64) <= len) {
+			/* The whole word. */
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       sizeof(u64));
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       sizeof(u64));
+			}
+			vring->cur_len += sizeof(u64);
+		} else {
+			/* Leftover bytes. */
+			BUG_ON(vring->cur_len > len);
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       len - vring->cur_len);
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       len - vring->cur_len);
+			}
+			vring->cur_len = len;
+		}
+
+		/* Write the word into FIFO for Tx. */
+		if (!is_rx) {
+			writeq(cpu_to_le64(data),
+			       fifo->tx_base + TMFIFO_TX_DATA);
+		}
+
+		num_avail--;
+
+check_done:
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done and release the desc. */
+			tmfifo_release_pkt(vdev, vring, &desc);
+			fifo->vring[is_rx] = NULL;
+
+			/* Notify upper layer that packet is done. */
+			spin_lock_irqsave(&tmfifo_spin_lock, flags);
+			vring_interrupt(0, vq);
+			spin_unlock_irqrestore(&tmfifo_spin_lock, flags);
+			continue;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+	struct tmfifo *fifo = vring->fifo;
+	unsigned long flags;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX HWM bit to start Rx. */
+		if (!test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&tmfifo_spin_lock, flags);
+			tmfifo_console_output(fifo->vdev[VIRTIO_ID_CONSOLE],
+					      vq);
+			spin_unlock_irqrestore(&tmfifo_spin_lock, flags);
+			schedule_work(&fifo->work);
+		} else if (!test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	}
+
+	return true;
+}
+
+/* Get the array of feature bits for this device. */
+static u64 tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			tmfifo_release_pkt(&tm_vdev->vdev, vring, &vring->desc);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+				  unsigned int nvqs,
+				  struct virtqueue *vqs[],
+				  vq_callback_t *callbacks[],
+				  const char * const names[],
+				  const bool *ctx,
+				  struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void tmfifo_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void tmfifo_virtio_get(struct virtio_device *vdev,
+			      unsigned int offset,
+			      void *buf,
+			      unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void tmfifo_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops tmfifo_virtio_config_ops = {
+	.get_features = tmfifo_virtio_get_features,
+	.finalize_features = tmfifo_virtio_finalize_features,
+	.find_vqs = tmfifo_virtio_find_vqs,
+	.del_vqs = tmfifo_virtio_del_vqs,
+	.reset = tmfifo_virtio_reset,
+	.set_status = tmfifo_virtio_set_status,
+	.get_status = tmfifo_virtio_get_status,
+	.get = tmfifo_virtio_get,
+	.set = tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+int tmfifo_create_vdev(struct tmfifo *fifo, int vdev_id, u64 features,
+		       void *config, u32 size)
+{
+	struct tmfifo_vdev *tm_vdev;
+	int ret = 0;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev != NULL) {
+		pr_err("vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto already_exist;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto already_exist;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+	if (tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
+		pr_err("Unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto alloc_vring_fail;
+	}
+	if (vdev_id == VIRTIO_ID_CONSOLE) {
+		tm_vdev->tx_buf = kmalloc(TMFIFO_CONS_TX_BUF_SIZE,
+					  GFP_KERNEL);
+	}
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
+		goto register_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+register_fail:
+	tmfifo_free_vrings(fifo, vdev_id);
+	fifo->vdev[vdev_id] = NULL;
+alloc_vring_fail:
+	kfree(tm_vdev);
+already_exist:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+int tmfifo_delete_vdev(struct tmfifo *fifo, int vdev_id)
+{
+	struct tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		tmfifo_free_vrings(fifo, vdev_id);
+		kfree(tm_vdev->tx_buf);
+		kfree(tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Device remove function. */
+static int tmfifo_remove(struct platform_device *pdev)
+{
+	int i;
+	struct tmfifo *fifo = platform_get_drvdata(pdev);
+	struct resource *rx_res, *tx_res;
+
+	tmfifo_ready = false;
+
+	if (fifo) {
+		mutex_lock(&tmfifo_lock);
+
+		/* Stop the timer. */
+		del_timer_sync(&fifo->timer);
+
+		/* Release interrupts. */
+		tmfifo_free_irqs(fifo);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&fifo->work);
+
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++)
+			tmfifo_delete_vdev(fifo, i);
+
+		/* Release IO resources. */
+		if (fifo->rx_base)
+			iounmap(fifo->rx_base);
+		if (fifo->tx_base)
+			iounmap(fifo->tx_base);
+
+		platform_set_drvdata(pdev, NULL);
+		kfree(fifo);
+
+		mutex_unlock(&tmfifo_lock);
+	}
+
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (rx_res)
+		release_mem_region(rx_res->start, resource_size(rx_res));
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (tx_res)
+		release_mem_region(tx_res->start, resource_size(tx_res));
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void tmfifo_get_cfg_mac(u8 *mac)
+{
+	u8 buf[6];
+	efi_status_t status;
+	unsigned long size = sizeof(buf);
+	efi_char16_t name[] = { 'R', 's', 'h', 'i', 'm', 'M', 'a', 'c',
+				'A', 'd', 'd', 'r', 0 };
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+
+	status = efi.get_variable(name, &guid, NULL, &size, buf);
+	if (status == EFI_SUCCESS && size == sizeof(buf))
+		memcpy(mac, buf, sizeof(buf));
+}
+
+/* Probe the TMFIFO. */
+static int tmfifo_probe(struct platform_device *pdev)
+{
+	u64 ctl;
+	struct tmfifo *fifo;
+	struct resource *rx_res, *tx_res;
+	struct virtio_net_config net_config;
+	int i, ret;
+
+	/* Get the resource of the Rx & Tx FIFO. */
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!rx_res || !tx_res) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (request_mem_region(rx_res->start,
+			       resource_size(rx_res), "bf-tmfifo") == NULL) {
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	if (request_mem_region(tx_res->start,
+			       resource_size(tx_res), "bf-tmfifo") == NULL) {
+		release_mem_region(rx_res->start, resource_size(rx_res));
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	ret = -ENOMEM;
+	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
+	if (!fifo)
+		goto err;
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	INIT_WORK(&fifo->work, tmfifo_work_handler);
+
+	timer_setup(&fifo->timer, tmfifo_timer, 0);
+	fifo->timer.function = tmfifo_timer;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		fifo->irq[i] = platform_get_irq(pdev, i);
+		ret = request_irq(fifo->irq[i], tmfifo_irq_handler, 0,
+				  "tmfifo", (u8 *)fifo + i);
+		if (ret) {
+			pr_err("Unable to request irq\n");
+			fifo->irq[i] = 0;
+			goto err;
+		}
+	}
+
+	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
+	if (!fifo->rx_base)
+		goto err;
+
+	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
+	if (!fifo->tx_base)
+		goto err;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		TMFIFO_GET_FIELD(ctl, TMFIFO_TX_CTL__MAX_ENTRIES);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_TX_CTL__LWM, fifo->tx_fifo_size / 2);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_TX_CTL__HWM, fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		TMFIFO_GET_FIELD(ctl, TMFIFO_RX_CTL__MAX_ENTRIES);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_RX_CTL__LWM, 0);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_RX_CTL__HWM, 1);
+	writeq(ctl, fifo->rx_base + TMFIFO_RX_CTL);
+
+	mutex_init(&fifo->lock);
+
+	/* Create the console vdev. */
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (ret)
+		goto err;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = TMFIFO_NET_MTU;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	memcpy(net_config.mac, tmfifo_net_default_mac, 6);
+	tmfifo_get_cfg_mac(net_config.mac);
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_NET, TMFIFO_NET_FEATURES,
+				 &net_config, sizeof(net_config));
+	if (ret)
+		goto err;
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+
+	tmfifo_ready = true;
+
+	return 0;
+
+err:
+	tmfifo_remove(pdev);
+early_err:
+	dev_err(&pdev->dev, "Probe Failed\n");
+	return ret;
+}
+
+static const struct of_device_id tmfifo_match[] = {
+	{ .compatible = "mellanox,bf-tmfifo" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, tmfifo_match);
+
+static const struct acpi_device_id bf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, bf_tmfifo_acpi_match);
+
+static struct platform_driver tmfifo_driver = {
+	.probe = tmfifo_probe,
+	.remove = tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.of_match_table = tmfifo_match,
+		.acpi_match_table = ACPI_PTR(bf_tmfifo_acpi_match),
+	},
+};
+
+static int __init tmfifo_init(void)
+{
+	int ret;
+
+	mutex_init(&tmfifo_lock);
+
+	ret = platform_driver_register(&tmfifo_driver);
+	if (ret)
+		pr_err("Failed to register tmfifo driver.\n");
+
+	return ret;
+}
+
+static void __exit tmfifo_exit(void)
+{
+	platform_driver_unregister(&tmfifo_driver);
+}
+
+module_init(tmfifo_init);
+module_exit(tmfifo_exit);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
+MODULE_AUTHOR("Mellanox Technologies, Ltd");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("0.7");
diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
new file mode 100644
index 0000000..b07f353
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo_regs.h
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __TMFIFO_REGS_H__
+#define __TMFIFO_REGS_H__
+
+#ifdef __ASSEMBLER__
+#define _64bit(x) x
+#else /* __ASSEMBLER__ */
+#ifdef __tile__
+#define _64bit(x) x ## UL
+#else /* __tile__ */
+#define _64bit(x) x ## ULL
+#endif /* __tile__ */
+#endif /* __ASSEMBLER */
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+#ifndef __DOXYGEN__
+
+#define TMFIFO_TX_DATA 0x0
+
+#define TMFIFO_TX_STS 0x8
+#define TMFIFO_TX_STS__LENGTH 0x0001
+#define TMFIFO_TX_STS__COUNT_SHIFT 0
+#define TMFIFO_TX_STS__COUNT_WIDTH 9
+#define TMFIFO_TX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_TX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_TX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_TX_CTL 0x10
+#define TMFIFO_TX_CTL__LENGTH 0x0001
+#define TMFIFO_TX_CTL__LWM_SHIFT 0
+#define TMFIFO_TX_CTL__LWM_WIDTH 8
+#define TMFIFO_TX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__LWM_RMASK 0xff
+#define TMFIFO_TX_CTL__LWM_MASK  0xff
+#define TMFIFO_TX_CTL__HWM_SHIFT 8
+#define TMFIFO_TX_CTL__HWM_WIDTH 8
+#define TMFIFO_TX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__HWM_RMASK 0xff
+#define TMFIFO_TX_CTL__HWM_MASK  0xff00
+#define TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_TX_CTL__MAX_ENTRIES_MASK  _64bit(0x1ff00000000)
+
+#define TMFIFO_RX_DATA 0x0
+
+#define TMFIFO_RX_STS 0x8
+#define TMFIFO_RX_STS__LENGTH 0x0001
+#define TMFIFO_RX_STS__COUNT_SHIFT 0
+#define TMFIFO_RX_STS__COUNT_WIDTH 9
+#define TMFIFO_RX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_RX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_RX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_RX_CTL 0x10
+#define TMFIFO_RX_CTL__LENGTH 0x0001
+#define TMFIFO_RX_CTL__LWM_SHIFT 0
+#define TMFIFO_RX_CTL__LWM_WIDTH 8
+#define TMFIFO_RX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__LWM_RMASK 0xff
+#define TMFIFO_RX_CTL__LWM_MASK  0xff
+#define TMFIFO_RX_CTL__HWM_SHIFT 8
+#define TMFIFO_RX_CTL__HWM_WIDTH 8
+#define TMFIFO_RX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__HWM_RMASK 0xff
+#define TMFIFO_RX_CTL__HWM_MASK  0xff00
+#define TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_RX_CTL__MAX_ENTRIES_MASK  _64bit(0x1ff00000000)
+
+#endif /* !defined(__DOXYGEN__) */
+#endif /* !defined(__TMFIFO_REGS_H__) */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v1 2/4] arm64: Add Mellanox BlueField SoC config option
  2018-05-25 16:06 ` Liming Sun
@ 2018-05-25 16:06   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-05-25 16:06 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit introduces config option for Mellanox BlueField SoC,
which can be used to build the SoC specific drivers, and enables
it by default in configs/defconfig.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 arch/arm64/Kconfig.platforms | 6 ++++++
 arch/arm64/configs/defconfig | 1 +
 2 files changed, 7 insertions(+)

diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 2b1535c..74ad03f 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -110,6 +110,12 @@ config ARCH_MESON
 	help
 	  This enables support for the Amlogic S905 SoCs.
 
+config ARCH_MLNX_BLUEFIELD
+	bool "Mellanox BlueField SoC Family"
+	select SOC_MLNX
+	help
+	  This enables support for the Mellanox BlueField SoC.
+
 config ARCH_MVEBU
 	bool "Marvell EBU SoC Family"
 	select ARMADA_AP806_SYSCON
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 1c98939..842f607 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -43,6 +43,7 @@ CONFIG_ARCH_LG1K=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
 CONFIG_ARCH_MESON=y
+CONFIG_ARCH_MLNX_BLUEFIELD=y
 CONFIG_ARCH_MVEBU=y
 CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v1 2/4] arm64: Add Mellanox BlueField SoC config option
@ 2018-05-25 16:06   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-05-25 16:06 UTC (permalink / raw)
  To: linux-arm-kernel

This commit introduces config option for Mellanox BlueField SoC,
which can be used to build the SoC specific drivers, and enables
it by default in configs/defconfig.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 arch/arm64/Kconfig.platforms | 6 ++++++
 arch/arm64/configs/defconfig | 1 +
 2 files changed, 7 insertions(+)

diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 2b1535c..74ad03f 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -110,6 +110,12 @@ config ARCH_MESON
 	help
 	  This enables support for the Amlogic S905 SoCs.
 
+config ARCH_MLNX_BLUEFIELD
+	bool "Mellanox BlueField SoC Family"
+	select SOC_MLNX
+	help
+	  This enables support for the Mellanox BlueField SoC.
+
 config ARCH_MVEBU
 	bool "Marvell EBU SoC Family"
 	select ARMADA_AP806_SYSCON
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 1c98939..842f607 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -43,6 +43,7 @@ CONFIG_ARCH_LG1K=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
 CONFIG_ARCH_MESON=y
+CONFIG_ARCH_MLNX_BLUEFIELD=y
 CONFIG_ARCH_MVEBU=y
 CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v1 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-05-25 16:06 ` Liming Sun
@ 2018-05-25 16:06   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-05-25 16:06 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods
  Cc: devicetree, Liming Sun, linux-arm-kernel

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 .../devicetree/bindings/soc/mellanox/tmfifo.txt      | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt

diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
new file mode 100644
index 0000000..0a362f5
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
@@ -0,0 +1,20 @@
+* Mellanox BlueField SoC TmFifo
+
+BlueField TmFifo provides a shared FIFO between the target and the
+external host machine, which can be accessed via USB or PCIe.
+
+Required properties:
+
+- compatible:	Should be "mellanox,bf-tmfifo"
+- reg:		Physical base address and length of Rx/Tx block
+- interrupts:	The interrupt number of Rx low water mark, Rx high water mark
+		Tx low water mark, Tx high water mark respectively.
+
+Example:
+
+tmfifo@800a20 {
+	compatible = "mellanox,bf-tmfifo";
+	reg = <0x00800a20 0x00000018
+	       0x00800a40 0x00000018>;
+	interrupts = <41, 42, 43, 44>;
+};
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v1 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
@ 2018-05-25 16:06   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-05-25 16:06 UTC (permalink / raw)
  To: linux-arm-kernel

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 .../devicetree/bindings/soc/mellanox/tmfifo.txt      | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt

diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
new file mode 100644
index 0000000..0a362f5
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
@@ -0,0 +1,20 @@
+* Mellanox BlueField SoC TmFifo
+
+BlueField TmFifo provides a shared FIFO between the target and the
+external host machine, which can be accessed via USB or PCIe.
+
+Required properties:
+
+- compatible:	Should be "mellanox,bf-tmfifo"
+- reg:		Physical base address and length of Rx/Tx block
+- interrupts:	The interrupt number of Rx low water mark, Rx high water mark
+		Tx low water mark, Tx high water mark respectively.
+
+Example:
+
+tmfifo at 800a20 {
+	compatible = "mellanox,bf-tmfifo";
+	reg = <0x00800a20 0x00000018
+	       0x00800a40 0x00000018>;
+	interrupts = <41, 42, 43, 44>;
+};
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v1 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc
  2018-05-25 16:06 ` Liming Sun
@ 2018-05-25 16:06   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-05-25 16:06 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods
  Cc: devicetree, Liming Sun, linux-arm-kernel

Add maintainer information for Mellanox BlueField SoC.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 58b9861..85d5639 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1636,6 +1636,14 @@ L:	linux-mediatek@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	drivers/phy/mediatek/phy-mtk-tphy.c
 
+ARM/Mellanox BlueField SoC support
+M:	David Woods <dwoods@mellanox.com>
+M:	Liming Sun <lsun@mellanox.com>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	drivers/soc/mellanox/*
+F:	Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
+
 ARM/MICREL KS8695 ARCHITECTURE
 M:	Greg Ungerer <gerg@uclinux.org>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v1 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc
@ 2018-05-25 16:06   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-05-25 16:06 UTC (permalink / raw)
  To: linux-arm-kernel

Add maintainer information for Mellanox BlueField SoC.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 58b9861..85d5639 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1636,6 +1636,14 @@ L:	linux-mediatek at lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	drivers/phy/mediatek/phy-mtk-tphy.c
 
+ARM/Mellanox BlueField SoC support
+M:	David Woods <dwoods@mellanox.com>
+M:	Liming Sun <lsun@mellanox.com>
+L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	drivers/soc/mellanox/*
+F:	Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
+
 ARM/MICREL KS8695 ARCHITECTURE
 M:	Greg Ungerer <gerg@uclinux.org>
 L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* Re: [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 16:06 ` Liming Sun
@ 2018-05-25 17:14   ` Robin Murphy
  -1 siblings, 0 replies; 179+ messages in thread
From: Robin Murphy @ 2018-05-25 17:14 UTC (permalink / raw)
  To: Liming Sun, Olof Johansson, Arnd Bergmann, David Woods
  Cc: devicetree, linux-arm-kernel

On 25/05/18 17:06, Liming Sun wrote:
[...]
> diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
> new file mode 100644
> index 0000000..a3303d1
> --- /dev/null
> +++ b/drivers/soc/mellanox/tmfifo.c
> @@ -0,0 +1,1265 @@
> +// SPDX-License-Identifier: GPL-2.0

This tag doesn't match the included license text...

> +/*
> + * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
> + *
> + * This software is available to you under a choice of one of two
> + * licenses.  You may choose to be licensed under the terms of the GNU
> + * General Public License (GPL) Version 2, available from the file
> + * COPYING in the main directory of this source tree, or the
> + * OpenIB.org BSD license below:
> + *
> + *     Redistribution and use in source and binary forms, with or
> + *     without modification, are permitted provided that the following
> + *     conditions are met:
> + *
> + *      - Redistributions of source code must retain the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer.
> + *
> + *      - Redistributions in binary form must reproduce the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer in the documentation and/or other materials
> + *        provided with the distribution.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + */

[...]
> +/* Several utility macros to get/set the register fields. */
> +#define TMFIFO_GET_FIELD(reg, field) \
> +	(((reg) >> field##_SHIFT) & ((1UL << field##_WIDTH) - 1))
> +
> +#define TMFIFO_SET_FIELD(reg, field, value) ({ \
> +	u64 _mask = ((1UL << field##_WIDTH) - 1) << field##_SHIFT; \
> +	((reg) & ~_mask) | (((value) << field##_SHIFT) & _mask); \
> +})

There's no need to reinvent <linux/bitfield.h>

[...]
> +MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
> +MODULE_AUTHOR("Mellanox Technologies, Ltd");
> +MODULE_LICENSE("GPL");

...and this implies yet another different license (since it indicates 
"GPLv2 or later")

> +MODULE_VERSION("0.7");
> diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
> new file mode 100644
> index 0000000..b07f353
> --- /dev/null
> +++ b/drivers/soc/mellanox/tmfifo_regs.h
> @@ -0,0 +1,112 @@
> +// SPDX-License-Identifier: GPL-2.0

Again, this doesn't match the included text. Also, the SPDX comment 
style is /* */ for headers.

> +/*
> + * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
> + *
> + * This software is available to you under a choice of one of two
> + * licenses.  You may choose to be licensed under the terms of the GNU
> + * General Public License (GPL) Version 2, available from the file
> + * COPYING in the main directory of this source tree, or the
> + * OpenIB.org BSD license below:
> + *
> + *     Redistribution and use in source and binary forms, with or
> + *     without modification, are permitted provided that the following
> + *     conditions are met:
> + *
> + *      - Redistributions of source code must retain the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer.
> + *
> + *      - Redistributions in binary form must reproduce the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer in the documentation and/or other materials
> + *        provided with the distribution.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + */

[...]
> +#ifdef __ASSEMBLER__
> +#define _64bit(x) x
> +#else /* __ASSEMBLER__ */
> +#ifdef __tile__
> +#define _64bit(x) x ## UL
> +#else /* __tile__ */
> +#define _64bit(x) x ## ULL
> +#endif /* __tile__ */
> +#endif /* __ASSEMBLER */
> +
> +#ifdef __KERNEL__
> +#include <linux/types.h>
> +#else
> +#include <stdint.h>
> +#endif
> +
> +#ifndef __DOXYGEN__

Given that this is a private header under drivers/, is any of that lot 
necessary?

Robin.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
@ 2018-05-25 17:14   ` Robin Murphy
  0 siblings, 0 replies; 179+ messages in thread
From: Robin Murphy @ 2018-05-25 17:14 UTC (permalink / raw)
  To: linux-arm-kernel

On 25/05/18 17:06, Liming Sun wrote:
[...]
> diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
> new file mode 100644
> index 0000000..a3303d1
> --- /dev/null
> +++ b/drivers/soc/mellanox/tmfifo.c
> @@ -0,0 +1,1265 @@
> +// SPDX-License-Identifier: GPL-2.0

This tag doesn't match the included license text...

> +/*
> + * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
> + *
> + * This software is available to you under a choice of one of two
> + * licenses.  You may choose to be licensed under the terms of the GNU
> + * General Public License (GPL) Version 2, available from the file
> + * COPYING in the main directory of this source tree, or the
> + * OpenIB.org BSD license below:
> + *
> + *     Redistribution and use in source and binary forms, with or
> + *     without modification, are permitted provided that the following
> + *     conditions are met:
> + *
> + *      - Redistributions of source code must retain the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer.
> + *
> + *      - Redistributions in binary form must reproduce the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer in the documentation and/or other materials
> + *        provided with the distribution.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + */

[...]
> +/* Several utility macros to get/set the register fields. */
> +#define TMFIFO_GET_FIELD(reg, field) \
> +	(((reg) >> field##_SHIFT) & ((1UL << field##_WIDTH) - 1))
> +
> +#define TMFIFO_SET_FIELD(reg, field, value) ({ \
> +	u64 _mask = ((1UL << field##_WIDTH) - 1) << field##_SHIFT; \
> +	((reg) & ~_mask) | (((value) << field##_SHIFT) & _mask); \
> +})

There's no need to reinvent <linux/bitfield.h>

[...]
> +MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
> +MODULE_AUTHOR("Mellanox Technologies, Ltd");
> +MODULE_LICENSE("GPL");

...and this implies yet another different license (since it indicates 
"GPLv2 or later")

> +MODULE_VERSION("0.7");
> diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
> new file mode 100644
> index 0000000..b07f353
> --- /dev/null
> +++ b/drivers/soc/mellanox/tmfifo_regs.h
> @@ -0,0 +1,112 @@
> +// SPDX-License-Identifier: GPL-2.0

Again, this doesn't match the included text. Also, the SPDX comment 
style is /* */ for headers.

> +/*
> + * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
> + *
> + * This software is available to you under a choice of one of two
> + * licenses.  You may choose to be licensed under the terms of the GNU
> + * General Public License (GPL) Version 2, available from the file
> + * COPYING in the main directory of this source tree, or the
> + * OpenIB.org BSD license below:
> + *
> + *     Redistribution and use in source and binary forms, with or
> + *     without modification, are permitted provided that the following
> + *     conditions are met:
> + *
> + *      - Redistributions of source code must retain the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer.
> + *
> + *      - Redistributions in binary form must reproduce the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer in the documentation and/or other materials
> + *        provided with the distribution.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + */

[...]
> +#ifdef __ASSEMBLER__
> +#define _64bit(x) x
> +#else /* __ASSEMBLER__ */
> +#ifdef __tile__
> +#define _64bit(x) x ## UL
> +#else /* __tile__ */
> +#define _64bit(x) x ## ULL
> +#endif /* __tile__ */
> +#endif /* __ASSEMBLER */
> +
> +#ifdef __KERNEL__
> +#include <linux/types.h>
> +#else
> +#include <stdint.h>
> +#endif
> +
> +#ifndef __DOXYGEN__

Given that this is a private header under drivers/, is any of that lot 
necessary?

Robin.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v2 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 16:06 ` Liming Sun
@ 2018-05-25 20:17   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-05-25 20:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit adds the TmFifo driver for Mellanox BlueField Soc.
TmFifo is a shared FIFO which enables external host machine to
exchange data with the SoC via USB or PCIe. The driver is based on
virtio framework and has console and network access enabled.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/Kconfig                |    1 +
 drivers/soc/Makefile               |    1 +
 drivers/soc/mellanox/Kconfig       |   18 +
 drivers/soc/mellanox/Makefile      |    5 +
 drivers/soc/mellanox/tmfifo.c      | 1239 ++++++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/tmfifo_regs.h |   75 +++
 6 files changed, 1339 insertions(+)
 create mode 100644 drivers/soc/mellanox/Kconfig
 create mode 100644 drivers/soc/mellanox/Makefile
 create mode 100644 drivers/soc/mellanox/tmfifo.c
 create mode 100644 drivers/soc/mellanox/tmfifo_regs.h

diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index c07b4a8..fa87dc8 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -7,6 +7,7 @@ source "drivers/soc/bcm/Kconfig"
 source "drivers/soc/fsl/Kconfig"
 source "drivers/soc/imx/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
+source "drivers/soc/mellanox/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/renesas/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 4052357..868163f 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ARCH_GEMINI)	+= gemini/
 obj-$(CONFIG_ARCH_MXC)		+= imx/
 obj-$(CONFIG_SOC_XWAY)		+= lantiq/
 obj-y				+= mediatek/
+obj-$(CONFIG_SOC_MLNX)		+= mellanox/
 obj-$(CONFIG_ARCH_MESON)	+= amlogic/
 obj-$(CONFIG_ARCH_QCOM)		+= qcom/
 obj-y				+= renesas/
diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
new file mode 100644
index 0000000..d88efa1
--- /dev/null
+++ b/drivers/soc/mellanox/Kconfig
@@ -0,0 +1,18 @@
+menuconfig SOC_MLNX
+	bool "Mellanox SoC drivers"
+	default y if ARCH_MLNX_BLUEFIELD
+
+if ARCH_MLNX_BLUEFIELD || COMPILE_TEST
+
+config MLNX_BLUEFIELD_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo driver"
+	depends on ARM64
+	default m
+	select VIRTIO_CONSOLE
+	select VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides the
+	  virtio driver framework for the TMFIFO of Mellanox BlueField SoC and
+	  the implementation of a console and network driver.
+
+endif # ARCH_MLNX_BLUEFIELD
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
new file mode 100644
index 0000000..c44c0e2
--- /dev/null
+++ b/drivers/soc/mellanox/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Mellanox SoC drivers.
+#
+obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
new file mode 100644
index 0000000..5647cb6
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo.c
@@ -0,0 +1,1239 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/cache.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <asm/byteorder.h>
+
+#include "tmfifo_regs.h"
+
+#define TMFIFO_GET_FIELD(reg, mask)	FIELD_GET(mask, reg)
+
+#define TMFIFO_SET_FIELD(reg, mask, value) \
+	((reg & ~mask) | FIELD_PREP(mask, value))
+
+/* Vring size. */
+#define TMFIFO_VRING_SIZE			1024
+
+/* Console Tx buffer size. */
+#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
+
+/* Use a timer for house-keeping. */
+static int tmfifo_timer_interval = HZ / 10;
+
+/* Global lock. */
+static struct mutex tmfifo_lock;
+
+/* Virtio ring size. */
+static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
+module_param(tmfifo_vring_size, int, 0444);
+MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring.");
+
+struct tmfifo;
+
+/* A flag to indicate TmFifo ready. */
+static bool tmfifo_ready;
+
+/* Virtual devices sharing the TM FIFO. */
+#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/* Spin lock. */
+static DEFINE_SPINLOCK(tmfifo_spin_lock);
+
+/* Structure to maintain the ring state. */
+struct tmfifo_vring {
+	void *va;			/* virtual address */
+	dma_addr_t dma;			/* dma address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	int vdev_id;			/* TMFIFO_VDEV_xxx */
+	u32 pkt_len;			/* packet total length */
+	__virtio16 next_avail;		/* next avail desc id */
+	struct tmfifo *fifo;		/* pointer back to the tmfifo */
+};
+
+/* Interrupt types. */
+enum {
+	TM_RX_LWM_IRQ,			/* Rx low water mark irq */
+	TM_RX_HWM_IRQ,			/* Rx high water mark irq */
+	TM_TX_LWM_IRQ,			/* Tx low water mark irq */
+	TM_TX_HWM_IRQ,			/* Tx high water mark irq */
+	TM_IRQ_CNT
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	TMFIFO_VRING_RX,		/* Rx ring */
+	TMFIFO_VRING_TX,		/* Tx ring */
+	TMFIFO_VRING_NUM
+};
+
+struct tmfifo_vdev {
+	struct virtio_device vdev;	/* virtual device */
+	u8 status;
+	u64 features;
+	union {				/* virtio config space */
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
+	u8 *tx_buf;			/* tx buffer */
+	u32 tx_head;			/* tx buffer head */
+	u32 tx_tail;			/* tx buffer tail */
+};
+
+#define TMFIFO_VDEV_TX_BUF_AVAIL(vdev) \
+	(((vdev)->tx_tail >= (vdev)->tx_head) ? \
+	(TMFIFO_CONS_TX_BUF_SIZE - 8 - ((vdev)->tx_tail - (vdev)->tx_head)) : \
+	((vdev)->tx_head - (vdev)->tx_tail - 8))
+
+#define TMFIFO_VDEV_TX_BUF_PUSH(vdev, len) do { \
+	(vdev)->tx_tail += (len); \
+	if ((vdev)->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE) \
+		(vdev)->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE; \
+} while (0)
+
+#define TMFIFO_VDEV_TX_BUF_POP(vdev, len) do { \
+	(vdev)->tx_head += (len); \
+	if ((vdev)->tx_head >= TMFIFO_CONS_TX_BUF_SIZE) \
+		(vdev)->tx_head -= TMFIFO_CONS_TX_BUF_SIZE; \
+} while (0)
+
+/* TMFIFO device structure */
+struct tmfifo {
+	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
+	struct platform_device *pdev;	/* platform device */
+	struct mutex lock;
+	void __iomem *rx_base;		/* mapped register base */
+	void __iomem *tx_base;		/* mapped register base */
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	int irq[TM_IRQ_CNT];		/* irq numbers */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
+};
+
+union tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* MTU setting of the virtio-net interface. */
+#define TMFIFO_NET_MTU		1500
+
+/* Supported virtio-net features. */
+#define TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
+				 (1UL << VIRTIO_NET_F_STATUS) | \
+				 (1UL << VIRTIO_NET_F_MAC))
+
+/* Forward declaration. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx);
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc);
+
+/* Allocate vrings for the fifo. */
+static int tmfifo_alloc_vrings(struct tmfifo *fifo,
+			       struct tmfifo_vdev *tm_vdev, int vdev_id)
+{
+	dma_addr_t dma;
+	void *va;
+	int i, size;
+	struct tmfifo_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->size = tmfifo_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+		vring->vdev_id = vdev_id;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
+					GFP_KERNEL);
+		if (!va) {
+			dev_err(tm_vdev->vdev.dev.parent,
+				"vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void tmfifo_free_vrings(struct tmfifo *fifo, int vdev_id)
+{
+	int i, size;
+	struct tmfifo_vring *vring;
+	struct tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Free interrupts of the fifo device. */
+static void tmfifo_free_irqs(struct tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		irq = fifo->irq[i];
+		if (irq) {
+			fifo->irq[i] = 0;
+			disable_irq(irq);
+			free_irq(irq, (u8 *)fifo + i);
+		}
+	}
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void tmfifo_work_handler(struct work_struct *work)
+{
+	int i;
+	struct tmfifo_vdev *tm_vdev;
+	struct tmfifo *fifo = container_of(work, struct tmfifo, work);
+
+	if (!tmfifo_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx. */
+	if (test_and_clear_bit(TM_TX_LWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_TX_LWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_TX].vq,
+					false);
+			}
+		}
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(TM_RX_HWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_RX_HWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_RX].vq,
+					true);
+			}
+		}
+	}
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* Interrupt handler. */
+static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
+{
+	int i = (uintptr_t)dev_id % sizeof(void *);
+	struct tmfifo *fifo = dev_id - i;
+
+	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
+		schedule_work(&fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Nothing to do for now. */
+static void tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+tmfifo_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+
+	if (!vr || vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vr->num);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+static inline void tmfifo_virtio_release_desc(struct virtio_device *vdev,
+					      struct vring *vr,
+					      struct vring_desc *desc, u32 len)
+{
+	unsigned int idx;
+
+	idx = vr->used->idx % vr->num;
+	vr->used->ring[idx].id = desc - vr->desc;
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/* Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc)
+{
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+	struct vring_desc *desc_head;
+	uint32_t pkt_len = 0;
+
+	if (!vr)
+		return;
+
+	if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
+		desc_head = vring->desc_head;
+		pkt_len = vring->pkt_len;
+	} else {
+		desc_head = tmfifo_virtio_get_next_desc(vring->vq);
+		if (desc_head != NULL) {
+			pkt_len = tmfifo_virtio_get_pkt_len(vdev,
+							desc_head, vr);
+		}
+	}
+
+	if (desc_head != NULL)
+		tmfifo_virtio_release_desc(vdev, vr, desc_head, pkt_len);
+
+	if (desc != NULL)
+		*desc = NULL;
+	vring->pkt_len = 0;
+}
+
+/* House-keeping timer. */
+static void tmfifo_timer(struct timer_list *arg)
+{
+	struct tmfifo *fifo = container_of(arg, struct tmfifo, timer);
+
+	/*
+	 * Wake up the work handler to poll the Rx FIFO in case interrupt
+	 * missing or any leftover bytes stuck in the FIFO.
+	 */
+	test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+}
+
+/* Buffer the console output. */
+static void tmfifo_console_output(struct tmfifo_vdev *cons,
+				  struct virtqueue *vq)
+{
+	u32 len, pkt_len, idx;
+	struct vring_desc *head_desc, *desc = NULL;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &cons->vdev;
+	void *addr;
+	union tmfifo_msg_hdr *hdr;
+
+	for (;;) {
+		head_desc = tmfifo_virtio_get_next_desc(vq);
+		if (head_desc == NULL)
+			break;
+
+		/* Release the packet if no more space. */
+		pkt_len = tmfifo_virtio_get_pkt_len(vdev, head_desc, vr);
+		if (pkt_len + sizeof(*hdr) > TMFIFO_VDEV_TX_BUF_AVAIL(cons)) {
+			tmfifo_virtio_release_desc(vdev, vr, head_desc,
+						   pkt_len);
+			break;
+		}
+
+		hdr = (union tmfifo_msg_hdr *)&cons->tx_buf[cons->tx_tail];
+		hdr->data = 0;
+		hdr->type = VIRTIO_ID_CONSOLE;
+		hdr->len = htons(pkt_len);
+
+		TMFIFO_VDEV_TX_BUF_PUSH(cons, sizeof(*hdr));
+		desc = head_desc;
+
+		while (desc != NULL) {
+			addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+			len = virtio32_to_cpu(vdev, desc->len);
+
+			if (len <= TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail) {
+				memcpy(cons->tx_buf + cons->tx_tail, addr, len);
+			} else {
+				u32 seg;
+
+				seg = TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail;
+				memcpy(cons->tx_buf + cons->tx_tail, addr, seg);
+				addr += seg;
+				memcpy(cons->tx_buf, addr, len - seg);
+			}
+			TMFIFO_VDEV_TX_BUF_PUSH(cons, len);
+
+			if (!(virtio16_to_cpu(vdev, desc->flags) &
+			    VRING_DESC_F_NEXT))
+				break;
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+		}
+
+		/* Make each packet 8-byte aligned. */
+		TMFIFO_VDEV_TX_BUF_PUSH(cons, ((pkt_len + 7) & -8) - pkt_len);
+
+		tmfifo_virtio_release_desc(vdev, vr, head_desc, pkt_len);
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct tmfifo_vring *vring;
+	struct tmfifo *fifo;
+	struct vring *vr;
+	struct virtio_device *vdev;
+	u64 sts, data;
+	int num_avail = 0, hdr_len, tx_reserve;
+	void *addr;
+	u32 len, idx;
+	struct vring_desc *desc;
+	unsigned long flags;
+	struct tmfifo_vdev *cons;
+
+	if (!vq)
+		return;
+
+	vring = (struct tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+	vr = (struct vring *)virtqueue_get_vring(vq);
+
+	if (!fifo->vdev[vring->vdev_id])
+		return;
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+
+	/* Don't continue if another vring is running. */
+	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
+		return;
+
+	/* tx_reserve is used to reserved some room in FIFO for console. */
+	if (vring->vdev_id == VIRTIO_ID_NET) {
+		hdr_len = sizeof(struct virtio_net_hdr);
+		tx_reserve = fifo->tx_fifo_size / 16;
+	} else {
+		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
+		hdr_len = 0;
+		tx_reserve = 1;
+	}
+
+	desc = vring->desc;
+
+again:
+	while (1) {
+		/* Get available FIFO space. */
+		if (num_avail == 0) {
+			if (is_rx) {
+				/* Get the number of available words in FIFO. */
+				sts = readq(fifo->rx_base + TMFIFO_RX_STS);
+				num_avail = TMFIFO_GET_FIELD(sts,
+						TMFIFO_RX_STS__COUNT_MASK);
+
+				/* Don't continue if nothing in FIFO. */
+				if (num_avail <= 0)
+					break;
+			} else {
+				/* Get available space in FIFO. */
+				sts = readq(fifo->tx_base + TMFIFO_TX_STS);
+				num_avail = fifo->tx_fifo_size - tx_reserve -
+					TMFIFO_GET_FIELD(sts,
+						TMFIFO_TX_STS__COUNT_MASK);
+
+				if (num_avail <= 0)
+					break;
+			}
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
+		    cons != NULL && cons->tx_buf != NULL) {
+			for (;;) {
+				spin_lock_irqsave(&tmfifo_spin_lock, flags);
+				if (cons->tx_head == cons->tx_tail) {
+					spin_unlock_irqrestore(
+						&tmfifo_spin_lock, flags);
+					return;
+				}
+				addr = cons->tx_buf + cons->tx_head;
+				writeq(cpu_to_le64(*(u64 *)addr),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+				TMFIFO_VDEV_TX_BUF_POP(cons, sizeof(u64));
+				spin_unlock_irqrestore(&tmfifo_spin_lock,
+						       flags);
+				if (--num_avail <= 0)
+					goto again;
+			}
+		}
+
+		/* Get the desc of next packet. */
+		if (!desc) {
+			/* Save the head desc of the chain. */
+			vring->desc_head = tmfifo_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				return;
+			}
+			desc = vring->desc_head;
+			vring->desc = desc;
+
+			if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+						vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			}
+		}
+
+		/* Beginning of each packet. */
+		if (vring->pkt_len == 0) {
+			int vdev_id, vring_change = 0;
+			union tmfifo_msg_hdr hdr;
+
+			num_avail--;
+
+			/* Read/Write packet length. */
+			if (is_rx) {
+				hdr.data = readq(fifo->rx_base +
+						 TMFIFO_RX_DATA);
+				hdr.data = le64_to_cpu(hdr.data);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (hdr.len == 0)
+					continue;
+
+				/* Check packet type. */
+				if (hdr.type == VIRTIO_ID_NET) {
+					vdev_id = VIRTIO_ID_NET;
+					hdr_len = sizeof(struct virtio_net_hdr);
+				} else if (hdr.type == VIRTIO_ID_CONSOLE) {
+					vdev_id = VIRTIO_ID_CONSOLE;
+					hdr_len = 0;
+				} else {
+					continue;
+				}
+
+				/*
+				 * Check whether the new packet still belongs
+				 * to this vring or not. If not, update the
+				 * pkt_len of the new vring and return.
+				 */
+				if (vdev_id != vring->vdev_id) {
+					struct tmfifo_vdev *dev2 =
+						fifo->vdev[vdev_id];
+
+					if (!dev2)
+						break;
+					vring->desc = desc;
+					vring = &dev2->vrings[TMFIFO_VRING_RX];
+					vring_change = 1;
+				}
+				vring->pkt_len = ntohs(hdr.len) + hdr_len;
+			} else {
+				vring->pkt_len = tmfifo_virtio_get_pkt_len(
+					vdev, desc, vr);
+
+				hdr.data = 0;
+				hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+					VIRTIO_ID_NET :
+					VIRTIO_ID_CONSOLE;
+				hdr.len = htons(vring->pkt_len - hdr_len);
+				writeq(cpu_to_le64(hdr.data),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+			}
+
+			vring->cur_len = hdr_len;
+			vring->rem_len = vring->pkt_len;
+			fifo->vring[is_rx] = vring;
+
+			if (vring_change)
+				return;
+			continue;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check if the current desc is already done. */
+		if (vring->cur_len == len)
+			goto check_done;
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		/* Read a word from FIFO for Rx. */
+		if (is_rx) {
+			data = readq(fifo->rx_base + TMFIFO_RX_DATA);
+			data = le64_to_cpu(data);
+		}
+
+		if (vring->cur_len + sizeof(u64) <= len) {
+			/* The whole word. */
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       sizeof(u64));
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       sizeof(u64));
+			}
+			vring->cur_len += sizeof(u64);
+		} else {
+			/* Leftover bytes. */
+			BUG_ON(vring->cur_len > len);
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       len - vring->cur_len);
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       len - vring->cur_len);
+			}
+			vring->cur_len = len;
+		}
+
+		/* Write the word into FIFO for Tx. */
+		if (!is_rx) {
+			writeq(cpu_to_le64(data),
+			       fifo->tx_base + TMFIFO_TX_DATA);
+		}
+
+		num_avail--;
+
+check_done:
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done and release the desc. */
+			tmfifo_release_pkt(vdev, vring, &desc);
+			fifo->vring[is_rx] = NULL;
+
+			/* Notify upper layer that packet is done. */
+			spin_lock_irqsave(&tmfifo_spin_lock, flags);
+			vring_interrupt(0, vq);
+			spin_unlock_irqrestore(&tmfifo_spin_lock, flags);
+			continue;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+	struct tmfifo *fifo = vring->fifo;
+	unsigned long flags;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX HWM bit to start Rx. */
+		if (!test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&tmfifo_spin_lock, flags);
+			tmfifo_console_output(fifo->vdev[VIRTIO_ID_CONSOLE],
+					      vq);
+			spin_unlock_irqrestore(&tmfifo_spin_lock, flags);
+			schedule_work(&fifo->work);
+		} else if (!test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	}
+
+	return true;
+}
+
+/* Get the array of feature bits for this device. */
+static u64 tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			tmfifo_release_pkt(&tm_vdev->vdev, vring, &vring->desc);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+				  unsigned int nvqs,
+				  struct virtqueue *vqs[],
+				  vq_callback_t *callbacks[],
+				  const char * const names[],
+				  const bool *ctx,
+				  struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void tmfifo_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void tmfifo_virtio_get(struct virtio_device *vdev,
+			      unsigned int offset,
+			      void *buf,
+			      unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void tmfifo_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops tmfifo_virtio_config_ops = {
+	.get_features = tmfifo_virtio_get_features,
+	.finalize_features = tmfifo_virtio_finalize_features,
+	.find_vqs = tmfifo_virtio_find_vqs,
+	.del_vqs = tmfifo_virtio_del_vqs,
+	.reset = tmfifo_virtio_reset,
+	.set_status = tmfifo_virtio_set_status,
+	.get_status = tmfifo_virtio_get_status,
+	.get = tmfifo_virtio_get,
+	.set = tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+int tmfifo_create_vdev(struct tmfifo *fifo, int vdev_id, u64 features,
+		       void *config, u32 size)
+{
+	struct tmfifo_vdev *tm_vdev;
+	int ret = 0;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev != NULL) {
+		pr_err("vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto already_exist;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto already_exist;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+	if (tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
+		pr_err("Unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto alloc_vring_fail;
+	}
+	if (vdev_id == VIRTIO_ID_CONSOLE) {
+		tm_vdev->tx_buf = kmalloc(TMFIFO_CONS_TX_BUF_SIZE,
+					  GFP_KERNEL);
+	}
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
+		goto register_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+register_fail:
+	tmfifo_free_vrings(fifo, vdev_id);
+	fifo->vdev[vdev_id] = NULL;
+alloc_vring_fail:
+	kfree(tm_vdev);
+already_exist:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+int tmfifo_delete_vdev(struct tmfifo *fifo, int vdev_id)
+{
+	struct tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		tmfifo_free_vrings(fifo, vdev_id);
+		kfree(tm_vdev->tx_buf);
+		kfree(tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Device remove function. */
+static int tmfifo_remove(struct platform_device *pdev)
+{
+	int i;
+	struct tmfifo *fifo = platform_get_drvdata(pdev);
+	struct resource *rx_res, *tx_res;
+
+	tmfifo_ready = false;
+
+	if (fifo) {
+		mutex_lock(&tmfifo_lock);
+
+		/* Stop the timer. */
+		del_timer_sync(&fifo->timer);
+
+		/* Release interrupts. */
+		tmfifo_free_irqs(fifo);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&fifo->work);
+
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++)
+			tmfifo_delete_vdev(fifo, i);
+
+		/* Release IO resources. */
+		if (fifo->rx_base)
+			iounmap(fifo->rx_base);
+		if (fifo->tx_base)
+			iounmap(fifo->tx_base);
+
+		platform_set_drvdata(pdev, NULL);
+		kfree(fifo);
+
+		mutex_unlock(&tmfifo_lock);
+	}
+
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (rx_res)
+		release_mem_region(rx_res->start, resource_size(rx_res));
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (tx_res)
+		release_mem_region(tx_res->start, resource_size(tx_res));
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void tmfifo_get_cfg_mac(u8 *mac)
+{
+	u8 buf[6];
+	efi_status_t status;
+	unsigned long size = sizeof(buf);
+	efi_char16_t name[] = { 'R', 's', 'h', 'i', 'm', 'M', 'a', 'c',
+				'A', 'd', 'd', 'r', 0 };
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+
+	status = efi.get_variable(name, &guid, NULL, &size, buf);
+	if (status == EFI_SUCCESS && size == sizeof(buf))
+		memcpy(mac, buf, sizeof(buf));
+}
+
+/* Probe the TMFIFO. */
+static int tmfifo_probe(struct platform_device *pdev)
+{
+	u64 ctl;
+	struct tmfifo *fifo;
+	struct resource *rx_res, *tx_res;
+	struct virtio_net_config net_config;
+	int i, ret;
+
+	/* Get the resource of the Rx & Tx FIFO. */
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!rx_res || !tx_res) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (request_mem_region(rx_res->start,
+			       resource_size(rx_res), "bf-tmfifo") == NULL) {
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	if (request_mem_region(tx_res->start,
+			       resource_size(tx_res), "bf-tmfifo") == NULL) {
+		release_mem_region(rx_res->start, resource_size(rx_res));
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	ret = -ENOMEM;
+	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
+	if (!fifo)
+		goto err;
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	INIT_WORK(&fifo->work, tmfifo_work_handler);
+
+	timer_setup(&fifo->timer, tmfifo_timer, 0);
+	fifo->timer.function = tmfifo_timer;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		fifo->irq[i] = platform_get_irq(pdev, i);
+		ret = request_irq(fifo->irq[i], tmfifo_irq_handler, 0,
+				  "tmfifo", (u8 *)fifo + i);
+		if (ret) {
+			pr_err("Unable to request irq\n");
+			fifo->irq[i] = 0;
+			goto err;
+		}
+	}
+
+	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
+	if (!fifo->rx_base)
+		goto err;
+
+	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
+	if (!fifo->tx_base)
+		goto err;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		TMFIFO_GET_FIELD(ctl, TMFIFO_TX_CTL__MAX_ENTRIES_MASK);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_TX_CTL__LWM_MASK,
+			       fifo->tx_fifo_size / 2);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_TX_CTL__HWM_MASK,
+			       fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		TMFIFO_GET_FIELD(ctl, TMFIFO_RX_CTL__MAX_ENTRIES_MASK);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + TMFIFO_RX_CTL);
+
+	mutex_init(&fifo->lock);
+
+	/* Create the console vdev. */
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (ret)
+		goto err;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = TMFIFO_NET_MTU;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	memcpy(net_config.mac, tmfifo_net_default_mac, 6);
+	tmfifo_get_cfg_mac(net_config.mac);
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_NET, TMFIFO_NET_FEATURES,
+				 &net_config, sizeof(net_config));
+	if (ret)
+		goto err;
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+
+	tmfifo_ready = true;
+
+	return 0;
+
+err:
+	tmfifo_remove(pdev);
+early_err:
+	dev_err(&pdev->dev, "Probe Failed\n");
+	return ret;
+}
+
+static const struct of_device_id tmfifo_match[] = {
+	{ .compatible = "mellanox,bf-tmfifo" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, tmfifo_match);
+
+static const struct acpi_device_id bf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, bf_tmfifo_acpi_match);
+
+static struct platform_driver tmfifo_driver = {
+	.probe = tmfifo_probe,
+	.remove = tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.of_match_table = tmfifo_match,
+		.acpi_match_table = ACPI_PTR(bf_tmfifo_acpi_match),
+	},
+};
+
+static int __init tmfifo_init(void)
+{
+	int ret;
+
+	mutex_init(&tmfifo_lock);
+
+	ret = platform_driver_register(&tmfifo_driver);
+	if (ret)
+		pr_err("Failed to register tmfifo driver.\n");
+
+	return ret;
+}
+
+static void __exit tmfifo_exit(void)
+{
+	platform_driver_unregister(&tmfifo_driver);
+}
+
+module_init(tmfifo_init);
+module_exit(tmfifo_exit);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
new file mode 100644
index 0000000..f42c9d6
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo_regs.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __TMFIFO_REGS_H__
+#define __TMFIFO_REGS_H__
+
+#include <linux/types.h>
+
+#define TMFIFO_TX_DATA 0x0
+
+#define TMFIFO_TX_STS 0x8
+#define TMFIFO_TX_STS__LENGTH 0x0001
+#define TMFIFO_TX_STS__COUNT_SHIFT 0
+#define TMFIFO_TX_STS__COUNT_WIDTH 9
+#define TMFIFO_TX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_TX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_TX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_TX_CTL 0x10
+#define TMFIFO_TX_CTL__LENGTH 0x0001
+#define TMFIFO_TX_CTL__LWM_SHIFT 0
+#define TMFIFO_TX_CTL__LWM_WIDTH 8
+#define TMFIFO_TX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__LWM_RMASK 0xff
+#define TMFIFO_TX_CTL__LWM_MASK  0xff
+#define TMFIFO_TX_CTL__HWM_SHIFT 8
+#define TMFIFO_TX_CTL__HWM_WIDTH 8
+#define TMFIFO_TX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__HWM_RMASK 0xff
+#define TMFIFO_TX_CTL__HWM_MASK  0xff00
+#define TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#define TMFIFO_RX_DATA 0x0
+
+#define TMFIFO_RX_STS 0x8
+#define TMFIFO_RX_STS__LENGTH 0x0001
+#define TMFIFO_RX_STS__COUNT_SHIFT 0
+#define TMFIFO_RX_STS__COUNT_WIDTH 9
+#define TMFIFO_RX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_RX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_RX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_RX_CTL 0x10
+#define TMFIFO_RX_CTL__LENGTH 0x0001
+#define TMFIFO_RX_CTL__LWM_SHIFT 0
+#define TMFIFO_RX_CTL__LWM_WIDTH 8
+#define TMFIFO_RX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__LWM_RMASK 0xff
+#define TMFIFO_RX_CTL__LWM_MASK  0xff
+#define TMFIFO_RX_CTL__HWM_SHIFT 8
+#define TMFIFO_RX_CTL__HWM_WIDTH 8
+#define TMFIFO_RX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__HWM_RMASK 0xff
+#define TMFIFO_RX_CTL__HWM_MASK  0xff00
+#define TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#endif /* !defined(__TMFIFO_REGS_H__) */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v2 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
@ 2018-05-25 20:17   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-05-25 20:17 UTC (permalink / raw)
  To: linux-arm-kernel

This commit adds the TmFifo driver for Mellanox BlueField Soc.
TmFifo is a shared FIFO which enables external host machine to
exchange data with the SoC via USB or PCIe. The driver is based on
virtio framework and has console and network access enabled.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/Kconfig                |    1 +
 drivers/soc/Makefile               |    1 +
 drivers/soc/mellanox/Kconfig       |   18 +
 drivers/soc/mellanox/Makefile      |    5 +
 drivers/soc/mellanox/tmfifo.c      | 1239 ++++++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/tmfifo_regs.h |   75 +++
 6 files changed, 1339 insertions(+)
 create mode 100644 drivers/soc/mellanox/Kconfig
 create mode 100644 drivers/soc/mellanox/Makefile
 create mode 100644 drivers/soc/mellanox/tmfifo.c
 create mode 100644 drivers/soc/mellanox/tmfifo_regs.h

diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index c07b4a8..fa87dc8 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -7,6 +7,7 @@ source "drivers/soc/bcm/Kconfig"
 source "drivers/soc/fsl/Kconfig"
 source "drivers/soc/imx/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
+source "drivers/soc/mellanox/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/renesas/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 4052357..868163f 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ARCH_GEMINI)	+= gemini/
 obj-$(CONFIG_ARCH_MXC)		+= imx/
 obj-$(CONFIG_SOC_XWAY)		+= lantiq/
 obj-y				+= mediatek/
+obj-$(CONFIG_SOC_MLNX)		+= mellanox/
 obj-$(CONFIG_ARCH_MESON)	+= amlogic/
 obj-$(CONFIG_ARCH_QCOM)		+= qcom/
 obj-y				+= renesas/
diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
new file mode 100644
index 0000000..d88efa1
--- /dev/null
+++ b/drivers/soc/mellanox/Kconfig
@@ -0,0 +1,18 @@
+menuconfig SOC_MLNX
+	bool "Mellanox SoC drivers"
+	default y if ARCH_MLNX_BLUEFIELD
+
+if ARCH_MLNX_BLUEFIELD || COMPILE_TEST
+
+config MLNX_BLUEFIELD_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo driver"
+	depends on ARM64
+	default m
+	select VIRTIO_CONSOLE
+	select VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides the
+	  virtio driver framework for the TMFIFO of Mellanox BlueField SoC and
+	  the implementation of a console and network driver.
+
+endif # ARCH_MLNX_BLUEFIELD
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
new file mode 100644
index 0000000..c44c0e2
--- /dev/null
+++ b/drivers/soc/mellanox/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Mellanox SoC drivers.
+#
+obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
new file mode 100644
index 0000000..5647cb6
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo.c
@@ -0,0 +1,1239 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/cache.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <asm/byteorder.h>
+
+#include "tmfifo_regs.h"
+
+#define TMFIFO_GET_FIELD(reg, mask)	FIELD_GET(mask, reg)
+
+#define TMFIFO_SET_FIELD(reg, mask, value) \
+	((reg & ~mask) | FIELD_PREP(mask, value))
+
+/* Vring size. */
+#define TMFIFO_VRING_SIZE			1024
+
+/* Console Tx buffer size. */
+#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
+
+/* Use a timer for house-keeping. */
+static int tmfifo_timer_interval = HZ / 10;
+
+/* Global lock. */
+static struct mutex tmfifo_lock;
+
+/* Virtio ring size. */
+static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
+module_param(tmfifo_vring_size, int, 0444);
+MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring.");
+
+struct tmfifo;
+
+/* A flag to indicate TmFifo ready. */
+static bool tmfifo_ready;
+
+/* Virtual devices sharing the TM FIFO. */
+#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/* Spin lock. */
+static DEFINE_SPINLOCK(tmfifo_spin_lock);
+
+/* Structure to maintain the ring state. */
+struct tmfifo_vring {
+	void *va;			/* virtual address */
+	dma_addr_t dma;			/* dma address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	int vdev_id;			/* TMFIFO_VDEV_xxx */
+	u32 pkt_len;			/* packet total length */
+	__virtio16 next_avail;		/* next avail desc id */
+	struct tmfifo *fifo;		/* pointer back to the tmfifo */
+};
+
+/* Interrupt types. */
+enum {
+	TM_RX_LWM_IRQ,			/* Rx low water mark irq */
+	TM_RX_HWM_IRQ,			/* Rx high water mark irq */
+	TM_TX_LWM_IRQ,			/* Tx low water mark irq */
+	TM_TX_HWM_IRQ,			/* Tx high water mark irq */
+	TM_IRQ_CNT
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	TMFIFO_VRING_RX,		/* Rx ring */
+	TMFIFO_VRING_TX,		/* Tx ring */
+	TMFIFO_VRING_NUM
+};
+
+struct tmfifo_vdev {
+	struct virtio_device vdev;	/* virtual device */
+	u8 status;
+	u64 features;
+	union {				/* virtio config space */
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
+	u8 *tx_buf;			/* tx buffer */
+	u32 tx_head;			/* tx buffer head */
+	u32 tx_tail;			/* tx buffer tail */
+};
+
+#define TMFIFO_VDEV_TX_BUF_AVAIL(vdev) \
+	(((vdev)->tx_tail >= (vdev)->tx_head) ? \
+	(TMFIFO_CONS_TX_BUF_SIZE - 8 - ((vdev)->tx_tail - (vdev)->tx_head)) : \
+	((vdev)->tx_head - (vdev)->tx_tail - 8))
+
+#define TMFIFO_VDEV_TX_BUF_PUSH(vdev, len) do { \
+	(vdev)->tx_tail += (len); \
+	if ((vdev)->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE) \
+		(vdev)->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE; \
+} while (0)
+
+#define TMFIFO_VDEV_TX_BUF_POP(vdev, len) do { \
+	(vdev)->tx_head += (len); \
+	if ((vdev)->tx_head >= TMFIFO_CONS_TX_BUF_SIZE) \
+		(vdev)->tx_head -= TMFIFO_CONS_TX_BUF_SIZE; \
+} while (0)
+
+/* TMFIFO device structure */
+struct tmfifo {
+	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
+	struct platform_device *pdev;	/* platform device */
+	struct mutex lock;
+	void __iomem *rx_base;		/* mapped register base */
+	void __iomem *tx_base;		/* mapped register base */
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	int irq[TM_IRQ_CNT];		/* irq numbers */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
+};
+
+union tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* MTU setting of the virtio-net interface. */
+#define TMFIFO_NET_MTU		1500
+
+/* Supported virtio-net features. */
+#define TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
+				 (1UL << VIRTIO_NET_F_STATUS) | \
+				 (1UL << VIRTIO_NET_F_MAC))
+
+/* Forward declaration. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx);
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc);
+
+/* Allocate vrings for the fifo. */
+static int tmfifo_alloc_vrings(struct tmfifo *fifo,
+			       struct tmfifo_vdev *tm_vdev, int vdev_id)
+{
+	dma_addr_t dma;
+	void *va;
+	int i, size;
+	struct tmfifo_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->size = tmfifo_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+		vring->vdev_id = vdev_id;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
+					GFP_KERNEL);
+		if (!va) {
+			dev_err(tm_vdev->vdev.dev.parent,
+				"vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void tmfifo_free_vrings(struct tmfifo *fifo, int vdev_id)
+{
+	int i, size;
+	struct tmfifo_vring *vring;
+	struct tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Free interrupts of the fifo device. */
+static void tmfifo_free_irqs(struct tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		irq = fifo->irq[i];
+		if (irq) {
+			fifo->irq[i] = 0;
+			disable_irq(irq);
+			free_irq(irq, (u8 *)fifo + i);
+		}
+	}
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void tmfifo_work_handler(struct work_struct *work)
+{
+	int i;
+	struct tmfifo_vdev *tm_vdev;
+	struct tmfifo *fifo = container_of(work, struct tmfifo, work);
+
+	if (!tmfifo_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx. */
+	if (test_and_clear_bit(TM_TX_LWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_TX_LWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_TX].vq,
+					false);
+			}
+		}
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(TM_RX_HWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_RX_HWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_RX].vq,
+					true);
+			}
+		}
+	}
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* Interrupt handler. */
+static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
+{
+	int i = (uintptr_t)dev_id % sizeof(void *);
+	struct tmfifo *fifo = dev_id - i;
+
+	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
+		schedule_work(&fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Nothing to do for now. */
+static void tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+tmfifo_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+
+	if (!vr || vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vr->num);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+static inline void tmfifo_virtio_release_desc(struct virtio_device *vdev,
+					      struct vring *vr,
+					      struct vring_desc *desc, u32 len)
+{
+	unsigned int idx;
+
+	idx = vr->used->idx % vr->num;
+	vr->used->ring[idx].id = desc - vr->desc;
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/* Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc)
+{
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+	struct vring_desc *desc_head;
+	uint32_t pkt_len = 0;
+
+	if (!vr)
+		return;
+
+	if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
+		desc_head = vring->desc_head;
+		pkt_len = vring->pkt_len;
+	} else {
+		desc_head = tmfifo_virtio_get_next_desc(vring->vq);
+		if (desc_head != NULL) {
+			pkt_len = tmfifo_virtio_get_pkt_len(vdev,
+							desc_head, vr);
+		}
+	}
+
+	if (desc_head != NULL)
+		tmfifo_virtio_release_desc(vdev, vr, desc_head, pkt_len);
+
+	if (desc != NULL)
+		*desc = NULL;
+	vring->pkt_len = 0;
+}
+
+/* House-keeping timer. */
+static void tmfifo_timer(struct timer_list *arg)
+{
+	struct tmfifo *fifo = container_of(arg, struct tmfifo, timer);
+
+	/*
+	 * Wake up the work handler to poll the Rx FIFO in case interrupt
+	 * missing or any leftover bytes stuck in the FIFO.
+	 */
+	test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+}
+
+/* Buffer the console output. */
+static void tmfifo_console_output(struct tmfifo_vdev *cons,
+				  struct virtqueue *vq)
+{
+	u32 len, pkt_len, idx;
+	struct vring_desc *head_desc, *desc = NULL;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &cons->vdev;
+	void *addr;
+	union tmfifo_msg_hdr *hdr;
+
+	for (;;) {
+		head_desc = tmfifo_virtio_get_next_desc(vq);
+		if (head_desc == NULL)
+			break;
+
+		/* Release the packet if no more space. */
+		pkt_len = tmfifo_virtio_get_pkt_len(vdev, head_desc, vr);
+		if (pkt_len + sizeof(*hdr) > TMFIFO_VDEV_TX_BUF_AVAIL(cons)) {
+			tmfifo_virtio_release_desc(vdev, vr, head_desc,
+						   pkt_len);
+			break;
+		}
+
+		hdr = (union tmfifo_msg_hdr *)&cons->tx_buf[cons->tx_tail];
+		hdr->data = 0;
+		hdr->type = VIRTIO_ID_CONSOLE;
+		hdr->len = htons(pkt_len);
+
+		TMFIFO_VDEV_TX_BUF_PUSH(cons, sizeof(*hdr));
+		desc = head_desc;
+
+		while (desc != NULL) {
+			addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+			len = virtio32_to_cpu(vdev, desc->len);
+
+			if (len <= TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail) {
+				memcpy(cons->tx_buf + cons->tx_tail, addr, len);
+			} else {
+				u32 seg;
+
+				seg = TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail;
+				memcpy(cons->tx_buf + cons->tx_tail, addr, seg);
+				addr += seg;
+				memcpy(cons->tx_buf, addr, len - seg);
+			}
+			TMFIFO_VDEV_TX_BUF_PUSH(cons, len);
+
+			if (!(virtio16_to_cpu(vdev, desc->flags) &
+			    VRING_DESC_F_NEXT))
+				break;
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+		}
+
+		/* Make each packet 8-byte aligned. */
+		TMFIFO_VDEV_TX_BUF_PUSH(cons, ((pkt_len + 7) & -8) - pkt_len);
+
+		tmfifo_virtio_release_desc(vdev, vr, head_desc, pkt_len);
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct tmfifo_vring *vring;
+	struct tmfifo *fifo;
+	struct vring *vr;
+	struct virtio_device *vdev;
+	u64 sts, data;
+	int num_avail = 0, hdr_len, tx_reserve;
+	void *addr;
+	u32 len, idx;
+	struct vring_desc *desc;
+	unsigned long flags;
+	struct tmfifo_vdev *cons;
+
+	if (!vq)
+		return;
+
+	vring = (struct tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+	vr = (struct vring *)virtqueue_get_vring(vq);
+
+	if (!fifo->vdev[vring->vdev_id])
+		return;
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+
+	/* Don't continue if another vring is running. */
+	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
+		return;
+
+	/* tx_reserve is used to reserved some room in FIFO for console. */
+	if (vring->vdev_id == VIRTIO_ID_NET) {
+		hdr_len = sizeof(struct virtio_net_hdr);
+		tx_reserve = fifo->tx_fifo_size / 16;
+	} else {
+		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
+		hdr_len = 0;
+		tx_reserve = 1;
+	}
+
+	desc = vring->desc;
+
+again:
+	while (1) {
+		/* Get available FIFO space. */
+		if (num_avail == 0) {
+			if (is_rx) {
+				/* Get the number of available words in FIFO. */
+				sts = readq(fifo->rx_base + TMFIFO_RX_STS);
+				num_avail = TMFIFO_GET_FIELD(sts,
+						TMFIFO_RX_STS__COUNT_MASK);
+
+				/* Don't continue if nothing in FIFO. */
+				if (num_avail <= 0)
+					break;
+			} else {
+				/* Get available space in FIFO. */
+				sts = readq(fifo->tx_base + TMFIFO_TX_STS);
+				num_avail = fifo->tx_fifo_size - tx_reserve -
+					TMFIFO_GET_FIELD(sts,
+						TMFIFO_TX_STS__COUNT_MASK);
+
+				if (num_avail <= 0)
+					break;
+			}
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
+		    cons != NULL && cons->tx_buf != NULL) {
+			for (;;) {
+				spin_lock_irqsave(&tmfifo_spin_lock, flags);
+				if (cons->tx_head == cons->tx_tail) {
+					spin_unlock_irqrestore(
+						&tmfifo_spin_lock, flags);
+					return;
+				}
+				addr = cons->tx_buf + cons->tx_head;
+				writeq(cpu_to_le64(*(u64 *)addr),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+				TMFIFO_VDEV_TX_BUF_POP(cons, sizeof(u64));
+				spin_unlock_irqrestore(&tmfifo_spin_lock,
+						       flags);
+				if (--num_avail <= 0)
+					goto again;
+			}
+		}
+
+		/* Get the desc of next packet. */
+		if (!desc) {
+			/* Save the head desc of the chain. */
+			vring->desc_head = tmfifo_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				return;
+			}
+			desc = vring->desc_head;
+			vring->desc = desc;
+
+			if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+						vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			}
+		}
+
+		/* Beginning of each packet. */
+		if (vring->pkt_len == 0) {
+			int vdev_id, vring_change = 0;
+			union tmfifo_msg_hdr hdr;
+
+			num_avail--;
+
+			/* Read/Write packet length. */
+			if (is_rx) {
+				hdr.data = readq(fifo->rx_base +
+						 TMFIFO_RX_DATA);
+				hdr.data = le64_to_cpu(hdr.data);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (hdr.len == 0)
+					continue;
+
+				/* Check packet type. */
+				if (hdr.type == VIRTIO_ID_NET) {
+					vdev_id = VIRTIO_ID_NET;
+					hdr_len = sizeof(struct virtio_net_hdr);
+				} else if (hdr.type == VIRTIO_ID_CONSOLE) {
+					vdev_id = VIRTIO_ID_CONSOLE;
+					hdr_len = 0;
+				} else {
+					continue;
+				}
+
+				/*
+				 * Check whether the new packet still belongs
+				 * to this vring or not. If not, update the
+				 * pkt_len of the new vring and return.
+				 */
+				if (vdev_id != vring->vdev_id) {
+					struct tmfifo_vdev *dev2 =
+						fifo->vdev[vdev_id];
+
+					if (!dev2)
+						break;
+					vring->desc = desc;
+					vring = &dev2->vrings[TMFIFO_VRING_RX];
+					vring_change = 1;
+				}
+				vring->pkt_len = ntohs(hdr.len) + hdr_len;
+			} else {
+				vring->pkt_len = tmfifo_virtio_get_pkt_len(
+					vdev, desc, vr);
+
+				hdr.data = 0;
+				hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+					VIRTIO_ID_NET :
+					VIRTIO_ID_CONSOLE;
+				hdr.len = htons(vring->pkt_len - hdr_len);
+				writeq(cpu_to_le64(hdr.data),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+			}
+
+			vring->cur_len = hdr_len;
+			vring->rem_len = vring->pkt_len;
+			fifo->vring[is_rx] = vring;
+
+			if (vring_change)
+				return;
+			continue;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check if the current desc is already done. */
+		if (vring->cur_len == len)
+			goto check_done;
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		/* Read a word from FIFO for Rx. */
+		if (is_rx) {
+			data = readq(fifo->rx_base + TMFIFO_RX_DATA);
+			data = le64_to_cpu(data);
+		}
+
+		if (vring->cur_len + sizeof(u64) <= len) {
+			/* The whole word. */
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       sizeof(u64));
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       sizeof(u64));
+			}
+			vring->cur_len += sizeof(u64);
+		} else {
+			/* Leftover bytes. */
+			BUG_ON(vring->cur_len > len);
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       len - vring->cur_len);
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       len - vring->cur_len);
+			}
+			vring->cur_len = len;
+		}
+
+		/* Write the word into FIFO for Tx. */
+		if (!is_rx) {
+			writeq(cpu_to_le64(data),
+			       fifo->tx_base + TMFIFO_TX_DATA);
+		}
+
+		num_avail--;
+
+check_done:
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done and release the desc. */
+			tmfifo_release_pkt(vdev, vring, &desc);
+			fifo->vring[is_rx] = NULL;
+
+			/* Notify upper layer that packet is done. */
+			spin_lock_irqsave(&tmfifo_spin_lock, flags);
+			vring_interrupt(0, vq);
+			spin_unlock_irqrestore(&tmfifo_spin_lock, flags);
+			continue;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+	struct tmfifo *fifo = vring->fifo;
+	unsigned long flags;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX HWM bit to start Rx. */
+		if (!test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&tmfifo_spin_lock, flags);
+			tmfifo_console_output(fifo->vdev[VIRTIO_ID_CONSOLE],
+					      vq);
+			spin_unlock_irqrestore(&tmfifo_spin_lock, flags);
+			schedule_work(&fifo->work);
+		} else if (!test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	}
+
+	return true;
+}
+
+/* Get the array of feature bits for this device. */
+static u64 tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			tmfifo_release_pkt(&tm_vdev->vdev, vring, &vring->desc);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+				  unsigned int nvqs,
+				  struct virtqueue *vqs[],
+				  vq_callback_t *callbacks[],
+				  const char * const names[],
+				  const bool *ctx,
+				  struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void tmfifo_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void tmfifo_virtio_get(struct virtio_device *vdev,
+			      unsigned int offset,
+			      void *buf,
+			      unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void tmfifo_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops tmfifo_virtio_config_ops = {
+	.get_features = tmfifo_virtio_get_features,
+	.finalize_features = tmfifo_virtio_finalize_features,
+	.find_vqs = tmfifo_virtio_find_vqs,
+	.del_vqs = tmfifo_virtio_del_vqs,
+	.reset = tmfifo_virtio_reset,
+	.set_status = tmfifo_virtio_set_status,
+	.get_status = tmfifo_virtio_get_status,
+	.get = tmfifo_virtio_get,
+	.set = tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+int tmfifo_create_vdev(struct tmfifo *fifo, int vdev_id, u64 features,
+		       void *config, u32 size)
+{
+	struct tmfifo_vdev *tm_vdev;
+	int ret = 0;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev != NULL) {
+		pr_err("vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto already_exist;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto already_exist;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+	if (tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
+		pr_err("Unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto alloc_vring_fail;
+	}
+	if (vdev_id == VIRTIO_ID_CONSOLE) {
+		tm_vdev->tx_buf = kmalloc(TMFIFO_CONS_TX_BUF_SIZE,
+					  GFP_KERNEL);
+	}
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
+		goto register_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+register_fail:
+	tmfifo_free_vrings(fifo, vdev_id);
+	fifo->vdev[vdev_id] = NULL;
+alloc_vring_fail:
+	kfree(tm_vdev);
+already_exist:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+int tmfifo_delete_vdev(struct tmfifo *fifo, int vdev_id)
+{
+	struct tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		tmfifo_free_vrings(fifo, vdev_id);
+		kfree(tm_vdev->tx_buf);
+		kfree(tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Device remove function. */
+static int tmfifo_remove(struct platform_device *pdev)
+{
+	int i;
+	struct tmfifo *fifo = platform_get_drvdata(pdev);
+	struct resource *rx_res, *tx_res;
+
+	tmfifo_ready = false;
+
+	if (fifo) {
+		mutex_lock(&tmfifo_lock);
+
+		/* Stop the timer. */
+		del_timer_sync(&fifo->timer);
+
+		/* Release interrupts. */
+		tmfifo_free_irqs(fifo);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&fifo->work);
+
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++)
+			tmfifo_delete_vdev(fifo, i);
+
+		/* Release IO resources. */
+		if (fifo->rx_base)
+			iounmap(fifo->rx_base);
+		if (fifo->tx_base)
+			iounmap(fifo->tx_base);
+
+		platform_set_drvdata(pdev, NULL);
+		kfree(fifo);
+
+		mutex_unlock(&tmfifo_lock);
+	}
+
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (rx_res)
+		release_mem_region(rx_res->start, resource_size(rx_res));
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (tx_res)
+		release_mem_region(tx_res->start, resource_size(tx_res));
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void tmfifo_get_cfg_mac(u8 *mac)
+{
+	u8 buf[6];
+	efi_status_t status;
+	unsigned long size = sizeof(buf);
+	efi_char16_t name[] = { 'R', 's', 'h', 'i', 'm', 'M', 'a', 'c',
+				'A', 'd', 'd', 'r', 0 };
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+
+	status = efi.get_variable(name, &guid, NULL, &size, buf);
+	if (status == EFI_SUCCESS && size == sizeof(buf))
+		memcpy(mac, buf, sizeof(buf));
+}
+
+/* Probe the TMFIFO. */
+static int tmfifo_probe(struct platform_device *pdev)
+{
+	u64 ctl;
+	struct tmfifo *fifo;
+	struct resource *rx_res, *tx_res;
+	struct virtio_net_config net_config;
+	int i, ret;
+
+	/* Get the resource of the Rx & Tx FIFO. */
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!rx_res || !tx_res) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (request_mem_region(rx_res->start,
+			       resource_size(rx_res), "bf-tmfifo") == NULL) {
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	if (request_mem_region(tx_res->start,
+			       resource_size(tx_res), "bf-tmfifo") == NULL) {
+		release_mem_region(rx_res->start, resource_size(rx_res));
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	ret = -ENOMEM;
+	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
+	if (!fifo)
+		goto err;
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	INIT_WORK(&fifo->work, tmfifo_work_handler);
+
+	timer_setup(&fifo->timer, tmfifo_timer, 0);
+	fifo->timer.function = tmfifo_timer;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		fifo->irq[i] = platform_get_irq(pdev, i);
+		ret = request_irq(fifo->irq[i], tmfifo_irq_handler, 0,
+				  "tmfifo", (u8 *)fifo + i);
+		if (ret) {
+			pr_err("Unable to request irq\n");
+			fifo->irq[i] = 0;
+			goto err;
+		}
+	}
+
+	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
+	if (!fifo->rx_base)
+		goto err;
+
+	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
+	if (!fifo->tx_base)
+		goto err;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		TMFIFO_GET_FIELD(ctl, TMFIFO_TX_CTL__MAX_ENTRIES_MASK);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_TX_CTL__LWM_MASK,
+			       fifo->tx_fifo_size / 2);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_TX_CTL__HWM_MASK,
+			       fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		TMFIFO_GET_FIELD(ctl, TMFIFO_RX_CTL__MAX_ENTRIES_MASK);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + TMFIFO_RX_CTL);
+
+	mutex_init(&fifo->lock);
+
+	/* Create the console vdev. */
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (ret)
+		goto err;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = TMFIFO_NET_MTU;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	memcpy(net_config.mac, tmfifo_net_default_mac, 6);
+	tmfifo_get_cfg_mac(net_config.mac);
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_NET, TMFIFO_NET_FEATURES,
+				 &net_config, sizeof(net_config));
+	if (ret)
+		goto err;
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+
+	tmfifo_ready = true;
+
+	return 0;
+
+err:
+	tmfifo_remove(pdev);
+early_err:
+	dev_err(&pdev->dev, "Probe Failed\n");
+	return ret;
+}
+
+static const struct of_device_id tmfifo_match[] = {
+	{ .compatible = "mellanox,bf-tmfifo" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, tmfifo_match);
+
+static const struct acpi_device_id bf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, bf_tmfifo_acpi_match);
+
+static struct platform_driver tmfifo_driver = {
+	.probe = tmfifo_probe,
+	.remove = tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.of_match_table = tmfifo_match,
+		.acpi_match_table = ACPI_PTR(bf_tmfifo_acpi_match),
+	},
+};
+
+static int __init tmfifo_init(void)
+{
+	int ret;
+
+	mutex_init(&tmfifo_lock);
+
+	ret = platform_driver_register(&tmfifo_driver);
+	if (ret)
+		pr_err("Failed to register tmfifo driver.\n");
+
+	return ret;
+}
+
+static void __exit tmfifo_exit(void)
+{
+	platform_driver_unregister(&tmfifo_driver);
+}
+
+module_init(tmfifo_init);
+module_exit(tmfifo_exit);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
new file mode 100644
index 0000000..f42c9d6
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo_regs.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __TMFIFO_REGS_H__
+#define __TMFIFO_REGS_H__
+
+#include <linux/types.h>
+
+#define TMFIFO_TX_DATA 0x0
+
+#define TMFIFO_TX_STS 0x8
+#define TMFIFO_TX_STS__LENGTH 0x0001
+#define TMFIFO_TX_STS__COUNT_SHIFT 0
+#define TMFIFO_TX_STS__COUNT_WIDTH 9
+#define TMFIFO_TX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_TX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_TX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_TX_CTL 0x10
+#define TMFIFO_TX_CTL__LENGTH 0x0001
+#define TMFIFO_TX_CTL__LWM_SHIFT 0
+#define TMFIFO_TX_CTL__LWM_WIDTH 8
+#define TMFIFO_TX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__LWM_RMASK 0xff
+#define TMFIFO_TX_CTL__LWM_MASK  0xff
+#define TMFIFO_TX_CTL__HWM_SHIFT 8
+#define TMFIFO_TX_CTL__HWM_WIDTH 8
+#define TMFIFO_TX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__HWM_RMASK 0xff
+#define TMFIFO_TX_CTL__HWM_MASK  0xff00
+#define TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#define TMFIFO_RX_DATA 0x0
+
+#define TMFIFO_RX_STS 0x8
+#define TMFIFO_RX_STS__LENGTH 0x0001
+#define TMFIFO_RX_STS__COUNT_SHIFT 0
+#define TMFIFO_RX_STS__COUNT_WIDTH 9
+#define TMFIFO_RX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_RX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_RX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_RX_CTL 0x10
+#define TMFIFO_RX_CTL__LENGTH 0x0001
+#define TMFIFO_RX_CTL__LWM_SHIFT 0
+#define TMFIFO_RX_CTL__LWM_WIDTH 8
+#define TMFIFO_RX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__LWM_RMASK 0xff
+#define TMFIFO_RX_CTL__LWM_MASK  0xff
+#define TMFIFO_RX_CTL__HWM_SHIFT 8
+#define TMFIFO_RX_CTL__HWM_WIDTH 8
+#define TMFIFO_RX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__HWM_RMASK 0xff
+#define TMFIFO_RX_CTL__HWM_MASK  0xff00
+#define TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#endif /* !defined(__TMFIFO_REGS_H__) */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v2 2/4] arm64: Add Mellanox BlueField SoC config option
  2018-05-25 20:17   ` Liming Sun
@ 2018-05-25 20:17   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-05-25 20:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit introduces config option for Mellanox BlueField SoC,
which can be used to build the SoC specific drivers, and enables
it by default in configs/defconfig.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 arch/arm64/Kconfig.platforms | 6 ++++++
 arch/arm64/configs/defconfig | 1 +
 2 files changed, 7 insertions(+)

diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 2b1535c..74ad03f 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -110,6 +110,12 @@ config ARCH_MESON
 	help
 	  This enables support for the Amlogic S905 SoCs.
 
+config ARCH_MLNX_BLUEFIELD
+	bool "Mellanox BlueField SoC Family"
+	select SOC_MLNX
+	help
+	  This enables support for the Mellanox BlueField SoC.
+
 config ARCH_MVEBU
 	bool "Marvell EBU SoC Family"
 	select ARMADA_AP806_SYSCON
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 1c98939..842f607 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -43,6 +43,7 @@ CONFIG_ARCH_LG1K=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
 CONFIG_ARCH_MESON=y
+CONFIG_ARCH_MLNX_BLUEFIELD=y
 CONFIG_ARCH_MVEBU=y
 CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v2 2/4] arm64: Add Mellanox BlueField SoC config option
@ 2018-05-25 20:17   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-05-25 20:17 UTC (permalink / raw)
  To: linux-arm-kernel

This commit introduces config option for Mellanox BlueField SoC,
which can be used to build the SoC specific drivers, and enables
it by default in configs/defconfig.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 arch/arm64/Kconfig.platforms | 6 ++++++
 arch/arm64/configs/defconfig | 1 +
 2 files changed, 7 insertions(+)

diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 2b1535c..74ad03f 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -110,6 +110,12 @@ config ARCH_MESON
 	help
 	  This enables support for the Amlogic S905 SoCs.
 
+config ARCH_MLNX_BLUEFIELD
+	bool "Mellanox BlueField SoC Family"
+	select SOC_MLNX
+	help
+	  This enables support for the Mellanox BlueField SoC.
+
 config ARCH_MVEBU
 	bool "Marvell EBU SoC Family"
 	select ARMADA_AP806_SYSCON
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 1c98939..842f607 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -43,6 +43,7 @@ CONFIG_ARCH_LG1K=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
 CONFIG_ARCH_MESON=y
+CONFIG_ARCH_MLNX_BLUEFIELD=y
 CONFIG_ARCH_MVEBU=y
 CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v2 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-05-25 20:17   ` Liming Sun
@ 2018-05-25 20:17   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-05-25 20:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy
  Cc: devicetree, Liming Sun, linux-arm-kernel

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 .../devicetree/bindings/soc/mellanox/tmfifo.txt      | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt

diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
new file mode 100644
index 0000000..0a362f5
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
@@ -0,0 +1,20 @@
+* Mellanox BlueField SoC TmFifo
+
+BlueField TmFifo provides a shared FIFO between the target and the
+external host machine, which can be accessed via USB or PCIe.
+
+Required properties:
+
+- compatible:	Should be "mellanox,bf-tmfifo"
+- reg:		Physical base address and length of Rx/Tx block
+- interrupts:	The interrupt number of Rx low water mark, Rx high water mark
+		Tx low water mark, Tx high water mark respectively.
+
+Example:
+
+tmfifo@800a20 {
+	compatible = "mellanox,bf-tmfifo";
+	reg = <0x00800a20 0x00000018
+	       0x00800a40 0x00000018>;
+	interrupts = <41, 42, 43, 44>;
+};
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v2 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
@ 2018-05-25 20:17   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-05-25 20:17 UTC (permalink / raw)
  To: linux-arm-kernel

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 .../devicetree/bindings/soc/mellanox/tmfifo.txt      | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt

diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
new file mode 100644
index 0000000..0a362f5
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
@@ -0,0 +1,20 @@
+* Mellanox BlueField SoC TmFifo
+
+BlueField TmFifo provides a shared FIFO between the target and the
+external host machine, which can be accessed via USB or PCIe.
+
+Required properties:
+
+- compatible:	Should be "mellanox,bf-tmfifo"
+- reg:		Physical base address and length of Rx/Tx block
+- interrupts:	The interrupt number of Rx low water mark, Rx high water mark
+		Tx low water mark, Tx high water mark respectively.
+
+Example:
+
+tmfifo at 800a20 {
+	compatible = "mellanox,bf-tmfifo";
+	reg = <0x00800a20 0x00000018
+	       0x00800a40 0x00000018>;
+	interrupts = <41, 42, 43, 44>;
+};
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v2 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc
  2018-05-25 20:17   ` Liming Sun
@ 2018-05-25 20:17   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-05-25 20:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy
  Cc: devicetree, Liming Sun, linux-arm-kernel

Add maintainer information for Mellanox BlueField SoC.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 58b9861..85d5639 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1636,6 +1636,14 @@ L:	linux-mediatek@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	drivers/phy/mediatek/phy-mtk-tphy.c
 
+ARM/Mellanox BlueField SoC support
+M:	David Woods <dwoods@mellanox.com>
+M:	Liming Sun <lsun@mellanox.com>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	drivers/soc/mellanox/*
+F:	Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
+
 ARM/MICREL KS8695 ARCHITECTURE
 M:	Greg Ungerer <gerg@uclinux.org>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v2 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc
@ 2018-05-25 20:17   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-05-25 20:17 UTC (permalink / raw)
  To: linux-arm-kernel

Add maintainer information for Mellanox BlueField SoC.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 58b9861..85d5639 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1636,6 +1636,14 @@ L:	linux-mediatek at lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	drivers/phy/mediatek/phy-mtk-tphy.c
 
+ARM/Mellanox BlueField SoC support
+M:	David Woods <dwoods@mellanox.com>
+M:	Liming Sun <lsun@mellanox.com>
+L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	drivers/soc/mellanox/*
+F:	Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
+
 ARM/MICREL KS8695 ARCHITECTURE
 M:	Greg Ungerer <gerg@uclinux.org>
 L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* RE: [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 17:14   ` Robin Murphy
@ 2018-05-25 20:18     ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-05-25 20:18 UTC (permalink / raw)
  To: Robin Murphy, Olof Johansson, Arnd Bergmann, David Woods
  Cc: devicetree, linux-arm-kernel

Thanks for the comments! Uploaded patch series v2. 
Please also see response inline.

- Liming

> -----Original Message-----
> From: Robin Murphy [mailto:robin.murphy@arm.com]
> Sent: Friday, May 25, 2018 1:15 PM
> To: Liming Sun <lsun@mellanox.com>; Olof Johansson <olof@lixom.net>;
> Arnd Bergmann <arnd@arndb.de>; David Woods <dwoods@mellanox.com>
> Cc: devicetree@vger.kernel.org; linux-arm-kernel@lists.infradead.org
> Subject: Re: [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
> 
> On 25/05/18 17:06, Liming Sun wrote:
> [...]
> > diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
> > new file mode 100644
> > index 0000000..a3303d1
> > --- /dev/null
> > +++ b/drivers/soc/mellanox/tmfifo.c
> > @@ -0,0 +1,1265 @@
> > +// SPDX-License-Identifier: GPL-2.0
> 
> This tag doesn't match the included license text...

Fixed in patch v2-1/4.

> 
> > +/*
> > + * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
> > + *
> > + * This software is available to you under a choice of one of two
> > + * licenses.  You may choose to be licensed under the terms of the GNU
> > + * General Public License (GPL) Version 2, available from the file
> > + * COPYING in the main directory of this source tree, or the
> > + * OpenIB.org BSD license below:
> > + *
> > + *     Redistribution and use in source and binary forms, with or
> > + *     without modification, are permitted provided that the following
> > + *     conditions are met:
> > + *
> > + *      - Redistributions of source code must retain the above
> > + *        copyright notice, this list of conditions and the following
> > + *        disclaimer.
> > + *
> > + *      - Redistributions in binary form must reproduce the above
> > + *        copyright notice, this list of conditions and the following
> > + *        disclaimer in the documentation and/or other materials
> > + *        provided with the distribution.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> KIND,
> > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
> WARRANTIES OF
> > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
> COPYRIGHT HOLDERS
> > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
> AN
> > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
> OR IN
> > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> IN THE
> > + * SOFTWARE.
> > + */
> 
> [...]
> > +/* Several utility macros to get/set the register fields. */
> > +#define TMFIFO_GET_FIELD(reg, field) \
> > +	(((reg) >> field##_SHIFT) & ((1UL << field##_WIDTH) - 1))
> > +
> > +#define TMFIFO_SET_FIELD(reg, field, value) ({ \
> > +	u64 _mask = ((1UL << field##_WIDTH) - 1) << field##_SHIFT; \
> > +	((reg) & ~_mask) | (((value) << field##_SHIFT) & _mask); \
> > +})
> 
> There's no need to reinvent <linux/bitfield.h>

Updated in patch v2-1/4.

> 
> [...]
> > +MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
> > +MODULE_AUTHOR("Mellanox Technologies, Ltd");
> > +MODULE_LICENSE("GPL");
> 
> ...and this implies yet another different license (since it indicates
> "GPLv2 or later")

Fixed in patch v2-1/4.

> 
> > +MODULE_VERSION("0.7");
> > diff --git a/drivers/soc/mellanox/tmfifo_regs.h
> b/drivers/soc/mellanox/tmfifo_regs.h
> > new file mode 100644
> > index 0000000..b07f353
> > --- /dev/null
> > +++ b/drivers/soc/mellanox/tmfifo_regs.h
> > @@ -0,0 +1,112 @@
> > +// SPDX-License-Identifier: GPL-2.0
> 
> Again, this doesn't match the included text. Also, the SPDX comment
> style is /* */ for headers.

Updated in patch v2-1/4.

> 
> > +/*
> > + * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
> > + *
> > + * This software is available to you under a choice of one of two
> > + * licenses.  You may choose to be licensed under the terms of the GNU
> > + * General Public License (GPL) Version 2, available from the file
> > + * COPYING in the main directory of this source tree, or the
> > + * OpenIB.org BSD license below:
> > + *
> > + *     Redistribution and use in source and binary forms, with or
> > + *     without modification, are permitted provided that the following
> > + *     conditions are met:
> > + *
> > + *      - Redistributions of source code must retain the above
> > + *        copyright notice, this list of conditions and the following
> > + *        disclaimer.
> > + *
> > + *      - Redistributions in binary form must reproduce the above
> > + *        copyright notice, this list of conditions and the following
> > + *        disclaimer in the documentation and/or other materials
> > + *        provided with the distribution.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> KIND,
> > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
> WARRANTIES OF
> > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
> COPYRIGHT HOLDERS
> > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
> AN
> > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
> OR IN
> > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> IN THE
> > + * SOFTWARE.
> > + */
> 
> [...]
> > +#ifdef __ASSEMBLER__
> > +#define _64bit(x) x
> > +#else /* __ASSEMBLER__ */
> > +#ifdef __tile__
> > +#define _64bit(x) x ## UL
> > +#else /* __tile__ */
> > +#define _64bit(x) x ## ULL
> > +#endif /* __tile__ */
> > +#endif /* __ASSEMBLER */
> > +
> > +#ifdef __KERNEL__
> > +#include <linux/types.h>
> > +#else
> > +#include <stdint.h>
> > +#endif
> > +
> > +#ifndef __DOXYGEN__
> 
> Given that this is a private header under drivers/, is any of that lot
> necessary?

Simplified and updated it in patch v2-1/4.

> 
> Robin.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
@ 2018-05-25 20:18     ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-05-25 20:18 UTC (permalink / raw)
  To: linux-arm-kernel

Thanks for the comments! Uploaded patch series v2. 
Please also see response inline.

- Liming

> -----Original Message-----
> From: Robin Murphy [mailto:robin.murphy at arm.com]
> Sent: Friday, May 25, 2018 1:15 PM
> To: Liming Sun <lsun@mellanox.com>; Olof Johansson <olof@lixom.net>;
> Arnd Bergmann <arnd@arndb.de>; David Woods <dwoods@mellanox.com>
> Cc: devicetree at vger.kernel.org; linux-arm-kernel at lists.infradead.org
> Subject: Re: [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
> 
> On 25/05/18 17:06, Liming Sun wrote:
> [...]
> > diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
> > new file mode 100644
> > index 0000000..a3303d1
> > --- /dev/null
> > +++ b/drivers/soc/mellanox/tmfifo.c
> > @@ -0,0 +1,1265 @@
> > +// SPDX-License-Identifier: GPL-2.0
> 
> This tag doesn't match the included license text...

Fixed in patch v2-1/4.

> 
> > +/*
> > + * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
> > + *
> > + * This software is available to you under a choice of one of two
> > + * licenses.  You may choose to be licensed under the terms of the GNU
> > + * General Public License (GPL) Version 2, available from the file
> > + * COPYING in the main directory of this source tree, or the
> > + * OpenIB.org BSD license below:
> > + *
> > + *     Redistribution and use in source and binary forms, with or
> > + *     without modification, are permitted provided that the following
> > + *     conditions are met:
> > + *
> > + *      - Redistributions of source code must retain the above
> > + *        copyright notice, this list of conditions and the following
> > + *        disclaimer.
> > + *
> > + *      - Redistributions in binary form must reproduce the above
> > + *        copyright notice, this list of conditions and the following
> > + *        disclaimer in the documentation and/or other materials
> > + *        provided with the distribution.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> KIND,
> > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
> WARRANTIES OF
> > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
> COPYRIGHT HOLDERS
> > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
> AN
> > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
> OR IN
> > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> IN THE
> > + * SOFTWARE.
> > + */
> 
> [...]
> > +/* Several utility macros to get/set the register fields. */
> > +#define TMFIFO_GET_FIELD(reg, field) \
> > +	(((reg) >> field##_SHIFT) & ((1UL << field##_WIDTH) - 1))
> > +
> > +#define TMFIFO_SET_FIELD(reg, field, value) ({ \
> > +	u64 _mask = ((1UL << field##_WIDTH) - 1) << field##_SHIFT; \
> > +	((reg) & ~_mask) | (((value) << field##_SHIFT) & _mask); \
> > +})
> 
> There's no need to reinvent <linux/bitfield.h>

Updated in patch v2-1/4.

> 
> [...]
> > +MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
> > +MODULE_AUTHOR("Mellanox Technologies, Ltd");
> > +MODULE_LICENSE("GPL");
> 
> ...and this implies yet another different license (since it indicates
> "GPLv2 or later")

Fixed in patch v2-1/4.

> 
> > +MODULE_VERSION("0.7");
> > diff --git a/drivers/soc/mellanox/tmfifo_regs.h
> b/drivers/soc/mellanox/tmfifo_regs.h
> > new file mode 100644
> > index 0000000..b07f353
> > --- /dev/null
> > +++ b/drivers/soc/mellanox/tmfifo_regs.h
> > @@ -0,0 +1,112 @@
> > +// SPDX-License-Identifier: GPL-2.0
> 
> Again, this doesn't match the included text. Also, the SPDX comment
> style is /* */ for headers.

Updated in patch v2-1/4.

> 
> > +/*
> > + * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
> > + *
> > + * This software is available to you under a choice of one of two
> > + * licenses.  You may choose to be licensed under the terms of the GNU
> > + * General Public License (GPL) Version 2, available from the file
> > + * COPYING in the main directory of this source tree, or the
> > + * OpenIB.org BSD license below:
> > + *
> > + *     Redistribution and use in source and binary forms, with or
> > + *     without modification, are permitted provided that the following
> > + *     conditions are met:
> > + *
> > + *      - Redistributions of source code must retain the above
> > + *        copyright notice, this list of conditions and the following
> > + *        disclaimer.
> > + *
> > + *      - Redistributions in binary form must reproduce the above
> > + *        copyright notice, this list of conditions and the following
> > + *        disclaimer in the documentation and/or other materials
> > + *        provided with the distribution.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> KIND,
> > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
> WARRANTIES OF
> > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
> COPYRIGHT HOLDERS
> > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
> AN
> > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
> OR IN
> > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> IN THE
> > + * SOFTWARE.
> > + */
> 
> [...]
> > +#ifdef __ASSEMBLER__
> > +#define _64bit(x) x
> > +#else /* __ASSEMBLER__ */
> > +#ifdef __tile__
> > +#define _64bit(x) x ## UL
> > +#else /* __tile__ */
> > +#define _64bit(x) x ## ULL
> > +#endif /* __tile__ */
> > +#endif /* __ASSEMBLER */
> > +
> > +#ifdef __KERNEL__
> > +#include <linux/types.h>
> > +#else
> > +#include <stdint.h>
> > +#endif
> > +
> > +#ifndef __DOXYGEN__
> 
> Given that this is a private header under drivers/, is any of that lot
> necessary?

Simplified and updated it in patch v2-1/4.

> 
> Robin.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v2 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-05-25 20:17   ` Liming Sun
@ 2018-05-31  3:43     ` Rob Herring
  -1 siblings, 0 replies; 179+ messages in thread
From: Rob Herring @ 2018-05-31  3:43 UTC (permalink / raw)
  To: Liming Sun
  Cc: devicetree, David Woods, Arnd Bergmann, Olof Johansson,
	Robin Murphy, linux-arm-kernel

On Fri, May 25, 2018 at 04:17:15PM -0400, Liming Sun wrote:

Commit msg?

> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>
> ---
>  .../devicetree/bindings/soc/mellanox/tmfifo.txt      | 20 ++++++++++++++++++++
>  1 file changed, 20 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> 
> diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> new file mode 100644
> index 0000000..0a362f5
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> @@ -0,0 +1,20 @@
> +* Mellanox BlueField SoC TmFifo
> +
> +BlueField TmFifo provides a shared FIFO between the target and the
> +external host machine, which can be accessed via USB or PCIe.

A FIFO for what? I'd like to find a better spot than bindings/soc/
> +
> +Required properties:
> +
> +- compatible:	Should be "mellanox,bf-tmfifo"
> +- reg:		Physical base address and length of Rx/Tx block
> +- interrupts:	The interrupt number of Rx low water mark, Rx high water mark
> +		Tx low water mark, Tx high water mark respectively.
> +
> +Example:
> +
> +tmfifo@800a20 {
> +	compatible = "mellanox,bf-tmfifo";
> +	reg = <0x00800a20 0x00000018
> +	       0x00800a40 0x00000018>;
> +	interrupts = <41, 42, 43, 44>;
> +};
> -- 
> 1.8.3.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe devicetree" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v2 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
@ 2018-05-31  3:43     ` Rob Herring
  0 siblings, 0 replies; 179+ messages in thread
From: Rob Herring @ 2018-05-31  3:43 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, May 25, 2018 at 04:17:15PM -0400, Liming Sun wrote:

Commit msg?

> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>
> ---
>  .../devicetree/bindings/soc/mellanox/tmfifo.txt      | 20 ++++++++++++++++++++
>  1 file changed, 20 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> 
> diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> new file mode 100644
> index 0000000..0a362f5
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> @@ -0,0 +1,20 @@
> +* Mellanox BlueField SoC TmFifo
> +
> +BlueField TmFifo provides a shared FIFO between the target and the
> +external host machine, which can be accessed via USB or PCIe.

A FIFO for what? I'd like to find a better spot than bindings/soc/
> +
> +Required properties:
> +
> +- compatible:	Should be "mellanox,bf-tmfifo"
> +- reg:		Physical base address and length of Rx/Tx block
> +- interrupts:	The interrupt number of Rx low water mark, Rx high water mark
> +		Tx low water mark, Tx high water mark respectively.
> +
> +Example:
> +
> +tmfifo at 800a20 {
> +	compatible = "mellanox,bf-tmfifo";
> +	reg = <0x00800a20 0x00000018
> +	       0x00800a40 0x00000018>;
> +	interrupts = <41, 42, 43, 44>;
> +};
> -- 
> 1.8.3.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe devicetree" in
> the body of a message to majordomo at vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v2 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-05-31  3:43     ` Rob Herring
@ 2018-06-01 14:31       ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-06-01 14:31 UTC (permalink / raw)
  To: Rob Herring
  Cc: devicetree, David Woods, Arnd Bergmann, Olof Johansson,
	Robin Murphy, linux-arm-kernel

Thanks for the comments. Please see response inline.

> -----Original Message-----
> From: Rob Herring [mailto:robh@kernel.org]
> Sent: Wednesday, May 30, 2018 11:44 PM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; Arnd Bergmann <arnd@arndb.de>;
> David Woods <dwoods@mellanox.com>; Robin Murphy
> <robin.murphy@arm.com>; devicetree@vger.kernel.org; linux-arm-
> kernel@lists.infradead.org
> Subject: Re: [PATCH v2 3/4] dt-bindings: soc: Add TmFifo binding for
> Mellanox BlueField SoC
> 
> On Fri, May 25, 2018 at 04:17:15PM -0400, Liming Sun wrote:
> 
> Commit msg?

Updated in patch v3 3/4.

> 
> > Reviewed-by: David Woods <dwoods@mellanox.com>
> > Signed-off-by: Liming Sun <lsun@mellanox.com>
> > ---
> >  .../devicetree/bindings/soc/mellanox/tmfifo.txt      | 20
> ++++++++++++++++++++
> >  1 file changed, 20 insertions(+)
> >  create mode 100644
> Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> >
> > diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> > new file mode 100644
> > index 0000000..0a362f5
> > --- /dev/null
> > +++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> > @@ -0,0 +1,20 @@
> > +* Mellanox BlueField SoC TmFifo
> > +
> > +BlueField TmFifo provides a shared FIFO between the target and the
> > +external host machine, which can be accessed via USB or PCIe.
> 
> A FIFO for what? I'd like to find a better spot than bindings/soc/

This is a generic HW FIFO which can be accessed by the SoC SW and external Host machine to exchange any data. In the current Linux tmfifo driver, this FIFO has been used (demuxed) to implement a virtual console and network interface based on virtio framework.  

Updated the tmfifo.txt in patch v3 3/4 with the above explanation. Please advise if there is a better place for this file.
 
> > +
> > +Required properties:
> > +
> > +- compatible:	Should be "mellanox,bf-tmfifo"
> > +- reg:		Physical base address and length of Rx/Tx block
> > +- interrupts:	The interrupt number of Rx low water mark, Rx high water
> mark
> > +		Tx low water mark, Tx high water mark respectively.
> > +
> > +Example:
> > +
> > +tmfifo@800a20 {
> > +	compatible = "mellanox,bf-tmfifo";
> > +	reg = <0x00800a20 0x00000018
> > +	       0x00800a40 0x00000018>;
> > +	interrupts = <41, 42, 43, 44>;
> > +};
> > --
> > 1.8.3.1
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe devicetree" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v2 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
@ 2018-06-01 14:31       ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-06-01 14:31 UTC (permalink / raw)
  To: linux-arm-kernel

Thanks for the comments. Please see response inline.

> -----Original Message-----
> From: Rob Herring [mailto:robh at kernel.org]
> Sent: Wednesday, May 30, 2018 11:44 PM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; Arnd Bergmann <arnd@arndb.de>;
> David Woods <dwoods@mellanox.com>; Robin Murphy
> <robin.murphy@arm.com>; devicetree at vger.kernel.org; linux-arm-
> kernel at lists.infradead.org
> Subject: Re: [PATCH v2 3/4] dt-bindings: soc: Add TmFifo binding for
> Mellanox BlueField SoC
> 
> On Fri, May 25, 2018 at 04:17:15PM -0400, Liming Sun wrote:
> 
> Commit msg?

Updated in patch v3 3/4.

> 
> > Reviewed-by: David Woods <dwoods@mellanox.com>
> > Signed-off-by: Liming Sun <lsun@mellanox.com>
> > ---
> >  .../devicetree/bindings/soc/mellanox/tmfifo.txt      | 20
> ++++++++++++++++++++
> >  1 file changed, 20 insertions(+)
> >  create mode 100644
> Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> >
> > diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> > new file mode 100644
> > index 0000000..0a362f5
> > --- /dev/null
> > +++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> > @@ -0,0 +1,20 @@
> > +* Mellanox BlueField SoC TmFifo
> > +
> > +BlueField TmFifo provides a shared FIFO between the target and the
> > +external host machine, which can be accessed via USB or PCIe.
> 
> A FIFO for what? I'd like to find a better spot than bindings/soc/

This is a generic HW FIFO which can be accessed by the SoC SW and external Host machine to exchange any data. In the current Linux tmfifo driver, this FIFO has been used (demuxed) to implement a virtual console and network interface based on virtio framework.  

Updated the tmfifo.txt in patch v3 3/4 with the above explanation. Please advise if there is a better place for this file.
 
> > +
> > +Required properties:
> > +
> > +- compatible:	Should be "mellanox,bf-tmfifo"
> > +- reg:		Physical base address and length of Rx/Tx block
> > +- interrupts:	The interrupt number of Rx low water mark, Rx high water
> mark
> > +		Tx low water mark, Tx high water mark respectively.
> > +
> > +Example:
> > +
> > +tmfifo at 800a20 {
> > +	compatible = "mellanox,bf-tmfifo";
> > +	reg = <0x00800a20 0x00000018
> > +	       0x00800a40 0x00000018>;
> > +	interrupts = <41, 42, 43, 44>;
> > +};
> > --
> > 1.8.3.1
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe devicetree" in
> > the body of a message to majordomo at vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v3 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 16:06 ` Liming Sun
@ 2018-06-01 14:31   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-06-01 14:31 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit adds the TmFifo driver for Mellanox BlueField Soc.
TmFifo is a shared FIFO which enables external host machine to
exchange data with the SoC via USB or PCIe. The driver is based on
virtio framework and has console and network access enabled.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/Kconfig                |    1 +
 drivers/soc/Makefile               |    1 +
 drivers/soc/mellanox/Kconfig       |   18 +
 drivers/soc/mellanox/Makefile      |    5 +
 drivers/soc/mellanox/tmfifo.c      | 1239 ++++++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/tmfifo_regs.h |   75 +++
 6 files changed, 1339 insertions(+)
 create mode 100644 drivers/soc/mellanox/Kconfig
 create mode 100644 drivers/soc/mellanox/Makefile
 create mode 100644 drivers/soc/mellanox/tmfifo.c
 create mode 100644 drivers/soc/mellanox/tmfifo_regs.h

diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index c07b4a8..fa87dc8 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -7,6 +7,7 @@ source "drivers/soc/bcm/Kconfig"
 source "drivers/soc/fsl/Kconfig"
 source "drivers/soc/imx/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
+source "drivers/soc/mellanox/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/renesas/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 4052357..868163f 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ARCH_GEMINI)	+= gemini/
 obj-$(CONFIG_ARCH_MXC)		+= imx/
 obj-$(CONFIG_SOC_XWAY)		+= lantiq/
 obj-y				+= mediatek/
+obj-$(CONFIG_SOC_MLNX)		+= mellanox/
 obj-$(CONFIG_ARCH_MESON)	+= amlogic/
 obj-$(CONFIG_ARCH_QCOM)		+= qcom/
 obj-y				+= renesas/
diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
new file mode 100644
index 0000000..d88efa1
--- /dev/null
+++ b/drivers/soc/mellanox/Kconfig
@@ -0,0 +1,18 @@
+menuconfig SOC_MLNX
+	bool "Mellanox SoC drivers"
+	default y if ARCH_MLNX_BLUEFIELD
+
+if ARCH_MLNX_BLUEFIELD || COMPILE_TEST
+
+config MLNX_BLUEFIELD_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo driver"
+	depends on ARM64
+	default m
+	select VIRTIO_CONSOLE
+	select VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides the
+	  virtio driver framework for the TMFIFO of Mellanox BlueField SoC and
+	  the implementation of a console and network driver.
+
+endif # ARCH_MLNX_BLUEFIELD
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
new file mode 100644
index 0000000..c44c0e2
--- /dev/null
+++ b/drivers/soc/mellanox/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Mellanox SoC drivers.
+#
+obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
new file mode 100644
index 0000000..5647cb6
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo.c
@@ -0,0 +1,1239 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/cache.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <asm/byteorder.h>
+
+#include "tmfifo_regs.h"
+
+#define TMFIFO_GET_FIELD(reg, mask)	FIELD_GET(mask, reg)
+
+#define TMFIFO_SET_FIELD(reg, mask, value) \
+	((reg & ~mask) | FIELD_PREP(mask, value))
+
+/* Vring size. */
+#define TMFIFO_VRING_SIZE			1024
+
+/* Console Tx buffer size. */
+#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
+
+/* Use a timer for house-keeping. */
+static int tmfifo_timer_interval = HZ / 10;
+
+/* Global lock. */
+static struct mutex tmfifo_lock;
+
+/* Virtio ring size. */
+static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
+module_param(tmfifo_vring_size, int, 0444);
+MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring.");
+
+struct tmfifo;
+
+/* A flag to indicate TmFifo ready. */
+static bool tmfifo_ready;
+
+/* Virtual devices sharing the TM FIFO. */
+#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/* Spin lock. */
+static DEFINE_SPINLOCK(tmfifo_spin_lock);
+
+/* Structure to maintain the ring state. */
+struct tmfifo_vring {
+	void *va;			/* virtual address */
+	dma_addr_t dma;			/* dma address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	int vdev_id;			/* TMFIFO_VDEV_xxx */
+	u32 pkt_len;			/* packet total length */
+	__virtio16 next_avail;		/* next avail desc id */
+	struct tmfifo *fifo;		/* pointer back to the tmfifo */
+};
+
+/* Interrupt types. */
+enum {
+	TM_RX_LWM_IRQ,			/* Rx low water mark irq */
+	TM_RX_HWM_IRQ,			/* Rx high water mark irq */
+	TM_TX_LWM_IRQ,			/* Tx low water mark irq */
+	TM_TX_HWM_IRQ,			/* Tx high water mark irq */
+	TM_IRQ_CNT
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	TMFIFO_VRING_RX,		/* Rx ring */
+	TMFIFO_VRING_TX,		/* Tx ring */
+	TMFIFO_VRING_NUM
+};
+
+struct tmfifo_vdev {
+	struct virtio_device vdev;	/* virtual device */
+	u8 status;
+	u64 features;
+	union {				/* virtio config space */
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
+	u8 *tx_buf;			/* tx buffer */
+	u32 tx_head;			/* tx buffer head */
+	u32 tx_tail;			/* tx buffer tail */
+};
+
+#define TMFIFO_VDEV_TX_BUF_AVAIL(vdev) \
+	(((vdev)->tx_tail >= (vdev)->tx_head) ? \
+	(TMFIFO_CONS_TX_BUF_SIZE - 8 - ((vdev)->tx_tail - (vdev)->tx_head)) : \
+	((vdev)->tx_head - (vdev)->tx_tail - 8))
+
+#define TMFIFO_VDEV_TX_BUF_PUSH(vdev, len) do { \
+	(vdev)->tx_tail += (len); \
+	if ((vdev)->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE) \
+		(vdev)->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE; \
+} while (0)
+
+#define TMFIFO_VDEV_TX_BUF_POP(vdev, len) do { \
+	(vdev)->tx_head += (len); \
+	if ((vdev)->tx_head >= TMFIFO_CONS_TX_BUF_SIZE) \
+		(vdev)->tx_head -= TMFIFO_CONS_TX_BUF_SIZE; \
+} while (0)
+
+/* TMFIFO device structure */
+struct tmfifo {
+	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
+	struct platform_device *pdev;	/* platform device */
+	struct mutex lock;
+	void __iomem *rx_base;		/* mapped register base */
+	void __iomem *tx_base;		/* mapped register base */
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	int irq[TM_IRQ_CNT];		/* irq numbers */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
+};
+
+union tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* MTU setting of the virtio-net interface. */
+#define TMFIFO_NET_MTU		1500
+
+/* Supported virtio-net features. */
+#define TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
+				 (1UL << VIRTIO_NET_F_STATUS) | \
+				 (1UL << VIRTIO_NET_F_MAC))
+
+/* Forward declaration. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx);
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc);
+
+/* Allocate vrings for the fifo. */
+static int tmfifo_alloc_vrings(struct tmfifo *fifo,
+			       struct tmfifo_vdev *tm_vdev, int vdev_id)
+{
+	dma_addr_t dma;
+	void *va;
+	int i, size;
+	struct tmfifo_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->size = tmfifo_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+		vring->vdev_id = vdev_id;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
+					GFP_KERNEL);
+		if (!va) {
+			dev_err(tm_vdev->vdev.dev.parent,
+				"vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void tmfifo_free_vrings(struct tmfifo *fifo, int vdev_id)
+{
+	int i, size;
+	struct tmfifo_vring *vring;
+	struct tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Free interrupts of the fifo device. */
+static void tmfifo_free_irqs(struct tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		irq = fifo->irq[i];
+		if (irq) {
+			fifo->irq[i] = 0;
+			disable_irq(irq);
+			free_irq(irq, (u8 *)fifo + i);
+		}
+	}
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void tmfifo_work_handler(struct work_struct *work)
+{
+	int i;
+	struct tmfifo_vdev *tm_vdev;
+	struct tmfifo *fifo = container_of(work, struct tmfifo, work);
+
+	if (!tmfifo_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx. */
+	if (test_and_clear_bit(TM_TX_LWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_TX_LWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_TX].vq,
+					false);
+			}
+		}
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(TM_RX_HWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_RX_HWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_RX].vq,
+					true);
+			}
+		}
+	}
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* Interrupt handler. */
+static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
+{
+	int i = (uintptr_t)dev_id % sizeof(void *);
+	struct tmfifo *fifo = dev_id - i;
+
+	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
+		schedule_work(&fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Nothing to do for now. */
+static void tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+tmfifo_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+
+	if (!vr || vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vr->num);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+static inline void tmfifo_virtio_release_desc(struct virtio_device *vdev,
+					      struct vring *vr,
+					      struct vring_desc *desc, u32 len)
+{
+	unsigned int idx;
+
+	idx = vr->used->idx % vr->num;
+	vr->used->ring[idx].id = desc - vr->desc;
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/* Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc)
+{
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+	struct vring_desc *desc_head;
+	uint32_t pkt_len = 0;
+
+	if (!vr)
+		return;
+
+	if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
+		desc_head = vring->desc_head;
+		pkt_len = vring->pkt_len;
+	} else {
+		desc_head = tmfifo_virtio_get_next_desc(vring->vq);
+		if (desc_head != NULL) {
+			pkt_len = tmfifo_virtio_get_pkt_len(vdev,
+							desc_head, vr);
+		}
+	}
+
+	if (desc_head != NULL)
+		tmfifo_virtio_release_desc(vdev, vr, desc_head, pkt_len);
+
+	if (desc != NULL)
+		*desc = NULL;
+	vring->pkt_len = 0;
+}
+
+/* House-keeping timer. */
+static void tmfifo_timer(struct timer_list *arg)
+{
+	struct tmfifo *fifo = container_of(arg, struct tmfifo, timer);
+
+	/*
+	 * Wake up the work handler to poll the Rx FIFO in case interrupt
+	 * missing or any leftover bytes stuck in the FIFO.
+	 */
+	test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+}
+
+/* Buffer the console output. */
+static void tmfifo_console_output(struct tmfifo_vdev *cons,
+				  struct virtqueue *vq)
+{
+	u32 len, pkt_len, idx;
+	struct vring_desc *head_desc, *desc = NULL;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &cons->vdev;
+	void *addr;
+	union tmfifo_msg_hdr *hdr;
+
+	for (;;) {
+		head_desc = tmfifo_virtio_get_next_desc(vq);
+		if (head_desc == NULL)
+			break;
+
+		/* Release the packet if no more space. */
+		pkt_len = tmfifo_virtio_get_pkt_len(vdev, head_desc, vr);
+		if (pkt_len + sizeof(*hdr) > TMFIFO_VDEV_TX_BUF_AVAIL(cons)) {
+			tmfifo_virtio_release_desc(vdev, vr, head_desc,
+						   pkt_len);
+			break;
+		}
+
+		hdr = (union tmfifo_msg_hdr *)&cons->tx_buf[cons->tx_tail];
+		hdr->data = 0;
+		hdr->type = VIRTIO_ID_CONSOLE;
+		hdr->len = htons(pkt_len);
+
+		TMFIFO_VDEV_TX_BUF_PUSH(cons, sizeof(*hdr));
+		desc = head_desc;
+
+		while (desc != NULL) {
+			addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+			len = virtio32_to_cpu(vdev, desc->len);
+
+			if (len <= TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail) {
+				memcpy(cons->tx_buf + cons->tx_tail, addr, len);
+			} else {
+				u32 seg;
+
+				seg = TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail;
+				memcpy(cons->tx_buf + cons->tx_tail, addr, seg);
+				addr += seg;
+				memcpy(cons->tx_buf, addr, len - seg);
+			}
+			TMFIFO_VDEV_TX_BUF_PUSH(cons, len);
+
+			if (!(virtio16_to_cpu(vdev, desc->flags) &
+			    VRING_DESC_F_NEXT))
+				break;
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+		}
+
+		/* Make each packet 8-byte aligned. */
+		TMFIFO_VDEV_TX_BUF_PUSH(cons, ((pkt_len + 7) & -8) - pkt_len);
+
+		tmfifo_virtio_release_desc(vdev, vr, head_desc, pkt_len);
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct tmfifo_vring *vring;
+	struct tmfifo *fifo;
+	struct vring *vr;
+	struct virtio_device *vdev;
+	u64 sts, data;
+	int num_avail = 0, hdr_len, tx_reserve;
+	void *addr;
+	u32 len, idx;
+	struct vring_desc *desc;
+	unsigned long flags;
+	struct tmfifo_vdev *cons;
+
+	if (!vq)
+		return;
+
+	vring = (struct tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+	vr = (struct vring *)virtqueue_get_vring(vq);
+
+	if (!fifo->vdev[vring->vdev_id])
+		return;
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+
+	/* Don't continue if another vring is running. */
+	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
+		return;
+
+	/* tx_reserve is used to reserved some room in FIFO for console. */
+	if (vring->vdev_id == VIRTIO_ID_NET) {
+		hdr_len = sizeof(struct virtio_net_hdr);
+		tx_reserve = fifo->tx_fifo_size / 16;
+	} else {
+		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
+		hdr_len = 0;
+		tx_reserve = 1;
+	}
+
+	desc = vring->desc;
+
+again:
+	while (1) {
+		/* Get available FIFO space. */
+		if (num_avail == 0) {
+			if (is_rx) {
+				/* Get the number of available words in FIFO. */
+				sts = readq(fifo->rx_base + TMFIFO_RX_STS);
+				num_avail = TMFIFO_GET_FIELD(sts,
+						TMFIFO_RX_STS__COUNT_MASK);
+
+				/* Don't continue if nothing in FIFO. */
+				if (num_avail <= 0)
+					break;
+			} else {
+				/* Get available space in FIFO. */
+				sts = readq(fifo->tx_base + TMFIFO_TX_STS);
+				num_avail = fifo->tx_fifo_size - tx_reserve -
+					TMFIFO_GET_FIELD(sts,
+						TMFIFO_TX_STS__COUNT_MASK);
+
+				if (num_avail <= 0)
+					break;
+			}
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
+		    cons != NULL && cons->tx_buf != NULL) {
+			for (;;) {
+				spin_lock_irqsave(&tmfifo_spin_lock, flags);
+				if (cons->tx_head == cons->tx_tail) {
+					spin_unlock_irqrestore(
+						&tmfifo_spin_lock, flags);
+					return;
+				}
+				addr = cons->tx_buf + cons->tx_head;
+				writeq(cpu_to_le64(*(u64 *)addr),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+				TMFIFO_VDEV_TX_BUF_POP(cons, sizeof(u64));
+				spin_unlock_irqrestore(&tmfifo_spin_lock,
+						       flags);
+				if (--num_avail <= 0)
+					goto again;
+			}
+		}
+
+		/* Get the desc of next packet. */
+		if (!desc) {
+			/* Save the head desc of the chain. */
+			vring->desc_head = tmfifo_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				return;
+			}
+			desc = vring->desc_head;
+			vring->desc = desc;
+
+			if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+						vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			}
+		}
+
+		/* Beginning of each packet. */
+		if (vring->pkt_len == 0) {
+			int vdev_id, vring_change = 0;
+			union tmfifo_msg_hdr hdr;
+
+			num_avail--;
+
+			/* Read/Write packet length. */
+			if (is_rx) {
+				hdr.data = readq(fifo->rx_base +
+						 TMFIFO_RX_DATA);
+				hdr.data = le64_to_cpu(hdr.data);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (hdr.len == 0)
+					continue;
+
+				/* Check packet type. */
+				if (hdr.type == VIRTIO_ID_NET) {
+					vdev_id = VIRTIO_ID_NET;
+					hdr_len = sizeof(struct virtio_net_hdr);
+				} else if (hdr.type == VIRTIO_ID_CONSOLE) {
+					vdev_id = VIRTIO_ID_CONSOLE;
+					hdr_len = 0;
+				} else {
+					continue;
+				}
+
+				/*
+				 * Check whether the new packet still belongs
+				 * to this vring or not. If not, update the
+				 * pkt_len of the new vring and return.
+				 */
+				if (vdev_id != vring->vdev_id) {
+					struct tmfifo_vdev *dev2 =
+						fifo->vdev[vdev_id];
+
+					if (!dev2)
+						break;
+					vring->desc = desc;
+					vring = &dev2->vrings[TMFIFO_VRING_RX];
+					vring_change = 1;
+				}
+				vring->pkt_len = ntohs(hdr.len) + hdr_len;
+			} else {
+				vring->pkt_len = tmfifo_virtio_get_pkt_len(
+					vdev, desc, vr);
+
+				hdr.data = 0;
+				hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+					VIRTIO_ID_NET :
+					VIRTIO_ID_CONSOLE;
+				hdr.len = htons(vring->pkt_len - hdr_len);
+				writeq(cpu_to_le64(hdr.data),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+			}
+
+			vring->cur_len = hdr_len;
+			vring->rem_len = vring->pkt_len;
+			fifo->vring[is_rx] = vring;
+
+			if (vring_change)
+				return;
+			continue;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check if the current desc is already done. */
+		if (vring->cur_len == len)
+			goto check_done;
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		/* Read a word from FIFO for Rx. */
+		if (is_rx) {
+			data = readq(fifo->rx_base + TMFIFO_RX_DATA);
+			data = le64_to_cpu(data);
+		}
+
+		if (vring->cur_len + sizeof(u64) <= len) {
+			/* The whole word. */
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       sizeof(u64));
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       sizeof(u64));
+			}
+			vring->cur_len += sizeof(u64);
+		} else {
+			/* Leftover bytes. */
+			BUG_ON(vring->cur_len > len);
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       len - vring->cur_len);
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       len - vring->cur_len);
+			}
+			vring->cur_len = len;
+		}
+
+		/* Write the word into FIFO for Tx. */
+		if (!is_rx) {
+			writeq(cpu_to_le64(data),
+			       fifo->tx_base + TMFIFO_TX_DATA);
+		}
+
+		num_avail--;
+
+check_done:
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done and release the desc. */
+			tmfifo_release_pkt(vdev, vring, &desc);
+			fifo->vring[is_rx] = NULL;
+
+			/* Notify upper layer that packet is done. */
+			spin_lock_irqsave(&tmfifo_spin_lock, flags);
+			vring_interrupt(0, vq);
+			spin_unlock_irqrestore(&tmfifo_spin_lock, flags);
+			continue;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+	struct tmfifo *fifo = vring->fifo;
+	unsigned long flags;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX HWM bit to start Rx. */
+		if (!test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&tmfifo_spin_lock, flags);
+			tmfifo_console_output(fifo->vdev[VIRTIO_ID_CONSOLE],
+					      vq);
+			spin_unlock_irqrestore(&tmfifo_spin_lock, flags);
+			schedule_work(&fifo->work);
+		} else if (!test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	}
+
+	return true;
+}
+
+/* Get the array of feature bits for this device. */
+static u64 tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			tmfifo_release_pkt(&tm_vdev->vdev, vring, &vring->desc);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+				  unsigned int nvqs,
+				  struct virtqueue *vqs[],
+				  vq_callback_t *callbacks[],
+				  const char * const names[],
+				  const bool *ctx,
+				  struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void tmfifo_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void tmfifo_virtio_get(struct virtio_device *vdev,
+			      unsigned int offset,
+			      void *buf,
+			      unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void tmfifo_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops tmfifo_virtio_config_ops = {
+	.get_features = tmfifo_virtio_get_features,
+	.finalize_features = tmfifo_virtio_finalize_features,
+	.find_vqs = tmfifo_virtio_find_vqs,
+	.del_vqs = tmfifo_virtio_del_vqs,
+	.reset = tmfifo_virtio_reset,
+	.set_status = tmfifo_virtio_set_status,
+	.get_status = tmfifo_virtio_get_status,
+	.get = tmfifo_virtio_get,
+	.set = tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+int tmfifo_create_vdev(struct tmfifo *fifo, int vdev_id, u64 features,
+		       void *config, u32 size)
+{
+	struct tmfifo_vdev *tm_vdev;
+	int ret = 0;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev != NULL) {
+		pr_err("vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto already_exist;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto already_exist;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+	if (tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
+		pr_err("Unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto alloc_vring_fail;
+	}
+	if (vdev_id == VIRTIO_ID_CONSOLE) {
+		tm_vdev->tx_buf = kmalloc(TMFIFO_CONS_TX_BUF_SIZE,
+					  GFP_KERNEL);
+	}
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
+		goto register_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+register_fail:
+	tmfifo_free_vrings(fifo, vdev_id);
+	fifo->vdev[vdev_id] = NULL;
+alloc_vring_fail:
+	kfree(tm_vdev);
+already_exist:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+int tmfifo_delete_vdev(struct tmfifo *fifo, int vdev_id)
+{
+	struct tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		tmfifo_free_vrings(fifo, vdev_id);
+		kfree(tm_vdev->tx_buf);
+		kfree(tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Device remove function. */
+static int tmfifo_remove(struct platform_device *pdev)
+{
+	int i;
+	struct tmfifo *fifo = platform_get_drvdata(pdev);
+	struct resource *rx_res, *tx_res;
+
+	tmfifo_ready = false;
+
+	if (fifo) {
+		mutex_lock(&tmfifo_lock);
+
+		/* Stop the timer. */
+		del_timer_sync(&fifo->timer);
+
+		/* Release interrupts. */
+		tmfifo_free_irqs(fifo);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&fifo->work);
+
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++)
+			tmfifo_delete_vdev(fifo, i);
+
+		/* Release IO resources. */
+		if (fifo->rx_base)
+			iounmap(fifo->rx_base);
+		if (fifo->tx_base)
+			iounmap(fifo->tx_base);
+
+		platform_set_drvdata(pdev, NULL);
+		kfree(fifo);
+
+		mutex_unlock(&tmfifo_lock);
+	}
+
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (rx_res)
+		release_mem_region(rx_res->start, resource_size(rx_res));
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (tx_res)
+		release_mem_region(tx_res->start, resource_size(tx_res));
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void tmfifo_get_cfg_mac(u8 *mac)
+{
+	u8 buf[6];
+	efi_status_t status;
+	unsigned long size = sizeof(buf);
+	efi_char16_t name[] = { 'R', 's', 'h', 'i', 'm', 'M', 'a', 'c',
+				'A', 'd', 'd', 'r', 0 };
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+
+	status = efi.get_variable(name, &guid, NULL, &size, buf);
+	if (status == EFI_SUCCESS && size == sizeof(buf))
+		memcpy(mac, buf, sizeof(buf));
+}
+
+/* Probe the TMFIFO. */
+static int tmfifo_probe(struct platform_device *pdev)
+{
+	u64 ctl;
+	struct tmfifo *fifo;
+	struct resource *rx_res, *tx_res;
+	struct virtio_net_config net_config;
+	int i, ret;
+
+	/* Get the resource of the Rx & Tx FIFO. */
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!rx_res || !tx_res) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (request_mem_region(rx_res->start,
+			       resource_size(rx_res), "bf-tmfifo") == NULL) {
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	if (request_mem_region(tx_res->start,
+			       resource_size(tx_res), "bf-tmfifo") == NULL) {
+		release_mem_region(rx_res->start, resource_size(rx_res));
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	ret = -ENOMEM;
+	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
+	if (!fifo)
+		goto err;
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	INIT_WORK(&fifo->work, tmfifo_work_handler);
+
+	timer_setup(&fifo->timer, tmfifo_timer, 0);
+	fifo->timer.function = tmfifo_timer;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		fifo->irq[i] = platform_get_irq(pdev, i);
+		ret = request_irq(fifo->irq[i], tmfifo_irq_handler, 0,
+				  "tmfifo", (u8 *)fifo + i);
+		if (ret) {
+			pr_err("Unable to request irq\n");
+			fifo->irq[i] = 0;
+			goto err;
+		}
+	}
+
+	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
+	if (!fifo->rx_base)
+		goto err;
+
+	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
+	if (!fifo->tx_base)
+		goto err;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		TMFIFO_GET_FIELD(ctl, TMFIFO_TX_CTL__MAX_ENTRIES_MASK);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_TX_CTL__LWM_MASK,
+			       fifo->tx_fifo_size / 2);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_TX_CTL__HWM_MASK,
+			       fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		TMFIFO_GET_FIELD(ctl, TMFIFO_RX_CTL__MAX_ENTRIES_MASK);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + TMFIFO_RX_CTL);
+
+	mutex_init(&fifo->lock);
+
+	/* Create the console vdev. */
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (ret)
+		goto err;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = TMFIFO_NET_MTU;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	memcpy(net_config.mac, tmfifo_net_default_mac, 6);
+	tmfifo_get_cfg_mac(net_config.mac);
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_NET, TMFIFO_NET_FEATURES,
+				 &net_config, sizeof(net_config));
+	if (ret)
+		goto err;
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+
+	tmfifo_ready = true;
+
+	return 0;
+
+err:
+	tmfifo_remove(pdev);
+early_err:
+	dev_err(&pdev->dev, "Probe Failed\n");
+	return ret;
+}
+
+static const struct of_device_id tmfifo_match[] = {
+	{ .compatible = "mellanox,bf-tmfifo" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, tmfifo_match);
+
+static const struct acpi_device_id bf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, bf_tmfifo_acpi_match);
+
+static struct platform_driver tmfifo_driver = {
+	.probe = tmfifo_probe,
+	.remove = tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.of_match_table = tmfifo_match,
+		.acpi_match_table = ACPI_PTR(bf_tmfifo_acpi_match),
+	},
+};
+
+static int __init tmfifo_init(void)
+{
+	int ret;
+
+	mutex_init(&tmfifo_lock);
+
+	ret = platform_driver_register(&tmfifo_driver);
+	if (ret)
+		pr_err("Failed to register tmfifo driver.\n");
+
+	return ret;
+}
+
+static void __exit tmfifo_exit(void)
+{
+	platform_driver_unregister(&tmfifo_driver);
+}
+
+module_init(tmfifo_init);
+module_exit(tmfifo_exit);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
new file mode 100644
index 0000000..f42c9d6
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo_regs.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __TMFIFO_REGS_H__
+#define __TMFIFO_REGS_H__
+
+#include <linux/types.h>
+
+#define TMFIFO_TX_DATA 0x0
+
+#define TMFIFO_TX_STS 0x8
+#define TMFIFO_TX_STS__LENGTH 0x0001
+#define TMFIFO_TX_STS__COUNT_SHIFT 0
+#define TMFIFO_TX_STS__COUNT_WIDTH 9
+#define TMFIFO_TX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_TX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_TX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_TX_CTL 0x10
+#define TMFIFO_TX_CTL__LENGTH 0x0001
+#define TMFIFO_TX_CTL__LWM_SHIFT 0
+#define TMFIFO_TX_CTL__LWM_WIDTH 8
+#define TMFIFO_TX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__LWM_RMASK 0xff
+#define TMFIFO_TX_CTL__LWM_MASK  0xff
+#define TMFIFO_TX_CTL__HWM_SHIFT 8
+#define TMFIFO_TX_CTL__HWM_WIDTH 8
+#define TMFIFO_TX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__HWM_RMASK 0xff
+#define TMFIFO_TX_CTL__HWM_MASK  0xff00
+#define TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#define TMFIFO_RX_DATA 0x0
+
+#define TMFIFO_RX_STS 0x8
+#define TMFIFO_RX_STS__LENGTH 0x0001
+#define TMFIFO_RX_STS__COUNT_SHIFT 0
+#define TMFIFO_RX_STS__COUNT_WIDTH 9
+#define TMFIFO_RX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_RX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_RX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_RX_CTL 0x10
+#define TMFIFO_RX_CTL__LENGTH 0x0001
+#define TMFIFO_RX_CTL__LWM_SHIFT 0
+#define TMFIFO_RX_CTL__LWM_WIDTH 8
+#define TMFIFO_RX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__LWM_RMASK 0xff
+#define TMFIFO_RX_CTL__LWM_MASK  0xff
+#define TMFIFO_RX_CTL__HWM_SHIFT 8
+#define TMFIFO_RX_CTL__HWM_WIDTH 8
+#define TMFIFO_RX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__HWM_RMASK 0xff
+#define TMFIFO_RX_CTL__HWM_MASK  0xff00
+#define TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#endif /* !defined(__TMFIFO_REGS_H__) */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v3 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
@ 2018-06-01 14:31   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-06-01 14:31 UTC (permalink / raw)
  To: linux-arm-kernel

This commit adds the TmFifo driver for Mellanox BlueField Soc.
TmFifo is a shared FIFO which enables external host machine to
exchange data with the SoC via USB or PCIe. The driver is based on
virtio framework and has console and network access enabled.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/Kconfig                |    1 +
 drivers/soc/Makefile               |    1 +
 drivers/soc/mellanox/Kconfig       |   18 +
 drivers/soc/mellanox/Makefile      |    5 +
 drivers/soc/mellanox/tmfifo.c      | 1239 ++++++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/tmfifo_regs.h |   75 +++
 6 files changed, 1339 insertions(+)
 create mode 100644 drivers/soc/mellanox/Kconfig
 create mode 100644 drivers/soc/mellanox/Makefile
 create mode 100644 drivers/soc/mellanox/tmfifo.c
 create mode 100644 drivers/soc/mellanox/tmfifo_regs.h

diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index c07b4a8..fa87dc8 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -7,6 +7,7 @@ source "drivers/soc/bcm/Kconfig"
 source "drivers/soc/fsl/Kconfig"
 source "drivers/soc/imx/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
+source "drivers/soc/mellanox/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/renesas/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 4052357..868163f 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ARCH_GEMINI)	+= gemini/
 obj-$(CONFIG_ARCH_MXC)		+= imx/
 obj-$(CONFIG_SOC_XWAY)		+= lantiq/
 obj-y				+= mediatek/
+obj-$(CONFIG_SOC_MLNX)		+= mellanox/
 obj-$(CONFIG_ARCH_MESON)	+= amlogic/
 obj-$(CONFIG_ARCH_QCOM)		+= qcom/
 obj-y				+= renesas/
diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
new file mode 100644
index 0000000..d88efa1
--- /dev/null
+++ b/drivers/soc/mellanox/Kconfig
@@ -0,0 +1,18 @@
+menuconfig SOC_MLNX
+	bool "Mellanox SoC drivers"
+	default y if ARCH_MLNX_BLUEFIELD
+
+if ARCH_MLNX_BLUEFIELD || COMPILE_TEST
+
+config MLNX_BLUEFIELD_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo driver"
+	depends on ARM64
+	default m
+	select VIRTIO_CONSOLE
+	select VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides the
+	  virtio driver framework for the TMFIFO of Mellanox BlueField SoC and
+	  the implementation of a console and network driver.
+
+endif # ARCH_MLNX_BLUEFIELD
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
new file mode 100644
index 0000000..c44c0e2
--- /dev/null
+++ b/drivers/soc/mellanox/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Mellanox SoC drivers.
+#
+obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
new file mode 100644
index 0000000..5647cb6
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo.c
@@ -0,0 +1,1239 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/cache.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <asm/byteorder.h>
+
+#include "tmfifo_regs.h"
+
+#define TMFIFO_GET_FIELD(reg, mask)	FIELD_GET(mask, reg)
+
+#define TMFIFO_SET_FIELD(reg, mask, value) \
+	((reg & ~mask) | FIELD_PREP(mask, value))
+
+/* Vring size. */
+#define TMFIFO_VRING_SIZE			1024
+
+/* Console Tx buffer size. */
+#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
+
+/* Use a timer for house-keeping. */
+static int tmfifo_timer_interval = HZ / 10;
+
+/* Global lock. */
+static struct mutex tmfifo_lock;
+
+/* Virtio ring size. */
+static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
+module_param(tmfifo_vring_size, int, 0444);
+MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring.");
+
+struct tmfifo;
+
+/* A flag to indicate TmFifo ready. */
+static bool tmfifo_ready;
+
+/* Virtual devices sharing the TM FIFO. */
+#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/* Spin lock. */
+static DEFINE_SPINLOCK(tmfifo_spin_lock);
+
+/* Structure to maintain the ring state. */
+struct tmfifo_vring {
+	void *va;			/* virtual address */
+	dma_addr_t dma;			/* dma address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	int vdev_id;			/* TMFIFO_VDEV_xxx */
+	u32 pkt_len;			/* packet total length */
+	__virtio16 next_avail;		/* next avail desc id */
+	struct tmfifo *fifo;		/* pointer back to the tmfifo */
+};
+
+/* Interrupt types. */
+enum {
+	TM_RX_LWM_IRQ,			/* Rx low water mark irq */
+	TM_RX_HWM_IRQ,			/* Rx high water mark irq */
+	TM_TX_LWM_IRQ,			/* Tx low water mark irq */
+	TM_TX_HWM_IRQ,			/* Tx high water mark irq */
+	TM_IRQ_CNT
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	TMFIFO_VRING_RX,		/* Rx ring */
+	TMFIFO_VRING_TX,		/* Tx ring */
+	TMFIFO_VRING_NUM
+};
+
+struct tmfifo_vdev {
+	struct virtio_device vdev;	/* virtual device */
+	u8 status;
+	u64 features;
+	union {				/* virtio config space */
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
+	u8 *tx_buf;			/* tx buffer */
+	u32 tx_head;			/* tx buffer head */
+	u32 tx_tail;			/* tx buffer tail */
+};
+
+#define TMFIFO_VDEV_TX_BUF_AVAIL(vdev) \
+	(((vdev)->tx_tail >= (vdev)->tx_head) ? \
+	(TMFIFO_CONS_TX_BUF_SIZE - 8 - ((vdev)->tx_tail - (vdev)->tx_head)) : \
+	((vdev)->tx_head - (vdev)->tx_tail - 8))
+
+#define TMFIFO_VDEV_TX_BUF_PUSH(vdev, len) do { \
+	(vdev)->tx_tail += (len); \
+	if ((vdev)->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE) \
+		(vdev)->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE; \
+} while (0)
+
+#define TMFIFO_VDEV_TX_BUF_POP(vdev, len) do { \
+	(vdev)->tx_head += (len); \
+	if ((vdev)->tx_head >= TMFIFO_CONS_TX_BUF_SIZE) \
+		(vdev)->tx_head -= TMFIFO_CONS_TX_BUF_SIZE; \
+} while (0)
+
+/* TMFIFO device structure */
+struct tmfifo {
+	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
+	struct platform_device *pdev;	/* platform device */
+	struct mutex lock;
+	void __iomem *rx_base;		/* mapped register base */
+	void __iomem *tx_base;		/* mapped register base */
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	int irq[TM_IRQ_CNT];		/* irq numbers */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
+};
+
+union tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* MTU setting of the virtio-net interface. */
+#define TMFIFO_NET_MTU		1500
+
+/* Supported virtio-net features. */
+#define TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
+				 (1UL << VIRTIO_NET_F_STATUS) | \
+				 (1UL << VIRTIO_NET_F_MAC))
+
+/* Forward declaration. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx);
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc);
+
+/* Allocate vrings for the fifo. */
+static int tmfifo_alloc_vrings(struct tmfifo *fifo,
+			       struct tmfifo_vdev *tm_vdev, int vdev_id)
+{
+	dma_addr_t dma;
+	void *va;
+	int i, size;
+	struct tmfifo_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->size = tmfifo_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+		vring->vdev_id = vdev_id;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
+					GFP_KERNEL);
+		if (!va) {
+			dev_err(tm_vdev->vdev.dev.parent,
+				"vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void tmfifo_free_vrings(struct tmfifo *fifo, int vdev_id)
+{
+	int i, size;
+	struct tmfifo_vring *vring;
+	struct tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Free interrupts of the fifo device. */
+static void tmfifo_free_irqs(struct tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		irq = fifo->irq[i];
+		if (irq) {
+			fifo->irq[i] = 0;
+			disable_irq(irq);
+			free_irq(irq, (u8 *)fifo + i);
+		}
+	}
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void tmfifo_work_handler(struct work_struct *work)
+{
+	int i;
+	struct tmfifo_vdev *tm_vdev;
+	struct tmfifo *fifo = container_of(work, struct tmfifo, work);
+
+	if (!tmfifo_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx. */
+	if (test_and_clear_bit(TM_TX_LWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_TX_LWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_TX].vq,
+					false);
+			}
+		}
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(TM_RX_HWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_RX_HWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_RX].vq,
+					true);
+			}
+		}
+	}
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* Interrupt handler. */
+static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
+{
+	int i = (uintptr_t)dev_id % sizeof(void *);
+	struct tmfifo *fifo = dev_id - i;
+
+	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
+		schedule_work(&fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Nothing to do for now. */
+static void tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+tmfifo_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+
+	if (!vr || vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vr->num);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+static inline void tmfifo_virtio_release_desc(struct virtio_device *vdev,
+					      struct vring *vr,
+					      struct vring_desc *desc, u32 len)
+{
+	unsigned int idx;
+
+	idx = vr->used->idx % vr->num;
+	vr->used->ring[idx].id = desc - vr->desc;
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/* Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc)
+{
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+	struct vring_desc *desc_head;
+	uint32_t pkt_len = 0;
+
+	if (!vr)
+		return;
+
+	if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
+		desc_head = vring->desc_head;
+		pkt_len = vring->pkt_len;
+	} else {
+		desc_head = tmfifo_virtio_get_next_desc(vring->vq);
+		if (desc_head != NULL) {
+			pkt_len = tmfifo_virtio_get_pkt_len(vdev,
+							desc_head, vr);
+		}
+	}
+
+	if (desc_head != NULL)
+		tmfifo_virtio_release_desc(vdev, vr, desc_head, pkt_len);
+
+	if (desc != NULL)
+		*desc = NULL;
+	vring->pkt_len = 0;
+}
+
+/* House-keeping timer. */
+static void tmfifo_timer(struct timer_list *arg)
+{
+	struct tmfifo *fifo = container_of(arg, struct tmfifo, timer);
+
+	/*
+	 * Wake up the work handler to poll the Rx FIFO in case interrupt
+	 * missing or any leftover bytes stuck in the FIFO.
+	 */
+	test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+}
+
+/* Buffer the console output. */
+static void tmfifo_console_output(struct tmfifo_vdev *cons,
+				  struct virtqueue *vq)
+{
+	u32 len, pkt_len, idx;
+	struct vring_desc *head_desc, *desc = NULL;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &cons->vdev;
+	void *addr;
+	union tmfifo_msg_hdr *hdr;
+
+	for (;;) {
+		head_desc = tmfifo_virtio_get_next_desc(vq);
+		if (head_desc == NULL)
+			break;
+
+		/* Release the packet if no more space. */
+		pkt_len = tmfifo_virtio_get_pkt_len(vdev, head_desc, vr);
+		if (pkt_len + sizeof(*hdr) > TMFIFO_VDEV_TX_BUF_AVAIL(cons)) {
+			tmfifo_virtio_release_desc(vdev, vr, head_desc,
+						   pkt_len);
+			break;
+		}
+
+		hdr = (union tmfifo_msg_hdr *)&cons->tx_buf[cons->tx_tail];
+		hdr->data = 0;
+		hdr->type = VIRTIO_ID_CONSOLE;
+		hdr->len = htons(pkt_len);
+
+		TMFIFO_VDEV_TX_BUF_PUSH(cons, sizeof(*hdr));
+		desc = head_desc;
+
+		while (desc != NULL) {
+			addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+			len = virtio32_to_cpu(vdev, desc->len);
+
+			if (len <= TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail) {
+				memcpy(cons->tx_buf + cons->tx_tail, addr, len);
+			} else {
+				u32 seg;
+
+				seg = TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail;
+				memcpy(cons->tx_buf + cons->tx_tail, addr, seg);
+				addr += seg;
+				memcpy(cons->tx_buf, addr, len - seg);
+			}
+			TMFIFO_VDEV_TX_BUF_PUSH(cons, len);
+
+			if (!(virtio16_to_cpu(vdev, desc->flags) &
+			    VRING_DESC_F_NEXT))
+				break;
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+		}
+
+		/* Make each packet 8-byte aligned. */
+		TMFIFO_VDEV_TX_BUF_PUSH(cons, ((pkt_len + 7) & -8) - pkt_len);
+
+		tmfifo_virtio_release_desc(vdev, vr, head_desc, pkt_len);
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct tmfifo_vring *vring;
+	struct tmfifo *fifo;
+	struct vring *vr;
+	struct virtio_device *vdev;
+	u64 sts, data;
+	int num_avail = 0, hdr_len, tx_reserve;
+	void *addr;
+	u32 len, idx;
+	struct vring_desc *desc;
+	unsigned long flags;
+	struct tmfifo_vdev *cons;
+
+	if (!vq)
+		return;
+
+	vring = (struct tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+	vr = (struct vring *)virtqueue_get_vring(vq);
+
+	if (!fifo->vdev[vring->vdev_id])
+		return;
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+
+	/* Don't continue if another vring is running. */
+	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
+		return;
+
+	/* tx_reserve is used to reserved some room in FIFO for console. */
+	if (vring->vdev_id == VIRTIO_ID_NET) {
+		hdr_len = sizeof(struct virtio_net_hdr);
+		tx_reserve = fifo->tx_fifo_size / 16;
+	} else {
+		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
+		hdr_len = 0;
+		tx_reserve = 1;
+	}
+
+	desc = vring->desc;
+
+again:
+	while (1) {
+		/* Get available FIFO space. */
+		if (num_avail == 0) {
+			if (is_rx) {
+				/* Get the number of available words in FIFO. */
+				sts = readq(fifo->rx_base + TMFIFO_RX_STS);
+				num_avail = TMFIFO_GET_FIELD(sts,
+						TMFIFO_RX_STS__COUNT_MASK);
+
+				/* Don't continue if nothing in FIFO. */
+				if (num_avail <= 0)
+					break;
+			} else {
+				/* Get available space in FIFO. */
+				sts = readq(fifo->tx_base + TMFIFO_TX_STS);
+				num_avail = fifo->tx_fifo_size - tx_reserve -
+					TMFIFO_GET_FIELD(sts,
+						TMFIFO_TX_STS__COUNT_MASK);
+
+				if (num_avail <= 0)
+					break;
+			}
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
+		    cons != NULL && cons->tx_buf != NULL) {
+			for (;;) {
+				spin_lock_irqsave(&tmfifo_spin_lock, flags);
+				if (cons->tx_head == cons->tx_tail) {
+					spin_unlock_irqrestore(
+						&tmfifo_spin_lock, flags);
+					return;
+				}
+				addr = cons->tx_buf + cons->tx_head;
+				writeq(cpu_to_le64(*(u64 *)addr),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+				TMFIFO_VDEV_TX_BUF_POP(cons, sizeof(u64));
+				spin_unlock_irqrestore(&tmfifo_spin_lock,
+						       flags);
+				if (--num_avail <= 0)
+					goto again;
+			}
+		}
+
+		/* Get the desc of next packet. */
+		if (!desc) {
+			/* Save the head desc of the chain. */
+			vring->desc_head = tmfifo_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				return;
+			}
+			desc = vring->desc_head;
+			vring->desc = desc;
+
+			if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+						vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			}
+		}
+
+		/* Beginning of each packet. */
+		if (vring->pkt_len == 0) {
+			int vdev_id, vring_change = 0;
+			union tmfifo_msg_hdr hdr;
+
+			num_avail--;
+
+			/* Read/Write packet length. */
+			if (is_rx) {
+				hdr.data = readq(fifo->rx_base +
+						 TMFIFO_RX_DATA);
+				hdr.data = le64_to_cpu(hdr.data);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (hdr.len == 0)
+					continue;
+
+				/* Check packet type. */
+				if (hdr.type == VIRTIO_ID_NET) {
+					vdev_id = VIRTIO_ID_NET;
+					hdr_len = sizeof(struct virtio_net_hdr);
+				} else if (hdr.type == VIRTIO_ID_CONSOLE) {
+					vdev_id = VIRTIO_ID_CONSOLE;
+					hdr_len = 0;
+				} else {
+					continue;
+				}
+
+				/*
+				 * Check whether the new packet still belongs
+				 * to this vring or not. If not, update the
+				 * pkt_len of the new vring and return.
+				 */
+				if (vdev_id != vring->vdev_id) {
+					struct tmfifo_vdev *dev2 =
+						fifo->vdev[vdev_id];
+
+					if (!dev2)
+						break;
+					vring->desc = desc;
+					vring = &dev2->vrings[TMFIFO_VRING_RX];
+					vring_change = 1;
+				}
+				vring->pkt_len = ntohs(hdr.len) + hdr_len;
+			} else {
+				vring->pkt_len = tmfifo_virtio_get_pkt_len(
+					vdev, desc, vr);
+
+				hdr.data = 0;
+				hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+					VIRTIO_ID_NET :
+					VIRTIO_ID_CONSOLE;
+				hdr.len = htons(vring->pkt_len - hdr_len);
+				writeq(cpu_to_le64(hdr.data),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+			}
+
+			vring->cur_len = hdr_len;
+			vring->rem_len = vring->pkt_len;
+			fifo->vring[is_rx] = vring;
+
+			if (vring_change)
+				return;
+			continue;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check if the current desc is already done. */
+		if (vring->cur_len == len)
+			goto check_done;
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		/* Read a word from FIFO for Rx. */
+		if (is_rx) {
+			data = readq(fifo->rx_base + TMFIFO_RX_DATA);
+			data = le64_to_cpu(data);
+		}
+
+		if (vring->cur_len + sizeof(u64) <= len) {
+			/* The whole word. */
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       sizeof(u64));
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       sizeof(u64));
+			}
+			vring->cur_len += sizeof(u64);
+		} else {
+			/* Leftover bytes. */
+			BUG_ON(vring->cur_len > len);
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       len - vring->cur_len);
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       len - vring->cur_len);
+			}
+			vring->cur_len = len;
+		}
+
+		/* Write the word into FIFO for Tx. */
+		if (!is_rx) {
+			writeq(cpu_to_le64(data),
+			       fifo->tx_base + TMFIFO_TX_DATA);
+		}
+
+		num_avail--;
+
+check_done:
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done and release the desc. */
+			tmfifo_release_pkt(vdev, vring, &desc);
+			fifo->vring[is_rx] = NULL;
+
+			/* Notify upper layer that packet is done. */
+			spin_lock_irqsave(&tmfifo_spin_lock, flags);
+			vring_interrupt(0, vq);
+			spin_unlock_irqrestore(&tmfifo_spin_lock, flags);
+			continue;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+	struct tmfifo *fifo = vring->fifo;
+	unsigned long flags;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX HWM bit to start Rx. */
+		if (!test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&tmfifo_spin_lock, flags);
+			tmfifo_console_output(fifo->vdev[VIRTIO_ID_CONSOLE],
+					      vq);
+			spin_unlock_irqrestore(&tmfifo_spin_lock, flags);
+			schedule_work(&fifo->work);
+		} else if (!test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	}
+
+	return true;
+}
+
+/* Get the array of feature bits for this device. */
+static u64 tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			tmfifo_release_pkt(&tm_vdev->vdev, vring, &vring->desc);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+				  unsigned int nvqs,
+				  struct virtqueue *vqs[],
+				  vq_callback_t *callbacks[],
+				  const char * const names[],
+				  const bool *ctx,
+				  struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void tmfifo_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void tmfifo_virtio_get(struct virtio_device *vdev,
+			      unsigned int offset,
+			      void *buf,
+			      unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void tmfifo_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops tmfifo_virtio_config_ops = {
+	.get_features = tmfifo_virtio_get_features,
+	.finalize_features = tmfifo_virtio_finalize_features,
+	.find_vqs = tmfifo_virtio_find_vqs,
+	.del_vqs = tmfifo_virtio_del_vqs,
+	.reset = tmfifo_virtio_reset,
+	.set_status = tmfifo_virtio_set_status,
+	.get_status = tmfifo_virtio_get_status,
+	.get = tmfifo_virtio_get,
+	.set = tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+int tmfifo_create_vdev(struct tmfifo *fifo, int vdev_id, u64 features,
+		       void *config, u32 size)
+{
+	struct tmfifo_vdev *tm_vdev;
+	int ret = 0;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev != NULL) {
+		pr_err("vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto already_exist;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto already_exist;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+	if (tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
+		pr_err("Unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto alloc_vring_fail;
+	}
+	if (vdev_id == VIRTIO_ID_CONSOLE) {
+		tm_vdev->tx_buf = kmalloc(TMFIFO_CONS_TX_BUF_SIZE,
+					  GFP_KERNEL);
+	}
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
+		goto register_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+register_fail:
+	tmfifo_free_vrings(fifo, vdev_id);
+	fifo->vdev[vdev_id] = NULL;
+alloc_vring_fail:
+	kfree(tm_vdev);
+already_exist:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+int tmfifo_delete_vdev(struct tmfifo *fifo, int vdev_id)
+{
+	struct tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		tmfifo_free_vrings(fifo, vdev_id);
+		kfree(tm_vdev->tx_buf);
+		kfree(tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Device remove function. */
+static int tmfifo_remove(struct platform_device *pdev)
+{
+	int i;
+	struct tmfifo *fifo = platform_get_drvdata(pdev);
+	struct resource *rx_res, *tx_res;
+
+	tmfifo_ready = false;
+
+	if (fifo) {
+		mutex_lock(&tmfifo_lock);
+
+		/* Stop the timer. */
+		del_timer_sync(&fifo->timer);
+
+		/* Release interrupts. */
+		tmfifo_free_irqs(fifo);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&fifo->work);
+
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++)
+			tmfifo_delete_vdev(fifo, i);
+
+		/* Release IO resources. */
+		if (fifo->rx_base)
+			iounmap(fifo->rx_base);
+		if (fifo->tx_base)
+			iounmap(fifo->tx_base);
+
+		platform_set_drvdata(pdev, NULL);
+		kfree(fifo);
+
+		mutex_unlock(&tmfifo_lock);
+	}
+
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (rx_res)
+		release_mem_region(rx_res->start, resource_size(rx_res));
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (tx_res)
+		release_mem_region(tx_res->start, resource_size(tx_res));
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void tmfifo_get_cfg_mac(u8 *mac)
+{
+	u8 buf[6];
+	efi_status_t status;
+	unsigned long size = sizeof(buf);
+	efi_char16_t name[] = { 'R', 's', 'h', 'i', 'm', 'M', 'a', 'c',
+				'A', 'd', 'd', 'r', 0 };
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+
+	status = efi.get_variable(name, &guid, NULL, &size, buf);
+	if (status == EFI_SUCCESS && size == sizeof(buf))
+		memcpy(mac, buf, sizeof(buf));
+}
+
+/* Probe the TMFIFO. */
+static int tmfifo_probe(struct platform_device *pdev)
+{
+	u64 ctl;
+	struct tmfifo *fifo;
+	struct resource *rx_res, *tx_res;
+	struct virtio_net_config net_config;
+	int i, ret;
+
+	/* Get the resource of the Rx & Tx FIFO. */
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!rx_res || !tx_res) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (request_mem_region(rx_res->start,
+			       resource_size(rx_res), "bf-tmfifo") == NULL) {
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	if (request_mem_region(tx_res->start,
+			       resource_size(tx_res), "bf-tmfifo") == NULL) {
+		release_mem_region(rx_res->start, resource_size(rx_res));
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	ret = -ENOMEM;
+	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
+	if (!fifo)
+		goto err;
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	INIT_WORK(&fifo->work, tmfifo_work_handler);
+
+	timer_setup(&fifo->timer, tmfifo_timer, 0);
+	fifo->timer.function = tmfifo_timer;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		fifo->irq[i] = platform_get_irq(pdev, i);
+		ret = request_irq(fifo->irq[i], tmfifo_irq_handler, 0,
+				  "tmfifo", (u8 *)fifo + i);
+		if (ret) {
+			pr_err("Unable to request irq\n");
+			fifo->irq[i] = 0;
+			goto err;
+		}
+	}
+
+	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
+	if (!fifo->rx_base)
+		goto err;
+
+	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
+	if (!fifo->tx_base)
+		goto err;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		TMFIFO_GET_FIELD(ctl, TMFIFO_TX_CTL__MAX_ENTRIES_MASK);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_TX_CTL__LWM_MASK,
+			       fifo->tx_fifo_size / 2);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_TX_CTL__HWM_MASK,
+			       fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		TMFIFO_GET_FIELD(ctl, TMFIFO_RX_CTL__MAX_ENTRIES_MASK);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + TMFIFO_RX_CTL);
+
+	mutex_init(&fifo->lock);
+
+	/* Create the console vdev. */
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (ret)
+		goto err;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = TMFIFO_NET_MTU;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	memcpy(net_config.mac, tmfifo_net_default_mac, 6);
+	tmfifo_get_cfg_mac(net_config.mac);
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_NET, TMFIFO_NET_FEATURES,
+				 &net_config, sizeof(net_config));
+	if (ret)
+		goto err;
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+
+	tmfifo_ready = true;
+
+	return 0;
+
+err:
+	tmfifo_remove(pdev);
+early_err:
+	dev_err(&pdev->dev, "Probe Failed\n");
+	return ret;
+}
+
+static const struct of_device_id tmfifo_match[] = {
+	{ .compatible = "mellanox,bf-tmfifo" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, tmfifo_match);
+
+static const struct acpi_device_id bf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, bf_tmfifo_acpi_match);
+
+static struct platform_driver tmfifo_driver = {
+	.probe = tmfifo_probe,
+	.remove = tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.of_match_table = tmfifo_match,
+		.acpi_match_table = ACPI_PTR(bf_tmfifo_acpi_match),
+	},
+};
+
+static int __init tmfifo_init(void)
+{
+	int ret;
+
+	mutex_init(&tmfifo_lock);
+
+	ret = platform_driver_register(&tmfifo_driver);
+	if (ret)
+		pr_err("Failed to register tmfifo driver.\n");
+
+	return ret;
+}
+
+static void __exit tmfifo_exit(void)
+{
+	platform_driver_unregister(&tmfifo_driver);
+}
+
+module_init(tmfifo_init);
+module_exit(tmfifo_exit);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
new file mode 100644
index 0000000..f42c9d6
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo_regs.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __TMFIFO_REGS_H__
+#define __TMFIFO_REGS_H__
+
+#include <linux/types.h>
+
+#define TMFIFO_TX_DATA 0x0
+
+#define TMFIFO_TX_STS 0x8
+#define TMFIFO_TX_STS__LENGTH 0x0001
+#define TMFIFO_TX_STS__COUNT_SHIFT 0
+#define TMFIFO_TX_STS__COUNT_WIDTH 9
+#define TMFIFO_TX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_TX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_TX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_TX_CTL 0x10
+#define TMFIFO_TX_CTL__LENGTH 0x0001
+#define TMFIFO_TX_CTL__LWM_SHIFT 0
+#define TMFIFO_TX_CTL__LWM_WIDTH 8
+#define TMFIFO_TX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__LWM_RMASK 0xff
+#define TMFIFO_TX_CTL__LWM_MASK  0xff
+#define TMFIFO_TX_CTL__HWM_SHIFT 8
+#define TMFIFO_TX_CTL__HWM_WIDTH 8
+#define TMFIFO_TX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__HWM_RMASK 0xff
+#define TMFIFO_TX_CTL__HWM_MASK  0xff00
+#define TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#define TMFIFO_RX_DATA 0x0
+
+#define TMFIFO_RX_STS 0x8
+#define TMFIFO_RX_STS__LENGTH 0x0001
+#define TMFIFO_RX_STS__COUNT_SHIFT 0
+#define TMFIFO_RX_STS__COUNT_WIDTH 9
+#define TMFIFO_RX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_RX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_RX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_RX_CTL 0x10
+#define TMFIFO_RX_CTL__LENGTH 0x0001
+#define TMFIFO_RX_CTL__LWM_SHIFT 0
+#define TMFIFO_RX_CTL__LWM_WIDTH 8
+#define TMFIFO_RX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__LWM_RMASK 0xff
+#define TMFIFO_RX_CTL__LWM_MASK  0xff
+#define TMFIFO_RX_CTL__HWM_SHIFT 8
+#define TMFIFO_RX_CTL__HWM_WIDTH 8
+#define TMFIFO_RX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__HWM_RMASK 0xff
+#define TMFIFO_RX_CTL__HWM_MASK  0xff00
+#define TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#endif /* !defined(__TMFIFO_REGS_H__) */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v3 2/4] arm64: Add Mellanox BlueField SoC config option
  2018-06-01 14:31   ` Liming Sun
@ 2018-06-01 14:31   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-06-01 14:31 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit introduces config option for Mellanox BlueField SoC,
which can be used to build the SoC specific drivers, and enables
it by default in configs/defconfig.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 arch/arm64/Kconfig.platforms | 6 ++++++
 arch/arm64/configs/defconfig | 1 +
 2 files changed, 7 insertions(+)

diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 2b1535c..74ad03f 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -110,6 +110,12 @@ config ARCH_MESON
 	help
 	  This enables support for the Amlogic S905 SoCs.
 
+config ARCH_MLNX_BLUEFIELD
+	bool "Mellanox BlueField SoC Family"
+	select SOC_MLNX
+	help
+	  This enables support for the Mellanox BlueField SoC.
+
 config ARCH_MVEBU
 	bool "Marvell EBU SoC Family"
 	select ARMADA_AP806_SYSCON
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 1c98939..842f607 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -43,6 +43,7 @@ CONFIG_ARCH_LG1K=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
 CONFIG_ARCH_MESON=y
+CONFIG_ARCH_MLNX_BLUEFIELD=y
 CONFIG_ARCH_MVEBU=y
 CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v3 2/4] arm64: Add Mellanox BlueField SoC config option
@ 2018-06-01 14:31   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-06-01 14:31 UTC (permalink / raw)
  To: linux-arm-kernel

This commit introduces config option for Mellanox BlueField SoC,
which can be used to build the SoC specific drivers, and enables
it by default in configs/defconfig.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 arch/arm64/Kconfig.platforms | 6 ++++++
 arch/arm64/configs/defconfig | 1 +
 2 files changed, 7 insertions(+)

diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 2b1535c..74ad03f 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -110,6 +110,12 @@ config ARCH_MESON
 	help
 	  This enables support for the Amlogic S905 SoCs.
 
+config ARCH_MLNX_BLUEFIELD
+	bool "Mellanox BlueField SoC Family"
+	select SOC_MLNX
+	help
+	  This enables support for the Mellanox BlueField SoC.
+
 config ARCH_MVEBU
 	bool "Marvell EBU SoC Family"
 	select ARMADA_AP806_SYSCON
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 1c98939..842f607 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -43,6 +43,7 @@ CONFIG_ARCH_LG1K=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
 CONFIG_ARCH_MESON=y
+CONFIG_ARCH_MLNX_BLUEFIELD=y
 CONFIG_ARCH_MVEBU=y
 CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v3 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-06-01 14:31   ` Liming Sun
@ 2018-06-01 14:31   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-06-01 14:31 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy
  Cc: devicetree, Liming Sun, linux-arm-kernel

Add devicetree bindings for the TmFifo which is found on Mellanox
BlueField SoCs.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 .../devicetree/bindings/soc/mellanox/tmfifo.txt    | 23 ++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt

diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
new file mode 100644
index 0000000..8a13fa6
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
@@ -0,0 +1,23 @@
+* Mellanox BlueField SoC TmFifo
+
+BlueField TmFifo provides a shared FIFO between the target and the
+external host machine, which can be accessed by external host via
+USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
+to implement virtual console and network interface based on the virtio
+framework.
+
+Required properties:
+
+- compatible:	Should be "mellanox,bf-tmfifo"
+- reg:		Physical base address and length of Rx/Tx block
+- interrupts:	The interrupt number of Rx low water mark, Rx high water mark
+		Tx low water mark, Tx high water mark respectively.
+
+Example:
+
+tmfifo@800a20 {
+	compatible = "mellanox,bf-tmfifo";
+	reg = <0x00800a20 0x00000018
+	       0x00800a40 0x00000018>;
+	interrupts = <41, 42, 43, 44>;
+};
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v3 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
@ 2018-06-01 14:31   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-06-01 14:31 UTC (permalink / raw)
  To: linux-arm-kernel

Add devicetree bindings for the TmFifo which is found on Mellanox
BlueField SoCs.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 .../devicetree/bindings/soc/mellanox/tmfifo.txt    | 23 ++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt

diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
new file mode 100644
index 0000000..8a13fa6
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
@@ -0,0 +1,23 @@
+* Mellanox BlueField SoC TmFifo
+
+BlueField TmFifo provides a shared FIFO between the target and the
+external host machine, which can be accessed by external host via
+USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
+to implement virtual console and network interface based on the virtio
+framework.
+
+Required properties:
+
+- compatible:	Should be "mellanox,bf-tmfifo"
+- reg:		Physical base address and length of Rx/Tx block
+- interrupts:	The interrupt number of Rx low water mark, Rx high water mark
+		Tx low water mark, Tx high water mark respectively.
+
+Example:
+
+tmfifo at 800a20 {
+	compatible = "mellanox,bf-tmfifo";
+	reg = <0x00800a20 0x00000018
+	       0x00800a40 0x00000018>;
+	interrupts = <41, 42, 43, 44>;
+};
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v3 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc
  2018-06-01 14:31   ` Liming Sun
@ 2018-06-01 14:31   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-06-01 14:31 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy
  Cc: devicetree, Liming Sun, linux-arm-kernel

Add maintainer information for Mellanox BlueField SoC.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 58b9861..85d5639 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1636,6 +1636,14 @@ L:	linux-mediatek@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	drivers/phy/mediatek/phy-mtk-tphy.c
 
+ARM/Mellanox BlueField SoC support
+M:	David Woods <dwoods@mellanox.com>
+M:	Liming Sun <lsun@mellanox.com>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	drivers/soc/mellanox/*
+F:	Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
+
 ARM/MICREL KS8695 ARCHITECTURE
 M:	Greg Ungerer <gerg@uclinux.org>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v3 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc
@ 2018-06-01 14:31   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-06-01 14:31 UTC (permalink / raw)
  To: linux-arm-kernel

Add maintainer information for Mellanox BlueField SoC.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 58b9861..85d5639 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1636,6 +1636,14 @@ L:	linux-mediatek at lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	drivers/phy/mediatek/phy-mtk-tphy.c
 
+ARM/Mellanox BlueField SoC support
+M:	David Woods <dwoods@mellanox.com>
+M:	Liming Sun <lsun@mellanox.com>
+L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	drivers/soc/mellanox/*
+F:	Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
+
 ARM/MICREL KS8695 ARCHITECTURE
 M:	Greg Ungerer <gerg@uclinux.org>
 L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* Re: [PATCH v3 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-06-01 14:31   ` Liming Sun
@ 2018-06-11 18:19     ` Rob Herring
  -1 siblings, 0 replies; 179+ messages in thread
From: Rob Herring @ 2018-06-11 18:19 UTC (permalink / raw)
  To: Liming Sun
  Cc: devicetree, David Woods, Arnd Bergmann, Olof Johansson,
	Robin Murphy, linux-arm-kernel

On Fri, Jun 01, 2018 at 10:31:06AM -0400, Liming Sun wrote:
> Add devicetree bindings for the TmFifo which is found on Mellanox
> BlueField SoCs.
> 
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>
> ---
>  .../devicetree/bindings/soc/mellanox/tmfifo.txt    | 23 ++++++++++++++++++++++
>  1 file changed, 23 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt

Reviewed-by: Rob Herring <robh@kernel.org>

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v3 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
@ 2018-06-11 18:19     ` Rob Herring
  0 siblings, 0 replies; 179+ messages in thread
From: Rob Herring @ 2018-06-11 18:19 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Jun 01, 2018 at 10:31:06AM -0400, Liming Sun wrote:
> Add devicetree bindings for the TmFifo which is found on Mellanox
> BlueField SoCs.
> 
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>
> ---
>  .../devicetree/bindings/soc/mellanox/tmfifo.txt    | 23 ++++++++++++++++++++++
>  1 file changed, 23 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt

Reviewed-by: Rob Herring <robh@kernel.org>

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 16:06 ` Liming Sun
@ 2018-10-24 17:55   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-24 17:55 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit adds the TmFifo driver for Mellanox BlueField Soc.
TmFifo is a shared FIFO which enables external host machine to
exchange data with the SoC via USB or PCIe. The driver is based on
virtio framework and has console and network access enabled.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/Kconfig                |    1 +
 drivers/soc/Makefile               |    1 +
 drivers/soc/mellanox/Kconfig       |   18 +
 drivers/soc/mellanox/Makefile      |    5 +
 drivers/soc/mellanox/tmfifo.c      | 1239 ++++++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/tmfifo_regs.h |   75 +++
 6 files changed, 1339 insertions(+)
 create mode 100644 drivers/soc/mellanox/Kconfig
 create mode 100644 drivers/soc/mellanox/Makefile
 create mode 100644 drivers/soc/mellanox/tmfifo.c
 create mode 100644 drivers/soc/mellanox/tmfifo_regs.h

diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index c07b4a8..fa87dc8 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -7,6 +7,7 @@ source "drivers/soc/bcm/Kconfig"
 source "drivers/soc/fsl/Kconfig"
 source "drivers/soc/imx/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
+source "drivers/soc/mellanox/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/renesas/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 113e884..93052d0 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ARCH_GEMINI)	+= gemini/
 obj-$(CONFIG_ARCH_MXC)		+= imx/
 obj-$(CONFIG_SOC_XWAY)		+= lantiq/
 obj-y				+= mediatek/
+obj-$(CONFIG_SOC_MLNX)		+= mellanox/
 obj-$(CONFIG_ARCH_MESON)	+= amlogic/
 obj-y				+= qcom/
 obj-y				+= renesas/
diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
new file mode 100644
index 0000000..d88efa1
--- /dev/null
+++ b/drivers/soc/mellanox/Kconfig
@@ -0,0 +1,18 @@
+menuconfig SOC_MLNX
+	bool "Mellanox SoC drivers"
+	default y if ARCH_MLNX_BLUEFIELD
+
+if ARCH_MLNX_BLUEFIELD || COMPILE_TEST
+
+config MLNX_BLUEFIELD_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo driver"
+	depends on ARM64
+	default m
+	select VIRTIO_CONSOLE
+	select VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides the
+	  virtio driver framework for the TMFIFO of Mellanox BlueField SoC and
+	  the implementation of a console and network driver.
+
+endif # ARCH_MLNX_BLUEFIELD
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
new file mode 100644
index 0000000..c44c0e2
--- /dev/null
+++ b/drivers/soc/mellanox/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Mellanox SoC drivers.
+#
+obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
new file mode 100644
index 0000000..5647cb6
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo.c
@@ -0,0 +1,1239 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/cache.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <asm/byteorder.h>
+
+#include "tmfifo_regs.h"
+
+#define TMFIFO_GET_FIELD(reg, mask)	FIELD_GET(mask, reg)
+
+#define TMFIFO_SET_FIELD(reg, mask, value) \
+	((reg & ~mask) | FIELD_PREP(mask, value))
+
+/* Vring size. */
+#define TMFIFO_VRING_SIZE			1024
+
+/* Console Tx buffer size. */
+#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
+
+/* Use a timer for house-keeping. */
+static int tmfifo_timer_interval = HZ / 10;
+
+/* Global lock. */
+static struct mutex tmfifo_lock;
+
+/* Virtio ring size. */
+static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
+module_param(tmfifo_vring_size, int, 0444);
+MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring.");
+
+struct tmfifo;
+
+/* A flag to indicate TmFifo ready. */
+static bool tmfifo_ready;
+
+/* Virtual devices sharing the TM FIFO. */
+#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/* Spin lock. */
+static DEFINE_SPINLOCK(tmfifo_spin_lock);
+
+/* Structure to maintain the ring state. */
+struct tmfifo_vring {
+	void *va;			/* virtual address */
+	dma_addr_t dma;			/* dma address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	int vdev_id;			/* TMFIFO_VDEV_xxx */
+	u32 pkt_len;			/* packet total length */
+	__virtio16 next_avail;		/* next avail desc id */
+	struct tmfifo *fifo;		/* pointer back to the tmfifo */
+};
+
+/* Interrupt types. */
+enum {
+	TM_RX_LWM_IRQ,			/* Rx low water mark irq */
+	TM_RX_HWM_IRQ,			/* Rx high water mark irq */
+	TM_TX_LWM_IRQ,			/* Tx low water mark irq */
+	TM_TX_HWM_IRQ,			/* Tx high water mark irq */
+	TM_IRQ_CNT
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	TMFIFO_VRING_RX,		/* Rx ring */
+	TMFIFO_VRING_TX,		/* Tx ring */
+	TMFIFO_VRING_NUM
+};
+
+struct tmfifo_vdev {
+	struct virtio_device vdev;	/* virtual device */
+	u8 status;
+	u64 features;
+	union {				/* virtio config space */
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
+	u8 *tx_buf;			/* tx buffer */
+	u32 tx_head;			/* tx buffer head */
+	u32 tx_tail;			/* tx buffer tail */
+};
+
+#define TMFIFO_VDEV_TX_BUF_AVAIL(vdev) \
+	(((vdev)->tx_tail >= (vdev)->tx_head) ? \
+	(TMFIFO_CONS_TX_BUF_SIZE - 8 - ((vdev)->tx_tail - (vdev)->tx_head)) : \
+	((vdev)->tx_head - (vdev)->tx_tail - 8))
+
+#define TMFIFO_VDEV_TX_BUF_PUSH(vdev, len) do { \
+	(vdev)->tx_tail += (len); \
+	if ((vdev)->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE) \
+		(vdev)->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE; \
+} while (0)
+
+#define TMFIFO_VDEV_TX_BUF_POP(vdev, len) do { \
+	(vdev)->tx_head += (len); \
+	if ((vdev)->tx_head >= TMFIFO_CONS_TX_BUF_SIZE) \
+		(vdev)->tx_head -= TMFIFO_CONS_TX_BUF_SIZE; \
+} while (0)
+
+/* TMFIFO device structure */
+struct tmfifo {
+	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
+	struct platform_device *pdev;	/* platform device */
+	struct mutex lock;
+	void __iomem *rx_base;		/* mapped register base */
+	void __iomem *tx_base;		/* mapped register base */
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	int irq[TM_IRQ_CNT];		/* irq numbers */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
+};
+
+union tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* MTU setting of the virtio-net interface. */
+#define TMFIFO_NET_MTU		1500
+
+/* Supported virtio-net features. */
+#define TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
+				 (1UL << VIRTIO_NET_F_STATUS) | \
+				 (1UL << VIRTIO_NET_F_MAC))
+
+/* Forward declaration. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx);
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc);
+
+/* Allocate vrings for the fifo. */
+static int tmfifo_alloc_vrings(struct tmfifo *fifo,
+			       struct tmfifo_vdev *tm_vdev, int vdev_id)
+{
+	dma_addr_t dma;
+	void *va;
+	int i, size;
+	struct tmfifo_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->size = tmfifo_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+		vring->vdev_id = vdev_id;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
+					GFP_KERNEL);
+		if (!va) {
+			dev_err(tm_vdev->vdev.dev.parent,
+				"vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void tmfifo_free_vrings(struct tmfifo *fifo, int vdev_id)
+{
+	int i, size;
+	struct tmfifo_vring *vring;
+	struct tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Free interrupts of the fifo device. */
+static void tmfifo_free_irqs(struct tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		irq = fifo->irq[i];
+		if (irq) {
+			fifo->irq[i] = 0;
+			disable_irq(irq);
+			free_irq(irq, (u8 *)fifo + i);
+		}
+	}
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void tmfifo_work_handler(struct work_struct *work)
+{
+	int i;
+	struct tmfifo_vdev *tm_vdev;
+	struct tmfifo *fifo = container_of(work, struct tmfifo, work);
+
+	if (!tmfifo_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx. */
+	if (test_and_clear_bit(TM_TX_LWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_TX_LWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_TX].vq,
+					false);
+			}
+		}
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(TM_RX_HWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_RX_HWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_RX].vq,
+					true);
+			}
+		}
+	}
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* Interrupt handler. */
+static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
+{
+	int i = (uintptr_t)dev_id % sizeof(void *);
+	struct tmfifo *fifo = dev_id - i;
+
+	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
+		schedule_work(&fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Nothing to do for now. */
+static void tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+tmfifo_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+
+	if (!vr || vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vr->num);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+static inline void tmfifo_virtio_release_desc(struct virtio_device *vdev,
+					      struct vring *vr,
+					      struct vring_desc *desc, u32 len)
+{
+	unsigned int idx;
+
+	idx = vr->used->idx % vr->num;
+	vr->used->ring[idx].id = desc - vr->desc;
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/* Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc)
+{
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+	struct vring_desc *desc_head;
+	uint32_t pkt_len = 0;
+
+	if (!vr)
+		return;
+
+	if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
+		desc_head = vring->desc_head;
+		pkt_len = vring->pkt_len;
+	} else {
+		desc_head = tmfifo_virtio_get_next_desc(vring->vq);
+		if (desc_head != NULL) {
+			pkt_len = tmfifo_virtio_get_pkt_len(vdev,
+							desc_head, vr);
+		}
+	}
+
+	if (desc_head != NULL)
+		tmfifo_virtio_release_desc(vdev, vr, desc_head, pkt_len);
+
+	if (desc != NULL)
+		*desc = NULL;
+	vring->pkt_len = 0;
+}
+
+/* House-keeping timer. */
+static void tmfifo_timer(struct timer_list *arg)
+{
+	struct tmfifo *fifo = container_of(arg, struct tmfifo, timer);
+
+	/*
+	 * Wake up the work handler to poll the Rx FIFO in case interrupt
+	 * missing or any leftover bytes stuck in the FIFO.
+	 */
+	test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+}
+
+/* Buffer the console output. */
+static void tmfifo_console_output(struct tmfifo_vdev *cons,
+				  struct virtqueue *vq)
+{
+	u32 len, pkt_len, idx;
+	struct vring_desc *head_desc, *desc = NULL;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &cons->vdev;
+	void *addr;
+	union tmfifo_msg_hdr *hdr;
+
+	for (;;) {
+		head_desc = tmfifo_virtio_get_next_desc(vq);
+		if (head_desc == NULL)
+			break;
+
+		/* Release the packet if no more space. */
+		pkt_len = tmfifo_virtio_get_pkt_len(vdev, head_desc, vr);
+		if (pkt_len + sizeof(*hdr) > TMFIFO_VDEV_TX_BUF_AVAIL(cons)) {
+			tmfifo_virtio_release_desc(vdev, vr, head_desc,
+						   pkt_len);
+			break;
+		}
+
+		hdr = (union tmfifo_msg_hdr *)&cons->tx_buf[cons->tx_tail];
+		hdr->data = 0;
+		hdr->type = VIRTIO_ID_CONSOLE;
+		hdr->len = htons(pkt_len);
+
+		TMFIFO_VDEV_TX_BUF_PUSH(cons, sizeof(*hdr));
+		desc = head_desc;
+
+		while (desc != NULL) {
+			addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+			len = virtio32_to_cpu(vdev, desc->len);
+
+			if (len <= TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail) {
+				memcpy(cons->tx_buf + cons->tx_tail, addr, len);
+			} else {
+				u32 seg;
+
+				seg = TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail;
+				memcpy(cons->tx_buf + cons->tx_tail, addr, seg);
+				addr += seg;
+				memcpy(cons->tx_buf, addr, len - seg);
+			}
+			TMFIFO_VDEV_TX_BUF_PUSH(cons, len);
+
+			if (!(virtio16_to_cpu(vdev, desc->flags) &
+			    VRING_DESC_F_NEXT))
+				break;
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+		}
+
+		/* Make each packet 8-byte aligned. */
+		TMFIFO_VDEV_TX_BUF_PUSH(cons, ((pkt_len + 7) & -8) - pkt_len);
+
+		tmfifo_virtio_release_desc(vdev, vr, head_desc, pkt_len);
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct tmfifo_vring *vring;
+	struct tmfifo *fifo;
+	struct vring *vr;
+	struct virtio_device *vdev;
+	u64 sts, data;
+	int num_avail = 0, hdr_len, tx_reserve;
+	void *addr;
+	u32 len, idx;
+	struct vring_desc *desc;
+	unsigned long flags;
+	struct tmfifo_vdev *cons;
+
+	if (!vq)
+		return;
+
+	vring = (struct tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+	vr = (struct vring *)virtqueue_get_vring(vq);
+
+	if (!fifo->vdev[vring->vdev_id])
+		return;
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+
+	/* Don't continue if another vring is running. */
+	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
+		return;
+
+	/* tx_reserve is used to reserved some room in FIFO for console. */
+	if (vring->vdev_id == VIRTIO_ID_NET) {
+		hdr_len = sizeof(struct virtio_net_hdr);
+		tx_reserve = fifo->tx_fifo_size / 16;
+	} else {
+		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
+		hdr_len = 0;
+		tx_reserve = 1;
+	}
+
+	desc = vring->desc;
+
+again:
+	while (1) {
+		/* Get available FIFO space. */
+		if (num_avail == 0) {
+			if (is_rx) {
+				/* Get the number of available words in FIFO. */
+				sts = readq(fifo->rx_base + TMFIFO_RX_STS);
+				num_avail = TMFIFO_GET_FIELD(sts,
+						TMFIFO_RX_STS__COUNT_MASK);
+
+				/* Don't continue if nothing in FIFO. */
+				if (num_avail <= 0)
+					break;
+			} else {
+				/* Get available space in FIFO. */
+				sts = readq(fifo->tx_base + TMFIFO_TX_STS);
+				num_avail = fifo->tx_fifo_size - tx_reserve -
+					TMFIFO_GET_FIELD(sts,
+						TMFIFO_TX_STS__COUNT_MASK);
+
+				if (num_avail <= 0)
+					break;
+			}
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
+		    cons != NULL && cons->tx_buf != NULL) {
+			for (;;) {
+				spin_lock_irqsave(&tmfifo_spin_lock, flags);
+				if (cons->tx_head == cons->tx_tail) {
+					spin_unlock_irqrestore(
+						&tmfifo_spin_lock, flags);
+					return;
+				}
+				addr = cons->tx_buf + cons->tx_head;
+				writeq(cpu_to_le64(*(u64 *)addr),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+				TMFIFO_VDEV_TX_BUF_POP(cons, sizeof(u64));
+				spin_unlock_irqrestore(&tmfifo_spin_lock,
+						       flags);
+				if (--num_avail <= 0)
+					goto again;
+			}
+		}
+
+		/* Get the desc of next packet. */
+		if (!desc) {
+			/* Save the head desc of the chain. */
+			vring->desc_head = tmfifo_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				return;
+			}
+			desc = vring->desc_head;
+			vring->desc = desc;
+
+			if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+						vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			}
+		}
+
+		/* Beginning of each packet. */
+		if (vring->pkt_len == 0) {
+			int vdev_id, vring_change = 0;
+			union tmfifo_msg_hdr hdr;
+
+			num_avail--;
+
+			/* Read/Write packet length. */
+			if (is_rx) {
+				hdr.data = readq(fifo->rx_base +
+						 TMFIFO_RX_DATA);
+				hdr.data = le64_to_cpu(hdr.data);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (hdr.len == 0)
+					continue;
+
+				/* Check packet type. */
+				if (hdr.type == VIRTIO_ID_NET) {
+					vdev_id = VIRTIO_ID_NET;
+					hdr_len = sizeof(struct virtio_net_hdr);
+				} else if (hdr.type == VIRTIO_ID_CONSOLE) {
+					vdev_id = VIRTIO_ID_CONSOLE;
+					hdr_len = 0;
+				} else {
+					continue;
+				}
+
+				/*
+				 * Check whether the new packet still belongs
+				 * to this vring or not. If not, update the
+				 * pkt_len of the new vring and return.
+				 */
+				if (vdev_id != vring->vdev_id) {
+					struct tmfifo_vdev *dev2 =
+						fifo->vdev[vdev_id];
+
+					if (!dev2)
+						break;
+					vring->desc = desc;
+					vring = &dev2->vrings[TMFIFO_VRING_RX];
+					vring_change = 1;
+				}
+				vring->pkt_len = ntohs(hdr.len) + hdr_len;
+			} else {
+				vring->pkt_len = tmfifo_virtio_get_pkt_len(
+					vdev, desc, vr);
+
+				hdr.data = 0;
+				hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+					VIRTIO_ID_NET :
+					VIRTIO_ID_CONSOLE;
+				hdr.len = htons(vring->pkt_len - hdr_len);
+				writeq(cpu_to_le64(hdr.data),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+			}
+
+			vring->cur_len = hdr_len;
+			vring->rem_len = vring->pkt_len;
+			fifo->vring[is_rx] = vring;
+
+			if (vring_change)
+				return;
+			continue;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check if the current desc is already done. */
+		if (vring->cur_len == len)
+			goto check_done;
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		/* Read a word from FIFO for Rx. */
+		if (is_rx) {
+			data = readq(fifo->rx_base + TMFIFO_RX_DATA);
+			data = le64_to_cpu(data);
+		}
+
+		if (vring->cur_len + sizeof(u64) <= len) {
+			/* The whole word. */
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       sizeof(u64));
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       sizeof(u64));
+			}
+			vring->cur_len += sizeof(u64);
+		} else {
+			/* Leftover bytes. */
+			BUG_ON(vring->cur_len > len);
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       len - vring->cur_len);
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       len - vring->cur_len);
+			}
+			vring->cur_len = len;
+		}
+
+		/* Write the word into FIFO for Tx. */
+		if (!is_rx) {
+			writeq(cpu_to_le64(data),
+			       fifo->tx_base + TMFIFO_TX_DATA);
+		}
+
+		num_avail--;
+
+check_done:
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done and release the desc. */
+			tmfifo_release_pkt(vdev, vring, &desc);
+			fifo->vring[is_rx] = NULL;
+
+			/* Notify upper layer that packet is done. */
+			spin_lock_irqsave(&tmfifo_spin_lock, flags);
+			vring_interrupt(0, vq);
+			spin_unlock_irqrestore(&tmfifo_spin_lock, flags);
+			continue;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+	struct tmfifo *fifo = vring->fifo;
+	unsigned long flags;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX HWM bit to start Rx. */
+		if (!test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&tmfifo_spin_lock, flags);
+			tmfifo_console_output(fifo->vdev[VIRTIO_ID_CONSOLE],
+					      vq);
+			spin_unlock_irqrestore(&tmfifo_spin_lock, flags);
+			schedule_work(&fifo->work);
+		} else if (!test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	}
+
+	return true;
+}
+
+/* Get the array of feature bits for this device. */
+static u64 tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			tmfifo_release_pkt(&tm_vdev->vdev, vring, &vring->desc);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+				  unsigned int nvqs,
+				  struct virtqueue *vqs[],
+				  vq_callback_t *callbacks[],
+				  const char * const names[],
+				  const bool *ctx,
+				  struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void tmfifo_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void tmfifo_virtio_get(struct virtio_device *vdev,
+			      unsigned int offset,
+			      void *buf,
+			      unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void tmfifo_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops tmfifo_virtio_config_ops = {
+	.get_features = tmfifo_virtio_get_features,
+	.finalize_features = tmfifo_virtio_finalize_features,
+	.find_vqs = tmfifo_virtio_find_vqs,
+	.del_vqs = tmfifo_virtio_del_vqs,
+	.reset = tmfifo_virtio_reset,
+	.set_status = tmfifo_virtio_set_status,
+	.get_status = tmfifo_virtio_get_status,
+	.get = tmfifo_virtio_get,
+	.set = tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+int tmfifo_create_vdev(struct tmfifo *fifo, int vdev_id, u64 features,
+		       void *config, u32 size)
+{
+	struct tmfifo_vdev *tm_vdev;
+	int ret = 0;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev != NULL) {
+		pr_err("vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto already_exist;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto already_exist;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+	if (tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
+		pr_err("Unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto alloc_vring_fail;
+	}
+	if (vdev_id == VIRTIO_ID_CONSOLE) {
+		tm_vdev->tx_buf = kmalloc(TMFIFO_CONS_TX_BUF_SIZE,
+					  GFP_KERNEL);
+	}
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
+		goto register_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+register_fail:
+	tmfifo_free_vrings(fifo, vdev_id);
+	fifo->vdev[vdev_id] = NULL;
+alloc_vring_fail:
+	kfree(tm_vdev);
+already_exist:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+int tmfifo_delete_vdev(struct tmfifo *fifo, int vdev_id)
+{
+	struct tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		tmfifo_free_vrings(fifo, vdev_id);
+		kfree(tm_vdev->tx_buf);
+		kfree(tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Device remove function. */
+static int tmfifo_remove(struct platform_device *pdev)
+{
+	int i;
+	struct tmfifo *fifo = platform_get_drvdata(pdev);
+	struct resource *rx_res, *tx_res;
+
+	tmfifo_ready = false;
+
+	if (fifo) {
+		mutex_lock(&tmfifo_lock);
+
+		/* Stop the timer. */
+		del_timer_sync(&fifo->timer);
+
+		/* Release interrupts. */
+		tmfifo_free_irqs(fifo);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&fifo->work);
+
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++)
+			tmfifo_delete_vdev(fifo, i);
+
+		/* Release IO resources. */
+		if (fifo->rx_base)
+			iounmap(fifo->rx_base);
+		if (fifo->tx_base)
+			iounmap(fifo->tx_base);
+
+		platform_set_drvdata(pdev, NULL);
+		kfree(fifo);
+
+		mutex_unlock(&tmfifo_lock);
+	}
+
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (rx_res)
+		release_mem_region(rx_res->start, resource_size(rx_res));
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (tx_res)
+		release_mem_region(tx_res->start, resource_size(tx_res));
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void tmfifo_get_cfg_mac(u8 *mac)
+{
+	u8 buf[6];
+	efi_status_t status;
+	unsigned long size = sizeof(buf);
+	efi_char16_t name[] = { 'R', 's', 'h', 'i', 'm', 'M', 'a', 'c',
+				'A', 'd', 'd', 'r', 0 };
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+
+	status = efi.get_variable(name, &guid, NULL, &size, buf);
+	if (status == EFI_SUCCESS && size == sizeof(buf))
+		memcpy(mac, buf, sizeof(buf));
+}
+
+/* Probe the TMFIFO. */
+static int tmfifo_probe(struct platform_device *pdev)
+{
+	u64 ctl;
+	struct tmfifo *fifo;
+	struct resource *rx_res, *tx_res;
+	struct virtio_net_config net_config;
+	int i, ret;
+
+	/* Get the resource of the Rx & Tx FIFO. */
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!rx_res || !tx_res) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (request_mem_region(rx_res->start,
+			       resource_size(rx_res), "bf-tmfifo") == NULL) {
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	if (request_mem_region(tx_res->start,
+			       resource_size(tx_res), "bf-tmfifo") == NULL) {
+		release_mem_region(rx_res->start, resource_size(rx_res));
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	ret = -ENOMEM;
+	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
+	if (!fifo)
+		goto err;
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	INIT_WORK(&fifo->work, tmfifo_work_handler);
+
+	timer_setup(&fifo->timer, tmfifo_timer, 0);
+	fifo->timer.function = tmfifo_timer;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		fifo->irq[i] = platform_get_irq(pdev, i);
+		ret = request_irq(fifo->irq[i], tmfifo_irq_handler, 0,
+				  "tmfifo", (u8 *)fifo + i);
+		if (ret) {
+			pr_err("Unable to request irq\n");
+			fifo->irq[i] = 0;
+			goto err;
+		}
+	}
+
+	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
+	if (!fifo->rx_base)
+		goto err;
+
+	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
+	if (!fifo->tx_base)
+		goto err;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		TMFIFO_GET_FIELD(ctl, TMFIFO_TX_CTL__MAX_ENTRIES_MASK);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_TX_CTL__LWM_MASK,
+			       fifo->tx_fifo_size / 2);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_TX_CTL__HWM_MASK,
+			       fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		TMFIFO_GET_FIELD(ctl, TMFIFO_RX_CTL__MAX_ENTRIES_MASK);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + TMFIFO_RX_CTL);
+
+	mutex_init(&fifo->lock);
+
+	/* Create the console vdev. */
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (ret)
+		goto err;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = TMFIFO_NET_MTU;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	memcpy(net_config.mac, tmfifo_net_default_mac, 6);
+	tmfifo_get_cfg_mac(net_config.mac);
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_NET, TMFIFO_NET_FEATURES,
+				 &net_config, sizeof(net_config));
+	if (ret)
+		goto err;
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+
+	tmfifo_ready = true;
+
+	return 0;
+
+err:
+	tmfifo_remove(pdev);
+early_err:
+	dev_err(&pdev->dev, "Probe Failed\n");
+	return ret;
+}
+
+static const struct of_device_id tmfifo_match[] = {
+	{ .compatible = "mellanox,bf-tmfifo" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, tmfifo_match);
+
+static const struct acpi_device_id bf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, bf_tmfifo_acpi_match);
+
+static struct platform_driver tmfifo_driver = {
+	.probe = tmfifo_probe,
+	.remove = tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.of_match_table = tmfifo_match,
+		.acpi_match_table = ACPI_PTR(bf_tmfifo_acpi_match),
+	},
+};
+
+static int __init tmfifo_init(void)
+{
+	int ret;
+
+	mutex_init(&tmfifo_lock);
+
+	ret = platform_driver_register(&tmfifo_driver);
+	if (ret)
+		pr_err("Failed to register tmfifo driver.\n");
+
+	return ret;
+}
+
+static void __exit tmfifo_exit(void)
+{
+	platform_driver_unregister(&tmfifo_driver);
+}
+
+module_init(tmfifo_init);
+module_exit(tmfifo_exit);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
new file mode 100644
index 0000000..f42c9d6
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo_regs.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __TMFIFO_REGS_H__
+#define __TMFIFO_REGS_H__
+
+#include <linux/types.h>
+
+#define TMFIFO_TX_DATA 0x0
+
+#define TMFIFO_TX_STS 0x8
+#define TMFIFO_TX_STS__LENGTH 0x0001
+#define TMFIFO_TX_STS__COUNT_SHIFT 0
+#define TMFIFO_TX_STS__COUNT_WIDTH 9
+#define TMFIFO_TX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_TX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_TX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_TX_CTL 0x10
+#define TMFIFO_TX_CTL__LENGTH 0x0001
+#define TMFIFO_TX_CTL__LWM_SHIFT 0
+#define TMFIFO_TX_CTL__LWM_WIDTH 8
+#define TMFIFO_TX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__LWM_RMASK 0xff
+#define TMFIFO_TX_CTL__LWM_MASK  0xff
+#define TMFIFO_TX_CTL__HWM_SHIFT 8
+#define TMFIFO_TX_CTL__HWM_WIDTH 8
+#define TMFIFO_TX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__HWM_RMASK 0xff
+#define TMFIFO_TX_CTL__HWM_MASK  0xff00
+#define TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#define TMFIFO_RX_DATA 0x0
+
+#define TMFIFO_RX_STS 0x8
+#define TMFIFO_RX_STS__LENGTH 0x0001
+#define TMFIFO_RX_STS__COUNT_SHIFT 0
+#define TMFIFO_RX_STS__COUNT_WIDTH 9
+#define TMFIFO_RX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_RX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_RX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_RX_CTL 0x10
+#define TMFIFO_RX_CTL__LENGTH 0x0001
+#define TMFIFO_RX_CTL__LWM_SHIFT 0
+#define TMFIFO_RX_CTL__LWM_WIDTH 8
+#define TMFIFO_RX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__LWM_RMASK 0xff
+#define TMFIFO_RX_CTL__LWM_MASK  0xff
+#define TMFIFO_RX_CTL__HWM_SHIFT 8
+#define TMFIFO_RX_CTL__HWM_WIDTH 8
+#define TMFIFO_RX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__HWM_RMASK 0xff
+#define TMFIFO_RX_CTL__HWM_MASK  0xff00
+#define TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#endif /* !defined(__TMFIFO_REGS_H__) */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
@ 2018-10-24 17:55   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-24 17:55 UTC (permalink / raw)
  To: linux-arm-kernel

This commit adds the TmFifo driver for Mellanox BlueField Soc.
TmFifo is a shared FIFO which enables external host machine to
exchange data with the SoC via USB or PCIe. The driver is based on
virtio framework and has console and network access enabled.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/Kconfig                |    1 +
 drivers/soc/Makefile               |    1 +
 drivers/soc/mellanox/Kconfig       |   18 +
 drivers/soc/mellanox/Makefile      |    5 +
 drivers/soc/mellanox/tmfifo.c      | 1239 ++++++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/tmfifo_regs.h |   75 +++
 6 files changed, 1339 insertions(+)
 create mode 100644 drivers/soc/mellanox/Kconfig
 create mode 100644 drivers/soc/mellanox/Makefile
 create mode 100644 drivers/soc/mellanox/tmfifo.c
 create mode 100644 drivers/soc/mellanox/tmfifo_regs.h

diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index c07b4a8..fa87dc8 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -7,6 +7,7 @@ source "drivers/soc/bcm/Kconfig"
 source "drivers/soc/fsl/Kconfig"
 source "drivers/soc/imx/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
+source "drivers/soc/mellanox/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/renesas/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 113e884..93052d0 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ARCH_GEMINI)	+= gemini/
 obj-$(CONFIG_ARCH_MXC)		+= imx/
 obj-$(CONFIG_SOC_XWAY)		+= lantiq/
 obj-y				+= mediatek/
+obj-$(CONFIG_SOC_MLNX)		+= mellanox/
 obj-$(CONFIG_ARCH_MESON)	+= amlogic/
 obj-y				+= qcom/
 obj-y				+= renesas/
diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
new file mode 100644
index 0000000..d88efa1
--- /dev/null
+++ b/drivers/soc/mellanox/Kconfig
@@ -0,0 +1,18 @@
+menuconfig SOC_MLNX
+	bool "Mellanox SoC drivers"
+	default y if ARCH_MLNX_BLUEFIELD
+
+if ARCH_MLNX_BLUEFIELD || COMPILE_TEST
+
+config MLNX_BLUEFIELD_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo driver"
+	depends on ARM64
+	default m
+	select VIRTIO_CONSOLE
+	select VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides the
+	  virtio driver framework for the TMFIFO of Mellanox BlueField SoC and
+	  the implementation of a console and network driver.
+
+endif # ARCH_MLNX_BLUEFIELD
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
new file mode 100644
index 0000000..c44c0e2
--- /dev/null
+++ b/drivers/soc/mellanox/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Mellanox SoC drivers.
+#
+obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
new file mode 100644
index 0000000..5647cb6
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo.c
@@ -0,0 +1,1239 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/cache.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <asm/byteorder.h>
+
+#include "tmfifo_regs.h"
+
+#define TMFIFO_GET_FIELD(reg, mask)	FIELD_GET(mask, reg)
+
+#define TMFIFO_SET_FIELD(reg, mask, value) \
+	((reg & ~mask) | FIELD_PREP(mask, value))
+
+/* Vring size. */
+#define TMFIFO_VRING_SIZE			1024
+
+/* Console Tx buffer size. */
+#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
+
+/* Use a timer for house-keeping. */
+static int tmfifo_timer_interval = HZ / 10;
+
+/* Global lock. */
+static struct mutex tmfifo_lock;
+
+/* Virtio ring size. */
+static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
+module_param(tmfifo_vring_size, int, 0444);
+MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring.");
+
+struct tmfifo;
+
+/* A flag to indicate TmFifo ready. */
+static bool tmfifo_ready;
+
+/* Virtual devices sharing the TM FIFO. */
+#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/* Spin lock. */
+static DEFINE_SPINLOCK(tmfifo_spin_lock);
+
+/* Structure to maintain the ring state. */
+struct tmfifo_vring {
+	void *va;			/* virtual address */
+	dma_addr_t dma;			/* dma address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	int vdev_id;			/* TMFIFO_VDEV_xxx */
+	u32 pkt_len;			/* packet total length */
+	__virtio16 next_avail;		/* next avail desc id */
+	struct tmfifo *fifo;		/* pointer back to the tmfifo */
+};
+
+/* Interrupt types. */
+enum {
+	TM_RX_LWM_IRQ,			/* Rx low water mark irq */
+	TM_RX_HWM_IRQ,			/* Rx high water mark irq */
+	TM_TX_LWM_IRQ,			/* Tx low water mark irq */
+	TM_TX_HWM_IRQ,			/* Tx high water mark irq */
+	TM_IRQ_CNT
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	TMFIFO_VRING_RX,		/* Rx ring */
+	TMFIFO_VRING_TX,		/* Tx ring */
+	TMFIFO_VRING_NUM
+};
+
+struct tmfifo_vdev {
+	struct virtio_device vdev;	/* virtual device */
+	u8 status;
+	u64 features;
+	union {				/* virtio config space */
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
+	u8 *tx_buf;			/* tx buffer */
+	u32 tx_head;			/* tx buffer head */
+	u32 tx_tail;			/* tx buffer tail */
+};
+
+#define TMFIFO_VDEV_TX_BUF_AVAIL(vdev) \
+	(((vdev)->tx_tail >= (vdev)->tx_head) ? \
+	(TMFIFO_CONS_TX_BUF_SIZE - 8 - ((vdev)->tx_tail - (vdev)->tx_head)) : \
+	((vdev)->tx_head - (vdev)->tx_tail - 8))
+
+#define TMFIFO_VDEV_TX_BUF_PUSH(vdev, len) do { \
+	(vdev)->tx_tail += (len); \
+	if ((vdev)->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE) \
+		(vdev)->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE; \
+} while (0)
+
+#define TMFIFO_VDEV_TX_BUF_POP(vdev, len) do { \
+	(vdev)->tx_head += (len); \
+	if ((vdev)->tx_head >= TMFIFO_CONS_TX_BUF_SIZE) \
+		(vdev)->tx_head -= TMFIFO_CONS_TX_BUF_SIZE; \
+} while (0)
+
+/* TMFIFO device structure */
+struct tmfifo {
+	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
+	struct platform_device *pdev;	/* platform device */
+	struct mutex lock;
+	void __iomem *rx_base;		/* mapped register base */
+	void __iomem *tx_base;		/* mapped register base */
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	int irq[TM_IRQ_CNT];		/* irq numbers */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
+};
+
+union tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* MTU setting of the virtio-net interface. */
+#define TMFIFO_NET_MTU		1500
+
+/* Supported virtio-net features. */
+#define TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
+				 (1UL << VIRTIO_NET_F_STATUS) | \
+				 (1UL << VIRTIO_NET_F_MAC))
+
+/* Forward declaration. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx);
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc);
+
+/* Allocate vrings for the fifo. */
+static int tmfifo_alloc_vrings(struct tmfifo *fifo,
+			       struct tmfifo_vdev *tm_vdev, int vdev_id)
+{
+	dma_addr_t dma;
+	void *va;
+	int i, size;
+	struct tmfifo_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->size = tmfifo_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+		vring->vdev_id = vdev_id;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
+					GFP_KERNEL);
+		if (!va) {
+			dev_err(tm_vdev->vdev.dev.parent,
+				"vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void tmfifo_free_vrings(struct tmfifo *fifo, int vdev_id)
+{
+	int i, size;
+	struct tmfifo_vring *vring;
+	struct tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Free interrupts of the fifo device. */
+static void tmfifo_free_irqs(struct tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		irq = fifo->irq[i];
+		if (irq) {
+			fifo->irq[i] = 0;
+			disable_irq(irq);
+			free_irq(irq, (u8 *)fifo + i);
+		}
+	}
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void tmfifo_work_handler(struct work_struct *work)
+{
+	int i;
+	struct tmfifo_vdev *tm_vdev;
+	struct tmfifo *fifo = container_of(work, struct tmfifo, work);
+
+	if (!tmfifo_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx. */
+	if (test_and_clear_bit(TM_TX_LWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_TX_LWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_TX].vq,
+					false);
+			}
+		}
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(TM_RX_HWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_RX_HWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_RX].vq,
+					true);
+			}
+		}
+	}
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* Interrupt handler. */
+static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
+{
+	int i = (uintptr_t)dev_id % sizeof(void *);
+	struct tmfifo *fifo = dev_id - i;
+
+	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
+		schedule_work(&fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Nothing to do for now. */
+static void tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+tmfifo_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+
+	if (!vr || vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vr->num);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+static inline void tmfifo_virtio_release_desc(struct virtio_device *vdev,
+					      struct vring *vr,
+					      struct vring_desc *desc, u32 len)
+{
+	unsigned int idx;
+
+	idx = vr->used->idx % vr->num;
+	vr->used->ring[idx].id = desc - vr->desc;
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/* Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc)
+{
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+	struct vring_desc *desc_head;
+	uint32_t pkt_len = 0;
+
+	if (!vr)
+		return;
+
+	if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
+		desc_head = vring->desc_head;
+		pkt_len = vring->pkt_len;
+	} else {
+		desc_head = tmfifo_virtio_get_next_desc(vring->vq);
+		if (desc_head != NULL) {
+			pkt_len = tmfifo_virtio_get_pkt_len(vdev,
+							desc_head, vr);
+		}
+	}
+
+	if (desc_head != NULL)
+		tmfifo_virtio_release_desc(vdev, vr, desc_head, pkt_len);
+
+	if (desc != NULL)
+		*desc = NULL;
+	vring->pkt_len = 0;
+}
+
+/* House-keeping timer. */
+static void tmfifo_timer(struct timer_list *arg)
+{
+	struct tmfifo *fifo = container_of(arg, struct tmfifo, timer);
+
+	/*
+	 * Wake up the work handler to poll the Rx FIFO in case interrupt
+	 * missing or any leftover bytes stuck in the FIFO.
+	 */
+	test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+}
+
+/* Buffer the console output. */
+static void tmfifo_console_output(struct tmfifo_vdev *cons,
+				  struct virtqueue *vq)
+{
+	u32 len, pkt_len, idx;
+	struct vring_desc *head_desc, *desc = NULL;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &cons->vdev;
+	void *addr;
+	union tmfifo_msg_hdr *hdr;
+
+	for (;;) {
+		head_desc = tmfifo_virtio_get_next_desc(vq);
+		if (head_desc == NULL)
+			break;
+
+		/* Release the packet if no more space. */
+		pkt_len = tmfifo_virtio_get_pkt_len(vdev, head_desc, vr);
+		if (pkt_len + sizeof(*hdr) > TMFIFO_VDEV_TX_BUF_AVAIL(cons)) {
+			tmfifo_virtio_release_desc(vdev, vr, head_desc,
+						   pkt_len);
+			break;
+		}
+
+		hdr = (union tmfifo_msg_hdr *)&cons->tx_buf[cons->tx_tail];
+		hdr->data = 0;
+		hdr->type = VIRTIO_ID_CONSOLE;
+		hdr->len = htons(pkt_len);
+
+		TMFIFO_VDEV_TX_BUF_PUSH(cons, sizeof(*hdr));
+		desc = head_desc;
+
+		while (desc != NULL) {
+			addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+			len = virtio32_to_cpu(vdev, desc->len);
+
+			if (len <= TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail) {
+				memcpy(cons->tx_buf + cons->tx_tail, addr, len);
+			} else {
+				u32 seg;
+
+				seg = TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail;
+				memcpy(cons->tx_buf + cons->tx_tail, addr, seg);
+				addr += seg;
+				memcpy(cons->tx_buf, addr, len - seg);
+			}
+			TMFIFO_VDEV_TX_BUF_PUSH(cons, len);
+
+			if (!(virtio16_to_cpu(vdev, desc->flags) &
+			    VRING_DESC_F_NEXT))
+				break;
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+		}
+
+		/* Make each packet 8-byte aligned. */
+		TMFIFO_VDEV_TX_BUF_PUSH(cons, ((pkt_len + 7) & -8) - pkt_len);
+
+		tmfifo_virtio_release_desc(vdev, vr, head_desc, pkt_len);
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct tmfifo_vring *vring;
+	struct tmfifo *fifo;
+	struct vring *vr;
+	struct virtio_device *vdev;
+	u64 sts, data;
+	int num_avail = 0, hdr_len, tx_reserve;
+	void *addr;
+	u32 len, idx;
+	struct vring_desc *desc;
+	unsigned long flags;
+	struct tmfifo_vdev *cons;
+
+	if (!vq)
+		return;
+
+	vring = (struct tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+	vr = (struct vring *)virtqueue_get_vring(vq);
+
+	if (!fifo->vdev[vring->vdev_id])
+		return;
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+
+	/* Don't continue if another vring is running. */
+	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
+		return;
+
+	/* tx_reserve is used to reserved some room in FIFO for console. */
+	if (vring->vdev_id == VIRTIO_ID_NET) {
+		hdr_len = sizeof(struct virtio_net_hdr);
+		tx_reserve = fifo->tx_fifo_size / 16;
+	} else {
+		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
+		hdr_len = 0;
+		tx_reserve = 1;
+	}
+
+	desc = vring->desc;
+
+again:
+	while (1) {
+		/* Get available FIFO space. */
+		if (num_avail == 0) {
+			if (is_rx) {
+				/* Get the number of available words in FIFO. */
+				sts = readq(fifo->rx_base + TMFIFO_RX_STS);
+				num_avail = TMFIFO_GET_FIELD(sts,
+						TMFIFO_RX_STS__COUNT_MASK);
+
+				/* Don't continue if nothing in FIFO. */
+				if (num_avail <= 0)
+					break;
+			} else {
+				/* Get available space in FIFO. */
+				sts = readq(fifo->tx_base + TMFIFO_TX_STS);
+				num_avail = fifo->tx_fifo_size - tx_reserve -
+					TMFIFO_GET_FIELD(sts,
+						TMFIFO_TX_STS__COUNT_MASK);
+
+				if (num_avail <= 0)
+					break;
+			}
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
+		    cons != NULL && cons->tx_buf != NULL) {
+			for (;;) {
+				spin_lock_irqsave(&tmfifo_spin_lock, flags);
+				if (cons->tx_head == cons->tx_tail) {
+					spin_unlock_irqrestore(
+						&tmfifo_spin_lock, flags);
+					return;
+				}
+				addr = cons->tx_buf + cons->tx_head;
+				writeq(cpu_to_le64(*(u64 *)addr),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+				TMFIFO_VDEV_TX_BUF_POP(cons, sizeof(u64));
+				spin_unlock_irqrestore(&tmfifo_spin_lock,
+						       flags);
+				if (--num_avail <= 0)
+					goto again;
+			}
+		}
+
+		/* Get the desc of next packet. */
+		if (!desc) {
+			/* Save the head desc of the chain. */
+			vring->desc_head = tmfifo_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				return;
+			}
+			desc = vring->desc_head;
+			vring->desc = desc;
+
+			if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+						vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			}
+		}
+
+		/* Beginning of each packet. */
+		if (vring->pkt_len == 0) {
+			int vdev_id, vring_change = 0;
+			union tmfifo_msg_hdr hdr;
+
+			num_avail--;
+
+			/* Read/Write packet length. */
+			if (is_rx) {
+				hdr.data = readq(fifo->rx_base +
+						 TMFIFO_RX_DATA);
+				hdr.data = le64_to_cpu(hdr.data);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (hdr.len == 0)
+					continue;
+
+				/* Check packet type. */
+				if (hdr.type == VIRTIO_ID_NET) {
+					vdev_id = VIRTIO_ID_NET;
+					hdr_len = sizeof(struct virtio_net_hdr);
+				} else if (hdr.type == VIRTIO_ID_CONSOLE) {
+					vdev_id = VIRTIO_ID_CONSOLE;
+					hdr_len = 0;
+				} else {
+					continue;
+				}
+
+				/*
+				 * Check whether the new packet still belongs
+				 * to this vring or not. If not, update the
+				 * pkt_len of the new vring and return.
+				 */
+				if (vdev_id != vring->vdev_id) {
+					struct tmfifo_vdev *dev2 =
+						fifo->vdev[vdev_id];
+
+					if (!dev2)
+						break;
+					vring->desc = desc;
+					vring = &dev2->vrings[TMFIFO_VRING_RX];
+					vring_change = 1;
+				}
+				vring->pkt_len = ntohs(hdr.len) + hdr_len;
+			} else {
+				vring->pkt_len = tmfifo_virtio_get_pkt_len(
+					vdev, desc, vr);
+
+				hdr.data = 0;
+				hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+					VIRTIO_ID_NET :
+					VIRTIO_ID_CONSOLE;
+				hdr.len = htons(vring->pkt_len - hdr_len);
+				writeq(cpu_to_le64(hdr.data),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+			}
+
+			vring->cur_len = hdr_len;
+			vring->rem_len = vring->pkt_len;
+			fifo->vring[is_rx] = vring;
+
+			if (vring_change)
+				return;
+			continue;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check if the current desc is already done. */
+		if (vring->cur_len == len)
+			goto check_done;
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		/* Read a word from FIFO for Rx. */
+		if (is_rx) {
+			data = readq(fifo->rx_base + TMFIFO_RX_DATA);
+			data = le64_to_cpu(data);
+		}
+
+		if (vring->cur_len + sizeof(u64) <= len) {
+			/* The whole word. */
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       sizeof(u64));
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       sizeof(u64));
+			}
+			vring->cur_len += sizeof(u64);
+		} else {
+			/* Leftover bytes. */
+			BUG_ON(vring->cur_len > len);
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       len - vring->cur_len);
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       len - vring->cur_len);
+			}
+			vring->cur_len = len;
+		}
+
+		/* Write the word into FIFO for Tx. */
+		if (!is_rx) {
+			writeq(cpu_to_le64(data),
+			       fifo->tx_base + TMFIFO_TX_DATA);
+		}
+
+		num_avail--;
+
+check_done:
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done and release the desc. */
+			tmfifo_release_pkt(vdev, vring, &desc);
+			fifo->vring[is_rx] = NULL;
+
+			/* Notify upper layer that packet is done. */
+			spin_lock_irqsave(&tmfifo_spin_lock, flags);
+			vring_interrupt(0, vq);
+			spin_unlock_irqrestore(&tmfifo_spin_lock, flags);
+			continue;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+	struct tmfifo *fifo = vring->fifo;
+	unsigned long flags;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX HWM bit to start Rx. */
+		if (!test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&tmfifo_spin_lock, flags);
+			tmfifo_console_output(fifo->vdev[VIRTIO_ID_CONSOLE],
+					      vq);
+			spin_unlock_irqrestore(&tmfifo_spin_lock, flags);
+			schedule_work(&fifo->work);
+		} else if (!test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	}
+
+	return true;
+}
+
+/* Get the array of feature bits for this device. */
+static u64 tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			tmfifo_release_pkt(&tm_vdev->vdev, vring, &vring->desc);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+				  unsigned int nvqs,
+				  struct virtqueue *vqs[],
+				  vq_callback_t *callbacks[],
+				  const char * const names[],
+				  const bool *ctx,
+				  struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void tmfifo_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void tmfifo_virtio_get(struct virtio_device *vdev,
+			      unsigned int offset,
+			      void *buf,
+			      unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void tmfifo_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops tmfifo_virtio_config_ops = {
+	.get_features = tmfifo_virtio_get_features,
+	.finalize_features = tmfifo_virtio_finalize_features,
+	.find_vqs = tmfifo_virtio_find_vqs,
+	.del_vqs = tmfifo_virtio_del_vqs,
+	.reset = tmfifo_virtio_reset,
+	.set_status = tmfifo_virtio_set_status,
+	.get_status = tmfifo_virtio_get_status,
+	.get = tmfifo_virtio_get,
+	.set = tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+int tmfifo_create_vdev(struct tmfifo *fifo, int vdev_id, u64 features,
+		       void *config, u32 size)
+{
+	struct tmfifo_vdev *tm_vdev;
+	int ret = 0;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev != NULL) {
+		pr_err("vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto already_exist;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto already_exist;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+	if (tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
+		pr_err("Unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto alloc_vring_fail;
+	}
+	if (vdev_id == VIRTIO_ID_CONSOLE) {
+		tm_vdev->tx_buf = kmalloc(TMFIFO_CONS_TX_BUF_SIZE,
+					  GFP_KERNEL);
+	}
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
+		goto register_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+register_fail:
+	tmfifo_free_vrings(fifo, vdev_id);
+	fifo->vdev[vdev_id] = NULL;
+alloc_vring_fail:
+	kfree(tm_vdev);
+already_exist:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+int tmfifo_delete_vdev(struct tmfifo *fifo, int vdev_id)
+{
+	struct tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		tmfifo_free_vrings(fifo, vdev_id);
+		kfree(tm_vdev->tx_buf);
+		kfree(tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Device remove function. */
+static int tmfifo_remove(struct platform_device *pdev)
+{
+	int i;
+	struct tmfifo *fifo = platform_get_drvdata(pdev);
+	struct resource *rx_res, *tx_res;
+
+	tmfifo_ready = false;
+
+	if (fifo) {
+		mutex_lock(&tmfifo_lock);
+
+		/* Stop the timer. */
+		del_timer_sync(&fifo->timer);
+
+		/* Release interrupts. */
+		tmfifo_free_irqs(fifo);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&fifo->work);
+
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++)
+			tmfifo_delete_vdev(fifo, i);
+
+		/* Release IO resources. */
+		if (fifo->rx_base)
+			iounmap(fifo->rx_base);
+		if (fifo->tx_base)
+			iounmap(fifo->tx_base);
+
+		platform_set_drvdata(pdev, NULL);
+		kfree(fifo);
+
+		mutex_unlock(&tmfifo_lock);
+	}
+
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (rx_res)
+		release_mem_region(rx_res->start, resource_size(rx_res));
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (tx_res)
+		release_mem_region(tx_res->start, resource_size(tx_res));
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void tmfifo_get_cfg_mac(u8 *mac)
+{
+	u8 buf[6];
+	efi_status_t status;
+	unsigned long size = sizeof(buf);
+	efi_char16_t name[] = { 'R', 's', 'h', 'i', 'm', 'M', 'a', 'c',
+				'A', 'd', 'd', 'r', 0 };
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+
+	status = efi.get_variable(name, &guid, NULL, &size, buf);
+	if (status == EFI_SUCCESS && size == sizeof(buf))
+		memcpy(mac, buf, sizeof(buf));
+}
+
+/* Probe the TMFIFO. */
+static int tmfifo_probe(struct platform_device *pdev)
+{
+	u64 ctl;
+	struct tmfifo *fifo;
+	struct resource *rx_res, *tx_res;
+	struct virtio_net_config net_config;
+	int i, ret;
+
+	/* Get the resource of the Rx & Tx FIFO. */
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!rx_res || !tx_res) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (request_mem_region(rx_res->start,
+			       resource_size(rx_res), "bf-tmfifo") == NULL) {
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	if (request_mem_region(tx_res->start,
+			       resource_size(tx_res), "bf-tmfifo") == NULL) {
+		release_mem_region(rx_res->start, resource_size(rx_res));
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	ret = -ENOMEM;
+	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
+	if (!fifo)
+		goto err;
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	INIT_WORK(&fifo->work, tmfifo_work_handler);
+
+	timer_setup(&fifo->timer, tmfifo_timer, 0);
+	fifo->timer.function = tmfifo_timer;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		fifo->irq[i] = platform_get_irq(pdev, i);
+		ret = request_irq(fifo->irq[i], tmfifo_irq_handler, 0,
+				  "tmfifo", (u8 *)fifo + i);
+		if (ret) {
+			pr_err("Unable to request irq\n");
+			fifo->irq[i] = 0;
+			goto err;
+		}
+	}
+
+	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
+	if (!fifo->rx_base)
+		goto err;
+
+	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
+	if (!fifo->tx_base)
+		goto err;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		TMFIFO_GET_FIELD(ctl, TMFIFO_TX_CTL__MAX_ENTRIES_MASK);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_TX_CTL__LWM_MASK,
+			       fifo->tx_fifo_size / 2);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_TX_CTL__HWM_MASK,
+			       fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		TMFIFO_GET_FIELD(ctl, TMFIFO_RX_CTL__MAX_ENTRIES_MASK);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + TMFIFO_RX_CTL);
+
+	mutex_init(&fifo->lock);
+
+	/* Create the console vdev. */
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (ret)
+		goto err;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = TMFIFO_NET_MTU;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	memcpy(net_config.mac, tmfifo_net_default_mac, 6);
+	tmfifo_get_cfg_mac(net_config.mac);
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_NET, TMFIFO_NET_FEATURES,
+				 &net_config, sizeof(net_config));
+	if (ret)
+		goto err;
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+
+	tmfifo_ready = true;
+
+	return 0;
+
+err:
+	tmfifo_remove(pdev);
+early_err:
+	dev_err(&pdev->dev, "Probe Failed\n");
+	return ret;
+}
+
+static const struct of_device_id tmfifo_match[] = {
+	{ .compatible = "mellanox,bf-tmfifo" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, tmfifo_match);
+
+static const struct acpi_device_id bf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, bf_tmfifo_acpi_match);
+
+static struct platform_driver tmfifo_driver = {
+	.probe = tmfifo_probe,
+	.remove = tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.of_match_table = tmfifo_match,
+		.acpi_match_table = ACPI_PTR(bf_tmfifo_acpi_match),
+	},
+};
+
+static int __init tmfifo_init(void)
+{
+	int ret;
+
+	mutex_init(&tmfifo_lock);
+
+	ret = platform_driver_register(&tmfifo_driver);
+	if (ret)
+		pr_err("Failed to register tmfifo driver.\n");
+
+	return ret;
+}
+
+static void __exit tmfifo_exit(void)
+{
+	platform_driver_unregister(&tmfifo_driver);
+}
+
+module_init(tmfifo_init);
+module_exit(tmfifo_exit);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
new file mode 100644
index 0000000..f42c9d6
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo_regs.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __TMFIFO_REGS_H__
+#define __TMFIFO_REGS_H__
+
+#include <linux/types.h>
+
+#define TMFIFO_TX_DATA 0x0
+
+#define TMFIFO_TX_STS 0x8
+#define TMFIFO_TX_STS__LENGTH 0x0001
+#define TMFIFO_TX_STS__COUNT_SHIFT 0
+#define TMFIFO_TX_STS__COUNT_WIDTH 9
+#define TMFIFO_TX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_TX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_TX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_TX_CTL 0x10
+#define TMFIFO_TX_CTL__LENGTH 0x0001
+#define TMFIFO_TX_CTL__LWM_SHIFT 0
+#define TMFIFO_TX_CTL__LWM_WIDTH 8
+#define TMFIFO_TX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__LWM_RMASK 0xff
+#define TMFIFO_TX_CTL__LWM_MASK  0xff
+#define TMFIFO_TX_CTL__HWM_SHIFT 8
+#define TMFIFO_TX_CTL__HWM_WIDTH 8
+#define TMFIFO_TX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__HWM_RMASK 0xff
+#define TMFIFO_TX_CTL__HWM_MASK  0xff00
+#define TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#define TMFIFO_RX_DATA 0x0
+
+#define TMFIFO_RX_STS 0x8
+#define TMFIFO_RX_STS__LENGTH 0x0001
+#define TMFIFO_RX_STS__COUNT_SHIFT 0
+#define TMFIFO_RX_STS__COUNT_WIDTH 9
+#define TMFIFO_RX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_RX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_RX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_RX_CTL 0x10
+#define TMFIFO_RX_CTL__LENGTH 0x0001
+#define TMFIFO_RX_CTL__LWM_SHIFT 0
+#define TMFIFO_RX_CTL__LWM_WIDTH 8
+#define TMFIFO_RX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__LWM_RMASK 0xff
+#define TMFIFO_RX_CTL__LWM_MASK  0xff
+#define TMFIFO_RX_CTL__HWM_SHIFT 8
+#define TMFIFO_RX_CTL__HWM_WIDTH 8
+#define TMFIFO_RX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__HWM_RMASK 0xff
+#define TMFIFO_RX_CTL__HWM_MASK  0xff00
+#define TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#endif /* !defined(__TMFIFO_REGS_H__) */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
  2018-10-24 17:55   ` Liming Sun
@ 2018-10-24 17:55   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-24 17:55 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit introduces config option for Mellanox BlueField SoC,
which can be used to build the SoC specific drivers, and enables
it by default in configs/defconfig.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 arch/arm64/Kconfig.platforms | 6 ++++++
 arch/arm64/configs/defconfig | 1 +
 2 files changed, 7 insertions(+)

diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index d5aeac3..aeb67c2 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -110,6 +110,12 @@ config ARCH_MESON
 	help
 	  This enables support for the Amlogic S905 SoCs.
 
+config ARCH_MLNX_BLUEFIELD
+	bool "Mellanox BlueField SoC Family"
+	select SOC_MLNX
+	help
+	  This enables support for the Mellanox BlueField SoC.
+
 config ARCH_MVEBU
 	bool "Marvell EBU SoC Family"
 	select ARMADA_AP806_SYSCON
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index f9a186f..508cb9d 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -43,6 +43,7 @@ CONFIG_ARCH_LG1K=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
 CONFIG_ARCH_MESON=y
+CONFIG_ARCH_MLNX_BLUEFIELD=y
 CONFIG_ARCH_MVEBU=y
 CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
@ 2018-10-24 17:55   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-24 17:55 UTC (permalink / raw)
  To: linux-arm-kernel

This commit introduces config option for Mellanox BlueField SoC,
which can be used to build the SoC specific drivers, and enables
it by default in configs/defconfig.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 arch/arm64/Kconfig.platforms | 6 ++++++
 arch/arm64/configs/defconfig | 1 +
 2 files changed, 7 insertions(+)

diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index d5aeac3..aeb67c2 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -110,6 +110,12 @@ config ARCH_MESON
 	help
 	  This enables support for the Amlogic S905 SoCs.
 
+config ARCH_MLNX_BLUEFIELD
+	bool "Mellanox BlueField SoC Family"
+	select SOC_MLNX
+	help
+	  This enables support for the Mellanox BlueField SoC.
+
 config ARCH_MVEBU
 	bool "Marvell EBU SoC Family"
 	select ARMADA_AP806_SYSCON
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index f9a186f..508cb9d 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -43,6 +43,7 @@ CONFIG_ARCH_LG1K=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
 CONFIG_ARCH_MESON=y
+CONFIG_ARCH_MLNX_BLUEFIELD=y
 CONFIG_ARCH_MVEBU=y
 CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-10-24 17:55   ` Liming Sun
@ 2018-10-24 17:55   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-24 17:55 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

Add devicetree bindings for the TmFifo which is found on Mellanox
BlueField SoCs.

Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 .../devicetree/bindings/soc/mellanox/tmfifo.txt    | 23 ++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt

diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
new file mode 100644
index 0000000..8a13fa6
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
@@ -0,0 +1,23 @@
+* Mellanox BlueField SoC TmFifo
+
+BlueField TmFifo provides a shared FIFO between the target and the
+external host machine, which can be accessed by external host via
+USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
+to implement virtual console and network interface based on the virtio
+framework.
+
+Required properties:
+
+- compatible:	Should be "mellanox,bf-tmfifo"
+- reg:		Physical base address and length of Rx/Tx block
+- interrupts:	The interrupt number of Rx low water mark, Rx high water mark
+		Tx low water mark, Tx high water mark respectively.
+
+Example:
+
+tmfifo@800a20 {
+	compatible = "mellanox,bf-tmfifo";
+	reg = <0x00800a20 0x00000018
+	       0x00800a40 0x00000018>;
+	interrupts = <41, 42, 43, 44>;
+};
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
@ 2018-10-24 17:55   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-24 17:55 UTC (permalink / raw)
  To: linux-arm-kernel

Add devicetree bindings for the TmFifo which is found on Mellanox
BlueField SoCs.

Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 .../devicetree/bindings/soc/mellanox/tmfifo.txt    | 23 ++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt

diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
new file mode 100644
index 0000000..8a13fa6
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
@@ -0,0 +1,23 @@
+* Mellanox BlueField SoC TmFifo
+
+BlueField TmFifo provides a shared FIFO between the target and the
+external host machine, which can be accessed by external host via
+USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
+to implement virtual console and network interface based on the virtio
+framework.
+
+Required properties:
+
+- compatible:	Should be "mellanox,bf-tmfifo"
+- reg:		Physical base address and length of Rx/Tx block
+- interrupts:	The interrupt number of Rx low water mark, Rx high water mark
+		Tx low water mark, Tx high water mark respectively.
+
+Example:
+
+tmfifo at 800a20 {
+	compatible = "mellanox,bf-tmfifo";
+	reg = <0x00800a20 0x00000018
+	       0x00800a40 0x00000018>;
+	interrupts = <41, 42, 43, 44>;
+};
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v4 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc
  2018-10-24 17:55   ` Liming Sun
@ 2018-10-24 17:55   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-24 17:55 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

Add maintainer information for Mellanox BlueField SoC.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index c78feb0..07f7c7e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1649,6 +1649,14 @@ L:	linux-mediatek@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	drivers/phy/mediatek/phy-mtk-tphy.c
 
+ARM/Mellanox BlueField SoC support
+M:	David Woods <dwoods@mellanox.com>
+M:	Liming Sun <lsun@mellanox.com>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	drivers/soc/mellanox/*
+F:	Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
+
 ARM/MICREL KS8695 ARCHITECTURE
 M:	Greg Ungerer <gerg@uclinux.org>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v4 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc
@ 2018-10-24 17:55   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-24 17:55 UTC (permalink / raw)
  To: linux-arm-kernel

Add maintainer information for Mellanox BlueField SoC.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index c78feb0..07f7c7e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1649,6 +1649,14 @@ L:	linux-mediatek at lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	drivers/phy/mediatek/phy-mtk-tphy.c
 
+ARM/Mellanox BlueField SoC support
+M:	David Woods <dwoods@mellanox.com>
+M:	Liming Sun <lsun@mellanox.com>
+L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	drivers/soc/mellanox/*
+F:	Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
+
 ARM/MICREL KS8695 ARCHITECTURE
 M:	Greg Ungerer <gerg@uclinux.org>
 L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* Re: [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-10-24 17:55   ` Liming Sun
@ 2018-10-25 15:32     ` Arnd Bergmann
  -1 siblings, 0 replies; 179+ messages in thread
From: Arnd Bergmann @ 2018-10-25 15:32 UTC (permalink / raw)
  To: Liming Sun
  Cc: devicetree, David Woods, arm-soc, Olof Johansson, Robin Murphy,
	linux-arm-kernel

On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> Add devicetree bindings for the TmFifo which is found on Mellanox
> BlueField SoCs.
>
> Reviewed-by: Rob Herring <robh@kernel.org>
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>
> ---
>  .../devicetree/bindings/soc/mellanox/tmfifo.txt    | 23
> ++++++++++++++++++++++
>  1 file changed, 23 insertions(+)
>  create mode 100644
> Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
>
> diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> new file mode 100644
> index 0000000..8a13fa6
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> @@ -0,0 +1,23 @@
> +* Mellanox BlueField SoC TmFifo
> +
> +BlueField TmFifo provides a shared FIFO between the target and the
> +external host machine, which can be accessed by external host via
> +USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
> +to implement virtual console and network interface based on the virtio
> +framework.
> +
> +Required properties:
> +
> +- compatible:	Should be "mellanox,bf-tmfifo"
> +- reg:		Physical base address and length of Rx/Tx block
> +- interrupts:	The interrupt number of Rx low water mark, Rx high water
> mark
> +		Tx low water mark, Tx high water mark respectively.


This sounds like it might fit into the mailbox subsystem, and perhaps
it should use the mailbox DT bindings. Have you had a look at that?

       Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
@ 2018-10-25 15:32     ` Arnd Bergmann
  0 siblings, 0 replies; 179+ messages in thread
From: Arnd Bergmann @ 2018-10-25 15:32 UTC (permalink / raw)
  To: linux-arm-kernel

On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> Add devicetree bindings for the TmFifo which is found on Mellanox
> BlueField SoCs.
>
> Reviewed-by: Rob Herring <robh@kernel.org>
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>
> ---
>  .../devicetree/bindings/soc/mellanox/tmfifo.txt    | 23
> ++++++++++++++++++++++
>  1 file changed, 23 insertions(+)
>  create mode 100644
> Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
>
> diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> new file mode 100644
> index 0000000..8a13fa6
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> @@ -0,0 +1,23 @@
> +* Mellanox BlueField SoC TmFifo
> +
> +BlueField TmFifo provides a shared FIFO between the target and the
> +external host machine, which can be accessed by external host via
> +USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
> +to implement virtual console and network interface based on the virtio
> +framework.
> +
> +Required properties:
> +
> +- compatible:	Should be "mellanox,bf-tmfifo"
> +- reg:		Physical base address and length of Rx/Tx block
> +- interrupts:	The interrupt number of Rx low water mark, Rx high water
> mark
> +		Tx low water mark, Tx high water mark respectively.


This sounds like it might fit into the mailbox subsystem, and perhaps
it should use the mailbox DT bindings. Have you had a look at that?

       Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
  2018-10-24 17:55   ` Liming Sun
@ 2018-10-25 15:38     ` Arnd Bergmann
  -1 siblings, 0 replies; 179+ messages in thread
From: Arnd Bergmann @ 2018-10-25 15:38 UTC (permalink / raw)
  To: Liming Sun
  Cc: devicetree, David Woods, arm-soc, Olof Johansson, Robin Murphy,
	linux-arm-kernel

On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> This commit introduces config option for Mellanox BlueField SoC,
> which can be used to build the SoC specific drivers, and enables
> it by default in configs/defconfig.
>
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>
> ---
>  arch/arm64/Kconfig.platforms | 6 ++++++
>  arch/arm64/configs/defconfig | 1 +
>  2 files changed, 7 insertions(+)

Reviewed-by: Arnd Bergmann <arnd@arndb.de>

I'm sorry for missing your series in the past. We should definitely merge
the platform support soon. Do you also have device tree files for reference
systems or even production hardware?

I need to have a separate look at the fifo driver.

Unfortunately, you have sent these patches during the merge window,
which is the time during which we don't pick up new work. Let's plan
to pick these up after 4.20-rc1, and please resend to arm@kernel.org
if we manage to forget about it again.

      Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
@ 2018-10-25 15:38     ` Arnd Bergmann
  0 siblings, 0 replies; 179+ messages in thread
From: Arnd Bergmann @ 2018-10-25 15:38 UTC (permalink / raw)
  To: linux-arm-kernel

On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> This commit introduces config option for Mellanox BlueField SoC,
> which can be used to build the SoC specific drivers, and enables
> it by default in configs/defconfig.
>
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>
> ---
>  arch/arm64/Kconfig.platforms | 6 ++++++
>  arch/arm64/configs/defconfig | 1 +
>  2 files changed, 7 insertions(+)

Reviewed-by: Arnd Bergmann <arnd@arndb.de>

I'm sorry for missing your series in the past. We should definitely merge
the platform support soon. Do you also have device tree files for reference
systems or even production hardware?

I need to have a separate look at the fifo driver.

Unfortunately, you have sent these patches during the merge window,
which is the time during which we don't pick up new work. Let's plan
to pick these up after 4.20-rc1, and please resend to arm at kernel.org
if we manage to forget about it again.

      Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-10-24 17:55   ` Liming Sun
@ 2018-10-25 15:57     ` Arnd Bergmann
  -1 siblings, 0 replies; 179+ messages in thread
From: Arnd Bergmann @ 2018-10-25 15:57 UTC (permalink / raw)
  To: Liming Sun
  Cc: devicetree, David Woods, arm-soc, Olof Johansson, Robin Murphy,
	linux-arm-kernel

On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> This commit adds the TmFifo driver for Mellanox BlueField Soc.
> TmFifo is a shared FIFO which enables external host machine to
> exchange data with the SoC via USB or PCIe. The driver is based on
> virtio framework and has console and network access enabled.
>
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>

I definitely like the idea of using virtio-net and virtio-console here,
this is a great way of reusing the existing high-level drivers,
and i similar in concept (but also much simpler) to what we
have in drivers/misc/mic/ for another Linux-running machine that
can be a PCIe add-on card.

Have you also posted the other half of this driver? I'd like to see
how it all fits together.

A few style comments:

> +
> +#define TMFIFO_GET_FIELD(reg, mask)	FIELD_GET(mask, reg)
> +
> +#define TMFIFO_SET_FIELD(reg, mask, value) \
> +	((reg & ~mask) | FIELD_PREP(mask, value))

I think it would be nicer to use FIELD_GET/FIELD_PREP
in the code directly, and avoid adding extra wrappers around them.

> +/* Vring size. */
> +#define TMFIFO_VRING_SIZE			1024
> +
> +/* Console Tx buffer size. */
> +#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
> +
> +/* Use a timer for house-keeping. */
> +static int tmfifo_timer_interval = HZ / 10;
> +
> +/* Global lock. */
> +static struct mutex tmfifo_lock;

Maybe use 'static DEFINE_MUTEX(tmfifo_lock) here and remove the
initialization call.

> +/* Virtio ring size. */
> +static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
> +module_param(tmfifo_vring_size, int, 0444);
> +MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring.");
> +
> +struct tmfifo;
> +
> +/* A flag to indicate TmFifo ready. */
> +static bool tmfifo_ready;
> +
> +/* Virtual devices sharing the TM FIFO. */
> +#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
> +
> +/* Spin lock. */
> +static DEFINE_SPINLOCK(tmfifo_spin_lock);

Generally speaking, it's nicer to write a driver in a way that avoids
global variables and make the flags and locks all members of a
device specific structure.

> +struct tmfifo_vdev {
> +	struct virtio_device vdev;	/* virtual device */
> +	u8 status;
> +	u64 features;
> +	union {				/* virtio config space */
> +		struct virtio_console_config cons;
> +		struct virtio_net_config net;
> +	} config;
> +	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
> +	u8 *tx_buf;			/* tx buffer */
> +	u32 tx_head;			/* tx buffer head */
> +	u32 tx_tail;			/* tx buffer tail */
> +};

I suppose you did this to keep the driver simple, but it seems a
little inflexible
to only support two specific device types. Wouldn't we also want e.g. 9pfs
or virtio_blk in some configurations?

> +
> +#define TMFIFO_VDEV_TX_BUF_AVAIL(vdev) \
> +	(((vdev)->tx_tail >= (vdev)->tx_head) ? \
> +	(TMFIFO_CONS_TX_BUF_SIZE - 8 - ((vdev)->tx_tail - (vdev)->tx_head)) : \
> +	((vdev)->tx_head - (vdev)->tx_tail - 8))
> +
> +#define TMFIFO_VDEV_TX_BUF_PUSH(vdev, len) do { \
> +	(vdev)->tx_tail += (len); \
> +	if ((vdev)->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE) \
> +		(vdev)->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE; \
> +} while (0)
> +
> +#define TMFIFO_VDEV_TX_BUF_POP(vdev, len) do { \
> +	(vdev)->tx_head += (len); \
> +	if ((vdev)->tx_head >= TMFIFO_CONS_TX_BUF_SIZE) \
> +		(vdev)->tx_head -= TMFIFO_CONS_TX_BUF_SIZE; \
> +} while (0)

It would be nicer to turn these into inline functions rather than macros.

> +/* TMFIFO device structure */
> +struct tmfifo {
> +	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
> +	struct platform_device *pdev;	/* platform device */
> +	struct mutex lock;
> +	void __iomem *rx_base;		/* mapped register base */
> +	void __iomem *tx_base;		/* mapped register base */
> +	int tx_fifo_size;		/* number of entries of the Tx FIFO */
> +	int rx_fifo_size;		/* number of entries of the Rx FIFO */
> +	unsigned long pend_events;	/* pending bits for deferred process */
> +	int irq[TM_IRQ_CNT];		/* irq numbers */
> +	struct work_struct work;	/* work struct for deferred process */
> +	struct timer_list timer;	/* keepalive timer */
> +	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
> +};
> +
> +union tmfifo_msg_hdr {
> +	struct {
> +		u8 type;		/* message type */
> +		__be16 len;		/* payload length */
> +		u8 unused[5];		/* reserved, set to 0 */
> +	} __packed;
> +	u64 data;
> +};
> +
> +/*
> + * Default MAC.
> + * This MAC address will be read from EFI persistent variable if
> configured.
> + * It can also be reconfigured with standard Linux tools.
> + */
> +static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF,
> 0x01};
> +

Is a predefined MAC address better than a random one here?

For DT based systems, we tend to also call of_get_mac_address()
in order to allow setting a unique address from firmware.

> +/* Forward declaration. */
> +static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx);
> +static void tmfifo_release_pkt(struct virtio_device *vdev,
> +			       struct tmfifo_vring *vring,
> +			       struct vring_desc **desc);

Try to avoid forward declarations by reordering the functions according
to how they get called.

> +
> +/* Interrupt handler. */
> +static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
> +{
> +	int i = (uintptr_t)dev_id % sizeof(void *);
> +	struct tmfifo *fifo = dev_id - i;
> +
> +	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
> +		schedule_work(&fifo->work);
> +
> +	return IRQ_HANDLED;
> +}

Maybe using a request_threaded_irq() would be a better way to defer
the handler into IRQ context.

        Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
@ 2018-10-25 15:57     ` Arnd Bergmann
  0 siblings, 0 replies; 179+ messages in thread
From: Arnd Bergmann @ 2018-10-25 15:57 UTC (permalink / raw)
  To: linux-arm-kernel

On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> This commit adds the TmFifo driver for Mellanox BlueField Soc.
> TmFifo is a shared FIFO which enables external host machine to
> exchange data with the SoC via USB or PCIe. The driver is based on
> virtio framework and has console and network access enabled.
>
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>

I definitely like the idea of using virtio-net and virtio-console here,
this is a great way of reusing the existing high-level drivers,
and i similar in concept (but also much simpler) to what we
have in drivers/misc/mic/ for another Linux-running machine that
can be a PCIe add-on card.

Have you also posted the other half of this driver? I'd like to see
how it all fits together.

A few style comments:

> +
> +#define TMFIFO_GET_FIELD(reg, mask)	FIELD_GET(mask, reg)
> +
> +#define TMFIFO_SET_FIELD(reg, mask, value) \
> +	((reg & ~mask) | FIELD_PREP(mask, value))

I think it would be nicer to use FIELD_GET/FIELD_PREP
in the code directly, and avoid adding extra wrappers around them.

> +/* Vring size. */
> +#define TMFIFO_VRING_SIZE			1024
> +
> +/* Console Tx buffer size. */
> +#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
> +
> +/* Use a timer for house-keeping. */
> +static int tmfifo_timer_interval = HZ / 10;
> +
> +/* Global lock. */
> +static struct mutex tmfifo_lock;

Maybe use 'static DEFINE_MUTEX(tmfifo_lock) here and remove the
initialization call.

> +/* Virtio ring size. */
> +static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
> +module_param(tmfifo_vring_size, int, 0444);
> +MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring.");
> +
> +struct tmfifo;
> +
> +/* A flag to indicate TmFifo ready. */
> +static bool tmfifo_ready;
> +
> +/* Virtual devices sharing the TM FIFO. */
> +#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
> +
> +/* Spin lock. */
> +static DEFINE_SPINLOCK(tmfifo_spin_lock);

Generally speaking, it's nicer to write a driver in a way that avoids
global variables and make the flags and locks all members of a
device specific structure.

> +struct tmfifo_vdev {
> +	struct virtio_device vdev;	/* virtual device */
> +	u8 status;
> +	u64 features;
> +	union {				/* virtio config space */
> +		struct virtio_console_config cons;
> +		struct virtio_net_config net;
> +	} config;
> +	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
> +	u8 *tx_buf;			/* tx buffer */
> +	u32 tx_head;			/* tx buffer head */
> +	u32 tx_tail;			/* tx buffer tail */
> +};

I suppose you did this to keep the driver simple, but it seems a
little inflexible
to only support two specific device types. Wouldn't we also want e.g. 9pfs
or virtio_blk in some configurations?

> +
> +#define TMFIFO_VDEV_TX_BUF_AVAIL(vdev) \
> +	(((vdev)->tx_tail >= (vdev)->tx_head) ? \
> +	(TMFIFO_CONS_TX_BUF_SIZE - 8 - ((vdev)->tx_tail - (vdev)->tx_head)) : \
> +	((vdev)->tx_head - (vdev)->tx_tail - 8))
> +
> +#define TMFIFO_VDEV_TX_BUF_PUSH(vdev, len) do { \
> +	(vdev)->tx_tail += (len); \
> +	if ((vdev)->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE) \
> +		(vdev)->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE; \
> +} while (0)
> +
> +#define TMFIFO_VDEV_TX_BUF_POP(vdev, len) do { \
> +	(vdev)->tx_head += (len); \
> +	if ((vdev)->tx_head >= TMFIFO_CONS_TX_BUF_SIZE) \
> +		(vdev)->tx_head -= TMFIFO_CONS_TX_BUF_SIZE; \
> +} while (0)

It would be nicer to turn these into inline functions rather than macros.

> +/* TMFIFO device structure */
> +struct tmfifo {
> +	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
> +	struct platform_device *pdev;	/* platform device */
> +	struct mutex lock;
> +	void __iomem *rx_base;		/* mapped register base */
> +	void __iomem *tx_base;		/* mapped register base */
> +	int tx_fifo_size;		/* number of entries of the Tx FIFO */
> +	int rx_fifo_size;		/* number of entries of the Rx FIFO */
> +	unsigned long pend_events;	/* pending bits for deferred process */
> +	int irq[TM_IRQ_CNT];		/* irq numbers */
> +	struct work_struct work;	/* work struct for deferred process */
> +	struct timer_list timer;	/* keepalive timer */
> +	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
> +};
> +
> +union tmfifo_msg_hdr {
> +	struct {
> +		u8 type;		/* message type */
> +		__be16 len;		/* payload length */
> +		u8 unused[5];		/* reserved, set to 0 */
> +	} __packed;
> +	u64 data;
> +};
> +
> +/*
> + * Default MAC.
> + * This MAC address will be read from EFI persistent variable if
> configured.
> + * It can also be reconfigured with standard Linux tools.
> + */
> +static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF,
> 0x01};
> +

Is a predefined MAC address better than a random one here?

For DT based systems, we tend to also call of_get_mac_address()
in order to allow setting a unique address from firmware.

> +/* Forward declaration. */
> +static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx);
> +static void tmfifo_release_pkt(struct virtio_device *vdev,
> +			       struct tmfifo_vring *vring,
> +			       struct vring_desc **desc);

Try to avoid forward declarations by reordering the functions according
to how they get called.

> +
> +/* Interrupt handler. */
> +static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
> +{
> +	int i = (uintptr_t)dev_id % sizeof(void *);
> +	struct tmfifo *fifo = dev_id - i;
> +
> +	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
> +		schedule_work(&fifo->work);
> +
> +	return IRQ_HANDLED;
> +}

Maybe using a request_threaded_irq() would be a better way to defer
the handler into IRQ context.

        Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-10-25 15:57     ` Arnd Bergmann
@ 2018-10-26 18:24       ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-26 18:24 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: devicetree, David Woods, arm-soc, Olof Johansson, Robin Murphy,
	linux-arm-kernel

Thanks Arnd for the comments! Please see the response inline.

- Liming

> -----Original Message-----
> From: arndbergmann@gmail.com [mailto:arndbergmann@gmail.com] On
> Behalf Of Arnd Bergmann
> Sent: Thursday, October 25, 2018 11:58 AM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods
> <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-
> soc <arm@kernel.org>; devicetree@vger.kernel.org; linux-arm-
> kernel@lists.infradead.org
> Subject: Re: [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField
> Soc
> 
> On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> > This commit adds the TmFifo driver for Mellanox BlueField Soc.
> > TmFifo is a shared FIFO which enables external host machine to
> > exchange data with the SoC via USB or PCIe. The driver is based on
> > virtio framework and has console and network access enabled.
> >
> > Reviewed-by: David Woods <dwoods@mellanox.com>
> > Signed-off-by: Liming Sun <lsun@mellanox.com>
> 
> I definitely like the idea of using virtio-net and virtio-console here,
> this is a great way of reusing the existing high-level drivers,
> and i similar in concept (but also much simpler) to what we
> have in drivers/misc/mic/ for another Linux-running machine that
> can be a PCIe add-on card.
> 
> Have you also posted the other half of this driver? I'd like to see
> how it all fits together.

I'll add the (x86) host side driver into this patch series v5 as a separate commit.

> 
> A few style comments:
> 
> > +
> > +#define TMFIFO_GET_FIELD(reg, mask)	FIELD_GET(mask, reg)
> > +
> > +#define TMFIFO_SET_FIELD(reg, mask, value) \
> > +	((reg & ~mask) | FIELD_PREP(mask, value))
> 
> I think it would be nicer to use FIELD_GET/FIELD_PREP
> in the code directly, and avoid adding extra wrappers around them.

Will update it in patch v5.

> 
> > +/* Vring size. */
> > +#define TMFIFO_VRING_SIZE			1024
> > +
> > +/* Console Tx buffer size. */
> > +#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
> > +
> > +/* Use a timer for house-keeping. */
> > +static int tmfifo_timer_interval = HZ / 10;
> > +
> > +/* Global lock. */
> > +static struct mutex tmfifo_lock;
> 
> Maybe use 'static DEFINE_MUTEX(tmfifo_lock) here and remove the
> initialization call.

Will update it in patch v5.

> 
> > +/* Virtio ring size. */
> > +static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
> > +module_param(tmfifo_vring_size, int, 0444);
> > +MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring.");
> > +
> > +struct tmfifo;
> > +
> > +/* A flag to indicate TmFifo ready. */
> > +static bool tmfifo_ready;
> > +
> > +/* Virtual devices sharing the TM FIFO. */
> > +#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
> > +
> > +/* Spin lock. */
> > +static DEFINE_SPINLOCK(tmfifo_spin_lock);
> 
> Generally speaking, it's nicer to write a driver in a way that avoids
> global variables and make the flags and locks all members of a
> device specific structure.

Will update it in patch v5.

> 
> > +struct tmfifo_vdev {
> > +	struct virtio_device vdev;	/* virtual device */
> > +	u8 status;
> > +	u64 features;
> > +	union {				/* virtio config space */
> > +		struct virtio_console_config cons;
> > +		struct virtio_net_config net;
> > +	} config;
> > +	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
> > +	u8 *tx_buf;			/* tx buffer */
> > +	u32 tx_head;			/* tx buffer head */
> > +	u32 tx_tail;			/* tx buffer tail */
> > +};
> 
> I suppose you did this to keep the driver simple, but it seems a
> little inflexible
> to only support two specific device types. Wouldn't we also want e.g. 9pfs
> or virtio_blk in some configurations?

We could definitely add more when needed, which should be straightforward
due to the virtio framework. For now only network and console are supported
and ben been verified. 

> 
> > +
> > +#define TMFIFO_VDEV_TX_BUF_AVAIL(vdev) \
> > +	(((vdev)->tx_tail >= (vdev)->tx_head) ? \
> > +	(TMFIFO_CONS_TX_BUF_SIZE - 8 - ((vdev)->tx_tail - (vdev)->tx_head))
> : \
> > +	((vdev)->tx_head - (vdev)->tx_tail - 8))
> > +
> > +#define TMFIFO_VDEV_TX_BUF_PUSH(vdev, len) do { \
> > +	(vdev)->tx_tail += (len); \
> > +	if ((vdev)->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE) \
> > +		(vdev)->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE; \
> > +} while (0)
> > +
> > +#define TMFIFO_VDEV_TX_BUF_POP(vdev, len) do { \
> > +	(vdev)->tx_head += (len); \
> > +	if ((vdev)->tx_head >= TMFIFO_CONS_TX_BUF_SIZE) \
> > +		(vdev)->tx_head -= TMFIFO_CONS_TX_BUF_SIZE; \
> > +} while (0)
> 
> It would be nicer to turn these into inline functions rather than macros.

Will update it in patch v5.

> 
> > +/* TMFIFO device structure */
> > +struct tmfifo {
> > +	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
> > +	struct platform_device *pdev;	/* platform device */
> > +	struct mutex lock;
> > +	void __iomem *rx_base;		/* mapped register base */
> > +	void __iomem *tx_base;		/* mapped register base */
> > +	int tx_fifo_size;		/* number of entries of the Tx FIFO */
> > +	int rx_fifo_size;		/* number of entries of the Rx FIFO */
> > +	unsigned long pend_events;	/* pending bits for deferred process
> */
> > +	int irq[TM_IRQ_CNT];		/* irq numbers */
> > +	struct work_struct work;	/* work struct for deferred process
> */
> > +	struct timer_list timer;	/* keepalive timer */
> > +	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
> > +};
> > +
> > +union tmfifo_msg_hdr {
> > +	struct {
> > +		u8 type;		/* message type */
> > +		__be16 len;		/* payload length */
> > +		u8 unused[5];		/* reserved, set to 0 */
> > +	} __packed;
> > +	u64 data;
> > +};
> > +
> > +/*
> > + * Default MAC.
> > + * This MAC address will be read from EFI persistent variable if
> > configured.
> > + * It can also be reconfigured with standard Linux tools.
> > + */
> > +static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF,
> > 0x01};
> > +
> 
> Is a predefined MAC address better than a random one here?
> 
> For DT based systems, we tend to also call of_get_mac_address()
> in order to allow setting a unique address from firmware.

A predefined default MAC address is simpler in this case, which makes 
DHCP or PXE boot easier in development environment. 

For production, the MAC address is stored in persistent UEFI variable 
on the eeprom, which is read in function tmfifo_get_cfg_mac() which 
calls efi.get_variable() to get the MAC address.

> 
> > +/* Forward declaration. */
> > +static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx);
> > +static void tmfifo_release_pkt(struct virtio_device *vdev,
> > +			       struct tmfifo_vring *vring,
> > +			       struct vring_desc **desc);
> 
> Try to avoid forward declarations by reordering the functions according
> to how they get called.

Will update it in patch v5.

> 
> > +
> > +/* Interrupt handler. */
> > +static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
> > +{
> > +	int i = (uintptr_t)dev_id % sizeof(void *);
> > +	struct tmfifo *fifo = dev_id - i;
> > +
> > +	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
> > +		schedule_work(&fifo->work);
> > +
> > +	return IRQ_HANDLED;
> > +}
> 
> Maybe using a request_threaded_irq() would be a better way to defer
> the handler into IRQ context.

Not sure if I understand this comment correctly... In this case, the implemented handler 
has some mutex_lock() used, which tries to make the logic simple since multiple services 
(network & console) are sharing the same fifo. Thus schedule_work() is used.

> 
>         Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
@ 2018-10-26 18:24       ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-26 18:24 UTC (permalink / raw)
  To: linux-arm-kernel

Thanks Arnd for the comments! Please see the response inline.

- Liming

> -----Original Message-----
> From: arndbergmann at gmail.com [mailto:arndbergmann at gmail.com] On
> Behalf Of Arnd Bergmann
> Sent: Thursday, October 25, 2018 11:58 AM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods
> <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-
> soc <arm@kernel.org>; devicetree at vger.kernel.org; linux-arm-
> kernel at lists.infradead.org
> Subject: Re: [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField
> Soc
> 
> On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> > This commit adds the TmFifo driver for Mellanox BlueField Soc.
> > TmFifo is a shared FIFO which enables external host machine to
> > exchange data with the SoC via USB or PCIe. The driver is based on
> > virtio framework and has console and network access enabled.
> >
> > Reviewed-by: David Woods <dwoods@mellanox.com>
> > Signed-off-by: Liming Sun <lsun@mellanox.com>
> 
> I definitely like the idea of using virtio-net and virtio-console here,
> this is a great way of reusing the existing high-level drivers,
> and i similar in concept (but also much simpler) to what we
> have in drivers/misc/mic/ for another Linux-running machine that
> can be a PCIe add-on card.
> 
> Have you also posted the other half of this driver? I'd like to see
> how it all fits together.

I'll add the (x86) host side driver into this patch series v5 as a separate commit.

> 
> A few style comments:
> 
> > +
> > +#define TMFIFO_GET_FIELD(reg, mask)	FIELD_GET(mask, reg)
> > +
> > +#define TMFIFO_SET_FIELD(reg, mask, value) \
> > +	((reg & ~mask) | FIELD_PREP(mask, value))
> 
> I think it would be nicer to use FIELD_GET/FIELD_PREP
> in the code directly, and avoid adding extra wrappers around them.

Will update it in patch v5.

> 
> > +/* Vring size. */
> > +#define TMFIFO_VRING_SIZE			1024
> > +
> > +/* Console Tx buffer size. */
> > +#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
> > +
> > +/* Use a timer for house-keeping. */
> > +static int tmfifo_timer_interval = HZ / 10;
> > +
> > +/* Global lock. */
> > +static struct mutex tmfifo_lock;
> 
> Maybe use 'static DEFINE_MUTEX(tmfifo_lock) here and remove the
> initialization call.

Will update it in patch v5.

> 
> > +/* Virtio ring size. */
> > +static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
> > +module_param(tmfifo_vring_size, int, 0444);
> > +MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring.");
> > +
> > +struct tmfifo;
> > +
> > +/* A flag to indicate TmFifo ready. */
> > +static bool tmfifo_ready;
> > +
> > +/* Virtual devices sharing the TM FIFO. */
> > +#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
> > +
> > +/* Spin lock. */
> > +static DEFINE_SPINLOCK(tmfifo_spin_lock);
> 
> Generally speaking, it's nicer to write a driver in a way that avoids
> global variables and make the flags and locks all members of a
> device specific structure.

Will update it in patch v5.

> 
> > +struct tmfifo_vdev {
> > +	struct virtio_device vdev;	/* virtual device */
> > +	u8 status;
> > +	u64 features;
> > +	union {				/* virtio config space */
> > +		struct virtio_console_config cons;
> > +		struct virtio_net_config net;
> > +	} config;
> > +	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
> > +	u8 *tx_buf;			/* tx buffer */
> > +	u32 tx_head;			/* tx buffer head */
> > +	u32 tx_tail;			/* tx buffer tail */
> > +};
> 
> I suppose you did this to keep the driver simple, but it seems a
> little inflexible
> to only support two specific device types. Wouldn't we also want e.g. 9pfs
> or virtio_blk in some configurations?

We could definitely add more when needed, which should be straightforward
due to the virtio framework. For now only network and console are supported
and ben been verified. 

> 
> > +
> > +#define TMFIFO_VDEV_TX_BUF_AVAIL(vdev) \
> > +	(((vdev)->tx_tail >= (vdev)->tx_head) ? \
> > +	(TMFIFO_CONS_TX_BUF_SIZE - 8 - ((vdev)->tx_tail - (vdev)->tx_head))
> : \
> > +	((vdev)->tx_head - (vdev)->tx_tail - 8))
> > +
> > +#define TMFIFO_VDEV_TX_BUF_PUSH(vdev, len) do { \
> > +	(vdev)->tx_tail += (len); \
> > +	if ((vdev)->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE) \
> > +		(vdev)->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE; \
> > +} while (0)
> > +
> > +#define TMFIFO_VDEV_TX_BUF_POP(vdev, len) do { \
> > +	(vdev)->tx_head += (len); \
> > +	if ((vdev)->tx_head >= TMFIFO_CONS_TX_BUF_SIZE) \
> > +		(vdev)->tx_head -= TMFIFO_CONS_TX_BUF_SIZE; \
> > +} while (0)
> 
> It would be nicer to turn these into inline functions rather than macros.

Will update it in patch v5.

> 
> > +/* TMFIFO device structure */
> > +struct tmfifo {
> > +	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
> > +	struct platform_device *pdev;	/* platform device */
> > +	struct mutex lock;
> > +	void __iomem *rx_base;		/* mapped register base */
> > +	void __iomem *tx_base;		/* mapped register base */
> > +	int tx_fifo_size;		/* number of entries of the Tx FIFO */
> > +	int rx_fifo_size;		/* number of entries of the Rx FIFO */
> > +	unsigned long pend_events;	/* pending bits for deferred process
> */
> > +	int irq[TM_IRQ_CNT];		/* irq numbers */
> > +	struct work_struct work;	/* work struct for deferred process
> */
> > +	struct timer_list timer;	/* keepalive timer */
> > +	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
> > +};
> > +
> > +union tmfifo_msg_hdr {
> > +	struct {
> > +		u8 type;		/* message type */
> > +		__be16 len;		/* payload length */
> > +		u8 unused[5];		/* reserved, set to 0 */
> > +	} __packed;
> > +	u64 data;
> > +};
> > +
> > +/*
> > + * Default MAC.
> > + * This MAC address will be read from EFI persistent variable if
> > configured.
> > + * It can also be reconfigured with standard Linux tools.
> > + */
> > +static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF,
> > 0x01};
> > +
> 
> Is a predefined MAC address better than a random one here?
> 
> For DT based systems, we tend to also call of_get_mac_address()
> in order to allow setting a unique address from firmware.

A predefined default MAC address is simpler in this case, which makes 
DHCP or PXE boot easier in development environment. 

For production, the MAC address is stored in persistent UEFI variable 
on the eeprom, which is read in function tmfifo_get_cfg_mac() which 
calls efi.get_variable() to get the MAC address.

> 
> > +/* Forward declaration. */
> > +static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx);
> > +static void tmfifo_release_pkt(struct virtio_device *vdev,
> > +			       struct tmfifo_vring *vring,
> > +			       struct vring_desc **desc);
> 
> Try to avoid forward declarations by reordering the functions according
> to how they get called.

Will update it in patch v5.

> 
> > +
> > +/* Interrupt handler. */
> > +static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
> > +{
> > +	int i = (uintptr_t)dev_id % sizeof(void *);
> > +	struct tmfifo *fifo = dev_id - i;
> > +
> > +	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
> > +		schedule_work(&fifo->work);
> > +
> > +	return IRQ_HANDLED;
> > +}
> 
> Maybe using a request_threaded_irq() would be a better way to defer
> the handler into IRQ context.

Not sure if I understand this comment correctly... In this case, the implemented handler 
has some mutex_lock() used, which tries to make the logic simple since multiple services 
(network & console) are sharing the same fifo. Thus schedule_work() is used.

> 
>         Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-10-26 18:24       ` Liming Sun
@ 2018-10-26 18:35         ` Arnd Bergmann
  -1 siblings, 0 replies; 179+ messages in thread
From: Arnd Bergmann @ 2018-10-26 18:35 UTC (permalink / raw)
  To: Liming Sun
  Cc: devicetree, David Woods, arm-soc, Olof Johansson, Robin Murphy,
	linux-arm-kernel

On 10/26/18, Liming Sun <lsun@mellanox.com> wrote:
>> -----Original Message-----
>> From: arndbergmann@gmail.com [mailto:arndbergmann@gmail.com] On
>> Behalf Of Arnd Bergmann
>> Sent: Thursday, October 25, 2018 11:58 AM
>> To: Liming Sun <lsun@mellanox.com>
>> Cc: Olof Johansson <olof@lixom.net>; David Woods
>> <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-
>> soc <arm@kernel.org>; devicetree@vger.kernel.org; linux-arm-
>> kernel@lists.infradead.org
>> Subject: Re: [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField
>> Soc
>>
>> On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
>> > +struct tmfifo_vdev {
>> > +	struct virtio_device vdev;	/* virtual device */
>> > +	u8 status;
>> > +	u64 features;
>> > +	union {				/* virtio config space */
>> > +		struct virtio_console_config cons;
>> > +		struct virtio_net_config net;
>> > +	} config;
>> > +	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
>> > +	u8 *tx_buf;			/* tx buffer */
>> > +	u32 tx_head;			/* tx buffer head */
>> > +	u32 tx_tail;			/* tx buffer tail */
>> > +};
>>
>> I suppose you did this to keep the driver simple, but it seems a
>> little inflexible
>> to only support two specific device types. Wouldn't we also want e.g.
>> 9pfs
>> or virtio_blk in some configurations?
>
> We could definitely add more when needed, which should be straightforward
> due to the virtio framework. For now only network and console are supported
> and ben been verified.

Wouldn't that require a new PCI ID to have the driver on the host
side match what this side does? I guess I'll see when you post the
other driver.

>> > +/* TMFIFO device structure */
>> > +struct tmfifo {
>> > +	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
>> > +	struct platform_device *pdev;	/* platform device */
>> > +	struct mutex lock;
>> > +	void __iomem *rx_base;		/* mapped register base */
>> > +	void __iomem *tx_base;		/* mapped register base */
>> > +	int tx_fifo_size;		/* number of entries of the Tx FIFO */
>> > +	int rx_fifo_size;		/* number of entries of the Rx FIFO */
>> > +	unsigned long pend_events;	/* pending bits for deferred process
>> */
>> > +	int irq[TM_IRQ_CNT];		/* irq numbers */
>> > +	struct work_struct work;	/* work struct for deferred process
>> */
>> > +	struct timer_list timer;	/* keepalive timer */
>> > +	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
>> > +};
>> > +
>> > +union tmfifo_msg_hdr {
>> > +	struct {
>> > +		u8 type;		/* message type */
>> > +		__be16 len;		/* payload length */
>> > +		u8 unused[5];		/* reserved, set to 0 */
>> > +	} __packed;
>> > +	u64 data;
>> > +};
>> > +
>> > +/*
>> > + * Default MAC.
>> > + * This MAC address will be read from EFI persistent variable if
>> > configured.
>> > + * It can also be reconfigured with standard Linux tools.
>> > + */
>> > +static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF,
>> > 0x01};
>> > +
>>
>> Is a predefined MAC address better than a random one here?
>>
>> For DT based systems, we tend to also call of_get_mac_address()
>> in order to allow setting a unique address from firmware.
>
> A predefined default MAC address is simpler in this case, which makes
> DHCP or PXE boot easier in development environment.
>
> For production, the MAC address is stored in persistent UEFI variable
> on the eeprom, which is read in function tmfifo_get_cfg_mac() which
> calls efi.get_variable() to get the MAC address.

Ok, fair enough. Generally speaking the recommended way of doing
this is to update the DT properties from eeprom when a network
driver has no way to store the mac address itself, but I suppose
you always have UEFI anyway, and this also makes it work in
the same way across both DT and ACPI.

>> > +/* Interrupt handler. */
>> > +static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
>> > +{
>> > +	int i = (uintptr_t)dev_id % sizeof(void *);
>> > +	struct tmfifo *fifo = dev_id - i;
>> > +
>> > +	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
>> > +		schedule_work(&fifo->work);
>> > +
>> > +	return IRQ_HANDLED;
>> > +}
>>
>> Maybe using a request_threaded_irq() would be a better way to defer
>> the handler into IRQ context.
>
> Not sure if I understand this comment correctly... In this case, the
> implemented handler
> has some mutex_lock() used, which tries to make the logic simple since
> multiple services
> (network & console) are sharing the same fifo. Thus schedule_work() is
> used.

schedule_work() and threaded IRQs are just two different ways of deferring
into process context where you can do the mutex_lock(). The effect is
almost the same, but work queues can be delayed for a substantial
amount of time depending on what other work functions have been
queued at the same time, and request_threaded_irq() is the more normal
way of doing this specifically for an IRQ handler, probably saving a couple
of lines of source code.

If you have any kind of real-time requirement, you can also assign a
specific realtime priority to that interrupt thread.

       Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
@ 2018-10-26 18:35         ` Arnd Bergmann
  0 siblings, 0 replies; 179+ messages in thread
From: Arnd Bergmann @ 2018-10-26 18:35 UTC (permalink / raw)
  To: linux-arm-kernel

On 10/26/18, Liming Sun <lsun@mellanox.com> wrote:
>> -----Original Message-----
>> From: arndbergmann at gmail.com [mailto:arndbergmann at gmail.com] On
>> Behalf Of Arnd Bergmann
>> Sent: Thursday, October 25, 2018 11:58 AM
>> To: Liming Sun <lsun@mellanox.com>
>> Cc: Olof Johansson <olof@lixom.net>; David Woods
>> <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-
>> soc <arm@kernel.org>; devicetree at vger.kernel.org; linux-arm-
>> kernel at lists.infradead.org
>> Subject: Re: [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField
>> Soc
>>
>> On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
>> > +struct tmfifo_vdev {
>> > +	struct virtio_device vdev;	/* virtual device */
>> > +	u8 status;
>> > +	u64 features;
>> > +	union {				/* virtio config space */
>> > +		struct virtio_console_config cons;
>> > +		struct virtio_net_config net;
>> > +	} config;
>> > +	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
>> > +	u8 *tx_buf;			/* tx buffer */
>> > +	u32 tx_head;			/* tx buffer head */
>> > +	u32 tx_tail;			/* tx buffer tail */
>> > +};
>>
>> I suppose you did this to keep the driver simple, but it seems a
>> little inflexible
>> to only support two specific device types. Wouldn't we also want e.g.
>> 9pfs
>> or virtio_blk in some configurations?
>
> We could definitely add more when needed, which should be straightforward
> due to the virtio framework. For now only network and console are supported
> and ben been verified.

Wouldn't that require a new PCI ID to have the driver on the host
side match what this side does? I guess I'll see when you post the
other driver.

>> > +/* TMFIFO device structure */
>> > +struct tmfifo {
>> > +	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
>> > +	struct platform_device *pdev;	/* platform device */
>> > +	struct mutex lock;
>> > +	void __iomem *rx_base;		/* mapped register base */
>> > +	void __iomem *tx_base;		/* mapped register base */
>> > +	int tx_fifo_size;		/* number of entries of the Tx FIFO */
>> > +	int rx_fifo_size;		/* number of entries of the Rx FIFO */
>> > +	unsigned long pend_events;	/* pending bits for deferred process
>> */
>> > +	int irq[TM_IRQ_CNT];		/* irq numbers */
>> > +	struct work_struct work;	/* work struct for deferred process
>> */
>> > +	struct timer_list timer;	/* keepalive timer */
>> > +	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
>> > +};
>> > +
>> > +union tmfifo_msg_hdr {
>> > +	struct {
>> > +		u8 type;		/* message type */
>> > +		__be16 len;		/* payload length */
>> > +		u8 unused[5];		/* reserved, set to 0 */
>> > +	} __packed;
>> > +	u64 data;
>> > +};
>> > +
>> > +/*
>> > + * Default MAC.
>> > + * This MAC address will be read from EFI persistent variable if
>> > configured.
>> > + * It can also be reconfigured with standard Linux tools.
>> > + */
>> > +static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF,
>> > 0x01};
>> > +
>>
>> Is a predefined MAC address better than a random one here?
>>
>> For DT based systems, we tend to also call of_get_mac_address()
>> in order to allow setting a unique address from firmware.
>
> A predefined default MAC address is simpler in this case, which makes
> DHCP or PXE boot easier in development environment.
>
> For production, the MAC address is stored in persistent UEFI variable
> on the eeprom, which is read in function tmfifo_get_cfg_mac() which
> calls efi.get_variable() to get the MAC address.

Ok, fair enough. Generally speaking the recommended way of doing
this is to update the DT properties from eeprom when a network
driver has no way to store the mac address itself, but I suppose
you always have UEFI anyway, and this also makes it work in
the same way across both DT and ACPI.

>> > +/* Interrupt handler. */
>> > +static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
>> > +{
>> > +	int i = (uintptr_t)dev_id % sizeof(void *);
>> > +	struct tmfifo *fifo = dev_id - i;
>> > +
>> > +	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
>> > +		schedule_work(&fifo->work);
>> > +
>> > +	return IRQ_HANDLED;
>> > +}
>>
>> Maybe using a request_threaded_irq() would be a better way to defer
>> the handler into IRQ context.
>
> Not sure if I understand this comment correctly... In this case, the
> implemented handler
> has some mutex_lock() used, which tries to make the logic simple since
> multiple services
> (network & console) are sharing the same fifo. Thus schedule_work() is
> used.

schedule_work() and threaded IRQs are just two different ways of deferring
into process context where you can do the mutex_lock(). The effect is
almost the same, but work queues can be delayed for a substantial
amount of time depending on what other work functions have been
queued at the same time, and request_threaded_irq() is the more normal
way of doing this specifically for an IRQ handler, probably saving a couple
of lines of source code.

If you have any kind of real-time requirement, you can also assign a
specific realtime priority to that interrupt thread.

       Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
  2018-10-25 15:38     ` Arnd Bergmann
@ 2018-10-26 19:18       ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-26 19:18 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: devicetree, David Woods, arm-soc, Olof Johansson, Robin Murphy,
	linux-arm-kernel

Thanks Arnd for the comments!  Please see my response inline.

- Liming

> -----Original Message-----
> From: arndbergmann@gmail.com [mailto:arndbergmann@gmail.com] On
> Behalf Of Arnd Bergmann
> Sent: Thursday, October 25, 2018 11:38 AM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods
> <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-
> soc <arm@kernel.org>; devicetree@vger.kernel.org; linux-arm-
> kernel@lists.infradead.org
> Subject: Re: [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
> 
> On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> > This commit introduces config option for Mellanox BlueField SoC,
> > which can be used to build the SoC specific drivers, and enables
> > it by default in configs/defconfig.
> >
> > Reviewed-by: David Woods <dwoods@mellanox.com>
> > Signed-off-by: Liming Sun <lsun@mellanox.com>
> > ---
> >  arch/arm64/Kconfig.platforms | 6 ++++++
> >  arch/arm64/configs/defconfig | 1 +
> >  2 files changed, 7 insertions(+)
> 
> Reviewed-by: Arnd Bergmann <arnd@arndb.de>
> 
> I'm sorry for missing your series in the past. We should definitely merge
> the platform support soon. Do you also have device tree files for reference
> systems or even production hardware?

We have obsoleted the device tree, and mainly support ACPI now
on Reference/Production HW. Below is the TMFIFO definition in the ACPI
DSDT table.

    // RShim TMFIFO
    Device(RSH0) {
      Name(_HID, "MLNXBF01")
      Name(_UID, Zero)
      Name(_CCA, 1)
      Name(_CRS, ResourceTemplate() {
        Memory32Fixed(ReadWrite, 0x00800a20, 0x00000018)
        Memory32Fixed(ReadWrite, 0x00800a40, 0x00000018)
        Interrupt(ResourceConsumer, Edge, ActiveHigh, Exclusive)
          { BF1_RSH0_TM_HTT_LWM_INT,
            BF1_RSH0_TM_HTT_HWM_INT,
            BF1_RSH0_TM_TTH_LWM_INT,
            BF1_RSH0_TM_TTH_HWM_INT
          }
      })
    }

The full ACPI implementation can be found in the 1.0 release:
http://www.mellanox.com/downloads/BlueField/BlueField-1.0.0.10521/BlueField-1.0.0.10521.tar.xz
Inside this tarball, we can see the "src/edk2/" directory which has the edk2 patch file including all the ACPI implementation.

> 
> I need to have a separate look at the fifo driver.
> 
> Unfortunately, you have sent these patches during the merge window,
> which is the time during which we don't pick up new work. Let's plan
> to pick these up after 4.20-rc1, and please resend to arm@kernel.org
> if we manage to forget about it again.
> 
>       Arnd

Thanks for the information. I'll resend it to make sure not to miss the date.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
@ 2018-10-26 19:18       ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-26 19:18 UTC (permalink / raw)
  To: linux-arm-kernel

Thanks Arnd for the comments!  Please see my response inline.

- Liming

> -----Original Message-----
> From: arndbergmann at gmail.com [mailto:arndbergmann at gmail.com] On
> Behalf Of Arnd Bergmann
> Sent: Thursday, October 25, 2018 11:38 AM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods
> <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-
> soc <arm@kernel.org>; devicetree at vger.kernel.org; linux-arm-
> kernel at lists.infradead.org
> Subject: Re: [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
> 
> On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> > This commit introduces config option for Mellanox BlueField SoC,
> > which can be used to build the SoC specific drivers, and enables
> > it by default in configs/defconfig.
> >
> > Reviewed-by: David Woods <dwoods@mellanox.com>
> > Signed-off-by: Liming Sun <lsun@mellanox.com>
> > ---
> >  arch/arm64/Kconfig.platforms | 6 ++++++
> >  arch/arm64/configs/defconfig | 1 +
> >  2 files changed, 7 insertions(+)
> 
> Reviewed-by: Arnd Bergmann <arnd@arndb.de>
> 
> I'm sorry for missing your series in the past. We should definitely merge
> the platform support soon. Do you also have device tree files for reference
> systems or even production hardware?

We have obsoleted the device tree, and mainly support ACPI now
on Reference/Production HW. Below is the TMFIFO definition in the ACPI
DSDT table.

    // RShim TMFIFO
    Device(RSH0) {
      Name(_HID, "MLNXBF01")
      Name(_UID, Zero)
      Name(_CCA, 1)
      Name(_CRS, ResourceTemplate() {
        Memory32Fixed(ReadWrite, 0x00800a20, 0x00000018)
        Memory32Fixed(ReadWrite, 0x00800a40, 0x00000018)
        Interrupt(ResourceConsumer, Edge, ActiveHigh, Exclusive)
          { BF1_RSH0_TM_HTT_LWM_INT,
            BF1_RSH0_TM_HTT_HWM_INT,
            BF1_RSH0_TM_TTH_LWM_INT,
            BF1_RSH0_TM_TTH_HWM_INT
          }
      })
    }

The full ACPI implementation can be found in the 1.0 release:
http://www.mellanox.com/downloads/BlueField/BlueField-1.0.0.10521/BlueField-1.0.0.10521.tar.xz
Inside this tarball, we can see the "src/edk2/" directory which has the edk2 patch file including all the ACPI implementation.

> 
> I need to have a separate look at the fifo driver.
> 
> Unfortunately, you have sent these patches during the merge window,
> which is the time during which we don't pick up new work. Let's plan
> to pick these up after 4.20-rc1, and please resend to arm at kernel.org
> if we manage to forget about it again.
> 
>       Arnd

Thanks for the information. I'll resend it to make sure not to miss the date.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
  2018-10-26 19:18       ` Liming Sun
@ 2018-10-26 19:32         ` Arnd Bergmann
  -1 siblings, 0 replies; 179+ messages in thread
From: Arnd Bergmann @ 2018-10-26 19:32 UTC (permalink / raw)
  To: Liming Sun
  Cc: DTML, David Woods, arm-soc, Olof Johansson, Robin Murphy, Linux ARM

On Fri, Oct 26, 2018 at 9:18 PM Liming Sun <lsun@mellanox.com> wrote:
> > -----Original Message-----
> > From: arndbergmann@gmail.com [mailto:arndbergmann@gmail.com] On
> > Behalf Of Arnd Bergmann
> > Sent: Thursday, October 25, 2018 11:38 AM
> > To: Liming Sun <lsun@mellanox.com>
> > Cc: Olof Johansson <olof@lixom.net>; David Woods
> > <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-
> > soc <arm@kernel.org>; devicetree@vger.kernel.org; linux-arm-
> > kernel@lists.infradead.org
> > Subject: Re: [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
> >
> > On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> > > This commit introduces config option for Mellanox BlueField SoC,
> > > which can be used to build the SoC specific drivers, and enables
> > > it by default in configs/defconfig.
> > >
> > > Reviewed-by: David Woods <dwoods@mellanox.com>
> > > Signed-off-by: Liming Sun <lsun@mellanox.com>
> > > ---
> > >  arch/arm64/Kconfig.platforms | 6 ++++++
> > >  arch/arm64/configs/defconfig | 1 +
> > >  2 files changed, 7 insertions(+)
> >
> > Reviewed-by: Arnd Bergmann <arnd@arndb.de>
> >
> > I'm sorry for missing your series in the past. We should definitely merge
> > the platform support soon. Do you also have device tree files for reference
> > systems or even production hardware?
>
> We have obsoleted the device tree, and mainly support ACPI now
> on Reference/Production HW. Below is the TMFIFO definition in the ACPI
> DSDT table.
>
>     // RShim TMFIFO
>     Device(RSH0) {
>       Name(_HID, "MLNXBF01")
>       Name(_UID, Zero)
>       Name(_CCA, 1)
>       Name(_CRS, ResourceTemplate() {
>         Memory32Fixed(ReadWrite, 0x00800a20, 0x00000018)
>         Memory32Fixed(ReadWrite, 0x00800a40, 0x00000018)
>         Interrupt(ResourceConsumer, Edge, ActiveHigh, Exclusive)
>           { BF1_RSH0_TM_HTT_LWM_INT,
>             BF1_RSH0_TM_HTT_HWM_INT,
>             BF1_RSH0_TM_TTH_LWM_INT,
>             BF1_RSH0_TM_TTH_HWM_INT
>           }
>       })
>     }
>
> The full ACPI implementation can be found in the 1.0 release:
> http://www.mellanox.com/downloads/BlueField/BlueField-1.0.0.10521/BlueField-1.0.0.10521.tar.xz
> Inside this tarball, we can see the "src/edk2/" directory which has the edk2 patch file including all the ACPI implementation.

It would be nice if you could still include the dts sources in the submission.
Since this is not a classic server hardware, DT is probably more suitable
here, and there are likely use cases that won't work with ACPI, so it's
good to have a starting point for your users if they need to override
the ACPI tables with a DT, or build systems with a simpler boot loader.

       Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
@ 2018-10-26 19:32         ` Arnd Bergmann
  0 siblings, 0 replies; 179+ messages in thread
From: Arnd Bergmann @ 2018-10-26 19:32 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Oct 26, 2018 at 9:18 PM Liming Sun <lsun@mellanox.com> wrote:
> > -----Original Message-----
> > From: arndbergmann at gmail.com [mailto:arndbergmann at gmail.com] On
> > Behalf Of Arnd Bergmann
> > Sent: Thursday, October 25, 2018 11:38 AM
> > To: Liming Sun <lsun@mellanox.com>
> > Cc: Olof Johansson <olof@lixom.net>; David Woods
> > <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-
> > soc <arm@kernel.org>; devicetree at vger.kernel.org; linux-arm-
> > kernel at lists.infradead.org
> > Subject: Re: [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
> >
> > On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> > > This commit introduces config option for Mellanox BlueField SoC,
> > > which can be used to build the SoC specific drivers, and enables
> > > it by default in configs/defconfig.
> > >
> > > Reviewed-by: David Woods <dwoods@mellanox.com>
> > > Signed-off-by: Liming Sun <lsun@mellanox.com>
> > > ---
> > >  arch/arm64/Kconfig.platforms | 6 ++++++
> > >  arch/arm64/configs/defconfig | 1 +
> > >  2 files changed, 7 insertions(+)
> >
> > Reviewed-by: Arnd Bergmann <arnd@arndb.de>
> >
> > I'm sorry for missing your series in the past. We should definitely merge
> > the platform support soon. Do you also have device tree files for reference
> > systems or even production hardware?
>
> We have obsoleted the device tree, and mainly support ACPI now
> on Reference/Production HW. Below is the TMFIFO definition in the ACPI
> DSDT table.
>
>     // RShim TMFIFO
>     Device(RSH0) {
>       Name(_HID, "MLNXBF01")
>       Name(_UID, Zero)
>       Name(_CCA, 1)
>       Name(_CRS, ResourceTemplate() {
>         Memory32Fixed(ReadWrite, 0x00800a20, 0x00000018)
>         Memory32Fixed(ReadWrite, 0x00800a40, 0x00000018)
>         Interrupt(ResourceConsumer, Edge, ActiveHigh, Exclusive)
>           { BF1_RSH0_TM_HTT_LWM_INT,
>             BF1_RSH0_TM_HTT_HWM_INT,
>             BF1_RSH0_TM_TTH_LWM_INT,
>             BF1_RSH0_TM_TTH_HWM_INT
>           }
>       })
>     }
>
> The full ACPI implementation can be found in the 1.0 release:
> http://www.mellanox.com/downloads/BlueField/BlueField-1.0.0.10521/BlueField-1.0.0.10521.tar.xz
> Inside this tarball, we can see the "src/edk2/" directory which has the edk2 patch file including all the ACPI implementation.

It would be nice if you could still include the dts sources in the submission.
Since this is not a classic server hardware, DT is probably more suitable
here, and there are likely use cases that won't work with ACPI, so it's
good to have a starting point for your users if they need to override
the ACPI tables with a DT, or build systems with a simpler boot loader.

       Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-10-25 15:32     ` Arnd Bergmann
@ 2018-10-26 19:36       ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-26 19:36 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: devicetree, David Woods, arm-soc, Olof Johansson, Robin Murphy,
	linux-arm-kernel

Thanks Arnd for the comments! Please see the response inline.

- Liming

> -----Original Message-----
> From: arndbergmann@gmail.com [mailto:arndbergmann@gmail.com] On
> Behalf Of Arnd Bergmann
> Sent: Thursday, October 25, 2018 11:33 AM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods
> <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-
> soc <arm@kernel.org>; devicetree@vger.kernel.org; linux-arm-
> kernel@lists.infradead.org
> Subject: Re: [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox
> BlueField SoC
> 
> On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> > Add devicetree bindings for the TmFifo which is found on Mellanox
> > BlueField SoCs.
> >
> > Reviewed-by: Rob Herring <robh@kernel.org>
> > Reviewed-by: David Woods <dwoods@mellanox.com>
> > Signed-off-by: Liming Sun <lsun@mellanox.com>
> > ---
> >  .../devicetree/bindings/soc/mellanox/tmfifo.txt    | 23
> > ++++++++++++++++++++++
> >  1 file changed, 23 insertions(+)
> >  create mode 100644
> > Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> >
> > diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> > b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> > new file mode 100644
> > index 0000000..8a13fa6
> > --- /dev/null
> > +++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> > @@ -0,0 +1,23 @@
> > +* Mellanox BlueField SoC TmFifo
> > +
> > +BlueField TmFifo provides a shared FIFO between the target and the
> > +external host machine, which can be accessed by external host via
> > +USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
> > +to implement virtual console and network interface based on the virtio
> > +framework.
> > +
> > +Required properties:
> > +
> > +- compatible:	Should be "mellanox,bf-tmfifo"
> > +- reg:		Physical base address and length of Rx/Tx block
> > +- interrupts:	The interrupt number of Rx low water mark, Rx high water
> > mark
> > +		Tx low water mark, Tx high water mark respectively.
> 
> 
> This sounds like it might fit into the mailbox subsystem, and perhaps
> it should use the mailbox DT bindings. Have you had a look at that?

This commit of dt-bindings is mainly to solve the warning of checkpatch.pl.
Like the response to patch 2/4, ACPI is actually used now instead of device tree.
The TMFIFO definition in the ACPI DSDT table would be something like below.

    // RShim TMFIFO
    Device(RSH0) {
      Name(_HID, "MLNXBF01")
      Name(_UID, Zero)
      Name(_CCA, 1)
      Name(_CRS, ResourceTemplate() {
        Memory32Fixed(ReadWrite, 0x00800a20, 0x00000018)
        Memory32Fixed(ReadWrite, 0x00800a40, 0x00000018)
        Interrupt(ResourceConsumer, Edge, ActiveHigh, Exclusive)
          { BF1_RSH0_TM_HTT_LWM_INT,
            BF1_RSH0_TM_HTT_HWM_INT,
            BF1_RSH0_TM_TTH_LWM_INT,
            BF1_RSH0_TM_TTH_HWM_INT
          }
      })
    }

Any suggestion how it should be added into Linux Documentation, or maybe I
should just remove this commit from this patch series?

As for the sub-component of this driver, the "soc" might be better fit than the mailbox 
for some reasons. It's a communication between extern machines and the SoC via 
USB / PCIe,  like pushing boot stream, console and network mgmt. Some of the features, 
like pushing boot stream, doesn't communicate with the ARM core. The boot stream 
is pushed to the SoC HW logic directly. I'll add the host-side virtio-based driver in patch v5.
> 
>        Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
@ 2018-10-26 19:36       ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-26 19:36 UTC (permalink / raw)
  To: linux-arm-kernel

Thanks Arnd for the comments! Please see the response inline.

- Liming

> -----Original Message-----
> From: arndbergmann at gmail.com [mailto:arndbergmann at gmail.com] On
> Behalf Of Arnd Bergmann
> Sent: Thursday, October 25, 2018 11:33 AM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods
> <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-
> soc <arm@kernel.org>; devicetree at vger.kernel.org; linux-arm-
> kernel at lists.infradead.org
> Subject: Re: [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox
> BlueField SoC
> 
> On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> > Add devicetree bindings for the TmFifo which is found on Mellanox
> > BlueField SoCs.
> >
> > Reviewed-by: Rob Herring <robh@kernel.org>
> > Reviewed-by: David Woods <dwoods@mellanox.com>
> > Signed-off-by: Liming Sun <lsun@mellanox.com>
> > ---
> >  .../devicetree/bindings/soc/mellanox/tmfifo.txt    | 23
> > ++++++++++++++++++++++
> >  1 file changed, 23 insertions(+)
> >  create mode 100644
> > Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> >
> > diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> > b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> > new file mode 100644
> > index 0000000..8a13fa6
> > --- /dev/null
> > +++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> > @@ -0,0 +1,23 @@
> > +* Mellanox BlueField SoC TmFifo
> > +
> > +BlueField TmFifo provides a shared FIFO between the target and the
> > +external host machine, which can be accessed by external host via
> > +USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
> > +to implement virtual console and network interface based on the virtio
> > +framework.
> > +
> > +Required properties:
> > +
> > +- compatible:	Should be "mellanox,bf-tmfifo"
> > +- reg:		Physical base address and length of Rx/Tx block
> > +- interrupts:	The interrupt number of Rx low water mark, Rx high water
> > mark
> > +		Tx low water mark, Tx high water mark respectively.
> 
> 
> This sounds like it might fit into the mailbox subsystem, and perhaps
> it should use the mailbox DT bindings. Have you had a look at that?

This commit of dt-bindings is mainly to solve the warning of checkpatch.pl.
Like the response to patch 2/4, ACPI is actually used now instead of device tree.
The TMFIFO definition in the ACPI DSDT table would be something like below.

    // RShim TMFIFO
    Device(RSH0) {
      Name(_HID, "MLNXBF01")
      Name(_UID, Zero)
      Name(_CCA, 1)
      Name(_CRS, ResourceTemplate() {
        Memory32Fixed(ReadWrite, 0x00800a20, 0x00000018)
        Memory32Fixed(ReadWrite, 0x00800a40, 0x00000018)
        Interrupt(ResourceConsumer, Edge, ActiveHigh, Exclusive)
          { BF1_RSH0_TM_HTT_LWM_INT,
            BF1_RSH0_TM_HTT_HWM_INT,
            BF1_RSH0_TM_TTH_LWM_INT,
            BF1_RSH0_TM_TTH_HWM_INT
          }
      })
    }

Any suggestion how it should be added into Linux Documentation, or maybe I
should just remove this commit from this patch series?

As for the sub-component of this driver, the "soc" might be better fit than the mailbox 
for some reasons. It's a communication between extern machines and the SoC via 
USB / PCIe,  like pushing boot stream, console and network mgmt. Some of the features, 
like pushing boot stream, doesn't communicate with the ARM core. The boot stream 
is pushed to the SoC HW logic directly. I'll add the host-side virtio-based driver in patch v5.
> 
>        Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-10-26 19:36       ` Liming Sun
@ 2018-10-26 20:33         ` Arnd Bergmann
  -1 siblings, 0 replies; 179+ messages in thread
From: Arnd Bergmann @ 2018-10-26 20:33 UTC (permalink / raw)
  To: Liming Sun
  Cc: DTML, David Woods, arm-soc, Olof Johansson, Robin Murphy, Linux ARM

On Fri, Oct 26, 2018 at 9:36 PM Liming Sun <lsun@mellanox.com> wrote:
> > -----Original Message-----
> > From: arndbergmann@gmail.com [mailto:arndbergmann@gmail.com] On
> > > --- /dev/null
> > > +++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> > > @@ -0,0 +1,23 @@
> > > +* Mellanox BlueField SoC TmFifo
> > > +
> > > +BlueField TmFifo provides a shared FIFO between the target and the
> > > +external host machine, which can be accessed by external host via
> > > +USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
> > > +to implement virtual console and network interface based on the virtio
> > > +framework.
> > > +
> > > +Required properties:
> > > +
> > > +- compatible:      Should be "mellanox,bf-tmfifo"
> > > +- reg:             Physical base address and length of Rx/Tx block
> > > +- interrupts:      The interrupt number of Rx low water mark, Rx high water
> > > mark
> > > +           Tx low water mark, Tx high water mark respectively.
> >
> >
> > This sounds like it might fit into the mailbox subsystem, and perhaps
> > it should use the mailbox DT bindings. Have you had a look at that?
>
> This commit of dt-bindings is mainly to solve the warning of checkpatch.pl.
> Like the response to patch 2/4, ACPI is actually used now instead of device tree.
> The TMFIFO definition in the ACPI DSDT table would be something like below.
>
>     // RShim TMFIFO
>     Device(RSH0) {
>       Name(_HID, "MLNXBF01")
>       Name(_UID, Zero)
>       Name(_CCA, 1)
>       Name(_CRS, ResourceTemplate() {
>         Memory32Fixed(ReadWrite, 0x00800a20, 0x00000018)
>         Memory32Fixed(ReadWrite, 0x00800a40, 0x00000018)
>         Interrupt(ResourceConsumer, Edge, ActiveHigh, Exclusive)
>           { BF1_RSH0_TM_HTT_LWM_INT,
>             BF1_RSH0_TM_HTT_HWM_INT,
>             BF1_RSH0_TM_TTH_LWM_INT,
>             BF1_RSH0_TM_TTH_HWM_INT
>           }
>       })
>     }
>
> Any suggestion how it should be added into Linux Documentation, or maybe I
> should just remove this commit from this patch series?

Maybe the best way here would be to not use ACPI for the case
where bluefin is integrated into a PCIe endpoint, since ACPI is
not as flexible here and generally relies on having an SBSA
compliant hardware that you no longer have if you require
random platform devices for booting from and for your console.

For the case where a bluefin SoC is used in a standalone system,
having ACPI makes more sense, as that lets you install Red Hat
Linux or other operating systems that rely on SBBR and SBSA.

> As for the sub-component of this driver, the "soc" might be better fit than the mailbox
> for some reasons. It's a communication between extern machines and the SoC via
> USB / PCIe,  like pushing boot stream, console and network mgmt. Some of the features,
> like pushing boot stream, doesn't communicate with the ARM core. The boot stream
> is pushed to the SoC HW logic directly. I'll add the host-side virtio-based driver in patch v5.

Right, the drivers/mailbox subsystem was not the right idea here,
I noticed that myself after actually reading the driver. Drivers/soc
may also not be the best fit, since this is not really about it being
a SoC, but rather a way to encapsulate virtual devices. The
mic driver I mentioned is in drivers/misc, but I don't like to add stuff
there if we can avoid it.

drivers/virtio, drivers/bus or drivers/mfd might also be an option that
could fit better than drivers/soc, or you could have your own subdir
below drivers/ as some others do. Finally, drivers/platform/mellanox
might be a reasonable choice, and it would let you keep both sides
of the driver in one place.

       Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
@ 2018-10-26 20:33         ` Arnd Bergmann
  0 siblings, 0 replies; 179+ messages in thread
From: Arnd Bergmann @ 2018-10-26 20:33 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Oct 26, 2018 at 9:36 PM Liming Sun <lsun@mellanox.com> wrote:
> > -----Original Message-----
> > From: arndbergmann at gmail.com [mailto:arndbergmann at gmail.com] On
> > > --- /dev/null
> > > +++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> > > @@ -0,0 +1,23 @@
> > > +* Mellanox BlueField SoC TmFifo
> > > +
> > > +BlueField TmFifo provides a shared FIFO between the target and the
> > > +external host machine, which can be accessed by external host via
> > > +USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
> > > +to implement virtual console and network interface based on the virtio
> > > +framework.
> > > +
> > > +Required properties:
> > > +
> > > +- compatible:      Should be "mellanox,bf-tmfifo"
> > > +- reg:             Physical base address and length of Rx/Tx block
> > > +- interrupts:      The interrupt number of Rx low water mark, Rx high water
> > > mark
> > > +           Tx low water mark, Tx high water mark respectively.
> >
> >
> > This sounds like it might fit into the mailbox subsystem, and perhaps
> > it should use the mailbox DT bindings. Have you had a look at that?
>
> This commit of dt-bindings is mainly to solve the warning of checkpatch.pl.
> Like the response to patch 2/4, ACPI is actually used now instead of device tree.
> The TMFIFO definition in the ACPI DSDT table would be something like below.
>
>     // RShim TMFIFO
>     Device(RSH0) {
>       Name(_HID, "MLNXBF01")
>       Name(_UID, Zero)
>       Name(_CCA, 1)
>       Name(_CRS, ResourceTemplate() {
>         Memory32Fixed(ReadWrite, 0x00800a20, 0x00000018)
>         Memory32Fixed(ReadWrite, 0x00800a40, 0x00000018)
>         Interrupt(ResourceConsumer, Edge, ActiveHigh, Exclusive)
>           { BF1_RSH0_TM_HTT_LWM_INT,
>             BF1_RSH0_TM_HTT_HWM_INT,
>             BF1_RSH0_TM_TTH_LWM_INT,
>             BF1_RSH0_TM_TTH_HWM_INT
>           }
>       })
>     }
>
> Any suggestion how it should be added into Linux Documentation, or maybe I
> should just remove this commit from this patch series?

Maybe the best way here would be to not use ACPI for the case
where bluefin is integrated into a PCIe endpoint, since ACPI is
not as flexible here and generally relies on having an SBSA
compliant hardware that you no longer have if you require
random platform devices for booting from and for your console.

For the case where a bluefin SoC is used in a standalone system,
having ACPI makes more sense, as that lets you install Red Hat
Linux or other operating systems that rely on SBBR and SBSA.

> As for the sub-component of this driver, the "soc" might be better fit than the mailbox
> for some reasons. It's a communication between extern machines and the SoC via
> USB / PCIe,  like pushing boot stream, console and network mgmt. Some of the features,
> like pushing boot stream, doesn't communicate with the ARM core. The boot stream
> is pushed to the SoC HW logic directly. I'll add the host-side virtio-based driver in patch v5.

Right, the drivers/mailbox subsystem was not the right idea here,
I noticed that myself after actually reading the driver. Drivers/soc
may also not be the best fit, since this is not really about it being
a SoC, but rather a way to encapsulate virtual devices. The
mic driver I mentioned is in drivers/misc, but I don't like to add stuff
there if we can avoid it.

drivers/virtio, drivers/bus or drivers/mfd might also be an option that
could fit better than drivers/soc, or you could have your own subdir
below drivers/ as some others do. Finally, drivers/platform/mellanox
might be a reasonable choice, and it would let you keep both sides
of the driver in one place.

       Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-10-26 18:35         ` Arnd Bergmann
@ 2018-10-29 14:17           ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-29 14:17 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: devicetree, David Woods, arm-soc, Olof Johansson, Robin Murphy,
	linux-arm-kernel

Thanks. Please see my response inline.

> -----Original Message-----
> From: arndbergmann@gmail.com [mailto:arndbergmann@gmail.com] On
> Behalf Of Arnd Bergmann
> Sent: Friday, October 26, 2018 2:35 PM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods
> <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-
> soc <arm@kernel.org>; devicetree@vger.kernel.org; linux-arm-
> kernel@lists.infradead.org
> Subject: Re: [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField
> Soc
> 
> On 10/26/18, Liming Sun <lsun@mellanox.com> wrote:
> >> -----Original Message-----
> >> From: arndbergmann@gmail.com [mailto:arndbergmann@gmail.com] On
> >> Behalf Of Arnd Bergmann
> >> Sent: Thursday, October 25, 2018 11:58 AM
> >> To: Liming Sun <lsun@mellanox.com>
> >> Cc: Olof Johansson <olof@lixom.net>; David Woods
> >> <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>;
> arm-
> >> soc <arm@kernel.org>; devicetree@vger.kernel.org; linux-arm-
> >> kernel@lists.infradead.org
> >> Subject: Re: [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField
> >> Soc
> >>
> >> On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> >> > +struct tmfifo_vdev {
> >> > +	struct virtio_device vdev;	/* virtual device */
> >> > +	u8 status;
> >> > +	u64 features;
> >> > +	union {				/* virtio config space */
> >> > +		struct virtio_console_config cons;
> >> > +		struct virtio_net_config net;
> >> > +	} config;
> >> > +	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
> >> > +	u8 *tx_buf;			/* tx buffer */
> >> > +	u32 tx_head;			/* tx buffer head */
> >> > +	u32 tx_tail;			/* tx buffer tail */
> >> > +};
> >>
> >> I suppose you did this to keep the driver simple, but it seems a
> >> little inflexible
> >> to only support two specific device types. Wouldn't we also want e.g.
> >> 9pfs
> >> or virtio_blk in some configurations?
> >
> > We could definitely add more when needed, which should be
> straightforward
> > due to the virtio framework. For now only network and console are
> supported
> > and ben been verified.
> 
> Wouldn't that require a new PCI ID to have the driver on the host
> side match what this side does? I guess I'll see when you post the
> other driver.

Yes, the PCI ID is in the host side driver which will be included in patch v5.

> 
> >> > +/* TMFIFO device structure */
> >> > +struct tmfifo {
> >> > +	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
> >> > +	struct platform_device *pdev;	/* platform device */
> >> > +	struct mutex lock;
> >> > +	void __iomem *rx_base;		/* mapped register base */
> >> > +	void __iomem *tx_base;		/* mapped register base */
> >> > +	int tx_fifo_size;		/* number of entries of the Tx FIFO */
> >> > +	int rx_fifo_size;		/* number of entries of the Rx FIFO */
> >> > +	unsigned long pend_events;	/* pending bits for deferred process
> >> */
> >> > +	int irq[TM_IRQ_CNT];		/* irq numbers */
> >> > +	struct work_struct work;	/* work struct for deferred process
> >> */
> >> > +	struct timer_list timer;	/* keepalive timer */
> >> > +	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
> >> > +};
> >> > +
> >> > +union tmfifo_msg_hdr {
> >> > +	struct {
> >> > +		u8 type;		/* message type */
> >> > +		__be16 len;		/* payload length */
> >> > +		u8 unused[5];		/* reserved, set to 0 */
> >> > +	} __packed;
> >> > +	u64 data;
> >> > +};
> >> > +
> >> > +/*
> >> > + * Default MAC.
> >> > + * This MAC address will be read from EFI persistent variable if
> >> > configured.
> >> > + * It can also be reconfigured with standard Linux tools.
> >> > + */
> >> > +static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF,
> >> > 0x01};
> >> > +
> >>
> >> Is a predefined MAC address better than a random one here?
> >>
> >> For DT based systems, we tend to also call of_get_mac_address()
> >> in order to allow setting a unique address from firmware.
> >
> > A predefined default MAC address is simpler in this case, which makes
> > DHCP or PXE boot easier in development environment.
> >
> > For production, the MAC address is stored in persistent UEFI variable
> > on the eeprom, which is read in function tmfifo_get_cfg_mac() which
> > calls efi.get_variable() to get the MAC address.
> 
> Ok, fair enough. Generally speaking the recommended way of doing
> this is to update the DT properties from eeprom when a network
> driver has no way to store the mac address itself, but I suppose
> you always have UEFI anyway, and this also makes it work in
> the same way across both DT and ACPI.

Yes, we always have UEFI available.

> 
> >> > +/* Interrupt handler. */
> >> > +static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
> >> > +{
> >> > +	int i = (uintptr_t)dev_id % sizeof(void *);
> >> > +	struct tmfifo *fifo = dev_id - i;
> >> > +
> >> > +	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
> >> > +		schedule_work(&fifo->work);
> >> > +
> >> > +	return IRQ_HANDLED;
> >> > +}
> >>
> >> Maybe using a request_threaded_irq() would be a better way to defer
> >> the handler into IRQ context.
> >
> > Not sure if I understand this comment correctly... In this case, the
> > implemented handler
> > has some mutex_lock() used, which tries to make the logic simple since
> > multiple services
> > (network & console) are sharing the same fifo. Thus schedule_work() is
> > used.
> 
> schedule_work() and threaded IRQs are just two different ways of deferring
> into process context where you can do the mutex_lock(). The effect is
> almost the same, but work queues can be delayed for a substantial
> amount of time depending on what other work functions have been
> queued at the same time, and request_threaded_irq() is the more normal
> way of doing this specifically for an IRQ handler, probably saving a couple
> of lines of source code.
> 
> If you have any kind of real-time requirement, you can also assign a
> specific realtime priority to that interrupt thread.

Good information! Currently this FIFO is mainly for mgmt purpose. I'll try the threaded 
IRQs approach to see whether it can be easily converted and make it into the v5 patch.
If not easily, probably a separate commit to improve it later?

> 
>        Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
@ 2018-10-29 14:17           ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-29 14:17 UTC (permalink / raw)
  To: linux-arm-kernel

Thanks. Please see my response inline.

> -----Original Message-----
> From: arndbergmann at gmail.com [mailto:arndbergmann at gmail.com] On
> Behalf Of Arnd Bergmann
> Sent: Friday, October 26, 2018 2:35 PM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods
> <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-
> soc <arm@kernel.org>; devicetree at vger.kernel.org; linux-arm-
> kernel at lists.infradead.org
> Subject: Re: [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField
> Soc
> 
> On 10/26/18, Liming Sun <lsun@mellanox.com> wrote:
> >> -----Original Message-----
> >> From: arndbergmann at gmail.com [mailto:arndbergmann at gmail.com] On
> >> Behalf Of Arnd Bergmann
> >> Sent: Thursday, October 25, 2018 11:58 AM
> >> To: Liming Sun <lsun@mellanox.com>
> >> Cc: Olof Johansson <olof@lixom.net>; David Woods
> >> <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>;
> arm-
> >> soc <arm@kernel.org>; devicetree at vger.kernel.org; linux-arm-
> >> kernel at lists.infradead.org
> >> Subject: Re: [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField
> >> Soc
> >>
> >> On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> >> > +struct tmfifo_vdev {
> >> > +	struct virtio_device vdev;	/* virtual device */
> >> > +	u8 status;
> >> > +	u64 features;
> >> > +	union {				/* virtio config space */
> >> > +		struct virtio_console_config cons;
> >> > +		struct virtio_net_config net;
> >> > +	} config;
> >> > +	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
> >> > +	u8 *tx_buf;			/* tx buffer */
> >> > +	u32 tx_head;			/* tx buffer head */
> >> > +	u32 tx_tail;			/* tx buffer tail */
> >> > +};
> >>
> >> I suppose you did this to keep the driver simple, but it seems a
> >> little inflexible
> >> to only support two specific device types. Wouldn't we also want e.g.
> >> 9pfs
> >> or virtio_blk in some configurations?
> >
> > We could definitely add more when needed, which should be
> straightforward
> > due to the virtio framework. For now only network and console are
> supported
> > and ben been verified.
> 
> Wouldn't that require a new PCI ID to have the driver on the host
> side match what this side does? I guess I'll see when you post the
> other driver.

Yes, the PCI ID is in the host side driver which will be included in patch v5.

> 
> >> > +/* TMFIFO device structure */
> >> > +struct tmfifo {
> >> > +	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
> >> > +	struct platform_device *pdev;	/* platform device */
> >> > +	struct mutex lock;
> >> > +	void __iomem *rx_base;		/* mapped register base */
> >> > +	void __iomem *tx_base;		/* mapped register base */
> >> > +	int tx_fifo_size;		/* number of entries of the Tx FIFO */
> >> > +	int rx_fifo_size;		/* number of entries of the Rx FIFO */
> >> > +	unsigned long pend_events;	/* pending bits for deferred process
> >> */
> >> > +	int irq[TM_IRQ_CNT];		/* irq numbers */
> >> > +	struct work_struct work;	/* work struct for deferred process
> >> */
> >> > +	struct timer_list timer;	/* keepalive timer */
> >> > +	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
> >> > +};
> >> > +
> >> > +union tmfifo_msg_hdr {
> >> > +	struct {
> >> > +		u8 type;		/* message type */
> >> > +		__be16 len;		/* payload length */
> >> > +		u8 unused[5];		/* reserved, set to 0 */
> >> > +	} __packed;
> >> > +	u64 data;
> >> > +};
> >> > +
> >> > +/*
> >> > + * Default MAC.
> >> > + * This MAC address will be read from EFI persistent variable if
> >> > configured.
> >> > + * It can also be reconfigured with standard Linux tools.
> >> > + */
> >> > +static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF,
> >> > 0x01};
> >> > +
> >>
> >> Is a predefined MAC address better than a random one here?
> >>
> >> For DT based systems, we tend to also call of_get_mac_address()
> >> in order to allow setting a unique address from firmware.
> >
> > A predefined default MAC address is simpler in this case, which makes
> > DHCP or PXE boot easier in development environment.
> >
> > For production, the MAC address is stored in persistent UEFI variable
> > on the eeprom, which is read in function tmfifo_get_cfg_mac() which
> > calls efi.get_variable() to get the MAC address.
> 
> Ok, fair enough. Generally speaking the recommended way of doing
> this is to update the DT properties from eeprom when a network
> driver has no way to store the mac address itself, but I suppose
> you always have UEFI anyway, and this also makes it work in
> the same way across both DT and ACPI.

Yes, we always have UEFI available.

> 
> >> > +/* Interrupt handler. */
> >> > +static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
> >> > +{
> >> > +	int i = (uintptr_t)dev_id % sizeof(void *);
> >> > +	struct tmfifo *fifo = dev_id - i;
> >> > +
> >> > +	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
> >> > +		schedule_work(&fifo->work);
> >> > +
> >> > +	return IRQ_HANDLED;
> >> > +}
> >>
> >> Maybe using a request_threaded_irq() would be a better way to defer
> >> the handler into IRQ context.
> >
> > Not sure if I understand this comment correctly... In this case, the
> > implemented handler
> > has some mutex_lock() used, which tries to make the logic simple since
> > multiple services
> > (network & console) are sharing the same fifo. Thus schedule_work() is
> > used.
> 
> schedule_work() and threaded IRQs are just two different ways of deferring
> into process context where you can do the mutex_lock(). The effect is
> almost the same, but work queues can be delayed for a substantial
> amount of time depending on what other work functions have been
> queued at the same time, and request_threaded_irq() is the more normal
> way of doing this specifically for an IRQ handler, probably saving a couple
> of lines of source code.
> 
> If you have any kind of real-time requirement, you can also assign a
> specific realtime priority to that interrupt thread.

Good information! Currently this FIFO is mainly for mgmt purpose. I'll try the threaded 
IRQs approach to see whether it can be easily converted and make it into the v5 patch.
If not easily, probably a separate commit to improve it later?

> 
>        Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-10-29 14:17           ` Liming Sun
@ 2018-10-29 14:52             ` Arnd Bergmann
  -1 siblings, 0 replies; 179+ messages in thread
From: Arnd Bergmann @ 2018-10-29 14:52 UTC (permalink / raw)
  To: Liming Sun
  Cc: DTML, David Woods, arm-soc, Olof Johansson, Robin Murphy, Linux ARM

On Mon, Oct 29, 2018 at 3:17 PM Liming Sun <lsun@mellanox.com> wrote:

> > >> > +/* Interrupt handler. */
> > >> > +static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
> > >> > +{
> > >> > +        int i = (uintptr_t)dev_id % sizeof(void *);
> > >> > +        struct tmfifo *fifo = dev_id - i;
> > >> > +
> > >> > +        if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
> > >> > +                schedule_work(&fifo->work);
> > >> > +
> > >> > +        return IRQ_HANDLED;
> > >> > +}
> > >>
> > >> Maybe using a request_threaded_irq() would be a better way to defer
> > >> the handler into IRQ context.
> > >
> > > Not sure if I understand this comment correctly... In this case, the
> > > implemented handler
> > > has some mutex_lock() used, which tries to make the logic simple since
> > > multiple services
> > > (network & console) are sharing the same fifo. Thus schedule_work() is
> > > used.
> >
> > schedule_work() and threaded IRQs are just two different ways of deferring
> > into process context where you can do the mutex_lock(). The effect is
> > almost the same, but work queues can be delayed for a substantial
> > amount of time depending on what other work functions have been
> > queued at the same time, and request_threaded_irq() is the more normal
> > way of doing this specifically for an IRQ handler, probably saving a couple
> > of lines of source code.
> >
> > If you have any kind of real-time requirement, you can also assign a
> > specific realtime priority to that interrupt thread.
>
> Good information! Currently this FIFO is mainly for mgmt purpose. I'll try the threaded
> IRQs approach to see whether it can be easily converted and make it into the v5 patch.
> If not easily, probably a separate commit to improve it later?

Sure, no problem. This is not an important change, but I also think it should
be easy to do, in particular as it is meant to simplify the code.

       Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
@ 2018-10-29 14:52             ` Arnd Bergmann
  0 siblings, 0 replies; 179+ messages in thread
From: Arnd Bergmann @ 2018-10-29 14:52 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Oct 29, 2018 at 3:17 PM Liming Sun <lsun@mellanox.com> wrote:

> > >> > +/* Interrupt handler. */
> > >> > +static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
> > >> > +{
> > >> > +        int i = (uintptr_t)dev_id % sizeof(void *);
> > >> > +        struct tmfifo *fifo = dev_id - i;
> > >> > +
> > >> > +        if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
> > >> > +                schedule_work(&fifo->work);
> > >> > +
> > >> > +        return IRQ_HANDLED;
> > >> > +}
> > >>
> > >> Maybe using a request_threaded_irq() would be a better way to defer
> > >> the handler into IRQ context.
> > >
> > > Not sure if I understand this comment correctly... In this case, the
> > > implemented handler
> > > has some mutex_lock() used, which tries to make the logic simple since
> > > multiple services
> > > (network & console) are sharing the same fifo. Thus schedule_work() is
> > > used.
> >
> > schedule_work() and threaded IRQs are just two different ways of deferring
> > into process context where you can do the mutex_lock(). The effect is
> > almost the same, but work queues can be delayed for a substantial
> > amount of time depending on what other work functions have been
> > queued at the same time, and request_threaded_irq() is the more normal
> > way of doing this specifically for an IRQ handler, probably saving a couple
> > of lines of source code.
> >
> > If you have any kind of real-time requirement, you can also assign a
> > specific realtime priority to that interrupt thread.
>
> Good information! Currently this FIFO is mainly for mgmt purpose. I'll try the threaded
> IRQs approach to see whether it can be easily converted and make it into the v5 patch.
> If not easily, probably a separate commit to improve it later?

Sure, no problem. This is not an important change, but I also think it should
be easy to do, in particular as it is meant to simplify the code.

       Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
  2018-10-26 19:32         ` Arnd Bergmann
@ 2018-10-29 14:58           ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-29 14:58 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: DTML, David Woods, arm-soc, Olof Johansson, Robin Murphy, Linux ARM

Thanks! Please see response inline.

- Liming

> -----Original Message-----
> From: Arnd Bergmann [mailto:arnd@arndb.de]
> Sent: Friday, October 26, 2018 3:32 PM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods
> <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-
> soc <arm@kernel.org>; DTML <devicetree@vger.kernel.org>; Linux ARM
> <linux-arm-kernel@lists.infradead.org>
> Subject: Re: [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
> 
> On Fri, Oct 26, 2018 at 9:18 PM Liming Sun <lsun@mellanox.com> wrote:
> > > -----Original Message-----
> > > From: arndbergmann@gmail.com [mailto:arndbergmann@gmail.com] On
> > > Behalf Of Arnd Bergmann
> > > Sent: Thursday, October 25, 2018 11:38 AM
> > > To: Liming Sun <lsun@mellanox.com>
> > > Cc: Olof Johansson <olof@lixom.net>; David Woods
> > > <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>;
> arm-
> > > soc <arm@kernel.org>; devicetree@vger.kernel.org; linux-arm-
> > > kernel@lists.infradead.org
> > > Subject: Re: [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config
> option
> > >
> > > On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> > > > This commit introduces config option for Mellanox BlueField SoC,
> > > > which can be used to build the SoC specific drivers, and enables
> > > > it by default in configs/defconfig.
> > > >
> > > > Reviewed-by: David Woods <dwoods@mellanox.com>
> > > > Signed-off-by: Liming Sun <lsun@mellanox.com>
> > > > ---
> > > >  arch/arm64/Kconfig.platforms | 6 ++++++
> > > >  arch/arm64/configs/defconfig | 1 +
> > > >  2 files changed, 7 insertions(+)
> > >
> > > Reviewed-by: Arnd Bergmann <arnd@arndb.de>
> > >
> > > I'm sorry for missing your series in the past. We should definitely merge
> > > the platform support soon. Do you also have device tree files for
> reference
> > > systems or even production hardware?
> >
> > We have obsoleted the device tree, and mainly support ACPI now
> > on Reference/Production HW. Below is the TMFIFO definition in the ACPI
> > DSDT table.
> >
> >     // RShim TMFIFO
> >     Device(RSH0) {
> >       Name(_HID, "MLNXBF01")
> >       Name(_UID, Zero)
> >       Name(_CCA, 1)
> >       Name(_CRS, ResourceTemplate() {
> >         Memory32Fixed(ReadWrite, 0x00800a20, 0x00000018)
> >         Memory32Fixed(ReadWrite, 0x00800a40, 0x00000018)
> >         Interrupt(ResourceConsumer, Edge, ActiveHigh, Exclusive)
> >           { BF1_RSH0_TM_HTT_LWM_INT,
> >             BF1_RSH0_TM_HTT_HWM_INT,
> >             BF1_RSH0_TM_TTH_LWM_INT,
> >             BF1_RSH0_TM_TTH_HWM_INT
> >           }
> >       })
> >     }
> >
> > The full ACPI implementation can be found in the 1.0 release:
> > http://www.mellanox.com/downloads/BlueField/BlueField-
> 1.0.0.10521/BlueField-1.0.0.10521.tar.xz
> > Inside this tarball, we can see the "src/edk2/" directory which has the edk2
> patch file including all the ACPI implementation.
> 
> It would be nice if you could still include the dts sources in the submission.
> Since this is not a classic server hardware, DT is probably more suitable
> here, and there are likely use cases that won't work with ACPI, so it's
> good to have a starting point for your users if they need to override
> the ACPI tables with a DT, or build systems with a simpler boot loader.

Sure, I'll keep this DT change for reference. A little explanation that
Mellanox BlueField SOC could be PCIe NIC or standalone device (server)  form which
can do PXE boot, CentOS installation even on the NIC, etc. The same software/driver 
works on both.  DT seems not working well for some requirement/features like 
NVDIMM (which requires ACPI DSM).  That's why we switched to ACPI though
DT support is preserved.

> 
>        Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
@ 2018-10-29 14:58           ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-29 14:58 UTC (permalink / raw)
  To: linux-arm-kernel

Thanks! Please see response inline.

- Liming

> -----Original Message-----
> From: Arnd Bergmann [mailto:arnd at arndb.de]
> Sent: Friday, October 26, 2018 3:32 PM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods
> <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-
> soc <arm@kernel.org>; DTML <devicetree@vger.kernel.org>; Linux ARM
> <linux-arm-kernel@lists.infradead.org>
> Subject: Re: [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
> 
> On Fri, Oct 26, 2018 at 9:18 PM Liming Sun <lsun@mellanox.com> wrote:
> > > -----Original Message-----
> > > From: arndbergmann at gmail.com [mailto:arndbergmann at gmail.com] On
> > > Behalf Of Arnd Bergmann
> > > Sent: Thursday, October 25, 2018 11:38 AM
> > > To: Liming Sun <lsun@mellanox.com>
> > > Cc: Olof Johansson <olof@lixom.net>; David Woods
> > > <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>;
> arm-
> > > soc <arm@kernel.org>; devicetree at vger.kernel.org; linux-arm-
> > > kernel at lists.infradead.org
> > > Subject: Re: [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config
> option
> > >
> > > On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> > > > This commit introduces config option for Mellanox BlueField SoC,
> > > > which can be used to build the SoC specific drivers, and enables
> > > > it by default in configs/defconfig.
> > > >
> > > > Reviewed-by: David Woods <dwoods@mellanox.com>
> > > > Signed-off-by: Liming Sun <lsun@mellanox.com>
> > > > ---
> > > >  arch/arm64/Kconfig.platforms | 6 ++++++
> > > >  arch/arm64/configs/defconfig | 1 +
> > > >  2 files changed, 7 insertions(+)
> > >
> > > Reviewed-by: Arnd Bergmann <arnd@arndb.de>
> > >
> > > I'm sorry for missing your series in the past. We should definitely merge
> > > the platform support soon. Do you also have device tree files for
> reference
> > > systems or even production hardware?
> >
> > We have obsoleted the device tree, and mainly support ACPI now
> > on Reference/Production HW. Below is the TMFIFO definition in the ACPI
> > DSDT table.
> >
> >     // RShim TMFIFO
> >     Device(RSH0) {
> >       Name(_HID, "MLNXBF01")
> >       Name(_UID, Zero)
> >       Name(_CCA, 1)
> >       Name(_CRS, ResourceTemplate() {
> >         Memory32Fixed(ReadWrite, 0x00800a20, 0x00000018)
> >         Memory32Fixed(ReadWrite, 0x00800a40, 0x00000018)
> >         Interrupt(ResourceConsumer, Edge, ActiveHigh, Exclusive)
> >           { BF1_RSH0_TM_HTT_LWM_INT,
> >             BF1_RSH0_TM_HTT_HWM_INT,
> >             BF1_RSH0_TM_TTH_LWM_INT,
> >             BF1_RSH0_TM_TTH_HWM_INT
> >           }
> >       })
> >     }
> >
> > The full ACPI implementation can be found in the 1.0 release:
> > http://www.mellanox.com/downloads/BlueField/BlueField-
> 1.0.0.10521/BlueField-1.0.0.10521.tar.xz
> > Inside this tarball, we can see the "src/edk2/" directory which has the edk2
> patch file including all the ACPI implementation.
> 
> It would be nice if you could still include the dts sources in the submission.
> Since this is not a classic server hardware, DT is probably more suitable
> here, and there are likely use cases that won't work with ACPI, so it's
> good to have a starting point for your users if they need to override
> the ACPI tables with a DT, or build systems with a simpler boot loader.

Sure, I'll keep this DT change for reference. A little explanation that
Mellanox BlueField SOC could be PCIe NIC or standalone device (server)  form which
can do PXE boot, CentOS installation even on the NIC, etc. The same software/driver 
works on both.  DT seems not working well for some requirement/features like 
NVDIMM (which requires ACPI DSM).  That's why we switched to ACPI though
DT support is preserved.

> 
>        Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
  2018-10-29 14:58           ` Liming Sun
@ 2018-10-29 15:26             ` Arnd Bergmann
  -1 siblings, 0 replies; 179+ messages in thread
From: Arnd Bergmann @ 2018-10-29 15:26 UTC (permalink / raw)
  To: Liming Sun
  Cc: DTML, David Woods, arm-soc, Olof Johansson, Robin Murphy, Linux ARM

On Mon, Oct 29, 2018 at 3:58 PM Liming Sun <lsun@mellanox.com> wrote:
> > It would be nice if you could still include the dts sources in the submission.
> > Since this is not a classic server hardware, DT is probably more suitable
> > here, and there are likely use cases that won't work with ACPI, so it's
> > good to have a starting point for your users if they need to override
> > the ACPI tables with a DT, or build systems with a simpler boot loader.
>
> Sure, I'll keep this DT change for reference. A little explanation that
> Mellanox BlueField SOC could be PCIe NIC or standalone device (server)  form which
> can do PXE boot, CentOS installation even on the NIC, etc. The same software/driver
> works on both.

Ok, good.

>  DT seems not working well for some requirement/features like
> NVDIMM (which requires ACPI DSM).  That's why we switched to ACPI though
> DT support is preserved.

This is probably something we need to look at separately, I'm sure we will want
to use NVDIMM with DT based systems in the future. Can you explain what
the DSM is used for here? Is this something that could be done in software
in the NVDIMM hardware specific driver?

       Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
@ 2018-10-29 15:26             ` Arnd Bergmann
  0 siblings, 0 replies; 179+ messages in thread
From: Arnd Bergmann @ 2018-10-29 15:26 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Oct 29, 2018 at 3:58 PM Liming Sun <lsun@mellanox.com> wrote:
> > It would be nice if you could still include the dts sources in the submission.
> > Since this is not a classic server hardware, DT is probably more suitable
> > here, and there are likely use cases that won't work with ACPI, so it's
> > good to have a starting point for your users if they need to override
> > the ACPI tables with a DT, or build systems with a simpler boot loader.
>
> Sure, I'll keep this DT change for reference. A little explanation that
> Mellanox BlueField SOC could be PCIe NIC or standalone device (server)  form which
> can do PXE boot, CentOS installation even on the NIC, etc. The same software/driver
> works on both.

Ok, good.

>  DT seems not working well for some requirement/features like
> NVDIMM (which requires ACPI DSM).  That's why we switched to ACPI though
> DT support is preserved.

This is probably something we need to look at separately, I'm sure we will want
to use NVDIMM with DT based systems in the future. Can you explain what
the DSM is used for here? Is this something that could be done in software
in the NVDIMM hardware specific driver?

       Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
  2018-10-29 15:26             ` Arnd Bergmann
@ 2018-10-29 16:09               ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-29 16:09 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: DTML, David Woods, arm-soc, Olof Johansson, Robin Murphy, Linux ARM

Please see my response below.

> -----Original Message-----
> From: Arnd Bergmann [mailto:arnd@arndb.de]
> Sent: Monday, October 29, 2018 11:26 AM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods
> <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-
> soc <arm@kernel.org>; DTML <devicetree@vger.kernel.org>; Linux ARM
> <linux-arm-kernel@lists.infradead.org>
> Subject: Re: [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
> 
> On Mon, Oct 29, 2018 at 3:58 PM Liming Sun <lsun@mellanox.com> wrote:
> > > It would be nice if you could still include the dts sources in the submission.
> > > Since this is not a classic server hardware, DT is probably more suitable
> > > here, and there are likely use cases that won't work with ACPI, so it's
> > > good to have a starting point for your users if they need to override
> > > the ACPI tables with a DT, or build systems with a simpler boot loader.
> >
> > Sure, I'll keep this DT change for reference. A little explanation that
> > Mellanox BlueField SOC could be PCIe NIC or standalone device (server)
> form which
> > can do PXE boot, CentOS installation even on the NIC, etc. The same
> software/driver
> > works on both.
> 
> Ok, good.
> 
> >  DT seems not working well for some requirement/features like
> > NVDIMM (which requires ACPI DSM).  That's why we switched to ACPI
> though
> > DT support is preserved.
> 
> This is probably something we need to look at separately, I'm sure we will
> want
> to use NVDIMM with DT based systems in the future. Can you explain what
> the DSM is used for here? Is this something that could be done in software
> in the NVDIMM hardware specific driver?

The DSM is kind of running logic instead of configuration which is provided by 
system firmware. It's defined in the UEFI spec and supported by Linux. In the
NVDIMM case here, it's used to do Address Range Scrub (ARS), I2C access (such as
reading the DIMM SPD information), runtime health check, etc. 

DSM is kind of requirement by NVDIMM, such as
https://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf
https://docs.microsoft.com/en-us/windows-hardware/drivers/storage/-dsm-interface-for-byte-addressable-energy-backed-function-class--function-interface-1-

By following the ACPI DSM definitions, there is no extra drivers needed in 
Linux due to the existing ACPI framework.  Since it's 'standard' APIs in UEFI, 
it has potential to support other OS other than Linux.

Hardware specific driver might be able to do some of the functionality, or
even 'simulate' some of the ACPI implementation or APIs. I guess it might need 
a framework for it which I haven't thought a lot of details yet :)

> 
>        Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
@ 2018-10-29 16:09               ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-29 16:09 UTC (permalink / raw)
  To: linux-arm-kernel

Please see my response below.

> -----Original Message-----
> From: Arnd Bergmann [mailto:arnd at arndb.de]
> Sent: Monday, October 29, 2018 11:26 AM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods
> <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-
> soc <arm@kernel.org>; DTML <devicetree@vger.kernel.org>; Linux ARM
> <linux-arm-kernel@lists.infradead.org>
> Subject: Re: [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
> 
> On Mon, Oct 29, 2018 at 3:58 PM Liming Sun <lsun@mellanox.com> wrote:
> > > It would be nice if you could still include the dts sources in the submission.
> > > Since this is not a classic server hardware, DT is probably more suitable
> > > here, and there are likely use cases that won't work with ACPI, so it's
> > > good to have a starting point for your users if they need to override
> > > the ACPI tables with a DT, or build systems with a simpler boot loader.
> >
> > Sure, I'll keep this DT change for reference. A little explanation that
> > Mellanox BlueField SOC could be PCIe NIC or standalone device (server)
> form which
> > can do PXE boot, CentOS installation even on the NIC, etc. The same
> software/driver
> > works on both.
> 
> Ok, good.
> 
> >  DT seems not working well for some requirement/features like
> > NVDIMM (which requires ACPI DSM).  That's why we switched to ACPI
> though
> > DT support is preserved.
> 
> This is probably something we need to look at separately, I'm sure we will
> want
> to use NVDIMM with DT based systems in the future. Can you explain what
> the DSM is used for here? Is this something that could be done in software
> in the NVDIMM hardware specific driver?

The DSM is kind of running logic instead of configuration which is provided by 
system firmware. It's defined in the UEFI spec and supported by Linux. In the
NVDIMM case here, it's used to do Address Range Scrub (ARS), I2C access (such as
reading the DIMM SPD information), runtime health check, etc. 

DSM is kind of requirement by NVDIMM, such as
https://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf
https://docs.microsoft.com/en-us/windows-hardware/drivers/storage/-dsm-interface-for-byte-addressable-energy-backed-function-class--function-interface-1-

By following the ACPI DSM definitions, there is no extra drivers needed in 
Linux due to the existing ACPI framework.  Since it's 'standard' APIs in UEFI, 
it has potential to support other OS other than Linux.

Hardware specific driver might be able to do some of the functionality, or
even 'simulate' some of the ACPI implementation or APIs. I guess it might need 
a framework for it which I haven't thought a lot of details yet :)

> 
>        Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-10-26 20:33         ` Arnd Bergmann
@ 2018-10-29 16:48           ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-29 16:48 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: DTML, David Woods, arm-soc, Olof Johansson, Robin Murphy, Linux ARM

Thanks for the comments! Please see my response/questions inline.

> -----Original Message-----
> From: Arnd Bergmann [mailto:arnd@arndb.de]
> Sent: Friday, October 26, 2018 4:34 PM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods
> <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-
> soc <arm@kernel.org>; DTML <devicetree@vger.kernel.org>; Linux ARM
> <linux-arm-kernel@lists.infradead.org>
> Subject: Re: [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox
> BlueField SoC
> 
> On Fri, Oct 26, 2018 at 9:36 PM Liming Sun <lsun@mellanox.com> wrote:
> > > -----Original Message-----
> > > From: arndbergmann@gmail.com [mailto:arndbergmann@gmail.com] On
> > > > --- /dev/null
> > > > +++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> > > > @@ -0,0 +1,23 @@
> > > > +* Mellanox BlueField SoC TmFifo
> > > > +
> > > > +BlueField TmFifo provides a shared FIFO between the target and the
> > > > +external host machine, which can be accessed by external host via
> > > > +USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
> > > > +to implement virtual console and network interface based on the virtio
> > > > +framework.
> > > > +
> > > > +Required properties:
> > > > +
> > > > +- compatible:      Should be "mellanox,bf-tmfifo"
> > > > +- reg:             Physical base address and length of Rx/Tx block
> > > > +- interrupts:      The interrupt number of Rx low water mark, Rx high
> water
> > > > mark
> > > > +           Tx low water mark, Tx high water mark respectively.
> > >
> > >
> > > This sounds like it might fit into the mailbox subsystem, and perhaps
> > > it should use the mailbox DT bindings. Have you had a look at that?
> >
> > This commit of dt-bindings is mainly to solve the warning of checkpatch.pl.
> > Like the response to patch 2/4, ACPI is actually used now instead of device
> tree.
> > The TMFIFO definition in the ACPI DSDT table would be something like
> below.
> >
> >     // RShim TMFIFO
> >     Device(RSH0) {
> >       Name(_HID, "MLNXBF01")
> >       Name(_UID, Zero)
> >       Name(_CCA, 1)
> >       Name(_CRS, ResourceTemplate() {
> >         Memory32Fixed(ReadWrite, 0x00800a20, 0x00000018)
> >         Memory32Fixed(ReadWrite, 0x00800a40, 0x00000018)
> >         Interrupt(ResourceConsumer, Edge, ActiveHigh, Exclusive)
> >           { BF1_RSH0_TM_HTT_LWM_INT,
> >             BF1_RSH0_TM_HTT_HWM_INT,
> >             BF1_RSH0_TM_TTH_LWM_INT,
> >             BF1_RSH0_TM_TTH_HWM_INT
> >           }
> >       })
> >     }
> >
> > Any suggestion how it should be added into Linux Documentation, or maybe
> I
> > should just remove this commit from this patch series?
> 
> Maybe the best way here would be to not use ACPI for the case
> where bluefin is integrated into a PCIe endpoint, since ACPI is
> not as flexible here and generally relies on having an SBSA
> compliant hardware that you no longer have if you require
> random platform devices for booting from and for your console.
> 
> For the case where a bluefin SoC is used in a standalone system,
> having ACPI makes more sense, as that lets you install Red Hat
> Linux or other operating systems that rely on SBBR and SBSA.

A little explanation for this SoC:
In the PCIe case, it's not just an endpoint. It can work in a PCIe "multi-host" mode
which behaves just like standalone system, such as doing PXE boot and CentOS 
or other OS installation on the eMMC and boot from it. It can run fully isolated 
from the x86 host. Below is a link of brief introduction.
http://www.mellanox.com/related-docs/prod_adapter_cards/PB_BlueField_Smart_NIC.pdf

So for now the same SW with ACPI configuration is used on all the boards for simplicity. 
But I think DT could definitely be used on customized board or when needed.

> 
> > As for the sub-component of this driver, the "soc" might be better fit than
> the mailbox
> > for some reasons. It's a communication between extern machines and the
> SoC via
> > USB / PCIe,  like pushing boot stream, console and network mgmt. Some of
> the features,
> > like pushing boot stream, doesn't communicate with the ARM core. The
> boot stream
> > is pushed to the SoC HW logic directly. I'll add the host-side virtio-based
> driver in patch v5.
> 
> Right, the drivers/mailbox subsystem was not the right idea here,
> I noticed that myself after actually reading the driver. Drivers/soc
> may also not be the best fit, since this is not really about it being
> a SoC, but rather a way to encapsulate virtual devices. The
> mic driver I mentioned is in drivers/misc, but I don't like to add stuff
> there if we can avoid it.
> 
> drivers/virtio, drivers/bus or drivers/mfd might also be an option that
> could fit better than drivers/soc, or you could have your own subdir
> below drivers/ as some others do. Finally, drivers/platform/mellanox
> might be a reasonable choice, and it would let you keep both sides
> of the driver in one place.

We actually have more drivers coming for this SoC, such for I2C, GPIO, PKA, 
performance counter, L3 cache profile, etc, which could be found at the link below
https://git.launchpad.net/~dcwoods/ubuntu/+source/linux/+git/cosmic/log/?h=mellanox_bluefield

Since it's not just the FIFO driver, any suggestion which one would be better, 
the "soc" or still the 'platform' ? Thanks!

> 
>        Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
@ 2018-10-29 16:48           ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-29 16:48 UTC (permalink / raw)
  To: linux-arm-kernel

Thanks for the comments! Please see my response/questions inline.

> -----Original Message-----
> From: Arnd Bergmann [mailto:arnd at arndb.de]
> Sent: Friday, October 26, 2018 4:34 PM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods
> <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-
> soc <arm@kernel.org>; DTML <devicetree@vger.kernel.org>; Linux ARM
> <linux-arm-kernel@lists.infradead.org>
> Subject: Re: [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox
> BlueField SoC
> 
> On Fri, Oct 26, 2018 at 9:36 PM Liming Sun <lsun@mellanox.com> wrote:
> > > -----Original Message-----
> > > From: arndbergmann at gmail.com [mailto:arndbergmann at gmail.com] On
> > > > --- /dev/null
> > > > +++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> > > > @@ -0,0 +1,23 @@
> > > > +* Mellanox BlueField SoC TmFifo
> > > > +
> > > > +BlueField TmFifo provides a shared FIFO between the target and the
> > > > +external host machine, which can be accessed by external host via
> > > > +USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
> > > > +to implement virtual console and network interface based on the virtio
> > > > +framework.
> > > > +
> > > > +Required properties:
> > > > +
> > > > +- compatible:      Should be "mellanox,bf-tmfifo"
> > > > +- reg:             Physical base address and length of Rx/Tx block
> > > > +- interrupts:      The interrupt number of Rx low water mark, Rx high
> water
> > > > mark
> > > > +           Tx low water mark, Tx high water mark respectively.
> > >
> > >
> > > This sounds like it might fit into the mailbox subsystem, and perhaps
> > > it should use the mailbox DT bindings. Have you had a look at that?
> >
> > This commit of dt-bindings is mainly to solve the warning of checkpatch.pl.
> > Like the response to patch 2/4, ACPI is actually used now instead of device
> tree.
> > The TMFIFO definition in the ACPI DSDT table would be something like
> below.
> >
> >     // RShim TMFIFO
> >     Device(RSH0) {
> >       Name(_HID, "MLNXBF01")
> >       Name(_UID, Zero)
> >       Name(_CCA, 1)
> >       Name(_CRS, ResourceTemplate() {
> >         Memory32Fixed(ReadWrite, 0x00800a20, 0x00000018)
> >         Memory32Fixed(ReadWrite, 0x00800a40, 0x00000018)
> >         Interrupt(ResourceConsumer, Edge, ActiveHigh, Exclusive)
> >           { BF1_RSH0_TM_HTT_LWM_INT,
> >             BF1_RSH0_TM_HTT_HWM_INT,
> >             BF1_RSH0_TM_TTH_LWM_INT,
> >             BF1_RSH0_TM_TTH_HWM_INT
> >           }
> >       })
> >     }
> >
> > Any suggestion how it should be added into Linux Documentation, or maybe
> I
> > should just remove this commit from this patch series?
> 
> Maybe the best way here would be to not use ACPI for the case
> where bluefin is integrated into a PCIe endpoint, since ACPI is
> not as flexible here and generally relies on having an SBSA
> compliant hardware that you no longer have if you require
> random platform devices for booting from and for your console.
> 
> For the case where a bluefin SoC is used in a standalone system,
> having ACPI makes more sense, as that lets you install Red Hat
> Linux or other operating systems that rely on SBBR and SBSA.

A little explanation for this SoC:
In the PCIe case, it's not just an endpoint. It can work in a PCIe "multi-host" mode
which behaves just like standalone system, such as doing PXE boot and CentOS 
or other OS installation on the eMMC and boot from it. It can run fully isolated 
from the x86 host. Below is a link of brief introduction.
http://www.mellanox.com/related-docs/prod_adapter_cards/PB_BlueField_Smart_NIC.pdf

So for now the same SW with ACPI configuration is used on all the boards for simplicity. 
But I think DT could definitely be used on customized board or when needed.

> 
> > As for the sub-component of this driver, the "soc" might be better fit than
> the mailbox
> > for some reasons. It's a communication between extern machines and the
> SoC via
> > USB / PCIe,  like pushing boot stream, console and network mgmt. Some of
> the features,
> > like pushing boot stream, doesn't communicate with the ARM core. The
> boot stream
> > is pushed to the SoC HW logic directly. I'll add the host-side virtio-based
> driver in patch v5.
> 
> Right, the drivers/mailbox subsystem was not the right idea here,
> I noticed that myself after actually reading the driver. Drivers/soc
> may also not be the best fit, since this is not really about it being
> a SoC, but rather a way to encapsulate virtual devices. The
> mic driver I mentioned is in drivers/misc, but I don't like to add stuff
> there if we can avoid it.
> 
> drivers/virtio, drivers/bus or drivers/mfd might also be an option that
> could fit better than drivers/soc, or you could have your own subdir
> below drivers/ as some others do. Finally, drivers/platform/mellanox
> might be a reasonable choice, and it would let you keep both sides
> of the driver in one place.

We actually have more drivers coming for this SoC, such for I2C, GPIO, PKA, 
performance counter, L3 cache profile, etc, which could be found at the link below
https://git.launchpad.net/~dcwoods/ubuntu/+source/linux/+git/cosmic/log/?h=mellanox_bluefield

Since it's not just the FIFO driver, any suggestion which one would be better, 
the "soc" or still the 'platform' ? Thanks!

> 
>        Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v5 1/5] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 16:06 ` Liming Sun
@ 2018-10-31 18:09   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-31 18:09 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit adds the TmFifo driver for Mellanox BlueField Soc.
TmFifo is a shared FIFO which enables external host machine to
exchange data with the SoC via USB or PCIe. The driver is based on
virtio framework and has console and network access enabled.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/Kconfig                |    1 +
 drivers/soc/Makefile               |    1 +
 drivers/soc/mellanox/Kconfig       |   18 +
 drivers/soc/mellanox/Makefile      |    5 +
 drivers/soc/mellanox/tmfifo.c      | 1236 ++++++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/tmfifo_regs.h |   76 +++
 6 files changed, 1337 insertions(+)
 create mode 100644 drivers/soc/mellanox/Kconfig
 create mode 100644 drivers/soc/mellanox/Makefile
 create mode 100644 drivers/soc/mellanox/tmfifo.c
 create mode 100644 drivers/soc/mellanox/tmfifo_regs.h

diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index c07b4a8..fa87dc8 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -7,6 +7,7 @@ source "drivers/soc/bcm/Kconfig"
 source "drivers/soc/fsl/Kconfig"
 source "drivers/soc/imx/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
+source "drivers/soc/mellanox/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/renesas/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 113e884..93052d0 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ARCH_GEMINI)	+= gemini/
 obj-$(CONFIG_ARCH_MXC)		+= imx/
 obj-$(CONFIG_SOC_XWAY)		+= lantiq/
 obj-y				+= mediatek/
+obj-$(CONFIG_SOC_MLNX)		+= mellanox/
 obj-$(CONFIG_ARCH_MESON)	+= amlogic/
 obj-y				+= qcom/
 obj-y				+= renesas/
diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
new file mode 100644
index 0000000..d88efa1
--- /dev/null
+++ b/drivers/soc/mellanox/Kconfig
@@ -0,0 +1,18 @@
+menuconfig SOC_MLNX
+	bool "Mellanox SoC drivers"
+	default y if ARCH_MLNX_BLUEFIELD
+
+if ARCH_MLNX_BLUEFIELD || COMPILE_TEST
+
+config MLNX_BLUEFIELD_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo driver"
+	depends on ARM64
+	default m
+	select VIRTIO_CONSOLE
+	select VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides the
+	  virtio driver framework for the TMFIFO of Mellanox BlueField SoC and
+	  the implementation of a console and network driver.
+
+endif # ARCH_MLNX_BLUEFIELD
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
new file mode 100644
index 0000000..c44c0e2
--- /dev/null
+++ b/drivers/soc/mellanox/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Mellanox SoC drivers.
+#
+obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
new file mode 100644
index 0000000..d5e3550
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo.c
@@ -0,0 +1,1236 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/cache.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <asm/byteorder.h>
+
+#include "tmfifo_regs.h"
+
+/* Vring size. */
+#define TMFIFO_VRING_SIZE			1024
+
+/* Console Tx buffer size. */
+#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
+
+/* House-keeping timer interval. */
+static int tmfifo_timer_interval = HZ / 10;
+module_param(tmfifo_timer_interval, int, 0644);
+MODULE_PARM_DESC(tmfifo_timer_interval, "timer interval");
+
+/* Global lock. */
+static DEFINE_MUTEX(tmfifo_lock);
+
+/* Virtio ring size. */
+static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
+module_param(tmfifo_vring_size, int, 0444);
+MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring");
+
+/* Struct declaration. */
+struct tmfifo;
+
+/* Virtual devices sharing the TM FIFO. */
+#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/* Structure to maintain the ring state. */
+struct tmfifo_vring {
+	void *va;			/* virtual address */
+	dma_addr_t dma;			/* dma address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	int vdev_id;			/* TMFIFO_VDEV_xxx */
+	u32 pkt_len;			/* packet total length */
+	__virtio16 next_avail;		/* next avail desc id */
+	struct tmfifo *fifo;		/* pointer back to the tmfifo */
+};
+
+/* Interrupt types. */
+enum {
+	TM_RX_LWM_IRQ,			/* Rx low water mark irq */
+	TM_RX_HWM_IRQ,			/* Rx high water mark irq */
+	TM_TX_LWM_IRQ,			/* Tx low water mark irq */
+	TM_TX_HWM_IRQ,			/* Tx high water mark irq */
+	TM_IRQ_CNT
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	TMFIFO_VRING_RX,		/* Rx ring */
+	TMFIFO_VRING_TX,		/* Tx ring */
+	TMFIFO_VRING_NUM
+};
+
+struct tmfifo_vdev {
+	struct virtio_device vdev;	/* virtual device */
+	u8 status;
+	u64 features;
+	union {				/* virtio config space */
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
+	u8 *tx_buf;			/* tx buffer */
+	u32 tx_head;			/* tx buffer head */
+	u32 tx_tail;			/* tx buffer tail */
+};
+
+/* TMFIFO device structure */
+struct tmfifo {
+	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
+	struct platform_device *pdev;	/* platform device */
+	struct mutex lock;
+	void __iomem *rx_base;		/* mapped register base */
+	void __iomem *tx_base;		/* mapped register base */
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	int irq[TM_IRQ_CNT];		/* irq numbers */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
+	bool is_ready;			/* ready flag */
+	spinlock_t spin_lock;		/* spin lock */
+};
+
+union tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* MTU setting of the virtio-net interface. */
+#define TMFIFO_NET_MTU		1500
+
+/* Supported virtio-net features. */
+#define TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
+				 (1UL << VIRTIO_NET_F_STATUS) | \
+				 (1UL << VIRTIO_NET_F_MAC))
+
+/* Return the available Tx buffer space. */
+static inline int tmfifo_vdev_tx_buf_avail(struct tmfifo_vdev *vdev)
+{
+	return ((vdev->tx_tail >= vdev->tx_head) ?
+		(TMFIFO_CONS_TX_BUF_SIZE - 8 - (vdev->tx_tail -
+		vdev->tx_head)) : (vdev->tx_head - vdev->tx_tail - 8));
+}
+
+/* Update Tx buffer pointer after pushing data. */
+static inline void tmfifo_vdev_tx_buf_push(struct tmfifo_vdev *vdev, u32 len)
+{
+	vdev->tx_tail += len;
+	if (vdev->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE)
+		vdev->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Update Tx buffer pointer after popping data. */
+static inline void tmfifo_vdev_tx_buf_pop(struct tmfifo_vdev *vdev, u32 len)
+{
+	vdev->tx_head += len;
+	if (vdev->tx_head >= TMFIFO_CONS_TX_BUF_SIZE)
+		vdev->tx_head -= TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Allocate vrings for the fifo. */
+static int tmfifo_alloc_vrings(struct tmfifo *fifo,
+			       struct tmfifo_vdev *tm_vdev, int vdev_id)
+{
+	dma_addr_t dma;
+	void *va;
+	int i, size;
+	struct tmfifo_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->size = tmfifo_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+		vring->vdev_id = vdev_id;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
+					GFP_KERNEL);
+		if (!va) {
+			dev_err(tm_vdev->vdev.dev.parent,
+				"vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void tmfifo_free_vrings(struct tmfifo *fifo, int vdev_id)
+{
+	int i, size;
+	struct tmfifo_vring *vring;
+	struct tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Free interrupts of the fifo device. */
+static void tmfifo_free_irqs(struct tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		irq = fifo->irq[i];
+		if (irq) {
+			fifo->irq[i] = 0;
+			disable_irq(irq);
+			free_irq(irq, (u8 *)fifo + i);
+		}
+	}
+}
+
+/* Interrupt handler. */
+static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
+{
+	int i = (uintptr_t)dev_id % sizeof(void *);
+	struct tmfifo *fifo = dev_id - i;
+
+	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
+		schedule_work(&fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Nothing to do for now. */
+static void tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+tmfifo_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+
+	if (!vr || vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vr->num);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+static inline void tmfifo_virtio_release_desc(struct virtio_device *vdev,
+					      struct vring *vr,
+					      struct vring_desc *desc, u32 len)
+{
+	unsigned int idx;
+
+	idx = vr->used->idx % vr->num;
+	vr->used->ring[idx].id = desc - vr->desc;
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/* Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc)
+{
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+	struct vring_desc *desc_head;
+	uint32_t pkt_len = 0;
+
+	if (!vr)
+		return;
+
+	if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
+		desc_head = vring->desc_head;
+		pkt_len = vring->pkt_len;
+	} else {
+		desc_head = tmfifo_virtio_get_next_desc(vring->vq);
+		if (desc_head != NULL) {
+			pkt_len = tmfifo_virtio_get_pkt_len(vdev,
+							desc_head, vr);
+		}
+	}
+
+	if (desc_head != NULL)
+		tmfifo_virtio_release_desc(vdev, vr, desc_head, pkt_len);
+
+	if (desc != NULL)
+		*desc = NULL;
+	vring->pkt_len = 0;
+}
+
+/* House-keeping timer. */
+static void tmfifo_timer(struct timer_list *arg)
+{
+	struct tmfifo *fifo = container_of(arg, struct tmfifo, timer);
+
+	/*
+	 * Wake up the work handler to poll the Rx FIFO in case interrupt
+	 * missing or any leftover bytes stuck in the FIFO.
+	 */
+	test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+}
+
+/* Buffer the console output. */
+static void tmfifo_console_output(struct tmfifo_vdev *cons,
+				  struct virtqueue *vq)
+{
+	u32 len, pkt_len, idx;
+	struct vring_desc *head_desc, *desc = NULL;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &cons->vdev;
+	void *addr;
+	union tmfifo_msg_hdr *hdr;
+
+	for (;;) {
+		head_desc = tmfifo_virtio_get_next_desc(vq);
+		if (head_desc == NULL)
+			break;
+
+		/* Release the packet if no more space. */
+		pkt_len = tmfifo_virtio_get_pkt_len(vdev, head_desc, vr);
+		if (pkt_len + sizeof(*hdr) > tmfifo_vdev_tx_buf_avail(cons)) {
+			tmfifo_virtio_release_desc(vdev, vr, head_desc,
+						   pkt_len);
+			break;
+		}
+
+		hdr = (union tmfifo_msg_hdr *)&cons->tx_buf[cons->tx_tail];
+		hdr->data = 0;
+		hdr->type = VIRTIO_ID_CONSOLE;
+		hdr->len = htons(pkt_len);
+
+		tmfifo_vdev_tx_buf_push(cons, sizeof(*hdr));
+		desc = head_desc;
+
+		while (desc != NULL) {
+			addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+			len = virtio32_to_cpu(vdev, desc->len);
+
+			if (len <= TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail) {
+				memcpy(cons->tx_buf + cons->tx_tail, addr, len);
+			} else {
+				u32 seg;
+
+				seg = TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail;
+				memcpy(cons->tx_buf + cons->tx_tail, addr, seg);
+				addr += seg;
+				memcpy(cons->tx_buf, addr, len - seg);
+			}
+			tmfifo_vdev_tx_buf_push(cons, len);
+
+			if (!(virtio16_to_cpu(vdev, desc->flags) &
+			    VRING_DESC_F_NEXT))
+				break;
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+		}
+
+		/* Make each packet 8-byte aligned. */
+		tmfifo_vdev_tx_buf_push(cons, ((pkt_len + 7) & -8) - pkt_len);
+
+		tmfifo_virtio_release_desc(vdev, vr, head_desc, pkt_len);
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct tmfifo_vring *vring;
+	struct tmfifo *fifo;
+	struct vring *vr;
+	struct virtio_device *vdev;
+	u64 sts, data;
+	int num_avail = 0, hdr_len, tx_reserve;
+	void *addr;
+	u32 len, idx;
+	struct vring_desc *desc;
+	unsigned long flags;
+	struct tmfifo_vdev *cons;
+
+	if (!vq)
+		return;
+
+	vring = (struct tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+	vr = (struct vring *)virtqueue_get_vring(vq);
+
+	if (!fifo->vdev[vring->vdev_id])
+		return;
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+
+	/* Don't continue if another vring is running. */
+	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
+		return;
+
+	/* tx_reserve is used to reserved some room in FIFO for console. */
+	if (vring->vdev_id == VIRTIO_ID_NET) {
+		hdr_len = sizeof(struct virtio_net_hdr);
+		tx_reserve = fifo->tx_fifo_size / 16;
+	} else {
+		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
+		hdr_len = 0;
+		tx_reserve = 1;
+	}
+
+	desc = vring->desc;
+
+again:
+	while (1) {
+		/* Get available FIFO space. */
+		if (num_avail == 0) {
+			if (is_rx) {
+				/* Get the number of available words in FIFO. */
+				sts = readq(fifo->rx_base + TMFIFO_RX_STS);
+				num_avail = FIELD_GET(
+					TMFIFO_RX_STS__COUNT_MASK, sts);
+
+				/* Don't continue if nothing in FIFO. */
+				if (num_avail <= 0)
+					break;
+			} else {
+				/* Get available space in FIFO. */
+				sts = readq(fifo->tx_base + TMFIFO_TX_STS);
+				num_avail = fifo->tx_fifo_size - tx_reserve -
+					FIELD_GET(
+						TMFIFO_TX_STS__COUNT_MASK, sts);
+
+				if (num_avail <= 0)
+					break;
+			}
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
+		    cons != NULL && cons->tx_buf != NULL) {
+			for (;;) {
+				spin_lock_irqsave(&fifo->spin_lock, flags);
+				if (cons->tx_head == cons->tx_tail) {
+					spin_unlock_irqrestore(
+						&fifo->spin_lock, flags);
+					return;
+				}
+				addr = cons->tx_buf + cons->tx_head;
+				writeq(cpu_to_le64(*(u64 *)addr),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+				tmfifo_vdev_tx_buf_pop(cons, sizeof(u64));
+				spin_unlock_irqrestore(&fifo->spin_lock,
+						       flags);
+				if (--num_avail <= 0)
+					goto again;
+			}
+		}
+
+		/* Get the desc of next packet. */
+		if (!desc) {
+			/* Save the head desc of the chain. */
+			vring->desc_head = tmfifo_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				return;
+			}
+			desc = vring->desc_head;
+			vring->desc = desc;
+
+			if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+						vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			}
+		}
+
+		/* Beginning of each packet. */
+		if (vring->pkt_len == 0) {
+			int vdev_id, vring_change = 0;
+			union tmfifo_msg_hdr hdr;
+
+			num_avail--;
+
+			/* Read/Write packet length. */
+			if (is_rx) {
+				hdr.data = readq(fifo->rx_base +
+						 TMFIFO_RX_DATA);
+				hdr.data = le64_to_cpu(hdr.data);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (hdr.len == 0)
+					continue;
+
+				/* Check packet type. */
+				if (hdr.type == VIRTIO_ID_NET) {
+					vdev_id = VIRTIO_ID_NET;
+					hdr_len = sizeof(struct virtio_net_hdr);
+				} else if (hdr.type == VIRTIO_ID_CONSOLE) {
+					vdev_id = VIRTIO_ID_CONSOLE;
+					hdr_len = 0;
+				} else {
+					continue;
+				}
+
+				/*
+				 * Check whether the new packet still belongs
+				 * to this vring or not. If not, update the
+				 * pkt_len of the new vring and return.
+				 */
+				if (vdev_id != vring->vdev_id) {
+					struct tmfifo_vdev *dev2 =
+						fifo->vdev[vdev_id];
+
+					if (!dev2)
+						break;
+					vring->desc = desc;
+					vring = &dev2->vrings[TMFIFO_VRING_RX];
+					vring_change = 1;
+				}
+				vring->pkt_len = ntohs(hdr.len) + hdr_len;
+			} else {
+				vring->pkt_len = tmfifo_virtio_get_pkt_len(
+					vdev, desc, vr);
+
+				hdr.data = 0;
+				hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+					VIRTIO_ID_NET :
+					VIRTIO_ID_CONSOLE;
+				hdr.len = htons(vring->pkt_len - hdr_len);
+				writeq(cpu_to_le64(hdr.data),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+			}
+
+			vring->cur_len = hdr_len;
+			vring->rem_len = vring->pkt_len;
+			fifo->vring[is_rx] = vring;
+
+			if (vring_change)
+				return;
+			continue;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check if the current desc is already done. */
+		if (vring->cur_len == len)
+			goto check_done;
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		/* Read a word from FIFO for Rx. */
+		if (is_rx) {
+			data = readq(fifo->rx_base + TMFIFO_RX_DATA);
+			data = le64_to_cpu(data);
+		}
+
+		if (vring->cur_len + sizeof(u64) <= len) {
+			/* The whole word. */
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       sizeof(u64));
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       sizeof(u64));
+			}
+			vring->cur_len += sizeof(u64);
+		} else {
+			/* Leftover bytes. */
+			BUG_ON(vring->cur_len > len);
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       len - vring->cur_len);
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       len - vring->cur_len);
+			}
+			vring->cur_len = len;
+		}
+
+		/* Write the word into FIFO for Tx. */
+		if (!is_rx) {
+			writeq(cpu_to_le64(data),
+			       fifo->tx_base + TMFIFO_TX_DATA);
+		}
+
+		num_avail--;
+
+check_done:
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done and release the desc. */
+			tmfifo_release_pkt(vdev, vring, &desc);
+			fifo->vring[is_rx] = NULL;
+
+			/* Notify upper layer that packet is done. */
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			vring_interrupt(0, vq);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			continue;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+	struct tmfifo *fifo = vring->fifo;
+	unsigned long flags;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX HWM bit to start Rx. */
+		if (!test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			tmfifo_console_output(fifo->vdev[VIRTIO_ID_CONSOLE],
+					      vq);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			schedule_work(&fifo->work);
+		} else if (!test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	}
+
+	return true;
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void tmfifo_work_handler(struct work_struct *work)
+{
+	int i;
+	struct tmfifo_vdev *tm_vdev;
+	struct tmfifo *fifo = container_of(work, struct tmfifo, work);
+
+	if (!fifo->is_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx. */
+	if (test_and_clear_bit(TM_TX_LWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_TX_LWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_TX].vq,
+					false);
+			}
+		}
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(TM_RX_HWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_RX_HWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_RX].vq,
+					true);
+			}
+		}
+	}
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* Get the array of feature bits for this device. */
+static u64 tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			tmfifo_release_pkt(&tm_vdev->vdev, vring, &vring->desc);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+				  unsigned int nvqs,
+				  struct virtqueue *vqs[],
+				  vq_callback_t *callbacks[],
+				  const char * const names[],
+				  const bool *ctx,
+				  struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void tmfifo_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void tmfifo_virtio_get(struct virtio_device *vdev,
+			      unsigned int offset,
+			      void *buf,
+			      unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void tmfifo_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops tmfifo_virtio_config_ops = {
+	.get_features = tmfifo_virtio_get_features,
+	.finalize_features = tmfifo_virtio_finalize_features,
+	.find_vqs = tmfifo_virtio_find_vqs,
+	.del_vqs = tmfifo_virtio_del_vqs,
+	.reset = tmfifo_virtio_reset,
+	.set_status = tmfifo_virtio_set_status,
+	.get_status = tmfifo_virtio_get_status,
+	.get = tmfifo_virtio_get,
+	.set = tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+int tmfifo_create_vdev(struct tmfifo *fifo, int vdev_id, u64 features,
+		       void *config, u32 size)
+{
+	struct tmfifo_vdev *tm_vdev;
+	int ret = 0;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev != NULL) {
+		pr_err("vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto already_exist;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto already_exist;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+	if (tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
+		pr_err("Unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto alloc_vring_fail;
+	}
+	if (vdev_id == VIRTIO_ID_CONSOLE) {
+		tm_vdev->tx_buf = kmalloc(TMFIFO_CONS_TX_BUF_SIZE,
+					  GFP_KERNEL);
+	}
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
+		goto register_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+register_fail:
+	tmfifo_free_vrings(fifo, vdev_id);
+	fifo->vdev[vdev_id] = NULL;
+alloc_vring_fail:
+	kfree(tm_vdev);
+already_exist:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+int tmfifo_delete_vdev(struct tmfifo *fifo, int vdev_id)
+{
+	struct tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		tmfifo_free_vrings(fifo, vdev_id);
+		kfree(tm_vdev->tx_buf);
+		kfree(tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Device remove function. */
+static int tmfifo_remove(struct platform_device *pdev)
+{
+	int i;
+	struct tmfifo *fifo = platform_get_drvdata(pdev);
+	struct resource *rx_res, *tx_res;
+
+	if (fifo) {
+		mutex_lock(&tmfifo_lock);
+
+		fifo->is_ready = false;
+
+		/* Stop the timer. */
+		del_timer_sync(&fifo->timer);
+
+		/* Release interrupts. */
+		tmfifo_free_irqs(fifo);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&fifo->work);
+
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++)
+			tmfifo_delete_vdev(fifo, i);
+
+		/* Release IO resources. */
+		if (fifo->rx_base)
+			iounmap(fifo->rx_base);
+		if (fifo->tx_base)
+			iounmap(fifo->tx_base);
+
+		platform_set_drvdata(pdev, NULL);
+		kfree(fifo);
+
+		mutex_unlock(&tmfifo_lock);
+	}
+
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (rx_res)
+		release_mem_region(rx_res->start, resource_size(rx_res));
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (tx_res)
+		release_mem_region(tx_res->start, resource_size(tx_res));
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void tmfifo_get_cfg_mac(u8 *mac)
+{
+	u8 buf[6];
+	efi_status_t status;
+	unsigned long size = sizeof(buf);
+	efi_char16_t name[] = { 'R', 's', 'h', 'i', 'm', 'M', 'a', 'c',
+				'A', 'd', 'd', 'r', 0 };
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+
+	status = efi.get_variable(name, &guid, NULL, &size, buf);
+	if (status == EFI_SUCCESS && size == sizeof(buf))
+		memcpy(mac, buf, sizeof(buf));
+}
+
+/* Probe the TMFIFO. */
+static int tmfifo_probe(struct platform_device *pdev)
+{
+	u64 ctl;
+	struct tmfifo *fifo;
+	struct resource *rx_res, *tx_res;
+	struct virtio_net_config net_config;
+	int i, ret;
+
+	/* Get the resource of the Rx & Tx FIFO. */
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!rx_res || !tx_res) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (request_mem_region(rx_res->start,
+			       resource_size(rx_res), "bf-tmfifo") == NULL) {
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	if (request_mem_region(tx_res->start,
+			       resource_size(tx_res), "bf-tmfifo") == NULL) {
+		release_mem_region(rx_res->start, resource_size(rx_res));
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	ret = -ENOMEM;
+	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
+	if (!fifo)
+		goto err;
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	spin_lock_init(&fifo->spin_lock);
+	INIT_WORK(&fifo->work, tmfifo_work_handler);
+
+	timer_setup(&fifo->timer, tmfifo_timer, 0);
+	fifo->timer.function = tmfifo_timer;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		fifo->irq[i] = platform_get_irq(pdev, i);
+		ret = request_irq(fifo->irq[i], tmfifo_irq_handler, 0,
+				  "tmfifo", (u8 *)fifo + i);
+		if (ret) {
+			pr_err("Unable to request irq\n");
+			fifo->irq[i] = 0;
+			goto err;
+		}
+	}
+
+	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
+	if (!fifo->rx_base)
+		goto err;
+
+	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
+	if (!fifo->tx_base)
+		goto err;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		FIELD_GET(TMFIFO_TX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~TMFIFO_TX_CTL__LWM_MASK) |
+		FIELD_PREP(TMFIFO_TX_CTL__LWM_MASK, fifo->tx_fifo_size / 2);
+	ctl = (ctl & ~TMFIFO_TX_CTL__HWM_MASK) |
+		FIELD_PREP(TMFIFO_TX_CTL__HWM_MASK, fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		FIELD_GET(TMFIFO_RX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~TMFIFO_RX_CTL__LWM_MASK) |
+		FIELD_PREP(TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = (ctl & ~TMFIFO_RX_CTL__HWM_MASK) |
+		FIELD_PREP(TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + TMFIFO_RX_CTL);
+
+	mutex_init(&fifo->lock);
+
+	/* Create the console vdev. */
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (ret)
+		goto err;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = TMFIFO_NET_MTU;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	memcpy(net_config.mac, tmfifo_net_default_mac, 6);
+	tmfifo_get_cfg_mac(net_config.mac);
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_NET, TMFIFO_NET_FEATURES,
+				 &net_config, sizeof(net_config));
+	if (ret)
+		goto err;
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+
+	fifo->is_ready = true;
+
+	return 0;
+
+err:
+	tmfifo_remove(pdev);
+early_err:
+	dev_err(&pdev->dev, "Probe Failed\n");
+	return ret;
+}
+
+static const struct of_device_id tmfifo_match[] = {
+	{ .compatible = "mellanox,bf-tmfifo" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, tmfifo_match);
+
+static const struct acpi_device_id bf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, bf_tmfifo_acpi_match);
+
+static struct platform_driver tmfifo_driver = {
+	.probe = tmfifo_probe,
+	.remove = tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.of_match_table = tmfifo_match,
+		.acpi_match_table = ACPI_PTR(bf_tmfifo_acpi_match),
+	},
+};
+
+static int __init tmfifo_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&tmfifo_driver);
+	if (ret)
+		pr_err("Failed to register tmfifo driver.\n");
+
+	return ret;
+}
+
+static void __exit tmfifo_exit(void)
+{
+	platform_driver_unregister(&tmfifo_driver);
+}
+
+module_init(tmfifo_init);
+module_exit(tmfifo_exit);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
new file mode 100644
index 0000000..9f21764
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo_regs.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __TMFIFO_REGS_H__
+#define __TMFIFO_REGS_H__
+
+#include <linux/types.h>
+
+#define TMFIFO_TX_DATA 0x0
+
+#define TMFIFO_TX_STS 0x8
+#define TMFIFO_TX_STS__LENGTH 0x0001
+#define TMFIFO_TX_STS__COUNT_SHIFT 0
+#define TMFIFO_TX_STS__COUNT_WIDTH 9
+#define TMFIFO_TX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_TX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_TX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_TX_CTL 0x10
+#define TMFIFO_TX_CTL__LENGTH 0x0001
+#define TMFIFO_TX_CTL__LWM_SHIFT 0
+#define TMFIFO_TX_CTL__LWM_WIDTH 8
+#define TMFIFO_TX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__LWM_RMASK 0xff
+#define TMFIFO_TX_CTL__LWM_MASK  0xff
+#define TMFIFO_TX_CTL__HWM_SHIFT 8
+#define TMFIFO_TX_CTL__HWM_WIDTH 8
+#define TMFIFO_TX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__HWM_RMASK 0xff
+#define TMFIFO_TX_CTL__HWM_MASK  0xff00
+#define TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#define TMFIFO_RX_DATA 0x0
+
+#define TMFIFO_RX_STS 0x8
+#define TMFIFO_RX_STS__LENGTH 0x0001
+#define TMFIFO_RX_STS__COUNT_SHIFT 0
+#define TMFIFO_RX_STS__COUNT_WIDTH 9
+#define TMFIFO_RX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_RX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_RX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_RX_CTL 0x10
+#define TMFIFO_RX_CTL__LENGTH 0x0001
+#define TMFIFO_RX_CTL__LWM_SHIFT 0
+#define TMFIFO_RX_CTL__LWM_WIDTH 8
+#define TMFIFO_RX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__LWM_RMASK 0xff
+#define TMFIFO_RX_CTL__LWM_MASK  0xff
+#define TMFIFO_RX_CTL__HWM_SHIFT 8
+#define TMFIFO_RX_CTL__HWM_WIDTH 8
+#define TMFIFO_RX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__HWM_RMASK 0xff
+#define TMFIFO_RX_CTL__HWM_MASK  0xff00
+#define TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#endif /* !defined(__TMFIFO_REGS_H__) */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v5 1/5] soc: Add TmFifo driver for Mellanox BlueField Soc
@ 2018-10-31 18:09   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-31 18:09 UTC (permalink / raw)
  To: linux-arm-kernel

This commit adds the TmFifo driver for Mellanox BlueField Soc.
TmFifo is a shared FIFO which enables external host machine to
exchange data with the SoC via USB or PCIe. The driver is based on
virtio framework and has console and network access enabled.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/Kconfig                |    1 +
 drivers/soc/Makefile               |    1 +
 drivers/soc/mellanox/Kconfig       |   18 +
 drivers/soc/mellanox/Makefile      |    5 +
 drivers/soc/mellanox/tmfifo.c      | 1236 ++++++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/tmfifo_regs.h |   76 +++
 6 files changed, 1337 insertions(+)
 create mode 100644 drivers/soc/mellanox/Kconfig
 create mode 100644 drivers/soc/mellanox/Makefile
 create mode 100644 drivers/soc/mellanox/tmfifo.c
 create mode 100644 drivers/soc/mellanox/tmfifo_regs.h

diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index c07b4a8..fa87dc8 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -7,6 +7,7 @@ source "drivers/soc/bcm/Kconfig"
 source "drivers/soc/fsl/Kconfig"
 source "drivers/soc/imx/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
+source "drivers/soc/mellanox/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/renesas/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 113e884..93052d0 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ARCH_GEMINI)	+= gemini/
 obj-$(CONFIG_ARCH_MXC)		+= imx/
 obj-$(CONFIG_SOC_XWAY)		+= lantiq/
 obj-y				+= mediatek/
+obj-$(CONFIG_SOC_MLNX)		+= mellanox/
 obj-$(CONFIG_ARCH_MESON)	+= amlogic/
 obj-y				+= qcom/
 obj-y				+= renesas/
diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
new file mode 100644
index 0000000..d88efa1
--- /dev/null
+++ b/drivers/soc/mellanox/Kconfig
@@ -0,0 +1,18 @@
+menuconfig SOC_MLNX
+	bool "Mellanox SoC drivers"
+	default y if ARCH_MLNX_BLUEFIELD
+
+if ARCH_MLNX_BLUEFIELD || COMPILE_TEST
+
+config MLNX_BLUEFIELD_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo driver"
+	depends on ARM64
+	default m
+	select VIRTIO_CONSOLE
+	select VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides the
+	  virtio driver framework for the TMFIFO of Mellanox BlueField SoC and
+	  the implementation of a console and network driver.
+
+endif # ARCH_MLNX_BLUEFIELD
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
new file mode 100644
index 0000000..c44c0e2
--- /dev/null
+++ b/drivers/soc/mellanox/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Mellanox SoC drivers.
+#
+obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
new file mode 100644
index 0000000..d5e3550
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo.c
@@ -0,0 +1,1236 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/cache.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <asm/byteorder.h>
+
+#include "tmfifo_regs.h"
+
+/* Vring size. */
+#define TMFIFO_VRING_SIZE			1024
+
+/* Console Tx buffer size. */
+#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
+
+/* House-keeping timer interval. */
+static int tmfifo_timer_interval = HZ / 10;
+module_param(tmfifo_timer_interval, int, 0644);
+MODULE_PARM_DESC(tmfifo_timer_interval, "timer interval");
+
+/* Global lock. */
+static DEFINE_MUTEX(tmfifo_lock);
+
+/* Virtio ring size. */
+static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
+module_param(tmfifo_vring_size, int, 0444);
+MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring");
+
+/* Struct declaration. */
+struct tmfifo;
+
+/* Virtual devices sharing the TM FIFO. */
+#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/* Structure to maintain the ring state. */
+struct tmfifo_vring {
+	void *va;			/* virtual address */
+	dma_addr_t dma;			/* dma address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	int vdev_id;			/* TMFIFO_VDEV_xxx */
+	u32 pkt_len;			/* packet total length */
+	__virtio16 next_avail;		/* next avail desc id */
+	struct tmfifo *fifo;		/* pointer back to the tmfifo */
+};
+
+/* Interrupt types. */
+enum {
+	TM_RX_LWM_IRQ,			/* Rx low water mark irq */
+	TM_RX_HWM_IRQ,			/* Rx high water mark irq */
+	TM_TX_LWM_IRQ,			/* Tx low water mark irq */
+	TM_TX_HWM_IRQ,			/* Tx high water mark irq */
+	TM_IRQ_CNT
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	TMFIFO_VRING_RX,		/* Rx ring */
+	TMFIFO_VRING_TX,		/* Tx ring */
+	TMFIFO_VRING_NUM
+};
+
+struct tmfifo_vdev {
+	struct virtio_device vdev;	/* virtual device */
+	u8 status;
+	u64 features;
+	union {				/* virtio config space */
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
+	u8 *tx_buf;			/* tx buffer */
+	u32 tx_head;			/* tx buffer head */
+	u32 tx_tail;			/* tx buffer tail */
+};
+
+/* TMFIFO device structure */
+struct tmfifo {
+	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
+	struct platform_device *pdev;	/* platform device */
+	struct mutex lock;
+	void __iomem *rx_base;		/* mapped register base */
+	void __iomem *tx_base;		/* mapped register base */
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	int irq[TM_IRQ_CNT];		/* irq numbers */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
+	bool is_ready;			/* ready flag */
+	spinlock_t spin_lock;		/* spin lock */
+};
+
+union tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* MTU setting of the virtio-net interface. */
+#define TMFIFO_NET_MTU		1500
+
+/* Supported virtio-net features. */
+#define TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
+				 (1UL << VIRTIO_NET_F_STATUS) | \
+				 (1UL << VIRTIO_NET_F_MAC))
+
+/* Return the available Tx buffer space. */
+static inline int tmfifo_vdev_tx_buf_avail(struct tmfifo_vdev *vdev)
+{
+	return ((vdev->tx_tail >= vdev->tx_head) ?
+		(TMFIFO_CONS_TX_BUF_SIZE - 8 - (vdev->tx_tail -
+		vdev->tx_head)) : (vdev->tx_head - vdev->tx_tail - 8));
+}
+
+/* Update Tx buffer pointer after pushing data. */
+static inline void tmfifo_vdev_tx_buf_push(struct tmfifo_vdev *vdev, u32 len)
+{
+	vdev->tx_tail += len;
+	if (vdev->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE)
+		vdev->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Update Tx buffer pointer after popping data. */
+static inline void tmfifo_vdev_tx_buf_pop(struct tmfifo_vdev *vdev, u32 len)
+{
+	vdev->tx_head += len;
+	if (vdev->tx_head >= TMFIFO_CONS_TX_BUF_SIZE)
+		vdev->tx_head -= TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Allocate vrings for the fifo. */
+static int tmfifo_alloc_vrings(struct tmfifo *fifo,
+			       struct tmfifo_vdev *tm_vdev, int vdev_id)
+{
+	dma_addr_t dma;
+	void *va;
+	int i, size;
+	struct tmfifo_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->size = tmfifo_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+		vring->vdev_id = vdev_id;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
+					GFP_KERNEL);
+		if (!va) {
+			dev_err(tm_vdev->vdev.dev.parent,
+				"vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void tmfifo_free_vrings(struct tmfifo *fifo, int vdev_id)
+{
+	int i, size;
+	struct tmfifo_vring *vring;
+	struct tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Free interrupts of the fifo device. */
+static void tmfifo_free_irqs(struct tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		irq = fifo->irq[i];
+		if (irq) {
+			fifo->irq[i] = 0;
+			disable_irq(irq);
+			free_irq(irq, (u8 *)fifo + i);
+		}
+	}
+}
+
+/* Interrupt handler. */
+static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
+{
+	int i = (uintptr_t)dev_id % sizeof(void *);
+	struct tmfifo *fifo = dev_id - i;
+
+	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
+		schedule_work(&fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Nothing to do for now. */
+static void tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+tmfifo_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+
+	if (!vr || vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vr->num);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+static inline void tmfifo_virtio_release_desc(struct virtio_device *vdev,
+					      struct vring *vr,
+					      struct vring_desc *desc, u32 len)
+{
+	unsigned int idx;
+
+	idx = vr->used->idx % vr->num;
+	vr->used->ring[idx].id = desc - vr->desc;
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/* Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc)
+{
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+	struct vring_desc *desc_head;
+	uint32_t pkt_len = 0;
+
+	if (!vr)
+		return;
+
+	if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
+		desc_head = vring->desc_head;
+		pkt_len = vring->pkt_len;
+	} else {
+		desc_head = tmfifo_virtio_get_next_desc(vring->vq);
+		if (desc_head != NULL) {
+			pkt_len = tmfifo_virtio_get_pkt_len(vdev,
+							desc_head, vr);
+		}
+	}
+
+	if (desc_head != NULL)
+		tmfifo_virtio_release_desc(vdev, vr, desc_head, pkt_len);
+
+	if (desc != NULL)
+		*desc = NULL;
+	vring->pkt_len = 0;
+}
+
+/* House-keeping timer. */
+static void tmfifo_timer(struct timer_list *arg)
+{
+	struct tmfifo *fifo = container_of(arg, struct tmfifo, timer);
+
+	/*
+	 * Wake up the work handler to poll the Rx FIFO in case interrupt
+	 * missing or any leftover bytes stuck in the FIFO.
+	 */
+	test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+}
+
+/* Buffer the console output. */
+static void tmfifo_console_output(struct tmfifo_vdev *cons,
+				  struct virtqueue *vq)
+{
+	u32 len, pkt_len, idx;
+	struct vring_desc *head_desc, *desc = NULL;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &cons->vdev;
+	void *addr;
+	union tmfifo_msg_hdr *hdr;
+
+	for (;;) {
+		head_desc = tmfifo_virtio_get_next_desc(vq);
+		if (head_desc == NULL)
+			break;
+
+		/* Release the packet if no more space. */
+		pkt_len = tmfifo_virtio_get_pkt_len(vdev, head_desc, vr);
+		if (pkt_len + sizeof(*hdr) > tmfifo_vdev_tx_buf_avail(cons)) {
+			tmfifo_virtio_release_desc(vdev, vr, head_desc,
+						   pkt_len);
+			break;
+		}
+
+		hdr = (union tmfifo_msg_hdr *)&cons->tx_buf[cons->tx_tail];
+		hdr->data = 0;
+		hdr->type = VIRTIO_ID_CONSOLE;
+		hdr->len = htons(pkt_len);
+
+		tmfifo_vdev_tx_buf_push(cons, sizeof(*hdr));
+		desc = head_desc;
+
+		while (desc != NULL) {
+			addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+			len = virtio32_to_cpu(vdev, desc->len);
+
+			if (len <= TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail) {
+				memcpy(cons->tx_buf + cons->tx_tail, addr, len);
+			} else {
+				u32 seg;
+
+				seg = TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail;
+				memcpy(cons->tx_buf + cons->tx_tail, addr, seg);
+				addr += seg;
+				memcpy(cons->tx_buf, addr, len - seg);
+			}
+			tmfifo_vdev_tx_buf_push(cons, len);
+
+			if (!(virtio16_to_cpu(vdev, desc->flags) &
+			    VRING_DESC_F_NEXT))
+				break;
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+		}
+
+		/* Make each packet 8-byte aligned. */
+		tmfifo_vdev_tx_buf_push(cons, ((pkt_len + 7) & -8) - pkt_len);
+
+		tmfifo_virtio_release_desc(vdev, vr, head_desc, pkt_len);
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct tmfifo_vring *vring;
+	struct tmfifo *fifo;
+	struct vring *vr;
+	struct virtio_device *vdev;
+	u64 sts, data;
+	int num_avail = 0, hdr_len, tx_reserve;
+	void *addr;
+	u32 len, idx;
+	struct vring_desc *desc;
+	unsigned long flags;
+	struct tmfifo_vdev *cons;
+
+	if (!vq)
+		return;
+
+	vring = (struct tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+	vr = (struct vring *)virtqueue_get_vring(vq);
+
+	if (!fifo->vdev[vring->vdev_id])
+		return;
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+
+	/* Don't continue if another vring is running. */
+	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
+		return;
+
+	/* tx_reserve is used to reserved some room in FIFO for console. */
+	if (vring->vdev_id == VIRTIO_ID_NET) {
+		hdr_len = sizeof(struct virtio_net_hdr);
+		tx_reserve = fifo->tx_fifo_size / 16;
+	} else {
+		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
+		hdr_len = 0;
+		tx_reserve = 1;
+	}
+
+	desc = vring->desc;
+
+again:
+	while (1) {
+		/* Get available FIFO space. */
+		if (num_avail == 0) {
+			if (is_rx) {
+				/* Get the number of available words in FIFO. */
+				sts = readq(fifo->rx_base + TMFIFO_RX_STS);
+				num_avail = FIELD_GET(
+					TMFIFO_RX_STS__COUNT_MASK, sts);
+
+				/* Don't continue if nothing in FIFO. */
+				if (num_avail <= 0)
+					break;
+			} else {
+				/* Get available space in FIFO. */
+				sts = readq(fifo->tx_base + TMFIFO_TX_STS);
+				num_avail = fifo->tx_fifo_size - tx_reserve -
+					FIELD_GET(
+						TMFIFO_TX_STS__COUNT_MASK, sts);
+
+				if (num_avail <= 0)
+					break;
+			}
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
+		    cons != NULL && cons->tx_buf != NULL) {
+			for (;;) {
+				spin_lock_irqsave(&fifo->spin_lock, flags);
+				if (cons->tx_head == cons->tx_tail) {
+					spin_unlock_irqrestore(
+						&fifo->spin_lock, flags);
+					return;
+				}
+				addr = cons->tx_buf + cons->tx_head;
+				writeq(cpu_to_le64(*(u64 *)addr),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+				tmfifo_vdev_tx_buf_pop(cons, sizeof(u64));
+				spin_unlock_irqrestore(&fifo->spin_lock,
+						       flags);
+				if (--num_avail <= 0)
+					goto again;
+			}
+		}
+
+		/* Get the desc of next packet. */
+		if (!desc) {
+			/* Save the head desc of the chain. */
+			vring->desc_head = tmfifo_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				return;
+			}
+			desc = vring->desc_head;
+			vring->desc = desc;
+
+			if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+						vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			}
+		}
+
+		/* Beginning of each packet. */
+		if (vring->pkt_len == 0) {
+			int vdev_id, vring_change = 0;
+			union tmfifo_msg_hdr hdr;
+
+			num_avail--;
+
+			/* Read/Write packet length. */
+			if (is_rx) {
+				hdr.data = readq(fifo->rx_base +
+						 TMFIFO_RX_DATA);
+				hdr.data = le64_to_cpu(hdr.data);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (hdr.len == 0)
+					continue;
+
+				/* Check packet type. */
+				if (hdr.type == VIRTIO_ID_NET) {
+					vdev_id = VIRTIO_ID_NET;
+					hdr_len = sizeof(struct virtio_net_hdr);
+				} else if (hdr.type == VIRTIO_ID_CONSOLE) {
+					vdev_id = VIRTIO_ID_CONSOLE;
+					hdr_len = 0;
+				} else {
+					continue;
+				}
+
+				/*
+				 * Check whether the new packet still belongs
+				 * to this vring or not. If not, update the
+				 * pkt_len of the new vring and return.
+				 */
+				if (vdev_id != vring->vdev_id) {
+					struct tmfifo_vdev *dev2 =
+						fifo->vdev[vdev_id];
+
+					if (!dev2)
+						break;
+					vring->desc = desc;
+					vring = &dev2->vrings[TMFIFO_VRING_RX];
+					vring_change = 1;
+				}
+				vring->pkt_len = ntohs(hdr.len) + hdr_len;
+			} else {
+				vring->pkt_len = tmfifo_virtio_get_pkt_len(
+					vdev, desc, vr);
+
+				hdr.data = 0;
+				hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+					VIRTIO_ID_NET :
+					VIRTIO_ID_CONSOLE;
+				hdr.len = htons(vring->pkt_len - hdr_len);
+				writeq(cpu_to_le64(hdr.data),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+			}
+
+			vring->cur_len = hdr_len;
+			vring->rem_len = vring->pkt_len;
+			fifo->vring[is_rx] = vring;
+
+			if (vring_change)
+				return;
+			continue;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check if the current desc is already done. */
+		if (vring->cur_len == len)
+			goto check_done;
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		/* Read a word from FIFO for Rx. */
+		if (is_rx) {
+			data = readq(fifo->rx_base + TMFIFO_RX_DATA);
+			data = le64_to_cpu(data);
+		}
+
+		if (vring->cur_len + sizeof(u64) <= len) {
+			/* The whole word. */
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       sizeof(u64));
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       sizeof(u64));
+			}
+			vring->cur_len += sizeof(u64);
+		} else {
+			/* Leftover bytes. */
+			BUG_ON(vring->cur_len > len);
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       len - vring->cur_len);
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       len - vring->cur_len);
+			}
+			vring->cur_len = len;
+		}
+
+		/* Write the word into FIFO for Tx. */
+		if (!is_rx) {
+			writeq(cpu_to_le64(data),
+			       fifo->tx_base + TMFIFO_TX_DATA);
+		}
+
+		num_avail--;
+
+check_done:
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done and release the desc. */
+			tmfifo_release_pkt(vdev, vring, &desc);
+			fifo->vring[is_rx] = NULL;
+
+			/* Notify upper layer that packet is done. */
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			vring_interrupt(0, vq);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			continue;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+	struct tmfifo *fifo = vring->fifo;
+	unsigned long flags;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX HWM bit to start Rx. */
+		if (!test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			tmfifo_console_output(fifo->vdev[VIRTIO_ID_CONSOLE],
+					      vq);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			schedule_work(&fifo->work);
+		} else if (!test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	}
+
+	return true;
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void tmfifo_work_handler(struct work_struct *work)
+{
+	int i;
+	struct tmfifo_vdev *tm_vdev;
+	struct tmfifo *fifo = container_of(work, struct tmfifo, work);
+
+	if (!fifo->is_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx. */
+	if (test_and_clear_bit(TM_TX_LWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_TX_LWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_TX].vq,
+					false);
+			}
+		}
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(TM_RX_HWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_RX_HWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_RX].vq,
+					true);
+			}
+		}
+	}
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* Get the array of feature bits for this device. */
+static u64 tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			tmfifo_release_pkt(&tm_vdev->vdev, vring, &vring->desc);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+				  unsigned int nvqs,
+				  struct virtqueue *vqs[],
+				  vq_callback_t *callbacks[],
+				  const char * const names[],
+				  const bool *ctx,
+				  struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void tmfifo_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void tmfifo_virtio_get(struct virtio_device *vdev,
+			      unsigned int offset,
+			      void *buf,
+			      unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void tmfifo_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops tmfifo_virtio_config_ops = {
+	.get_features = tmfifo_virtio_get_features,
+	.finalize_features = tmfifo_virtio_finalize_features,
+	.find_vqs = tmfifo_virtio_find_vqs,
+	.del_vqs = tmfifo_virtio_del_vqs,
+	.reset = tmfifo_virtio_reset,
+	.set_status = tmfifo_virtio_set_status,
+	.get_status = tmfifo_virtio_get_status,
+	.get = tmfifo_virtio_get,
+	.set = tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+int tmfifo_create_vdev(struct tmfifo *fifo, int vdev_id, u64 features,
+		       void *config, u32 size)
+{
+	struct tmfifo_vdev *tm_vdev;
+	int ret = 0;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev != NULL) {
+		pr_err("vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto already_exist;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto already_exist;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+	if (tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
+		pr_err("Unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto alloc_vring_fail;
+	}
+	if (vdev_id == VIRTIO_ID_CONSOLE) {
+		tm_vdev->tx_buf = kmalloc(TMFIFO_CONS_TX_BUF_SIZE,
+					  GFP_KERNEL);
+	}
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
+		goto register_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+register_fail:
+	tmfifo_free_vrings(fifo, vdev_id);
+	fifo->vdev[vdev_id] = NULL;
+alloc_vring_fail:
+	kfree(tm_vdev);
+already_exist:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+int tmfifo_delete_vdev(struct tmfifo *fifo, int vdev_id)
+{
+	struct tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		tmfifo_free_vrings(fifo, vdev_id);
+		kfree(tm_vdev->tx_buf);
+		kfree(tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Device remove function. */
+static int tmfifo_remove(struct platform_device *pdev)
+{
+	int i;
+	struct tmfifo *fifo = platform_get_drvdata(pdev);
+	struct resource *rx_res, *tx_res;
+
+	if (fifo) {
+		mutex_lock(&tmfifo_lock);
+
+		fifo->is_ready = false;
+
+		/* Stop the timer. */
+		del_timer_sync(&fifo->timer);
+
+		/* Release interrupts. */
+		tmfifo_free_irqs(fifo);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&fifo->work);
+
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++)
+			tmfifo_delete_vdev(fifo, i);
+
+		/* Release IO resources. */
+		if (fifo->rx_base)
+			iounmap(fifo->rx_base);
+		if (fifo->tx_base)
+			iounmap(fifo->tx_base);
+
+		platform_set_drvdata(pdev, NULL);
+		kfree(fifo);
+
+		mutex_unlock(&tmfifo_lock);
+	}
+
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (rx_res)
+		release_mem_region(rx_res->start, resource_size(rx_res));
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (tx_res)
+		release_mem_region(tx_res->start, resource_size(tx_res));
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void tmfifo_get_cfg_mac(u8 *mac)
+{
+	u8 buf[6];
+	efi_status_t status;
+	unsigned long size = sizeof(buf);
+	efi_char16_t name[] = { 'R', 's', 'h', 'i', 'm', 'M', 'a', 'c',
+				'A', 'd', 'd', 'r', 0 };
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+
+	status = efi.get_variable(name, &guid, NULL, &size, buf);
+	if (status == EFI_SUCCESS && size == sizeof(buf))
+		memcpy(mac, buf, sizeof(buf));
+}
+
+/* Probe the TMFIFO. */
+static int tmfifo_probe(struct platform_device *pdev)
+{
+	u64 ctl;
+	struct tmfifo *fifo;
+	struct resource *rx_res, *tx_res;
+	struct virtio_net_config net_config;
+	int i, ret;
+
+	/* Get the resource of the Rx & Tx FIFO. */
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!rx_res || !tx_res) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (request_mem_region(rx_res->start,
+			       resource_size(rx_res), "bf-tmfifo") == NULL) {
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	if (request_mem_region(tx_res->start,
+			       resource_size(tx_res), "bf-tmfifo") == NULL) {
+		release_mem_region(rx_res->start, resource_size(rx_res));
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	ret = -ENOMEM;
+	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
+	if (!fifo)
+		goto err;
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	spin_lock_init(&fifo->spin_lock);
+	INIT_WORK(&fifo->work, tmfifo_work_handler);
+
+	timer_setup(&fifo->timer, tmfifo_timer, 0);
+	fifo->timer.function = tmfifo_timer;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		fifo->irq[i] = platform_get_irq(pdev, i);
+		ret = request_irq(fifo->irq[i], tmfifo_irq_handler, 0,
+				  "tmfifo", (u8 *)fifo + i);
+		if (ret) {
+			pr_err("Unable to request irq\n");
+			fifo->irq[i] = 0;
+			goto err;
+		}
+	}
+
+	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
+	if (!fifo->rx_base)
+		goto err;
+
+	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
+	if (!fifo->tx_base)
+		goto err;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		FIELD_GET(TMFIFO_TX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~TMFIFO_TX_CTL__LWM_MASK) |
+		FIELD_PREP(TMFIFO_TX_CTL__LWM_MASK, fifo->tx_fifo_size / 2);
+	ctl = (ctl & ~TMFIFO_TX_CTL__HWM_MASK) |
+		FIELD_PREP(TMFIFO_TX_CTL__HWM_MASK, fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		FIELD_GET(TMFIFO_RX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~TMFIFO_RX_CTL__LWM_MASK) |
+		FIELD_PREP(TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = (ctl & ~TMFIFO_RX_CTL__HWM_MASK) |
+		FIELD_PREP(TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + TMFIFO_RX_CTL);
+
+	mutex_init(&fifo->lock);
+
+	/* Create the console vdev. */
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (ret)
+		goto err;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = TMFIFO_NET_MTU;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	memcpy(net_config.mac, tmfifo_net_default_mac, 6);
+	tmfifo_get_cfg_mac(net_config.mac);
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_NET, TMFIFO_NET_FEATURES,
+				 &net_config, sizeof(net_config));
+	if (ret)
+		goto err;
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+
+	fifo->is_ready = true;
+
+	return 0;
+
+err:
+	tmfifo_remove(pdev);
+early_err:
+	dev_err(&pdev->dev, "Probe Failed\n");
+	return ret;
+}
+
+static const struct of_device_id tmfifo_match[] = {
+	{ .compatible = "mellanox,bf-tmfifo" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, tmfifo_match);
+
+static const struct acpi_device_id bf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, bf_tmfifo_acpi_match);
+
+static struct platform_driver tmfifo_driver = {
+	.probe = tmfifo_probe,
+	.remove = tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.of_match_table = tmfifo_match,
+		.acpi_match_table = ACPI_PTR(bf_tmfifo_acpi_match),
+	},
+};
+
+static int __init tmfifo_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&tmfifo_driver);
+	if (ret)
+		pr_err("Failed to register tmfifo driver.\n");
+
+	return ret;
+}
+
+static void __exit tmfifo_exit(void)
+{
+	platform_driver_unregister(&tmfifo_driver);
+}
+
+module_init(tmfifo_init);
+module_exit(tmfifo_exit);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
new file mode 100644
index 0000000..9f21764
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo_regs.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __TMFIFO_REGS_H__
+#define __TMFIFO_REGS_H__
+
+#include <linux/types.h>
+
+#define TMFIFO_TX_DATA 0x0
+
+#define TMFIFO_TX_STS 0x8
+#define TMFIFO_TX_STS__LENGTH 0x0001
+#define TMFIFO_TX_STS__COUNT_SHIFT 0
+#define TMFIFO_TX_STS__COUNT_WIDTH 9
+#define TMFIFO_TX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_TX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_TX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_TX_CTL 0x10
+#define TMFIFO_TX_CTL__LENGTH 0x0001
+#define TMFIFO_TX_CTL__LWM_SHIFT 0
+#define TMFIFO_TX_CTL__LWM_WIDTH 8
+#define TMFIFO_TX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__LWM_RMASK 0xff
+#define TMFIFO_TX_CTL__LWM_MASK  0xff
+#define TMFIFO_TX_CTL__HWM_SHIFT 8
+#define TMFIFO_TX_CTL__HWM_WIDTH 8
+#define TMFIFO_TX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__HWM_RMASK 0xff
+#define TMFIFO_TX_CTL__HWM_MASK  0xff00
+#define TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#define TMFIFO_RX_DATA 0x0
+
+#define TMFIFO_RX_STS 0x8
+#define TMFIFO_RX_STS__LENGTH 0x0001
+#define TMFIFO_RX_STS__COUNT_SHIFT 0
+#define TMFIFO_RX_STS__COUNT_WIDTH 9
+#define TMFIFO_RX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_RX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_RX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_RX_CTL 0x10
+#define TMFIFO_RX_CTL__LENGTH 0x0001
+#define TMFIFO_RX_CTL__LWM_SHIFT 0
+#define TMFIFO_RX_CTL__LWM_WIDTH 8
+#define TMFIFO_RX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__LWM_RMASK 0xff
+#define TMFIFO_RX_CTL__LWM_MASK  0xff
+#define TMFIFO_RX_CTL__HWM_SHIFT 8
+#define TMFIFO_RX_CTL__HWM_WIDTH 8
+#define TMFIFO_RX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__HWM_RMASK 0xff
+#define TMFIFO_RX_CTL__HWM_MASK  0xff00
+#define TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#endif /* !defined(__TMFIFO_REGS_H__) */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v5 2/5] arm64: Add Mellanox BlueField SoC config option
  2018-10-31 18:09   ` Liming Sun
@ 2018-10-31 18:09   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-31 18:09 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit introduces config option for Mellanox BlueField SoC,
which can be used to build the SoC specific drivers, and enables
it by default in configs/defconfig.

Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 arch/arm64/Kconfig.platforms | 6 ++++++
 arch/arm64/configs/defconfig | 1 +
 2 files changed, 7 insertions(+)

diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index d5aeac3..aeb67c2 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -110,6 +110,12 @@ config ARCH_MESON
 	help
 	  This enables support for the Amlogic S905 SoCs.
 
+config ARCH_MLNX_BLUEFIELD
+	bool "Mellanox BlueField SoC Family"
+	select SOC_MLNX
+	help
+	  This enables support for the Mellanox BlueField SoC.
+
 config ARCH_MVEBU
 	bool "Marvell EBU SoC Family"
 	select ARMADA_AP806_SYSCON
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index f9a186f..508cb9d 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -43,6 +43,7 @@ CONFIG_ARCH_LG1K=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
 CONFIG_ARCH_MESON=y
+CONFIG_ARCH_MLNX_BLUEFIELD=y
 CONFIG_ARCH_MVEBU=y
 CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v5 2/5] arm64: Add Mellanox BlueField SoC config option
@ 2018-10-31 18:09   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-31 18:09 UTC (permalink / raw)
  To: linux-arm-kernel

This commit introduces config option for Mellanox BlueField SoC,
which can be used to build the SoC specific drivers, and enables
it by default in configs/defconfig.

Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 arch/arm64/Kconfig.platforms | 6 ++++++
 arch/arm64/configs/defconfig | 1 +
 2 files changed, 7 insertions(+)

diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index d5aeac3..aeb67c2 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -110,6 +110,12 @@ config ARCH_MESON
 	help
 	  This enables support for the Amlogic S905 SoCs.
 
+config ARCH_MLNX_BLUEFIELD
+	bool "Mellanox BlueField SoC Family"
+	select SOC_MLNX
+	help
+	  This enables support for the Mellanox BlueField SoC.
+
 config ARCH_MVEBU
 	bool "Marvell EBU SoC Family"
 	select ARMADA_AP806_SYSCON
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index f9a186f..508cb9d 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -43,6 +43,7 @@ CONFIG_ARCH_LG1K=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
 CONFIG_ARCH_MESON=y
+CONFIG_ARCH_MLNX_BLUEFIELD=y
 CONFIG_ARCH_MVEBU=y
 CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v5 3/5] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-10-31 18:09   ` Liming Sun
@ 2018-10-31 18:09   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-31 18:09 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

Add devicetree bindings for the TmFifo which is found on Mellanox
BlueField SoCs.

Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 .../devicetree/bindings/soc/mellanox/tmfifo.txt    | 23 ++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt

diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
new file mode 100644
index 0000000..8a13fa6
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
@@ -0,0 +1,23 @@
+* Mellanox BlueField SoC TmFifo
+
+BlueField TmFifo provides a shared FIFO between the target and the
+external host machine, which can be accessed by external host via
+USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
+to implement virtual console and network interface based on the virtio
+framework.
+
+Required properties:
+
+- compatible:	Should be "mellanox,bf-tmfifo"
+- reg:		Physical base address and length of Rx/Tx block
+- interrupts:	The interrupt number of Rx low water mark, Rx high water mark
+		Tx low water mark, Tx high water mark respectively.
+
+Example:
+
+tmfifo@800a20 {
+	compatible = "mellanox,bf-tmfifo";
+	reg = <0x00800a20 0x00000018
+	       0x00800a40 0x00000018>;
+	interrupts = <41, 42, 43, 44>;
+};
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v5 3/5] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
@ 2018-10-31 18:09   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-31 18:09 UTC (permalink / raw)
  To: linux-arm-kernel

Add devicetree bindings for the TmFifo which is found on Mellanox
BlueField SoCs.

Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 .../devicetree/bindings/soc/mellanox/tmfifo.txt    | 23 ++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt

diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
new file mode 100644
index 0000000..8a13fa6
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
@@ -0,0 +1,23 @@
+* Mellanox BlueField SoC TmFifo
+
+BlueField TmFifo provides a shared FIFO between the target and the
+external host machine, which can be accessed by external host via
+USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
+to implement virtual console and network interface based on the virtio
+framework.
+
+Required properties:
+
+- compatible:	Should be "mellanox,bf-tmfifo"
+- reg:		Physical base address and length of Rx/Tx block
+- interrupts:	The interrupt number of Rx low water mark, Rx high water mark
+		Tx low water mark, Tx high water mark respectively.
+
+Example:
+
+tmfifo at 800a20 {
+	compatible = "mellanox,bf-tmfifo";
+	reg = <0x00800a20 0x00000018
+	       0x00800a40 0x00000018>;
+	interrupts = <41, 42, 43, 44>;
+};
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v5 4/5] MAINTAINERS: Add entry for Mellanox Bluefield Soc
  2018-10-31 18:09   ` Liming Sun
@ 2018-10-31 18:09   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-31 18:09 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

Add maintainer information for Mellanox BlueField SoC.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index c78feb0..07f7c7e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1649,6 +1649,14 @@ L:	linux-mediatek@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	drivers/phy/mediatek/phy-mtk-tphy.c
 
+ARM/Mellanox BlueField SoC support
+M:	David Woods <dwoods@mellanox.com>
+M:	Liming Sun <lsun@mellanox.com>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	drivers/soc/mellanox/*
+F:	Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
+
 ARM/MICREL KS8695 ARCHITECTURE
 M:	Greg Ungerer <gerg@uclinux.org>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v5 4/5] MAINTAINERS: Add entry for Mellanox Bluefield Soc
@ 2018-10-31 18:09   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-31 18:09 UTC (permalink / raw)
  To: linux-arm-kernel

Add maintainer information for Mellanox BlueField SoC.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index c78feb0..07f7c7e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1649,6 +1649,14 @@ L:	linux-mediatek at lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	drivers/phy/mediatek/phy-mtk-tphy.c
 
+ARM/Mellanox BlueField SoC support
+M:	David Woods <dwoods@mellanox.com>
+M:	Liming Sun <lsun@mellanox.com>
+L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	drivers/soc/mellanox/*
+F:	Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
+
 ARM/MICREL KS8695 ARCHITECTURE
 M:	Greg Ungerer <gerg@uclinux.org>
 L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v5 5/5] soc: mellanox: Add host side drivers to support Mellanox BlueField SoCs.
  2018-05-25 16:06 ` Liming Sun
                   ` (20 preceding siblings ...)
  (?)
@ 2018-10-31 18:09 ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-10-31 18:09 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: Liming Sun, devicetree, linux-arm-kernel

An external host can connect to a Mellanox BlueField SoC using an
interface called the Rshim.  The rshim driver provides console,
networking and boot services over this interface.  There are three
possible transports for connecting a host to the rshim and there is
a back-end driver for each of them.

  rshim_usb - connection via a USB port.

  rshim_pcie - connections via PCI express, this is used for boards
               in a PCIe form-factor.

  rshim_pcie_lf - connectsion via PCI express when the device is
                  in "livefish" mode where FW is not loaded yet.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/Kconfig              |    8 +
 drivers/soc/mellanox/Makefile             |    1 +
 drivers/soc/mellanox/host/Makefile        |    2 +
 drivers/soc/mellanox/host/rshim.c         | 2673 +++++++++++++++++++++++++++++
 drivers/soc/mellanox/host/rshim.h         |  361 ++++
 drivers/soc/mellanox/host/rshim_net.c     |  834 +++++++++
 drivers/soc/mellanox/host/rshim_pcie.c    |  478 ++++++
 drivers/soc/mellanox/host/rshim_pcie_lf.c |  695 ++++++++
 drivers/soc/mellanox/host/rshim_regs.h    |  163 ++
 drivers/soc/mellanox/host/rshim_usb.c     | 1035 +++++++++++
 10 files changed, 6250 insertions(+)
 create mode 100644 drivers/soc/mellanox/host/Makefile
 create mode 100644 drivers/soc/mellanox/host/rshim.c
 create mode 100644 drivers/soc/mellanox/host/rshim.h
 create mode 100644 drivers/soc/mellanox/host/rshim_net.c
 create mode 100644 drivers/soc/mellanox/host/rshim_pcie.c
 create mode 100644 drivers/soc/mellanox/host/rshim_pcie_lf.c
 create mode 100644 drivers/soc/mellanox/host/rshim_regs.h
 create mode 100644 drivers/soc/mellanox/host/rshim_usb.c

diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
index d88efa1..ecd83a4 100644
--- a/drivers/soc/mellanox/Kconfig
+++ b/drivers/soc/mellanox/Kconfig
@@ -16,3 +16,11 @@ config MLNX_BLUEFIELD_TMFIFO
 	  the implementation of a console and network driver.
 
 endif # ARCH_MLNX_BLUEFIELD
+
+config MLNX_BLUEFIELD_HOST
+	tristate "Mellnox BlueField host side drivers"
+	help
+	  If you say yes to this option, then support will be added
+	  for control and communication of Mellanox BlueField SoCs
+	  from an external host via USB or PCI-express.
+
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
index c44c0e2..aaaf2be 100644
--- a/drivers/soc/mellanox/Makefile
+++ b/drivers/soc/mellanox/Makefile
@@ -3,3 +3,4 @@
 # Makefile for Mellanox SoC drivers.
 #
 obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
+obj-$(CONFIG_MLNX_BLUEFIELD_HOST)	+= host/
diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
new file mode 100644
index 0000000..79a1c86
--- /dev/null
+++ b/drivers/soc/mellanox/host/Makefile
@@ -0,0 +1,2 @@
+obj-m := rshim.o rshim_net.o rshim_usb.o rshim_pcie.o rshim_pcie_lf.o
+
diff --git a/drivers/soc/mellanox/host/rshim.c b/drivers/soc/mellanox/host/rshim.c
new file mode 100644
index 0000000..32f1124
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim.c
@@ -0,0 +1,2673 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_common.c - Mellanox host-side driver for RShim
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.	See the GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <linux/ioctl.h>
+#include <linux/termios.h>
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+#include <asm/termbits.h>
+#include <linux/circ_buf.h>
+#include <linux/delay.h>
+#include <linux/virtio_ids.h>
+
+#include "rshim.h"
+
+/* Maximum number of devices controlled by this driver. */
+int rshim_nr_devs = 64;
+module_param(rshim_nr_devs, int, 0444);
+MODULE_PARM_DESC(rshim_nr_devs, "Maximum number of supported devices");
+
+static char *backend_driver = "";
+module_param(backend_driver, charp, 0444);
+MODULE_PARM_DESC(backend_driver, "Rshim backend driver to use");
+
+static int rshim_keepalive_period = 300;
+module_param(rshim_keepalive_period, int, 0644);
+MODULE_PARM_DESC(rshim_keepalive_period, "keepalive period in milliseconds");
+
+#define RSH_KEEPALIVE_MAGIC_NUM 0x5089836482ULL
+
+/* Circular buffer macros. */
+
+#define read_empty(bd, chan) \
+	(CIRC_CNT((bd)->read_fifo[chan].head, \
+		  (bd)->read_fifo[chan].tail, READ_FIFO_SIZE) == 0)
+#define read_full(bd, chan) \
+	(CIRC_SPACE((bd)->read_fifo[chan].head, \
+		    (bd)->read_fifo[chan].tail, READ_FIFO_SIZE) == 0)
+#define read_space(bd, chan) \
+	CIRC_SPACE((bd)->read_fifo[chan].head, \
+		   (bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_cnt(bd, chan) \
+	CIRC_CNT((bd)->read_fifo[chan].head, \
+		 (bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_cnt_to_end(bd, chan) \
+	CIRC_CNT_TO_END((bd)->read_fifo[chan].head, \
+			(bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_data_ptr(bd, chan) \
+	((bd)->read_fifo[chan].data + \
+	 ((bd)->read_fifo[chan].tail & (READ_FIFO_SIZE - 1)))
+#define read_consume_bytes(bd, chan, nbytes) \
+	((bd)->read_fifo[chan].tail = \
+		((bd)->read_fifo[chan].tail + (nbytes)) & \
+		 (READ_FIFO_SIZE - 1))
+#define read_space_to_end(bd, chan) \
+	CIRC_SPACE_TO_END((bd)->read_fifo[chan].head, \
+			  (bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_space_offset(bd, chan) \
+	((bd)->read_fifo[chan].head & (READ_FIFO_SIZE - 1))
+#define read_space_ptr(bd, chan) \
+	((bd)->read_fifo[chan].data + read_space_offset(bd, (chan)))
+#define read_add_bytes(bd, chan, nbytes) \
+	((bd)->read_fifo[chan].head = \
+		((bd)->read_fifo[chan].head + (nbytes)) & \
+		 (READ_FIFO_SIZE - 1))
+#define read_reset(bd, chan) \
+	((bd)->read_fifo[chan].head = (bd)->read_fifo[chan].tail = 0)
+
+#define write_empty(bd, chan) \
+	(CIRC_CNT((bd)->write_fifo[chan].head, \
+		  (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE) == 0)
+#define write_full(bd, chan) \
+	(CIRC_SPACE((bd)->write_fifo[chan].head, \
+		    (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE) == 0)
+#define write_space(bd, chan) \
+	CIRC_SPACE((bd)->write_fifo[chan].head, \
+		   (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_cnt(bd, chan) \
+	CIRC_CNT((bd)->write_fifo[chan].head, \
+		 (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_cnt_to_end(bd, chan) \
+	CIRC_CNT_TO_END((bd)->write_fifo[chan].head, \
+			(bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_data_offset(bd, chan) \
+	((bd)->write_fifo[chan].tail & (WRITE_FIFO_SIZE - 1))
+#define write_data_ptr(bd, chan) \
+	((bd)->write_fifo[chan].data + write_data_offset(bd, (chan)))
+#define write_consume_bytes(bd, chan, nbytes) \
+	((bd)->write_fifo[chan].tail = \
+		 ((bd)->write_fifo[chan].tail + (nbytes)) & \
+		  (WRITE_FIFO_SIZE - 1))
+#define write_space_to_end(bd, chan) \
+	CIRC_SPACE_TO_END((bd)->write_fifo[chan].head, \
+			  (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_space_ptr(bd, chan) \
+	((bd)->write_fifo[chan].data + \
+	 ((bd)->write_fifo[chan].head & (WRITE_FIFO_SIZE - 1)))
+#define write_add_bytes(bd, chan, nbytes) \
+	((bd)->write_fifo[chan].head = \
+	 ((bd)->write_fifo[chan].head + (nbytes)) & \
+	  (WRITE_FIFO_SIZE - 1))
+#define write_reset(bd, chan) \
+	((bd)->write_fifo[chan].head = (bd)->write_fifo[chan].tail = 0)
+
+/*
+ * Tile-to-host bits (UART 0 scratchpad).
+ */
+/*
+ * Output write pointer mask.  Note that this is the maximum size; the
+ * write pointer may be smaller if requested by the host.
+ */
+#define CONS_RSHIM_T2H_OUT_WPTR_MASK     0x3FF
+
+/* Tile is done mask. */
+#define CONS_RSHIM_T2H_DONE_MASK         0x400
+
+/*
+ * Input read pointer mask.  Note that this is the maximum size; the read
+ * pointer may be smaller if requested by the host.
+ */
+#define CONS_RSHIM_T2H_IN_RPTR_MASK      0x1FF800
+
+/* Input read pointer shift. */
+#define CONS_RSHIM_T2H_IN_RPTR_SHIFT     11
+
+/* Tile is done mask. */
+#define CONS_RSHIM_T2H_DONE_MASK         0x400
+
+/* Number of words to send as sync-data (calculated by packet MTU). */
+#define TMFIFO_MAX_SYNC_WORDS            (1536 / 8)
+
+/* Terminal characteristics for newly created consoles. */
+static struct ktermios init_console_termios = {
+	.c_iflag = INLCR | ICRNL,
+	.c_oflag = OPOST | ONLCR,
+	.c_cflag = B115200 | HUPCL | CLOCAL | CREAD | CS8,
+	.c_lflag = ISIG | ICANON | ECHOE | ECHOK | ECHOCTL | ECHOKE | IEXTEN,
+	.c_line = 0,
+	.c_cc = INIT_C_CC,
+};
+
+/* Global mutex. */
+static DEFINE_MUTEX(rshim_mutex);
+
+/*
+ * Array of all of the rshim devices.  The high bits of our minor number
+ * index into this table to find the relevant device.
+ */
+struct rshim_backend **rshim_devs;
+
+/*
+ * Work queue. Right now we have one for the whole driver; we might
+ * eventually decide that we need one per device, but we'll see.
+ */
+struct workqueue_struct *rshim_wq;
+EXPORT_SYMBOL(rshim_wq);
+
+/*
+ * Array of pointers to kmalloc'ed strings, holding the path name for
+ * all of the devices we've seen.  If rshim_devs[i] is non-NULL, then
+ * rshim_dev_names[i] is its path name.  If rshim_devs[i] is NULL, then
+ * rshim_dev_names[i] is the name that was last used for that device.
+ * When we see a new device, we look it up in this table; this allows us to
+ * use the same device index we did last time we saw the device.  The
+ * strings within the array persist until the driver is unloaded.
+ */
+char **rshim_dev_names;
+
+/* Name of the sub-device types. */
+char *rshim_dev_minor_names[RSH_DEV_TYPES] = {
+	[RSH_DEV_TYPE_RSHIM] = "rshim",
+	[RSH_DEV_TYPE_BOOT] = "boot",
+	[RSH_DEV_TYPE_CONSOLE] = "console",
+	[RSH_DEV_TYPE_NET] = "net",
+	[RSH_DEV_TYPE_MISC] = "misc",
+};
+
+/* dev_t base index. */
+static dev_t rshim_dev_base;
+
+/* Class structure for our device class. */
+static struct class *rshim_class;
+
+/* Registered services. */
+static struct rshim_service *rshim_svc[RSH_SVC_MAX];
+
+/* FIFO reset. */
+static void rshim_fifo_reset(struct rshim_backend *bd);
+
+/* Global lock / unlock. */
+
+void rshim_lock(void)
+{
+	mutex_lock(&rshim_mutex);
+}
+EXPORT_SYMBOL(rshim_lock);
+
+void rshim_unlock(void)
+{
+	mutex_unlock(&rshim_mutex);
+}
+EXPORT_SYMBOL(rshim_unlock);
+
+/*
+ * Read some bytes from RShim.
+ *
+ * The provided buffer size should be multiple of 8 bytes. If not, the
+ * leftover bytes (which presumably were sent as NUL bytes by the sender)
+ * will be discarded.
+ */
+static ssize_t rshim_read_default(struct rshim_backend *bd, int devtype,
+				char *buf, size_t count)
+{
+	int retval, total = 0, avail = 0;
+	u64 word;
+
+	/* Read is only supported for RShim TMFIFO. */
+	if (devtype != RSH_DEV_TYPE_NET && devtype != RSH_DEV_TYPE_CONSOLE) {
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+	if (bd->is_boot_open)
+		return 0;
+
+	while (total < count) {
+		if (avail == 0) {
+			retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+						RSH_TM_TILE_TO_HOST_STS, &word);
+			if (retval < 0)
+				break;
+			avail = word & RSH_TM_TILE_TO_HOST_STS__COUNT_MASK;
+			if (avail == 0)
+				break;
+		}
+		retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+					RSH_TM_TILE_TO_HOST_DATA, &word);
+		if (retval < 0)
+			break;
+		/*
+		 * Convert it to little endian before sending to RShim. The
+		 * other side should decode it as little endian as well which
+		 * is usually the default case.
+		 */
+		word = le64_to_cpu(word);
+		if (total + sizeof(word) <= count) {
+			*(u64 *)buf = word;
+			buf += sizeof(word);
+			total += sizeof(word);
+		} else {
+			/* Copy the rest data which is less than 8 bytes. */
+			memcpy(buf, &word, count - total);
+			total = count;
+			break;
+		}
+		avail--;
+	}
+
+	return total;
+}
+
+/*
+ * Write some bytes to the RShim backend.
+ *
+ * If count is not multiple of 8-bytes, the data will be padded to 8-byte
+ * aligned which is required by RShim HW.
+ */
+static ssize_t rshim_write_delayed(struct rshim_backend *bd, int devtype,
+				   const char *buf, size_t count)
+{
+	u64 word;
+	char pad_buf[sizeof(u64)] = { 0 };
+	int size_addr, size_mask, data_addr, max_size;
+	int retval, avail = 0, byte_cnt = 0, retry;
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		if (bd->is_boot_open)
+			return count;
+		size_addr = RSH_TM_HOST_TO_TILE_STS;
+		size_mask = RSH_TM_HOST_TO_TILE_STS__COUNT_MASK;
+		data_addr = RSH_TM_HOST_TO_TILE_DATA;
+		retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+					RSH_TM_HOST_TO_TILE_CTL, &word);
+		if (retval < 0) {
+			pr_err("read_rshim error %d\n", retval);
+			return retval;
+		}
+		max_size = (word >> RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_SHIFT)
+			   & RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RMASK;
+		break;
+
+	case RSH_DEV_TYPE_BOOT:
+		size_addr = RSH_BOOT_FIFO_COUNT;
+		size_mask = RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_MASK;
+		data_addr = RSH_BOOT_FIFO_DATA;
+		max_size = RSH_BOOT_FIFO_SIZE;
+		break;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+
+	while (byte_cnt < count) {
+		/* Check the boot cancel condition. */
+		if (devtype == RSH_DEV_TYPE_BOOT && !bd->boot_work_buf)
+			break;
+
+		/* Add padding if less than 8 bytes left. */
+		if (byte_cnt + sizeof(u64) > count) {
+			memcpy(pad_buf, buf, count - byte_cnt);
+			buf = (const char *)pad_buf;
+		}
+
+		retry = 0;
+		while (avail <= 0) {
+			/* Calculate available space in words. */
+			retval = bd->read_rshim(bd, RSHIM_CHANNEL, size_addr,
+						&word);
+			if (retval < 0) {
+				pr_err("read_rshim error %d\n", retval);
+				break;
+			}
+			avail = max_size - (int)(word & size_mask) - 8;
+			if (avail > 0)
+				break;
+
+			/*
+			 * Retry 100s, or else return failure since the other
+			 * side seems not to be responding.
+			 */
+			if (++retry > 100000)
+				return -ETIMEDOUT;
+			msleep(1);
+		}
+
+		word = *(u64 *)buf;
+		/*
+		 * Convert to little endian before sending to RShim. The
+		 * receiving side should call le64_to_cpu() to convert
+		 * it back.
+		 */
+		word = cpu_to_le64(word);
+		retval = bd->write_rshim(bd, RSHIM_CHANNEL, data_addr, word);
+		if (retval < 0) {
+			pr_err("write_rshim error %d\n", retval);
+			break;
+		}
+		buf += sizeof(word);
+		byte_cnt += sizeof(word);
+		avail--;
+	}
+
+	/* Return number shouldn't count the padded bytes. */
+	return (byte_cnt > count) ? count : byte_cnt;
+}
+
+static ssize_t rshim_write_default(struct rshim_backend *bd, int devtype,
+				   const char *buf, size_t count)
+{
+	int retval;
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		if (bd->is_boot_open)
+			return count;
+
+		/* Set the flag so there is only one outstanding request. */
+		bd->spin_flags |= RSH_SFLG_WRITING;
+
+		/* Wake up the worker. */
+		bd->fifo_work_buf = (char *)buf;
+		bd->fifo_work_buf_len = count;
+		bd->fifo_work_devtype = devtype;
+		/*
+		 * Add barrier so the above writes complete before setting the
+		 * has_fifo_work flag.
+		 */
+		wmb();
+		bd->has_fifo_work = 1;
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+		return 0;
+
+	case RSH_DEV_TYPE_BOOT:
+		reinit_completion(&bd->boot_write_complete);
+		bd->boot_work_buf_len = count;
+		bd->boot_work_buf_actual_len = 0;
+		/*
+		 * Add barrier so the above writes complete before setting the
+		 * boot_work_buf pointer since it's checked in other places.
+		 */
+		wmb();
+		bd->boot_work_buf = (char *)buf;
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+
+		mutex_unlock(&bd->mutex);
+		retval = wait_for_completion_interruptible(
+					&bd->boot_write_complete);
+		/* Cancel the request if interrupted. */
+		if (retval)
+			bd->boot_work_buf = NULL;
+
+		mutex_lock(&bd->mutex);
+		return bd->boot_work_buf_actual_len;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+}
+
+/* Boot file operations routines */
+
+/*
+ * Wait for boot to complete, if necessary.  Return 0 if the boot is done
+ * and it's safe to continue, an error code if something went wrong.  Note
+ * that this routine must be called with the device mutex held.  If it
+ * returns successfully, the mutex will still be held (although it may have
+ * been dropped and reacquired); if it returns unsuccessfully the mutex
+ * will have been dropped.
+ */
+static int wait_for_boot_done(struct rshim_backend *bd)
+{
+	int retval;
+
+	if (!bd->has_reprobe)
+		return 0;
+
+	if (!bd->has_rshim || bd->is_booting) {
+		while (bd->is_booting) {
+			pr_info("boot write, waiting for re-probe\n");
+			/* We're booting, and the backend isn't ready yet. */
+			mutex_unlock(&bd->mutex);
+			/*
+			 * FIXME: might we want a timeout here, too?  If
+			 * the reprobe takes a very long time, something's
+			 * probably wrong.  Maybe a couple of minutes?
+			 */
+			retval = wait_for_completion_interruptible(
+				&bd->booting_complete);
+			if (retval)
+				return retval;
+			mutex_lock(&bd->mutex);
+		}
+		if (!bd->has_rshim) {
+			mutex_unlock(&bd->mutex);
+			return -ENODEV;
+		}
+	}
+
+	return 0;
+}
+
+static ssize_t rshim_boot_write(struct file *file, const char *user_buffer,
+			      size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+	int retval = 0, whichbuf = 0;
+	size_t bytes_written = 0, bytes_left;
+
+	/*
+	 * Hardware requires that we send multiples of 8 bytes.  Ideally
+	 * we'd handle the case where we got unaligned writes by
+	 * accumulating the residue somehow, but none of our clients
+	 * typically do this, so we just clip the size to prevent any
+	 * inadvertent errors from causing hardware problems.
+	 */
+	bytes_left = count & (-((size_t)8));
+	if (!bytes_left)
+		return 0;
+
+	mutex_lock(&bd->mutex);
+	if (bd->is_in_boot_write) {
+		mutex_unlock(&bd->mutex);
+		return -EBUSY;
+	}
+
+	retval = wait_for_boot_done(bd);
+	if (retval) {
+		pr_err("boot_write: wait for boot failed, err %d\n", retval);
+		/* wait_for_boot_done already dropped mutex */
+		return retval;
+	}
+
+	/*
+	 * We're going to drop the mutex while we wait for any outstanding
+	 * write to complete; this keeps another thread from getting in here
+	 * while we do that.
+	 */
+	bd->is_in_boot_write = 1;
+
+	while (bytes_left) {
+		size_t buf_bytes = min((size_t)BOOT_BUF_SIZE, bytes_left);
+		char *buf = bd->boot_buf[whichbuf];
+
+		whichbuf ^= 1;
+		if (copy_from_user(buf, user_buffer, buf_bytes)) {
+			retval = -EFAULT;
+			pr_err("boot_write: copy from user failed\n");
+			break;
+		}
+
+		retval = bd->write(bd, RSH_DEV_TYPE_BOOT, buf, buf_bytes);
+		if (retval > 0) {
+			bytes_left -= retval;
+			user_buffer += retval;
+			bytes_written += retval;
+		} else if (retval == 0) {
+			/* Wait for some time instead of busy polling. */
+			msleep_interruptible(1);
+			continue;
+		}
+		if (retval != buf_bytes)
+			break;
+	}
+
+	bd->is_in_boot_write = 0;
+	mutex_unlock(&bd->mutex);
+
+	/*
+	 * Return an error in case the 'count' is not multiple of 8 bytes.
+	 * At this moment, the truncated data has already been sent to
+	 * the BOOT fifo and hopefully it could still boot the chip.
+	 */
+	if (count % 8 != 0)
+		return -EINVAL;
+
+	return bytes_written ? bytes_written : retval;
+}
+
+static int rshim_boot_release(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+	struct module *owner;
+	int retval;
+
+	/* Restore the boot mode register. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL,
+				 RSH_BOOT_CONTROL,
+				 RSH_BOOT_CONTROL__BOOT_MODE_VAL_EMMC);
+	if (retval)
+		pr_err("couldn't set boot_control, err %d\n", retval);
+
+	mutex_lock(&bd->mutex);
+	bd->is_boot_open = 0;
+	queue_delayed_work(rshim_wq, &bd->work, HZ);
+	mutex_unlock(&bd->mutex);
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+static const struct file_operations rshim_boot_fops = {
+	.owner = THIS_MODULE,
+	.write = rshim_boot_write,
+	.release = rshim_boot_release,
+};
+
+int rshim_boot_open(struct file *file)
+{
+	int retval;
+	int i;
+	struct rshim_backend *bd = file->private_data;
+#if RSH_RESET_MUTEX
+	unsigned long devs_locked = 0;
+#endif
+
+	file->f_op = &rshim_boot_fops;
+
+#if RSH_RESET_MUTEX
+	/*
+	 * We're going to prevent resets and operations from running in
+	 * parallel with other resets.  Our method for this is to grab
+	 * every device's mutex before doing the reset, and then holding
+	 * onto them until the device we reset is reprobed, or a timeout
+	 * expires; the latter is mostly paranoia.  Anyway, in order to
+	 * find all of the other devices, we're going to need to walk the
+	 * device table, so we need to grab its mutex.  We have to do it
+	 * before we get our own device's mutex for lock ordering reasons.
+	 */
+	rshim_lock();
+#endif
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->is_boot_open) {
+		pr_info("can't boot, boot file already open\n");
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		rshim_unlock();
+#endif
+		return -EBUSY;
+	}
+
+	if (!bd->has_rshim) {
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		rshim_unlock();
+#endif
+		return -ENODEV;
+	}
+
+	pr_info("begin booting\n");
+	reinit_completion(&bd->booting_complete);
+	bd->is_booting = 1;
+
+	/*
+	 * Before we reset the chip, make sure we don't have any
+	 * outstanding writes, and flush the write and read FIFOs. (Note
+	 * that we can't have any outstanding reads, since we kill those
+	 * upon release of the TM FIFO file.)
+	 */
+	if (bd->cancel)
+		bd->cancel(bd, RSH_DEV_TYPE_NET, true);
+	bd->read_buf_bytes = 0;
+	bd->read_buf_pkt_rem = 0;
+	bd->read_buf_pkt_padding = 0;
+	spin_lock_irq(&bd->spinlock);
+	/* FIXME: should we be waiting for WRITING to go off, instead? */
+	bd->spin_flags &= ~RSH_SFLG_WRITING;
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		read_reset(bd, i);
+		write_reset(bd, i);
+	}
+	spin_unlock_irq(&bd->spinlock);
+
+	/* Set RShim (external) boot mode. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_BOOT_CONTROL,
+				 RSH_BOOT_CONTROL__BOOT_MODE_VAL_NONE);
+	if (retval) {
+		pr_err("boot_open: error %d writing boot control\n", retval);
+		bd->is_booting = 0;
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		rshim_unlock();
+#endif
+		return retval;
+	}
+
+#if RSH_RESET_MUTEX
+	/*
+	 * Acquire all of the other devices' mutexes, to keep them from
+	 * doing anything while we're performing the reset.  Also kill
+	 * any outstanding boot urbs; that way we'll restart them, after
+	 * the reset is done, and not report errors to the writers.
+	 */
+	for (i = 0; i < rshim_nr_devs; i++) {
+		if (rshim_devs[i] && rshim_devs[i] != bd) {
+			mutex_lock(&rshim_devs[i]->mutex);
+			devs_locked |= 1UL << i;
+			if (rshim_devs[i]->cancel) {
+				rshim_devs[i]->cancel(rshim_devs[i],
+						    RSH_DEV_TYPE_BOOT, true);
+			}
+		}
+	}
+	reinit_completion(&bd->reset_complete);
+#endif
+
+	bd->is_boot_open = 1;
+
+	/* SW reset. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_RESET_CONTROL,
+				 RSH_RESET_CONTROL__RESET_CHIP_VAL_KEY);
+
+	/* Reset the TmFifo. */
+	rshim_fifo_reset(bd);
+
+	/*
+	 * Note that occasionally, we get various errors on writing to
+	 * the reset register.  This appears to be caused by the chip
+	 * actually resetting before the response goes out, or perhaps by
+	 * our noticing the device unplug before we've seen the response.
+	 * Either way, the chip _does_ actually reset, so we just ignore
+	 * the error.  Should we ever start getting these errors without
+	 * the chip being reset, we'll have to figure out how to handle
+	 * this more intelligently.  (One potential option is to not reset
+	 * directly, but to set up a down counter to do the reset, but that
+	 * seems kind of kludgy, especially since Tile software might also
+	 * be trying to use the down counter.)
+	 */
+	if (retval && retval != -EPROTO && retval != -ESHUTDOWN &&
+#ifdef RSH_USB_BMC
+	    /*
+	     * The host driver on the BMC sometimes produces EOVERFLOW on
+	     * reset.  It also seems to have seems to have some sort of bug
+	     * which makes it return more bytes than we actually wrote!  In
+	     * that case we're returning EBADE.
+	     */
+	    retval != -EOVERFLOW && retval != -EBADE &&
+#endif
+	    retval != -ETIMEDOUT && retval != -EPIPE) {
+		pr_err("boot_open: error %d writing reset control\n", retval);
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		while (devs_locked) {
+			int i = __builtin_ctzl(devs_locked);
+
+			mutex_unlock(&rshim_devs[i]->mutex);
+			devs_locked &= ~(1UL << i);
+		}
+		rshim_unlock();
+#endif
+		bd->is_boot_open = 0;
+
+		return retval;
+	}
+
+	if (retval)
+		pr_err("boot_open: got error %d on reset write\n", retval);
+
+	mutex_unlock(&bd->mutex);
+
+#if RSH_RESET_MUTEX
+	rshim_unlock();
+	/*
+	 * We wait for reset_complete (signaled by probe), or for an
+	 * interrupt, or a timeout (set to 5s because of no re-probe
+	 * in the PCIe case). Note that we dropped dev->mutex above
+	 * so that probe can run; the BOOT_OPEN flag should keep our device
+	 * from trying to do anything before the device is reprobed.
+	 */
+	retval = wait_for_completion_interruptible_timeout(&bd->reset_complete,
+							   5 * HZ);
+	if (retval == 0)
+		pr_err("timed out waiting for device reprobe after reset\n");
+
+	while (devs_locked) {
+		int i = __builtin_ctz(devs_locked);
+
+		mutex_unlock(&rshim_devs[i]->mutex);
+		devs_locked &= ~(1UL << i);
+	}
+#endif
+
+	return 0;
+}
+
+/* FIFO common file operations routines */
+
+/*
+ * Signal an error on the FIFO, and wake up anyone who might need to know
+ * about it.
+ */
+static void rshim_fifo_err(struct rshim_backend *bd, int err)
+{
+	int i;
+
+	bd->tmfifo_error = err;
+	wake_up_interruptible_all(&bd->write_completed);
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		wake_up_interruptible_all(&bd->read_fifo[i].operable);
+		wake_up_interruptible_all(&bd->write_fifo[i].operable);
+	}
+}
+
+/* Drain the read buffer, and start another read/interrupt if needed. */
+static void rshim_fifo_input(struct rshim_backend *bd)
+{
+	union rshim_tmfifo_msg_hdr *hdr;
+	bool rx_avail = false;
+
+	if (bd->is_boot_open)
+		return;
+
+again:
+	while (bd->read_buf_next < bd->read_buf_bytes) {
+		int copysize;
+
+		/*
+		 * If we're at the start of a packet, then extract the
+		 * header, and update our count of bytes remaining in the
+		 * packet.
+		 */
+		if (bd->read_buf_pkt_rem == 0) {
+			/* Make sure header is received. */
+			if (bd->read_buf_next + sizeof(*hdr) >
+				bd->read_buf_bytes)
+				break;
+
+			pr_debug("next hdr %d\n", bd->read_buf_next);
+
+			hdr = (union rshim_tmfifo_msg_hdr *)
+				&bd->read_buf[bd->read_buf_next];
+
+			bd->read_buf_pkt_rem = ntohs(hdr->len) + sizeof(*hdr);
+			bd->read_buf_pkt_padding =
+				(8 - (bd->read_buf_pkt_rem & 7)) & 7;
+			if (hdr->type == VIRTIO_ID_NET)
+				bd->rx_chan = TMFIFO_NET_CHAN;
+			else if (hdr->type == VIRTIO_ID_CONSOLE) {
+				bd->rx_chan = TMFIFO_CONS_CHAN;
+				/* Strip off the message header for console. */
+				bd->read_buf_next += sizeof(*hdr);
+				bd->read_buf_pkt_rem -= sizeof(*hdr);
+				if (bd->read_buf_pkt_rem == 0)
+					continue;
+			} else {
+				pr_debug("bad type %d, drop it", hdr->type);
+				bd->read_buf_pkt_rem = 0;
+				bd->read_buf_pkt_padding = 0;
+				bd->read_buf_next = bd->read_buf_bytes;
+				break;
+			}
+
+			pr_debug("drain: hdr, nxt %d rem %d chn %d\n",
+			      bd->read_buf_next, bd->read_buf_pkt_rem,
+			      bd->rx_chan);
+			bd->drop = 0;
+		}
+
+		if (bd->rx_chan == TMFIFO_CONS_CHAN &&
+		    !(bd->spin_flags & RSH_SFLG_CONS_OPEN)) {
+			/*
+			 * If data is coming in for a closed console
+			 * channel, we want to just throw it away.
+			 * Resetting the channel every time through this
+			 * loop is a relatively cheap way to do that.  Note
+			 * that this works because the read buffer is no
+			 * larger than the read FIFO; thus, we know that if
+			 * we reset it here, we will always be able to
+			 * drain the read buffer of any console data, and
+			 * will then launch another read.
+			 */
+			read_reset(bd, TMFIFO_CONS_CHAN);
+			bd->drop = 1;
+		} else if (bd->rx_chan == TMFIFO_NET_CHAN && bd->net == NULL) {
+			/* Drop if networking is not enabled. */
+			read_reset(bd, TMFIFO_NET_CHAN);
+			bd->drop = 1;
+		}
+
+		copysize = min(bd->read_buf_pkt_rem,
+			       bd->read_buf_bytes - bd->read_buf_next);
+		copysize = min(copysize,
+			       read_space_to_end(bd, bd->rx_chan));
+
+		pr_debug("drain: copysize %d, head %d, tail %d, remaining %d\n",
+			 copysize, bd->read_fifo[bd->rx_chan].head,
+			 bd->read_fifo[bd->rx_chan].tail,
+			 bd->read_buf_pkt_rem);
+
+		if (copysize == 0) {
+			/*
+			 * We have data, but no space to put it in, so
+			 * we're done.
+			 */
+			pr_debug("drain: no more space in channel %d\n",
+				 bd->rx_chan);
+			break;
+		}
+
+		if (!bd->drop) {
+			memcpy(read_space_ptr(bd, bd->rx_chan),
+			       &bd->read_buf[bd->read_buf_next],
+			       copysize);
+			read_add_bytes(bd, bd->rx_chan, copysize);
+		}
+
+		bd->read_buf_next += copysize;
+		bd->read_buf_pkt_rem -= copysize;
+
+		wake_up_interruptible_all(&bd->read_fifo[
+				      bd->rx_chan].operable);
+		pr_debug("woke up readable chan %d\n", bd->rx_chan);
+
+		if (bd->read_buf_pkt_rem <= 0) {
+			bd->read_buf_next = bd->read_buf_next +
+				bd->read_buf_pkt_padding;
+			rx_avail = true;
+		}
+	}
+
+	/*
+	 * We've processed all of the data we can, so now we decide if we
+	 * need to launch another I/O.  If there's still data in the read
+	 * buffer, or if we're already reading, don't launch any new
+	 * operations.  If an interrupt just completed, and said there was
+	 * data, or the last time we did a read we got some data, then do
+	 * another read.  Otherwise, do an interrupt.
+	 */
+	if (bd->read_buf_next < bd->read_buf_bytes ||
+	    (bd->spin_flags & RSH_SFLG_READING)) {
+		/* We're doing nothing. */
+		pr_debug("fifo_input: no new read: %s\n",
+			 (bd->read_buf_next < bd->read_buf_bytes) ?
+			 "have data" : "already reading");
+	} else {
+		int len;
+
+		/* Process it if more data is received. */
+		len = bd->read(bd, RSH_DEV_TYPE_NET, (char *)bd->read_buf,
+			      READ_BUF_SIZE);
+		if (len > 0) {
+			bd->read_buf_bytes = len;
+			bd->read_buf_next = 0;
+			goto again;
+		}
+	}
+
+	if (rx_avail) {
+		if (bd->rx_chan == TMFIFO_NET_CHAN) {
+			struct rshim_service *svc;
+
+			/*
+			 * Protect rshim_svc with RCU lock. See comments in
+			 * rshim_register_service() / rshim_register_service()
+			 */
+			rcu_read_lock();
+			svc = rcu_dereference(rshim_svc[RSH_SVC_NET]);
+			if (svc != NULL)
+				(*svc->rx_notify)(bd);
+			rcu_read_unlock();
+		}
+	}
+}
+
+ssize_t rshim_fifo_read(struct rshim_backend *bd, char *buffer,
+		      size_t count, int chan, bool nonblock,
+		      bool to_user)
+{
+	size_t rd_cnt = 0;
+
+	mutex_lock(&bd->mutex);
+
+	while (count) {
+		size_t readsize;
+		int pass1;
+		int pass2;
+
+		pr_debug("fifo_read, top of loop, remaining count %zd\n",
+			 count);
+
+		/*
+		 * We check this each time through the loop since the
+		 * device could get disconnected while we're waiting for
+		 * more data in the read FIFO.
+		 */
+		if (!bd->has_tm) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_read: returning %zd/ENODEV\n", rd_cnt);
+			return rd_cnt ? rd_cnt : -ENODEV;
+		}
+
+		if (bd->tmfifo_error) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_read: returning %zd/%d\n", rd_cnt,
+			      bd->tmfifo_error);
+			return rd_cnt ? rd_cnt : bd->tmfifo_error;
+		}
+
+		if (read_empty(bd, chan)) {
+			pr_debug("fifo_read: fifo empty\n");
+			if (rd_cnt || nonblock) {
+				if (rd_cnt == 0) {
+					spin_lock_irq(&bd->spinlock);
+					rshim_fifo_input(bd);
+					spin_unlock_irq(&bd->spinlock);
+				}
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_read: returning %zd/EAGAIN\n",
+				      rd_cnt);
+				return rd_cnt ? rd_cnt : -EAGAIN;
+			}
+
+			mutex_unlock(&bd->mutex);
+
+			pr_debug("fifo_read: waiting for readable chan %d\n",
+				 chan);
+			if (wait_event_interruptible(
+					bd->read_fifo[chan].operable,
+					    !read_empty(bd, chan))) {
+				pr_debug("fifo_read: returning ERESTARTSYS\n");
+				return to_user ? -EINTR : -ERESTARTSYS;
+			}
+
+			mutex_lock(&bd->mutex);
+
+			/*
+			 * Since we dropped the mutex, we must make
+			 * sure our interface is still there before
+			 * we do anything else.
+			 */
+			continue;
+		}
+
+		/*
+		 * Figure out how many bytes we will transfer on this pass.
+		 */
+		spin_lock_irq(&bd->spinlock);
+
+		readsize = min(count, (size_t)read_cnt(bd, chan));
+
+		pass1 = min(readsize, (size_t)read_cnt_to_end(bd, chan));
+		pass2 = readsize - pass1;
+
+		spin_unlock_irq(&bd->spinlock);
+
+		pr_debug("fifo_read: readsize %zd, head %d, tail %d\n",
+			 readsize, bd->read_fifo[chan].head,
+			 bd->read_fifo[chan].tail);
+
+		if (!to_user) {
+			memcpy(buffer, read_data_ptr(bd, chan), pass1);
+			if (pass2) {
+				memcpy(buffer + pass1,
+				       bd->read_fifo[chan].data, pass2);
+			}
+		} else {
+			if (copy_to_user(buffer, read_data_ptr(bd, chan),
+				pass1) || (pass2 && copy_to_user(buffer + pass1,
+				bd->read_fifo[chan].data, pass2))) {
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_read: returns %zd/EFAULT\n",
+					 rd_cnt);
+				return rd_cnt ? rd_cnt : -EFAULT;
+			}
+		}
+
+		spin_lock_irq(&bd->spinlock);
+
+		read_consume_bytes(bd, chan, readsize);
+
+		/*
+		 * We consumed some bytes, so let's see if we can process
+		 * any more incoming data.
+		 */
+		rshim_fifo_input(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+
+		count -= readsize;
+		buffer += readsize;
+		rd_cnt += readsize;
+		pr_debug("fifo_read: transferred %zd bytes\n", readsize);
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	pr_debug("fifo_read: returning %zd\n", rd_cnt);
+	return rd_cnt;
+}
+EXPORT_SYMBOL(rshim_fifo_read);
+
+static void rshim_fifo_output(struct rshim_backend *bd)
+{
+	int writesize, write_buf_next = 0;
+	int write_avail = WRITE_BUF_SIZE - write_buf_next;
+	int numchan = TMFIFO_MAX_CHAN;
+	int chan, chan_offset;
+
+	/* If we're already writing, we have nowhere to put data. */
+	if (bd->spin_flags & RSH_SFLG_WRITING)
+		return;
+
+	/* Walk through all the channels, sending as much data as possible. */
+	for (chan_offset = 0; chan_offset < numchan; chan_offset++) {
+		/*
+		 * Pick the current channel if not done, otherwise round-robin
+		 * to the next channel.
+		 */
+		if (bd->write_buf_pkt_rem > 0)
+			chan = bd->tx_chan;
+		else {
+			u16 cur_len;
+			union rshim_tmfifo_msg_hdr *hdr = &bd->msg_hdr;
+
+			chan = bd->tx_chan = (bd->tx_chan + 1) % numchan;
+			cur_len = write_cnt(bd, chan);
+
+			/*
+			 * Set up message header for console data which is byte
+			 * stream. Network packets already have the message
+			 * header included.
+			 */
+			if (chan == TMFIFO_CONS_CHAN) {
+				if (cur_len == 0)
+					continue;
+				hdr->data = 0;
+				hdr->type = VIRTIO_ID_CONSOLE;
+				hdr->len = htons(cur_len);
+			} else {
+				int pass1;
+
+				if (cur_len <
+					sizeof(union rshim_tmfifo_msg_hdr))
+					continue;
+
+				pass1 = write_cnt_to_end(bd, chan);
+				if (pass1 >= sizeof(*hdr)) {
+					hdr = (union rshim_tmfifo_msg_hdr *)
+						write_data_ptr(bd, chan);
+				} else {
+					memcpy(hdr, write_data_ptr(bd, chan),
+					       pass1);
+					memcpy((u8 *)hdr + pass1,
+					       bd->write_fifo[chan].data,
+					       sizeof(*hdr) - pass1);
+				}
+			}
+
+			bd->write_buf_pkt_rem = ntohs(hdr->len) + sizeof(*hdr);
+		}
+
+		/* Send out the packet header for the console data. */
+		if (chan == TMFIFO_CONS_CHAN &&
+		    bd->write_buf_pkt_rem > ntohs(bd->msg_hdr.len)) {
+			union rshim_tmfifo_msg_hdr *hdr = &bd->msg_hdr;
+			int left = bd->write_buf_pkt_rem - ntohs(hdr->len);
+			u8 *pos = (u8 *)hdr + sizeof(*hdr) - left;
+
+			writesize = min(write_avail, left);
+			memcpy(&bd->write_buf[write_buf_next], pos, writesize);
+			write_buf_next += writesize;
+			bd->write_buf_pkt_rem -= writesize;
+			write_avail -= writesize;
+
+			/*
+			 * Don't continue if no more space for the header.
+			 * It'll be picked up next time.
+			 */
+			if (left != writesize)
+				break;
+		}
+
+		writesize = min(write_avail, (int)write_cnt(bd, chan));
+		writesize = min(writesize, bd->write_buf_pkt_rem);
+
+		/*
+		 * The write size should be aligned to 8 bytes unless for the
+		 * last block, which will be padded at the end.
+		 */
+		if (bd->write_buf_pkt_rem != writesize)
+			writesize &= -8;
+
+		if (writesize > 0) {
+			int pass1;
+			int pass2;
+
+			pass1 = min(writesize,
+				    (int)write_cnt_to_end(bd, chan));
+			pass2 = writesize - pass1;
+
+			pr_debug("fifo_outproc: chan %d, writesize %d, next %d,"
+				 " head %d, tail %d\n",
+				 chan, writesize, write_buf_next,
+				 bd->write_fifo[chan].head,
+				 bd->write_fifo[chan].tail);
+
+			memcpy(&bd->write_buf[write_buf_next],
+			       write_data_ptr(bd, chan), pass1);
+			memcpy(&bd->write_buf[write_buf_next + pass1],
+			       bd->write_fifo[chan].data, pass2);
+
+			write_consume_bytes(bd, chan, writesize);
+			write_buf_next += writesize;
+			bd->write_buf_pkt_rem -= writesize;
+			/* Add padding at the end. */
+			if (bd->write_buf_pkt_rem == 0)
+				write_buf_next = (write_buf_next + 7) & -8;
+			write_avail = WRITE_BUF_SIZE - write_buf_next;
+
+			wake_up_interruptible_all(
+				&bd->write_fifo[chan].operable);
+			pr_debug("woke up writable chan %d\n", chan);
+		}
+	}
+
+	/* Drop the data if it is still booting. */
+	if (bd->is_boot_open)
+		return;
+
+	/* If we actually put anything in the buffer, send it. */
+	if (write_buf_next) {
+		bd->write(bd, RSH_DEV_TYPE_NET, (char *)bd->write_buf,
+			  write_buf_next);
+	}
+}
+
+int rshim_fifo_alloc(struct rshim_backend *bd)
+{
+	int i, allocfail = 0;
+
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		if (!bd->read_fifo[i].data)
+			bd->read_fifo[i].data =
+				kmalloc(READ_FIFO_SIZE, GFP_KERNEL);
+		allocfail |= bd->read_fifo[i].data == 0;
+
+		if (!bd->write_fifo[i].data)
+			bd->write_fifo[i].data =
+				kmalloc(WRITE_FIFO_SIZE, GFP_KERNEL);
+		allocfail |= bd->write_fifo[i].data == 0;
+	}
+
+	return allocfail;
+}
+EXPORT_SYMBOL(rshim_fifo_alloc);
+
+static void rshim_fifo_reset(struct rshim_backend *bd)
+{
+	int i;
+
+	bd->read_buf_bytes = 0;
+	bd->read_buf_pkt_rem = 0;
+	bd->read_buf_next = 0;
+	bd->read_buf_pkt_padding = 0;
+	bd->write_buf_pkt_rem = 0;
+	bd->rx_chan = bd->tx_chan = 0;
+
+	spin_lock_irq(&bd->spinlock);
+	bd->spin_flags &= ~(RSH_SFLG_WRITING |
+			    RSH_SFLG_READING);
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		read_reset(bd, i);
+		write_reset(bd, i);
+	}
+	spin_unlock_irq(&bd->spinlock);
+}
+
+void rshim_fifo_free(struct rshim_backend *bd)
+{
+	int i;
+
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		kfree(bd->read_fifo[i].data);
+		bd->read_fifo[i].data = NULL;
+		kfree(bd->write_fifo[i].data);
+		bd->write_fifo[i].data = NULL;
+	}
+
+	rshim_fifo_reset(bd);
+
+	bd->has_tm = 0;
+}
+EXPORT_SYMBOL(rshim_fifo_free);
+
+ssize_t rshim_fifo_write(struct rshim_backend *bd, const char *buffer,
+		       size_t count, int chan, bool nonblock,
+		       bool from_user)
+{
+	size_t wr_cnt = 0;
+
+	mutex_lock(&bd->mutex);
+
+	while (count) {
+		size_t writesize;
+		int pass1;
+		int pass2;
+
+		pr_debug("fifo_write, top of loop, remaining count %zd\n",
+			 count);
+
+		/*
+		 * We check this each time through the loop since the
+		 * device could get disconnected while we're waiting for
+		 * more space in the write buffer.
+		 */
+		if (!bd->has_tm) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_write: returning %zd/ENODEV\n", wr_cnt);
+			return wr_cnt ? wr_cnt : -ENODEV;
+		}
+
+		if (bd->tmfifo_error) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_write: returning %zd/%d\n", wr_cnt,
+				 bd->tmfifo_error);
+			return wr_cnt ? wr_cnt : bd->tmfifo_error;
+		}
+
+		if (write_full(bd, chan)) {
+			pr_debug("fifo_write: fifo full\n");
+			if (nonblock) {
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_write: returning %zd/EAGAIN\n",
+					 wr_cnt);
+				return wr_cnt ? wr_cnt : -EAGAIN;
+			}
+
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_write: waiting for writable chan %d\n",
+				 chan);
+			if (wait_event_interruptible(
+				     bd->write_fifo[chan].operable,
+					     !write_full(bd, chan))) {
+				pr_debug("fifo_write: returning %zd/ERESTARTSYS\n",
+					 wr_cnt);
+				return wr_cnt ? wr_cnt : -ERESTARTSYS;
+			}
+			mutex_lock(&bd->mutex);
+			/*
+			 * Since we dropped the mutex, we must make
+			 * sure our interface is still there before
+			 * we do anything else.
+			 */
+			continue;
+		}
+
+		spin_lock_irq(&bd->spinlock);
+
+		writesize = min(count, (size_t)write_space(bd, chan));
+		pass1 = min(writesize, (size_t)write_space_to_end(bd, chan));
+		pass2 = writesize - pass1;
+
+		spin_unlock_irq(&bd->spinlock);
+
+		pr_debug("fifo_write: writesize %zd, head %d, tail %d\n",
+			 writesize, bd->write_fifo[chan].head,
+			 bd->write_fifo[chan].tail);
+
+		if (!from_user) {
+			memcpy(write_space_ptr(bd, chan), buffer, pass1);
+			if (pass2) {
+				memcpy(bd->write_fifo[chan].data,
+				       buffer + pass1, pass2);
+			}
+		} else {
+			if (copy_from_user(write_space_ptr(bd, chan), buffer,
+				pass1) || (pass2 &&
+				copy_from_user(bd->write_fifo[chan].data,
+						buffer + pass1, pass2))) {
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_write: returns %zd/EFAULT\n",
+					 wr_cnt);
+				return wr_cnt ? wr_cnt : -EFAULT;
+			}
+		}
+
+		spin_lock_irq(&bd->spinlock);
+
+		write_add_bytes(bd, chan, writesize);
+
+		/* We have some new bytes, let's see if we can write any. */
+		rshim_fifo_output(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+
+		count -= writesize;
+		buffer += writesize;
+		wr_cnt += writesize;
+		pr_debug("fifo_write: transferred %zd bytes this pass\n",
+			 writesize);
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	pr_debug("fifo_write: returning %zd\n", wr_cnt);
+	return wr_cnt;
+}
+EXPORT_SYMBOL(rshim_fifo_write);
+
+static int rshim_fifo_fsync(struct file *file, loff_t start, loff_t end,
+			    int datasync, int chan)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	mutex_lock(&bd->mutex);
+
+	/*
+	 * To ensure that all of our data has actually made it to the
+	 * device, we first wait until the channel is empty, then we wait
+	 * until there is no outstanding write urb.
+	 */
+	while (!write_empty(bd, chan))
+		if (wait_event_interruptible(bd->write_fifo[chan].operable,
+					     write_empty(bd, chan))) {
+			mutex_unlock(&bd->mutex);
+			return -ERESTARTSYS;
+		}
+
+	while (bd->spin_flags & RSH_SFLG_WRITING)
+		if (wait_event_interruptible(bd->write_completed,
+					     !(bd->spin_flags &
+					       RSH_SFLG_WRITING))) {
+			mutex_unlock(&bd->mutex);
+			return -ERESTARTSYS;
+		}
+
+	mutex_unlock(&bd->mutex);
+
+	return 0;
+}
+
+static unsigned int rshim_fifo_poll(struct file *file, poll_table *wait,
+				  int chan)
+{
+	struct rshim_backend *bd = file->private_data;
+	unsigned int retval = 0;
+
+	mutex_lock(&bd->mutex);
+
+	poll_wait(file, &bd->read_fifo[chan].operable, wait);
+	poll_wait(file, &bd->write_fifo[chan].operable, wait);
+
+	spin_lock_irq(&bd->spinlock);
+
+	if (!read_empty(bd, chan))
+		retval |= POLLIN | POLLRDNORM;
+	if (!write_full(bd, chan))
+		retval |= POLLOUT | POLLWRNORM;
+	/*
+	 * We don't report POLLERR on the console so that it doesn't get
+	 * automatically disconnected when it fails, and so that you can
+	 * connect to it in the error state before rebooting the target.
+	 * This is inconsistent, but being consistent turns out to be very
+	 * annoying.  If someone tries to actually type on it, they'll
+	 * get an error.
+	 */
+	if (bd->tmfifo_error && chan != TMFIFO_CONS_CHAN)
+		retval |= POLLERR;
+	spin_unlock_irq(&bd->spinlock);
+
+	mutex_unlock(&bd->mutex);
+
+	pr_debug("poll chan %d file %p returns 0x%x\n", chan, file, retval);
+
+	return retval;
+}
+
+
+static int rshim_fifo_release(struct inode *inode, struct file *file,
+			      int chan)
+{
+	struct rshim_backend *bd = file->private_data;
+	struct module *owner;
+
+	mutex_lock(&bd->mutex);
+
+	if (chan == TMFIFO_CONS_CHAN) {
+		/*
+		 * If we aren't the last console file, nothing to do but
+		 * fix the reference count.
+		 */
+		bd->console_opens--;
+		if (bd->console_opens) {
+			mutex_unlock(&bd->mutex);
+			return 0;
+		}
+
+		/*
+		 * We've told the host to stop using the TM FIFO console,
+		 * but there may be a lag before it does.  Unless we
+		 * continue to read data from the console stream, the host
+		 * may spin forever waiting for the console to be drained
+		 * and not realize that it's time to stop using it.
+		 * Clearing the CONS_OPEN spin flag will discard any future
+		 * incoming console data, but if our input buffers are full
+		 * now, we might not be even reading from the hardware
+		 * FIFO.  To avoid problems, clear the buffers and call the
+		 * drainer so that it knows there's space.
+		 */
+		spin_lock_irq(&bd->spinlock);
+
+		bd->spin_flags &= ~RSH_SFLG_CONS_OPEN;
+
+		read_reset(bd, TMFIFO_CONS_CHAN);
+		write_reset(bd, TMFIFO_CONS_CHAN);
+
+		if (bd->has_tm)
+			rshim_fifo_input(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+	}
+
+	if (chan == TMFIFO_CONS_CHAN)
+		bd->is_cons_open = 0;
+	else
+		bd->is_tm_open = 0;
+
+	if (!bd->is_tm_open && !bd->is_cons_open) {
+		if (bd->cancel)
+			bd->cancel(bd, RSH_DEV_TYPE_NET, false);
+
+		spin_lock_irq(&bd->spinlock);
+		bd->spin_flags &= ~RSH_SFLG_READING;
+		spin_unlock_irq(&bd->spinlock);
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+/* TMFIFO file operations routines */
+
+static ssize_t rshim_tmfifo_read(struct file *file, char *user_buffer,
+				   size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_read(bd, user_buffer, count, TMFIFO_NET_CHAN,
+			     file->f_flags & O_NONBLOCK, true);
+}
+
+static ssize_t rshim_tmfifo_write(struct file *file, const char *user_buffer,
+				size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_write(bd, user_buffer, count, TMFIFO_NET_CHAN,
+			      file->f_flags & O_NONBLOCK, true);
+}
+
+static int rshim_tmfifo_fsync(struct file *file, loff_t start,
+			      loff_t end, int datasync)
+{
+	return rshim_fifo_fsync(file, start, end, datasync, TMFIFO_NET_CHAN);
+}
+
+static unsigned int rshim_tmfifo_poll(struct file *file, poll_table *wait)
+{
+	return rshim_fifo_poll(file, wait, TMFIFO_NET_CHAN);
+}
+
+static int rshim_tmfifo_release(struct inode *inode, struct file *file)
+{
+	return rshim_fifo_release(inode, file, TMFIFO_NET_CHAN);
+}
+
+static const struct file_operations rshim_tmfifo_fops = {
+	.owner = THIS_MODULE,
+	.read = rshim_tmfifo_read,
+	.write = rshim_tmfifo_write,
+	.fsync = rshim_tmfifo_fsync,
+	.poll = rshim_tmfifo_poll,
+	.release = rshim_tmfifo_release,
+};
+
+static int rshim_tmfifo_open(struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	file->f_op = &rshim_tmfifo_fops;
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->is_tm_open) {
+		pr_debug("tmfifo_open: file already open\n");
+		mutex_unlock(&bd->mutex);
+		return -EBUSY;
+	}
+
+	bd->is_tm_open = 1;
+
+	spin_lock_irq(&bd->spinlock);
+
+	/* Call the drainer to do an initial read, if needed. */
+	rshim_fifo_input(bd);
+
+	spin_unlock_irq(&bd->spinlock);
+
+	mutex_unlock(&bd->mutex);
+
+	return 0;
+}
+
+/* Console file operations routines */
+
+static void rshim_work_handler(struct work_struct *work)
+{
+	struct rshim_backend *bd = container_of((struct delayed_work *) work,
+					      struct rshim_backend, work);
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->keepalive && bd->has_rshim) {
+		bd->write_rshim(bd, RSHIM_CHANNEL, RSH_SCRATCHPAD1,
+				RSH_KEEPALIVE_MAGIC_NUM);
+		bd->keepalive = 0;
+	}
+
+	if (bd->boot_work_buf != NULL) {
+		bd->boot_work_buf_actual_len = rshim_write_delayed(bd,
+							RSH_DEV_TYPE_BOOT,
+							bd->boot_work_buf,
+							bd->boot_work_buf_len);
+		bd->boot_work_buf = NULL;
+		complete_all(&bd->boot_write_complete);
+	}
+
+	if (bd->is_boot_open) {
+		mutex_unlock(&bd->mutex);
+		return;
+	}
+
+	if (bd->has_fifo_work) {
+		int len;
+
+		len = rshim_write_delayed(bd, bd->fifo_work_devtype,
+					  bd->fifo_work_buf,
+					  bd->fifo_work_buf_len);
+		bd->has_fifo_work = 0;
+
+		spin_lock(&bd->spinlock);
+		bd->spin_flags &= ~RSH_SFLG_WRITING;
+		if (len == bd->fifo_work_buf_len) {
+			wake_up_interruptible_all(&bd->write_completed);
+			rshim_notify(bd, RSH_EVENT_FIFO_OUTPUT, 0);
+		} else {
+			pr_err("fifo_write: completed abnormally.\n");
+			rshim_notify(bd, RSH_EVENT_FIFO_ERR, -1);
+		}
+		spin_unlock(&bd->spinlock);
+	}
+
+	if (bd->has_cons_work) {
+		spin_lock_irq(&bd->spinlock);
+
+		/* FIFO output. */
+		rshim_fifo_output(bd);
+
+		/* FIFO input. */
+		rshim_fifo_input(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+
+		bd->has_cons_work = 0;
+	}
+
+	if (!bd->has_reprobe && bd->is_cons_open) {
+		bd->has_cons_work = 1;
+		mod_timer(&bd->timer, jiffies + HZ / 10);
+	}
+
+	mutex_unlock(&bd->mutex);
+}
+
+static ssize_t rshim_console_read(struct file *file, char *user_buffer,
+				    size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_read(bd, user_buffer, count, TMFIFO_CONS_CHAN,
+			     file->f_flags & O_NONBLOCK, true);
+}
+
+static ssize_t rshim_console_write(struct file *file, const char *user_buffer,
+				 size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_write(bd, user_buffer, count, TMFIFO_CONS_CHAN,
+			      file->f_flags & O_NONBLOCK, true);
+}
+
+static int rshim_console_fsync(struct file *file, loff_t start,
+			       loff_t end, int datasync)
+{
+	return rshim_fifo_fsync(file, start, end, datasync, TMFIFO_CONS_CHAN);
+}
+
+static long rshim_console_unlocked_ioctl(struct file *file, unsigned int
+				       cmd, unsigned long arg)
+{
+	struct rshim_backend *bd = file->private_data;
+	int retval = 0;
+
+	mutex_lock(&bd->mutex);
+
+	switch (cmd) {
+	case TCGETS: {
+#ifdef TCGETS2
+		if (kernel_termios_to_user_termios_1(
+			(struct termios __user *)arg, &bd->cons_termios))
+#else
+		if (kernel_termios_to_user_termios(
+			(struct termios __user *)arg, &bd->cons_termios))
+#endif
+			retval = -EFAULT;
+		break;
+	}
+
+	case TCSETS:
+	case TCSETSW:
+	case TCSETSF: {
+#ifdef TCGETS2
+		if (user_termios_to_kernel_termios_1(
+			&bd->cons_termios, (struct termios __user *)arg))
+#else
+		if (user_termios_to_kernel_termios(
+			&bd->cons_termios, (struct termios __user *)arg))
+#endif
+			retval = -EFAULT;
+		break;
+	}
+
+	default:
+		retval = -EINVAL;
+		break;
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	return retval;
+}
+
+static unsigned int rshim_console_poll(struct file *file, poll_table *wait)
+{
+	return rshim_fifo_poll(file, wait, TMFIFO_CONS_CHAN);
+}
+
+static int rshim_console_release(struct inode *inode, struct file *file)
+{
+	return rshim_fifo_release(inode, file, TMFIFO_CONS_CHAN);
+}
+
+static const struct file_operations rshim_console_fops = {
+	.owner = THIS_MODULE,
+	.read = rshim_console_read,
+	.write = rshim_console_write,
+	.fsync = rshim_console_fsync,
+	.unlocked_ioctl = rshim_console_unlocked_ioctl,
+	.poll = rshim_console_poll,
+	.release = rshim_console_release,
+};
+
+static int rshim_console_open(struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	file->f_op = &rshim_console_fops;
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->is_cons_open) {
+		/*
+		 * The console is already open.  This is OK, but it means
+		 * there's no work to do other than updating the reference
+		 * count.
+		 */
+		bd->console_opens++;
+		mutex_unlock(&bd->mutex);
+		return 0;
+	}
+
+	bd->is_cons_open = 1;
+
+	spin_lock_irq(&bd->spinlock);
+
+	bd->spin_flags |= RSH_SFLG_CONS_OPEN;
+
+	spin_unlock_irq(&bd->spinlock);
+
+	if (!bd->has_cons_work) {
+		bd->has_cons_work = 1;
+		queue_delayed_work(rshim_wq, &bd->work, HZ / 10);
+	}
+
+	bd->console_opens++;
+	mutex_unlock(&bd->mutex);
+
+	return 0;
+}
+
+static int rshim_boot_done(struct rshim_backend *bd)
+{
+	if (bd->has_rshim && bd->has_tm) {
+		/* Clear any previous errors. */
+		bd->tmfifo_error = 0;
+
+		/*
+		 * If someone might be waiting for the device to come up,
+		 * tell them it's ready.
+		 */
+		if (bd->is_booting) {
+			bd->is_booting = 0;
+
+			pr_debug("signaling booting complete\n");
+			complete_all(&bd->booting_complete);
+#if RSH_RESET_MUTEX
+			complete_all(&bd->reset_complete);
+#endif
+		};
+
+		/* If the console device is open, start the worker. */
+		if (bd->is_cons_open && !bd->has_cons_work) {
+			bd->has_cons_work = 1;
+			pr_debug("probe: console_work submitted\n");
+			queue_delayed_work(rshim_wq, &bd->work, 0);
+		}
+
+		/* Tell the user this device is now attached. */
+		pr_info("%s now attached\n", rshim_dev_names[bd->dev_index]);
+	}
+
+	return 0;
+}
+
+/* Rshim file operations routines */
+
+static ssize_t rshim_rshim_read(struct file *file, char *user_buffer,
+			      size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd;
+	int retval = 0;
+	u64 buf;
+
+	/* rshim registers are all 8-byte aligned. */
+	if (count != 8 || (*ppos & 7) != 0)
+		return -EINVAL;
+
+	bd = file->private_data;
+
+	mutex_lock(&bd->mutex);
+	retval = bd->read_rshim(bd,
+				(*ppos >> 16) & 0xF, /* channel # */
+				*ppos & 0xFFFF,	 /* addr */
+				&buf);
+	mutex_unlock(&bd->mutex);
+
+	/* If the read was successful, copy the data to userspace */
+	if (!retval && copy_to_user(user_buffer, &buf, count))
+		return -EFAULT;
+
+	return retval ? retval : count;
+}
+
+static ssize_t rshim_rshim_write(struct file *file, const char *user_buffer,
+			       size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd;
+	int retval = 0;
+	u64 buf;
+
+	/* rshim registers are all 8-byte aligned. */
+	if (count != 8 || (*ppos & 7) != 0)
+		return -EINVAL;
+
+	/* Copy the data from userspace */
+	if (copy_from_user(&buf, user_buffer, count))
+		return -EFAULT;
+
+	bd = file->private_data;
+
+	mutex_lock(&bd->mutex);
+	retval = bd->write_rshim(bd,
+				 (*ppos >> 16) & 0xF, /* channel # */
+				 *ppos & 0xFFFF, /* addr */
+				 buf);
+	mutex_unlock(&bd->mutex);
+
+	return retval ? retval : count;
+}
+
+static int rshim_rshim_release(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+	struct module *owner;
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+static const struct file_operations rshim_rshim_fops = {
+	.owner = THIS_MODULE,
+	.read = rshim_rshim_read,
+	.write = rshim_rshim_write,
+	.release = rshim_rshim_release,
+	.llseek = default_llseek,
+};
+
+static int rshim_rshim_open(struct file *file)
+{
+	file->f_op = &rshim_rshim_fops;
+
+	return 0;
+}
+
+/* Misc file operations routines */
+
+static int
+rshim_misc_seq_show(struct seq_file *s, void *token)
+{
+	struct rshim_backend *bd = s->private;
+	int retval;
+	u64 value;
+
+	/* Boot mode. */
+	retval = bd->read_rshim(bd, RSHIM_CHANNEL, RSH_BOOT_CONTROL,
+				&value);
+	if (retval) {
+		pr_err("couldn't read rshim register\n");
+		return retval;
+	}
+	seq_printf(s, "BOOT_MODE %lld\n",
+		   value & RSH_BOOT_CONTROL__BOOT_MODE_MASK);
+
+	/* SW reset flag is always 0. */
+	seq_printf(s, "SW_RESET  %d\n", 0);
+
+	/* Display the driver name. */
+	seq_printf(s, "DRV_NAME  %s\n", bd->owner->name);
+
+	return 0;
+}
+
+static ssize_t rshim_misc_write(struct file *file, const char *user_buffer,
+				size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd;
+	int retval = 0, value;
+	char buf[64], key[32];
+
+	if (*ppos != 0 || count >= sizeof(buf))
+		return -EINVAL;
+
+	/* Copy the data from userspace */
+	if (copy_from_user(buf, user_buffer, count))
+		return -EFAULT;
+
+	if (sscanf(buf, "%s %x", key, &value) != 2)
+		return -EINVAL;
+
+	bd = ((struct seq_file *)file->private_data)->private;
+
+	if (strcmp(key, "BOOT_MODE") == 0) {
+		retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_BOOT_CONTROL,
+				 value & RSH_BOOT_CONTROL__BOOT_MODE_MASK);
+	} else if (strcmp(key, "SW_RESET") == 0) {
+		if (value) {
+			if (!bd->has_reprobe) {
+				/* Detach, which shouldn't hold bd->mutex. */
+				rshim_notify(bd, RSH_EVENT_DETACH, 0);
+
+				mutex_lock(&bd->mutex);
+				/* Reset the TmFifo. */
+				rshim_fifo_reset(bd);
+				mutex_unlock(&bd->mutex);
+			}
+
+			retval = bd->write_rshim(bd, RSHIM_CHANNEL,
+					RSH_RESET_CONTROL,
+					RSH_RESET_CONTROL__RESET_CHIP_VAL_KEY);
+
+			if (!bd->has_reprobe) {
+				/* Attach. */
+				msleep_interruptible(1000);
+				mutex_lock(&bd->mutex);
+				rshim_notify(bd, RSH_EVENT_ATTACH, 0);
+				mutex_unlock(&bd->mutex);
+			}
+		}
+	} else
+		return -EINVAL;
+
+	return retval ? retval : count;
+}
+
+static int rshim_misc_release(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd;
+	struct module *owner;
+	int retval;
+
+	/*
+	 * Note that since this got turned into a seq file by
+	 * rshim_misc_open(), our device pointer isn't in the usual spot
+	 * (the file's private data); that's used by the seq file
+	 * subsystem.
+	 */
+	bd = ((struct seq_file *)file->private_data)->private;
+
+	retval = single_release(inode, file);
+	if (retval)
+		return retval;
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+static const struct file_operations rshim_misc_fops = {
+	.owner = THIS_MODULE,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.write = rshim_misc_write,
+	.release = rshim_misc_release,
+};
+
+static int rshim_misc_open(struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+	int retval;
+
+	/*
+	 * If file->private_data is non-NULL, seq_open (called by
+	 * single_open) thinks it's already a seq_file struct, and
+	 * scribbles over it!  Very bad.
+	 */
+	file->private_data = NULL;
+
+	file->f_op = &rshim_misc_fops;
+	retval = single_open(file, rshim_misc_seq_show, bd);
+
+	return retval;
+}
+
+/* Common file operations routines */
+
+static int rshim_open(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd;
+	int subminor = iminor(inode);
+	int retval;
+
+	rshim_lock();
+
+	bd = rshim_devs[subminor / RSH_DEV_TYPES];
+	if (!bd) {
+		rshim_unlock();
+		return -ENODEV;
+	}
+
+	/* Add a reference to the owner. */
+	if (!try_module_get(bd->owner)) {
+		rshim_unlock();
+		return -ENODEV;
+	}
+
+	/* Increment our usage count for the device. */
+	kref_get(&bd->kref);
+
+	rshim_unlock();
+
+	file->private_data = bd;
+
+	switch (subminor % RSH_DEV_TYPES) {
+	case RSH_DEV_TYPE_BOOT:
+		retval = rshim_boot_open(file);
+		break;
+
+	case RSH_DEV_TYPE_RSHIM:
+		retval = rshim_rshim_open(file);
+		break;
+
+	case RSH_DEV_TYPE_CONSOLE:
+		retval = rshim_console_open(file);
+		break;
+
+	case RSH_DEV_TYPE_NET:
+		retval = rshim_tmfifo_open(file);
+		break;
+
+	case RSH_DEV_TYPE_MISC:
+		retval = rshim_misc_open(file);
+		break;
+
+	default:
+		retval = -ENODEV;
+		break;
+	}
+
+	/* If the minor open failed, drop the usage count. */
+	if (retval < 0) {
+		struct module *owner;
+
+		rshim_lock();
+		owner = RSHIM_READ_ONCE(bd->owner);
+		kref_put(&bd->kref, bd->destroy);
+		module_put(owner);
+		rshim_unlock();
+	}
+
+	return retval;
+}
+
+static const struct file_operations rshim_fops = {
+	.owner = THIS_MODULE,
+	.open =	rshim_open,
+};
+
+int rshim_tmfifo_sync(struct rshim_backend *bd)
+{
+	u64 word;
+	int i, retval, max_size, avail;
+	union rshim_tmfifo_msg_hdr hdr;
+
+	/* Get FIFO max size. */
+	retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+				RSH_TM_HOST_TO_TILE_CTL, &word);
+	if (retval < 0) {
+		pr_err("read_rshim error %d\n", retval);
+		return retval;
+	}
+	max_size = (word >> RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_SHIFT)
+		   & RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RMASK;
+
+	/* Calculate available size. */
+	retval = bd->read_rshim(bd, RSHIM_CHANNEL, RSH_TM_HOST_TO_TILE_STS,
+				&word);
+	if (retval < 0) {
+		pr_err("read_rshim error %d\n", retval);
+		return retval;
+	}
+	avail = max_size - (int)(word & RSH_TM_HOST_TO_TILE_STS__COUNT_MASK);
+
+	if (avail > TMFIFO_MAX_SYNC_WORDS)
+		avail = TMFIFO_MAX_SYNC_WORDS;
+
+	hdr.type = VIRTIO_ID_NET;
+	hdr.len = 0;
+	for (i = 0; i < avail; i++) {
+		retval = bd->write_rshim(bd, RSHIM_CHANNEL,
+					 RSH_TM_HOST_TO_TILE_STS, hdr.data);
+		if (retval < 0)
+			break;
+	}
+
+	return 0;
+}
+
+int rshim_notify(struct rshim_backend *bd, int event, int code)
+{
+	int i, rc = 0;
+	struct rshim_service *svc;
+
+	switch (event) {
+	case RSH_EVENT_FIFO_INPUT:
+		rshim_fifo_input(bd);
+		break;
+
+	case RSH_EVENT_FIFO_OUTPUT:
+		rshim_fifo_output(bd);
+		break;
+
+	case RSH_EVENT_FIFO_ERR:
+		rshim_fifo_err(bd, code);
+		break;
+
+	case RSH_EVENT_ATTACH:
+		rshim_boot_done(bd);
+
+		/* Sync-up the tmfifo if reprobe is not supported. */
+		if (!bd->has_reprobe && bd->has_rshim)
+			rshim_tmfifo_sync(bd);
+
+		rcu_read_lock();
+		for (i = 0; i < RSH_SVC_MAX; i++) {
+			svc = rcu_dereference(rshim_svc[i]);
+			if (svc != NULL && svc->create != NULL) {
+				rc = (*svc->create)(bd);
+				if (rc == -EEXIST)
+					rc = 0;
+				else if (rc) {
+					pr_err("Failed to attach svc %d\n", i);
+					break;
+				}
+			}
+		}
+		rcu_read_unlock();
+
+		spin_lock_irq(&bd->spinlock);
+		rshim_fifo_input(bd);
+		spin_unlock_irq(&bd->spinlock);
+		break;
+
+	case RSH_EVENT_DETACH:
+		for (i = 0; i < RSH_SVC_MAX; i++) {
+			/*
+			 * The svc->delete() could call into Linux kernel and
+			 * potentially trigger synchronize_rcu(). So it should
+			 * be outside of the rcu_read_lock(). Instead, a ref
+			 * counter is used here to avoid race condition between
+			 * svc deletion such as caused by kernel module unload.
+			 */
+			rcu_read_lock();
+			svc = rcu_dereference(rshim_svc[i]);
+			if (svc != NULL)
+				atomic_inc(&svc->ref);
+			rcu_read_unlock();
+
+			if (svc != NULL) {
+				(*svc->delete)(bd);
+				atomic_dec(&svc->ref);
+			}
+		}
+		bd->dev = NULL;
+		break;
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL(rshim_notify);
+
+static int rshim_find_index(char *dev_name)
+{
+	int i, dev_index = -1;
+
+	/* First look for a match with a previous device name. */
+	for (i = 0; i < rshim_nr_devs; i++)
+		if (rshim_dev_names[i] &&
+		    !strcmp(dev_name, rshim_dev_names[i])) {
+			pr_debug("found match with previous at index %d\n", i);
+			dev_index = i;
+			break;
+		}
+
+	/* Then look for a never-used slot. */
+	if (dev_index < 0) {
+		for (i = 0; i < rshim_nr_devs; i++)
+			if (!rshim_dev_names[i]) {
+				pr_debug("found never-used slot %d\n", i);
+				dev_index = i;
+				break;
+			}
+	}
+
+	/* Finally look for a currently-unused slot. */
+	if (dev_index < 0) {
+		for (i = 0; i < rshim_nr_devs; i++)
+			if (!rshim_devs[i]) {
+				pr_debug("found unused slot %d\n", i);
+				dev_index = i;
+				break;
+			}
+	}
+
+	return dev_index;
+}
+
+struct rshim_backend *rshim_find(char *dev_name)
+{
+	int dev_index = rshim_find_index(dev_name);
+
+	/* If none of that worked, we fail. */
+	if (dev_index < 0) {
+		pr_err("couldn't find slot for new device %s\n", dev_name);
+		return NULL;
+	}
+
+	return rshim_devs[dev_index];
+}
+EXPORT_SYMBOL(rshim_find);
+
+/* House-keeping timer. */
+static void rshim_timer_func(struct timer_list *arg)
+{
+	struct rshim_backend *bd =
+	  container_of(arg, struct rshim_backend, timer);
+
+	u32 period = msecs_to_jiffies(rshim_keepalive_period);
+
+	if (bd->has_cons_work)
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+
+	/* Request keepalive update and restart the ~300ms timer. */
+	if (time_after(jiffies, (unsigned long)bd->last_keepalive + period)) {
+		bd->keepalive = 1;
+		bd->last_keepalive = jiffies;
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+	}
+	mod_timer(&bd->timer, jiffies + period);
+}
+
+static ssize_t rshim_path_show(struct device *cdev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct rshim_backend *bd = dev_get_drvdata(cdev);
+
+	if (bd == NULL)
+		return -ENODEV;
+	return snprintf(buf, PAGE_SIZE, "%s\n",
+			rshim_dev_names[bd->dev_index]);
+}
+
+static DEVICE_ATTR(rshim_path, 0444, rshim_path_show, NULL);
+
+static void
+rshim_load_modules(struct work_struct *work)
+{
+	request_module("rshim_net");
+}
+
+static DECLARE_DELAYED_WORK(rshim_load_modules_work, rshim_load_modules);
+
+/* Check whether backend is allowed to register or not. */
+static int rshim_access_check(struct rshim_backend *bd)
+{
+	int i, retval;
+	u64 value;
+
+	/* Write value 0 to RSH_SCRATCHPAD1. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_SCRATCHPAD1, 0);
+	if (retval < 0)
+		return -ENODEV;
+
+	/*
+	 * Poll RSH_SCRATCHPAD1 up to one second to check whether it's reset to
+	 * the keepalive magic value, which indicates another backend driver has
+	 * already attached to this target.
+	 */
+	for (i = 0; i < 10; i++) {
+		retval = bd->read_rshim(bd, RSHIM_CHANNEL, RSH_SCRATCHPAD1,
+					&value);
+		if (retval < 0)
+			return -ENODEV;
+
+		if (value == RSH_KEEPALIVE_MAGIC_NUM) {
+			pr_info("another backend already attached.\n");
+			return -EEXIST;
+		}
+
+		msleep(100);
+	}
+
+	return 0;
+}
+
+int rshim_register(struct rshim_backend *bd)
+{
+	int i, retval, dev_index;
+
+	if (bd->registered)
+		return 0;
+
+	if (backend_driver[0] && strcmp(backend_driver, bd->owner->name))
+		return -EACCES;
+
+	dev_index = rshim_find_index(bd->dev_name);
+	if (dev_index < 0)
+		return -ENODEV;
+
+	if (!bd->read_rshim || !bd->write_rshim) {
+		pr_err("read_rshim/write_rshim missing\n");
+		return -EINVAL;
+	}
+
+	retval = rshim_access_check(bd);
+	if (retval)
+		return retval;
+
+	if (!bd->write)
+		bd->write = rshim_write_default;
+	if (!bd->read)
+		bd->read = rshim_read_default;
+
+	kref_init(&bd->kref);
+	spin_lock_init(&bd->spinlock);
+#if RSH_RESET_MUTEX
+	init_completion(&bd->reset_complete);
+#endif
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		init_waitqueue_head(&bd->read_fifo[i].operable);
+		init_waitqueue_head(&bd->write_fifo[i].operable);
+	}
+
+	init_waitqueue_head(&bd->write_completed);
+	init_completion(&bd->booting_complete);
+	init_completion(&bd->boot_write_complete);
+	memcpy(&bd->cons_termios, &init_console_termios,
+	       sizeof(init_console_termios));
+	INIT_DELAYED_WORK(&bd->work, rshim_work_handler);
+
+	bd->dev_index = dev_index;
+	if (rshim_dev_names[dev_index] != bd->dev_name) {
+		kfree(rshim_dev_names[dev_index]);
+		rshim_dev_names[dev_index] = bd->dev_name;
+	}
+	rshim_devs[dev_index] = bd;
+
+	for (i = 0; i < RSH_DEV_TYPES; i++) {
+		struct device *cl_dev;
+		int err;
+		char devbuf[32];
+
+		cdev_init(&bd->cdevs[i], &rshim_fops);
+		bd->cdevs[i].owner = THIS_MODULE;
+		/*
+		 * FIXME: is this addition really legal, or should
+		 * we be using MKDEV?
+		 */
+		err = cdev_add(&bd->cdevs[i],
+			       rshim_dev_base +
+			       bd->dev_index * RSH_DEV_TYPES + i,
+			       1);
+		/*
+		 * We complain if this fails, but we don't return
+		 * an error; it really shouldn't happen, and it's
+		 * hard to go un-do the rest of the adds.
+		 */
+		if (err)
+			pr_err("rsh%d: couldn't add minor %d\n", dev_index, i);
+
+		cl_dev = device_create(rshim_class, NULL, rshim_dev_base +
+				       bd->dev_index * RSH_DEV_TYPES + i, NULL,
+				       "rshim%d!%s",
+				       bd->dev_index, rshim_dev_minor_names[i]);
+		if (IS_ERR(cl_dev)) {
+			pr_err("rsh%d: couldn't add dev %s, err %ld\n",
+			       dev_index,
+			       format_dev_t(devbuf, rshim_dev_base + dev_index *
+					    RSH_DEV_TYPES + i),
+			       PTR_ERR(cl_dev));
+		} else {
+			pr_debug("added class dev %s\n",
+				 format_dev_t(devbuf, rshim_dev_base +
+					      bd->dev_index *
+					      RSH_DEV_TYPES + i));
+		}
+
+		dev_set_drvdata(cl_dev, bd);
+		if (device_create_file(cl_dev, &dev_attr_rshim_path))
+			pr_err("could not create rshim_path file in sysfs\n");
+	}
+
+	for (i = 0; i < 2; i++) {
+		bd->boot_buf[i] = kmalloc(BOOT_BUF_SIZE, GFP_KERNEL);
+		if (!bd->boot_buf[i]) {
+			if (i == 1) {
+				kfree(bd->boot_buf[0]);
+				bd->boot_buf[0] = NULL;
+			}
+		}
+	}
+
+	timer_setup(&bd->timer, rshim_timer_func, 0);
+
+	bd->registered = 1;
+
+	/* Start the keepalive timer. */
+	bd->last_keepalive = jiffies;
+	mod_timer(&bd->timer, jiffies + 1);
+
+	schedule_delayed_work(&rshim_load_modules_work, 3 * HZ);
+
+	return 0;
+}
+EXPORT_SYMBOL(rshim_register);
+
+void rshim_deregister(struct rshim_backend *bd)
+{
+	int i;
+
+	if (!bd->registered)
+		return;
+
+	/* Stop the timer. */
+	del_timer_sync(&bd->timer);
+
+	for (i = 0; i < 2; i++)
+		kfree(bd->boot_buf[i]);
+
+	for (i = 0; i < RSH_DEV_TYPES; i++) {
+		cdev_del(&bd->cdevs[i]);
+		device_destroy(rshim_class,
+			       rshim_dev_base + bd->dev_index *
+			       RSH_DEV_TYPES + i);
+	}
+
+	rshim_devs[bd->dev_index] = NULL;
+	bd->registered = 0;
+}
+EXPORT_SYMBOL(rshim_deregister);
+
+int rshim_register_service(struct rshim_service *service)
+{
+	int i, retval = 0;
+	struct rshim_service *svc;
+
+	rshim_lock();
+
+	atomic_set(&service->ref, 0);
+
+	BUG_ON(service->type >= RSH_SVC_MAX);
+
+	if (!rshim_svc[service->type]) {
+		svc = kmalloc(sizeof(*svc), GFP_KERNEL);
+		if (svc) {
+			memcpy(svc, service, sizeof(*svc));
+			/*
+			 * Add memory barrir to make sure 'svc' is ready
+			 * before switching the pointer.
+			 */
+			smp_mb();
+
+			/*
+			 * rshim_svc[] is protected by RCU. References to it
+			 * should have rcu_read_lock() / rcu_dereference() /
+			 * rcu_read_lock().
+			 */
+			rcu_assign_pointer(rshim_svc[service->type], svc);
+
+			/* Attach the service to all backends. */
+			for (i = 0; i < rshim_nr_devs; i++) {
+				if (rshim_devs[i] != NULL) {
+					retval = svc->create(rshim_devs[i]);
+					if (retval && retval != -EEXIST)
+						break;
+				}
+			}
+		} else
+			retval = -ENOMEM;
+	} else
+		retval = -EEXIST;
+
+	rshim_unlock();
+
+	/* Deregister / cleanup the service in case of failures. */
+	if (retval && retval != -EEXIST)
+		rshim_deregister_service(service);
+
+	return retval;
+}
+EXPORT_SYMBOL(rshim_register_service);
+
+void rshim_deregister_service(struct rshim_service *service)
+{
+	int i;
+	struct rshim_service *svc = NULL;
+
+	BUG_ON(service->type >= RSH_SVC_MAX);
+
+	/*
+	 * Use synchronize_rcu() to make sure no more outstanding
+	 * references to the 'svc' pointer before releasing it.
+	 *
+	 * The reason to use RCU is that the rshim_svc pointer will be
+	 * accessed in rshim_notify() which could be called in interrupt
+	 * context and not suitable for mutex lock.
+	 */
+	rshim_lock();
+	if (rshim_svc[service->type]) {
+		svc = rshim_svc[service->type];
+
+		/* Delete the service from all backends. */
+		for (i = 0; i < rshim_nr_devs; i++)
+			if (rshim_devs[i] != NULL)
+				svc->delete(rshim_devs[i]);
+
+		rcu_assign_pointer(rshim_svc[service->type], NULL);
+	}
+	rshim_unlock();
+	if (svc != NULL) {
+		synchronize_rcu();
+
+		/* Make sure no more references to the svc pointer. */
+		while (atomic_read(&svc->ref) != 0)
+			msleep(100);
+		kfree(svc);
+	}
+}
+EXPORT_SYMBOL(rshim_deregister_service);
+
+static int __init rshim_init(void)
+{
+	int result, class_registered = 0;
+
+	/* Register our device class. */
+	rshim_class = class_create(THIS_MODULE, "rsh");
+	if (IS_ERR(rshim_class)) {
+		result = PTR_ERR(rshim_class);
+		goto error;
+	}
+	class_registered = 1;
+
+	/* Allocate major/minor numbers. */
+	result = alloc_chrdev_region(&rshim_dev_base, 0,
+				     rshim_nr_devs * RSH_DEV_TYPES,
+				     "rsh");
+	if (result < 0) {
+		pr_err("can't get rshim major\n");
+		goto error;
+	}
+
+	rshim_dev_names = kzalloc(rshim_nr_devs *
+				    sizeof(rshim_dev_names[0]), GFP_KERNEL);
+	rshim_devs = kcalloc(rshim_nr_devs, sizeof(rshim_devs[0]),
+			       GFP_KERNEL);
+
+	if (!rshim_dev_names || !rshim_devs) {
+		result = -ENOMEM;
+		goto error;
+	}
+
+	rshim_wq = create_workqueue("rshim");
+	if (!rshim_wq) {
+		result = -ENOMEM;
+		goto error;
+	}
+
+	return 0;
+
+error:
+	if (rshim_dev_base)
+		unregister_chrdev_region(rshim_dev_base,
+				 rshim_nr_devs * RSH_DEV_TYPES);
+	if (class_registered)
+		class_destroy(rshim_class);
+	kfree(rshim_dev_names);
+	kfree(rshim_devs);
+
+	return result;
+}
+
+static void __exit rshim_exit(void)
+{
+	int i;
+
+	flush_delayed_work(&rshim_load_modules_work);
+
+	/* Free the major/minor numbers. */
+	unregister_chrdev_region(rshim_dev_base,
+				 rshim_nr_devs * RSH_DEV_TYPES);
+
+	/* Destroy our device class. */
+	class_destroy(rshim_class);
+
+	/* Destroy our work queue. */
+	destroy_workqueue(rshim_wq);
+
+	for (i = 0; i < RSH_SVC_MAX; i++)
+		kfree(rshim_svc[i]);
+
+	for (i = 0; i < rshim_nr_devs; i++)
+		kfree(rshim_dev_names[i]);
+
+	kfree(rshim_dev_names);
+	kfree(rshim_devs);
+}
+
+module_init(rshim_init);
+module_exit(rshim_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.12");
diff --git a/drivers/soc/mellanox/host/rshim.h b/drivers/soc/mellanox/host/rshim.h
new file mode 100644
index 0000000..3ac3410
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim.h
@@ -0,0 +1,361 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _RSHIM_H
+#define _RSHIM_H
+
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/termios.h>
+#include <linux/workqueue.h>
+#include <linux/device.h>
+#include <linux/cdev.h>
+
+#include "rshim_regs.h"
+
+/* ACCESS_ONCE() wrapper. */
+#define RSHIM_READ_ONCE(x)	READ_ONCE(x)
+
+/*
+ * This forces only one reset to occur at a time.  Once we've gotten
+ * more experience with this mode we'll probably remove the #define.
+ */
+#define RSH_RESET_MUTEX		1
+
+/* Spin flag values. */
+#define RSH_SFLG_READING	0x1  /* read is active. */
+#define RSH_SFLG_WRITING	0x2  /* write_urb is active. */
+#define RSH_SFLG_CONS_OPEN	0x4  /* console stream is open. */
+
+/*
+ * Buffer/FIFO sizes.  Note that the FIFO sizes must be powers of 2; also,
+ * the read and write buffers must be no larger than the corresponding
+ * FIFOs.
+ */
+#define READ_BUF_SIZE		2048
+#define WRITE_BUF_SIZE		2048
+#define READ_FIFO_SIZE		(4 * 1024)
+#define WRITE_FIFO_SIZE		(4 * 1024)
+#define BOOT_BUF_SIZE		(16 * 1024)
+
+/* Sub-device types. */
+enum {
+	RSH_DEV_TYPE_RSHIM,
+	RSH_DEV_TYPE_BOOT,
+	RSH_DEV_TYPE_CONSOLE,
+	RSH_DEV_TYPE_NET,
+	RSH_DEV_TYPE_MISC,
+	RSH_DEV_TYPES
+};
+
+/* Event types used in rshim_notify(). */
+enum {
+	RSH_EVENT_FIFO_INPUT,		/* fifo ready for input */
+	RSH_EVENT_FIFO_OUTPUT,		/* fifo ready for output */
+	RSH_EVENT_FIFO_ERR,		/* fifo error */
+	RSH_EVENT_ATTACH,		/* backend attaching */
+	RSH_EVENT_DETACH,		/* backend detaching */
+};
+
+/* RShim service types. */
+enum {
+	RSH_SVC_NET,			/* networking service */
+	RSH_SVC_MAX
+};
+
+/* TMFIFO message header. */
+union rshim_tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/* TMFIFO demux channels. */
+enum {
+	TMFIFO_CONS_CHAN,	/* Console */
+	TMFIFO_NET_CHAN,	/* Network */
+	TMFIFO_MAX_CHAN		/* Number of channels */
+};
+
+/* Various rshim definitions. */
+#define RSH_INT_VEC0_RTC__SWINT3_MASK 0x8
+
+#define RSH_BYTE_ACC_READ_TRIGGER 0x50000000
+#define RSH_BYTE_ACC_SIZE 0x10000000
+#define RSH_BYTE_ACC_PENDING 0x20000000
+
+
+#define BOOT_CHANNEL        RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_BOOT
+#define RSHIM_CHANNEL       RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_RSHIM
+#define UART0_CHANNEL       RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART0
+#define UART1_CHANNEL       RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART1
+
+#define RSH_BOOT_FIFO_SIZE   512
+
+/* FIFO structure. */
+struct rshim_fifo {
+	unsigned char *data;
+	unsigned int head;
+	unsigned int tail;
+	wait_queue_head_t operable;
+};
+
+/* RShim backend. */
+struct rshim_backend {
+	/* Device name. */
+	char *dev_name;
+
+	/* Backend owner. */
+	struct module *owner;
+
+	/* Pointer to the backend device. */
+	struct device *dev;
+
+	/* Pointer to the net device. */
+	void *net;
+
+	/* House-keeping Timer. */
+	struct timer_list timer;
+
+	/* Character device structure for each device. */
+	struct cdev cdevs[RSH_DEV_TYPES];
+
+	/*
+	 * The reference count for this structure.  This is incremented by
+	 * each open, and by the probe routine (thus, one reference for
+	 * each of the two interfaces).  It's decremented on each release,
+	 * and on each disconnect.
+	 */
+	struct kref kref;
+
+	/* State flags. */
+	u32 is_booting : 1;        /* Waiting for device to come back. */
+	u32 is_boot_open : 1;      /* Boot device is open. */
+	u32 is_tm_open : 1;        /* TM FIFO device is open. */
+	u32 is_cons_open : 1;      /* Console device is open. */
+	u32 is_in_boot_write : 1;  /* A thread is in boot_write(). */
+	u32 has_cons_work : 1;     /* Console worker thread running. */
+	u32 has_debug : 1;         /* Debug enabled for this device. */
+	u32 has_tm : 1;            /* TM FIFO found. */
+	u32 has_rshim : 1;         /* RSHIM found. */
+	u32 has_fifo_work : 1;     /* FIFO output to be done in worker. */
+	u32 has_reprobe : 1;       /* Reprobe support after SW reset. */
+	u32 drop : 1;              /* Drop the rest of the packet. */
+	u32 registered : 1;        /* Backend has been registered. */
+	u32 keepalive : 1;         /* A flag to update keepalive. */
+
+	/* Jiffies of last keepalive. */
+	u64 last_keepalive;
+
+	/* State flag bits from RSH_SFLG_xxx (see above). */
+	int spin_flags;
+
+	/* Total bytes in the read buffer. */
+	int read_buf_bytes;
+	/* Offset of next unread byte in the read buffer. */
+	int read_buf_next;
+	/* Bytes left in the current packet, or 0 if no current packet. */
+	int read_buf_pkt_rem;
+	/* Padded bytes in the read buffer. */
+	int read_buf_pkt_padding;
+
+	/* Bytes left in the current packet pending to write. */
+	int write_buf_pkt_rem;
+
+	/* Current message header. */
+	union rshim_tmfifo_msg_hdr msg_hdr;
+
+	/* Read FIFOs. */
+	struct rshim_fifo read_fifo[TMFIFO_MAX_CHAN];
+
+	/* Write FIFOs. */
+	struct rshim_fifo write_fifo[TMFIFO_MAX_CHAN];
+
+	/* Read buffer.  This is a DMA'able buffer. */
+	unsigned char *read_buf;
+	dma_addr_t read_buf_dma;
+
+	/* Write buffer.  This is a DMA'able buffer. */
+	unsigned char *write_buf;
+	dma_addr_t write_buf_dma;
+
+	/* Current Tx FIFO channel. */
+	int tx_chan;
+
+	/* Current Rx FIFO channel. */
+	int rx_chan;
+
+	/* First error encountered during read or write. */
+	int tmfifo_error;
+
+	/* Buffers used for boot writes.  Allocated at startup. */
+	char *boot_buf[2];
+
+	/*
+	 * This mutex is used to prevent the interface pointers and the
+	 * device pointer from disappearing while a driver entry point
+	 * is using them.  It's held throughout a read or write operation
+	 * (at least the parts of those operations which depend upon those
+	 * pointers) and is also held whenever those pointers are modified.
+	 * It also protects state flags, and booting_complete.
+	 */
+	struct mutex mutex;
+
+	/* We'll signal completion on this when FLG_BOOTING is turned off. */
+	struct completion booting_complete;
+
+#ifdef RSH_RESET_MUTEX
+	/* Signaled when a device is disconnected. */
+	struct completion reset_complete;
+#endif
+
+	/*
+	 * This wait queue supports fsync; it's woken up whenever an
+	 * outstanding USB write URB is done.  This will need to be more
+	 * complex if we start doing write double-buffering.
+	 */
+	wait_queue_head_t write_completed;
+
+	/* State for our outstanding boot write. */
+	struct completion boot_write_complete;
+
+	/*
+	 * This spinlock is used to protect items which must be updated by
+	 * URB completion handlers, since those can't sleep.  This includes
+	 * the read and write buffer pointers, as well as spin_flags.
+	 */
+	spinlock_t spinlock;
+
+	/* Current termios settings for the console. */
+	struct ktermios cons_termios;
+
+	/* Work queue entry. */
+	struct delayed_work	work;
+
+	/* Pending boot & fifo request for the worker. */
+	u8 *boot_work_buf;
+	u32 boot_work_buf_len;
+	u32 boot_work_buf_actual_len;
+	u8 *fifo_work_buf;
+	u32 fifo_work_buf_len;
+	int fifo_work_devtype;
+
+	/* Number of open console files. */
+	long console_opens;
+
+	/*
+	 * Our index in rshim_devs, which is also the high bits of our
+	 * minor number.
+	 */
+	int dev_index;
+
+	/* APIs provided by backend. */
+
+	/* API to write bulk data to RShim via the backend. */
+	ssize_t (*write)(struct rshim_backend *bd, int devtype,
+			 const char *buf, size_t count);
+
+	/* API to read bulk data from RShim via the backend. */
+	ssize_t (*read)(struct rshim_backend *bd, int devtype,
+			char *buf, size_t count);
+
+	/* API to cancel a read / write request (optional). */
+	void (*cancel)(struct rshim_backend *bd, int devtype, bool is_write);
+
+	/* API to destroy the backend. */
+	void (*destroy)(struct kref *kref);
+
+	/* API to read 8 bytes from RShim. */
+	int (*read_rshim)(struct rshim_backend *bd, int chan, int addr,
+			  u64 *value);
+
+	/* API to write 8 bytes to RShim. */
+	int (*write_rshim)(struct rshim_backend *bd, int chan, int addr,
+			   u64 value);
+};
+
+/* RShim service. */
+struct rshim_service {
+	/* Service type RSH_SVC_xxx. */
+	int type;
+
+	/* Reference number. */
+	atomic_t ref;
+
+	/* Create service. */
+	int (*create)(struct rshim_backend *bd);
+
+	/* Delete service. */
+	int (*delete)(struct rshim_backend *bd);
+
+	/* Notify service Rx is ready. */
+	void (*rx_notify)(struct rshim_backend *bd);
+};
+
+/* Global variables. */
+
+/* Global array to store RShim devices and names. */
+extern struct workqueue_struct *rshim_wq;
+
+/* Common APIs. */
+
+/* Register/unregister backend. */
+int rshim_register(struct rshim_backend *bd);
+void rshim_deregister(struct rshim_backend *bd);
+
+/* Register / deregister service. */
+int rshim_register_service(struct rshim_service *service);
+void rshim_deregister_service(struct rshim_service *service);
+
+/* Find backend by name. */
+struct rshim_backend *rshim_find(char *dev_name);
+
+/* RShim global lock. */
+void rshim_lock(void);
+void rshim_unlock(void);
+
+/* Event notification. */
+int rshim_notify(struct rshim_backend *bd, int event, int code);
+
+/*
+ * FIFO APIs.
+ *
+ * FIFO is demuxed into two channels, one for network interface
+ * (TMFIFO_NET_CHAN), one for console (TMFIFO_CONS_CHAN).
+ */
+
+/* Write / read some bytes to / from the FIFO via the backend. */
+ssize_t rshim_fifo_read(struct rshim_backend *bd, char *buffer,
+		      size_t count, int chan, bool nonblock,
+		      bool to_user);
+ssize_t rshim_fifo_write(struct rshim_backend *bd, const char *buffer,
+		       size_t count, int chan, bool nonblock,
+		       bool from_user);
+
+/* Alloc/free the FIFO. */
+int rshim_fifo_alloc(struct rshim_backend *bd);
+void rshim_fifo_free(struct rshim_backend *bd);
+
+/* Console APIs. */
+
+/* Enable early console. */
+int rshim_cons_early_enable(struct rshim_backend *bd);
+
+#endif /* _RSHIM_H */
diff --git a/drivers/soc/mellanox/host/rshim_net.c b/drivers/soc/mellanox/host/rshim_net.c
new file mode 100644
index 0000000..6d10497
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_net.c
@@ -0,0 +1,834 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_net.c - Mellanox RShim network host driver
+ *
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_ring.h>
+#include <linux/virtio_net.h>
+#include <linux/cache.h>
+#include <linux/interrupt.h>
+#include <linux/version.h>
+#include <asm/byteorder.h>
+
+#include "rshim.h"
+
+/* Vring size. */
+#define RSH_NET_VRING_SIZE			1024
+
+/*
+ * Keepalive time in seconds. If configured, the link is considered down
+ * if no Rx activity within the configured time.
+ */
+static int rshim_net_keepalive;
+module_param(rshim_net_keepalive, int, 0644);
+MODULE_PARM_DESC(rshim_net_keepalive,
+		 "Keepalive time in seconds.");
+
+/* Use a timer for house-keeping. */
+static int rshim_net_timer_interval = HZ / 10;
+
+/* Flag to drain the current pending packet. */
+static bool rshim_net_draining_mode;
+
+/* Spin lock. */
+static DEFINE_SPINLOCK(rshim_net_spin_lock);
+
+/* Virtio ring size. */
+static int rshim_net_vring_size = RSH_NET_VRING_SIZE;
+module_param(rshim_net_vring_size, int, 0444);
+MODULE_PARM_DESC(rshim_net_vring_size, "Size of the vring.");
+
+/* Supported virtio-net features. */
+#define RSH_NET_FEATURES		((1 << VIRTIO_NET_F_MTU) | \
+					 (1 << VIRTIO_NET_F_MAC) | \
+					 (1 << VIRTIO_NET_F_STATUS))
+
+/* Default MAC. */
+static u8 rshim_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x02};
+module_param_array(rshim_net_default_mac, byte, NULL, 0);
+MODULE_PARM_DESC(rshim_net_default_mac, "default MAC address");
+
+#define VIRTIO_GET_FEATURES_RETURN_TYPE		u64
+#define VIRTIO_FINALIZE_FEATURES_RETURN_TYPE	int
+#define VIRTIO_NOTIFY_RETURN_TYPE	bool
+#define VIRTIO_NOTIFY_RETURN		{ return true; }
+
+/* MTU setting of the virtio-net interface. */
+#define RSH_NET_MTU			1500
+
+struct rshim_net;
+static void rshim_net_virtio_rxtx(struct virtqueue *vq, bool is_rx);
+static void rshim_net_update_activity(struct rshim_net *net, bool activity);
+
+/* Structure to maintain the ring state. */
+struct rshim_net_vring {
+	void *va;			/* virtual address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	u32 pkt_len;			/* packet total length */
+	u16 next_avail;			/* next avail desc id */
+	union rshim_tmfifo_msg_hdr hdr;	/* header of the current packet */
+	struct rshim_net *net;		/* pointer back to the rshim_net */
+};
+
+/* Event types. */
+enum {
+	RSH_NET_RX_EVENT,		/* Rx event */
+	RSH_NET_TX_EVENT		/* Tx event */
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	RSH_NET_VRING_RX,		/* Rx ring */
+	RSH_NET_VRING_TX,		/* Tx ring */
+	RSH_NET_VRING_NUM
+};
+
+/* RShim net device structure */
+struct rshim_net {
+	struct virtio_device vdev;	/* virtual device */
+	struct mutex lock;
+	struct rshim_backend *bd;		/* backend */
+	u8 status;
+	u16 virtio_registered : 1;
+	u64 features;
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	unsigned long rx_jiffies;	/* last Rx jiffies */
+	struct rshim_net_vring vrings[RSH_NET_VRING_NUM];
+	struct virtio_net_config config;	/* virtio config space */
+};
+
+/* Allocate vrings for the net device. */
+static int rshim_net_alloc_vrings(struct rshim_net *net)
+{
+	void *va;
+	int i, size;
+	struct rshim_net_vring *vring;
+	struct virtio_device *vdev = &net->vdev;
+
+	for (i = 0; i < ARRAY_SIZE(net->vrings); i++) {
+		vring = &net->vrings[i];
+		vring->net = net;
+		vring->size = rshim_net_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = kzalloc(size, GFP_KERNEL);
+		if (!va) {
+			dev_err(vdev->dev.parent, "vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the net device. */
+static void rshim_net_free_vrings(struct rshim_net *net)
+{
+	int i, size;
+	struct rshim_net_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(net->vrings); i++) {
+		vring = &net->vrings[i];
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			kfree(vring->va);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void rshim_net_work_handler(struct work_struct *work)
+{
+	struct virtqueue *vq;
+	struct rshim_net *net = container_of(work, struct rshim_net, work);
+
+	/* Tx. */
+	if (test_and_clear_bit(RSH_NET_TX_EVENT, &net->pend_events) &&
+		       net->virtio_registered) {
+		vq = net->vrings[RSH_NET_VRING_TX].vq;
+		if (vq)
+			rshim_net_virtio_rxtx(vq, false);
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(RSH_NET_RX_EVENT, &net->pend_events) &&
+		       net->virtio_registered) {
+		vq = net->vrings[RSH_NET_VRING_RX].vq;
+		if (vq)
+			rshim_net_virtio_rxtx(vq, true);
+	}
+
+	/* Keepalive check. */
+	if (rshim_net_keepalive &&
+	    time_after(jiffies, net->rx_jiffies +
+		       (unsigned long)rshim_net_keepalive * HZ)) {
+		mutex_lock(&net->lock);
+		rshim_net_update_activity(net, false);
+		mutex_unlock(&net->lock);
+	}
+}
+
+/* Nothing to do for now. */
+static void rshim_net_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+rshim_net_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct rshim_net_vring *vring = (struct rshim_net_vring *)vq->priv;
+
+	if (vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vring->size;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vring->size);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 rshim_net_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+/* House-keeping timer. */
+static void rshim_net_timer(struct timer_list *arg)
+{
+	struct rshim_net *net = container_of(arg, struct rshim_net, timer);
+
+	/*
+	 * Wake up Rx handler in case Rx event is missing or any leftover
+	 * bytes are stuck in the backend.
+	 */
+	test_and_set_bit(RSH_NET_RX_EVENT, &net->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(RSH_NET_TX_EVENT, &net->pend_events);
+
+	schedule_work(&net->work);
+
+	mod_timer(&net->timer, jiffies + rshim_net_timer_interval);
+}
+
+static void rshim_net_release_cur_desc(struct virtio_device *vdev,
+				       struct rshim_net_vring *vring)
+{
+	int idx;
+	unsigned long flags;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+
+	idx = vr->used->idx % vring->size;
+	vr->used->ring[idx].id = vring->desc_head - vr->desc;
+	vr->used->ring[idx].len =
+		cpu_to_virtio32(vdev, vring->pkt_len);
+
+	/*
+	 * Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+
+	vring->desc = NULL;
+
+	/* Notify upper layer. */
+	spin_lock_irqsave(&rshim_net_spin_lock, flags);
+	vring_interrupt(0, vring->vq);
+	spin_unlock_irqrestore(&rshim_net_spin_lock, flags);
+}
+
+/* Update the link activity. */
+static void rshim_net_update_activity(struct rshim_net *net, bool activity)
+{
+	if (activity) {
+		/* Bring up the link. */
+		if (!(net->config.status & VIRTIO_NET_S_LINK_UP)) {
+			net->config.status |= VIRTIO_NET_S_LINK_UP;
+			virtio_config_changed(&net->vdev);
+		}
+	} else {
+		/* Bring down the link. */
+		if (net->config.status & VIRTIO_NET_S_LINK_UP) {
+			int i;
+
+			net->config.status &= ~VIRTIO_NET_S_LINK_UP;
+			virtio_config_changed(&net->vdev);
+
+			/* Reset the ring state. */
+			for (i = 0; i < RSH_NET_VRING_NUM; i++) {
+				net->vrings[i].pkt_len =
+						sizeof(struct virtio_net_hdr);
+				net->vrings[i].cur_len = 0;
+				net->vrings[i].rem_len = 0;
+			}
+		}
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void rshim_net_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct rshim_net_vring *vring = (struct rshim_net_vring *)vq->priv;
+	struct rshim_net *net = vring->net;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &net->vdev;
+	void *addr;
+	int len, idx, seg_len;
+	struct vring_desc *desc;
+
+	mutex_lock(&net->lock);
+
+	/* Get the current pending descriptor. */
+	desc = vring->desc;
+
+	/* Don't continue if booting. */
+	if (net->bd->is_boot_open) {
+		/* Drop the pending buffer. */
+		if (desc != NULL)
+			rshim_net_release_cur_desc(vdev, vring);
+		mutex_unlock(&net->lock);
+		return;
+	}
+
+	while (1) {
+		if (!desc) {
+			/* Don't process new packet in draining mode. */
+			if (RSHIM_READ_ONCE(rshim_net_draining_mode))
+				break;
+
+			/* Get the head desc of next packet. */
+			vring->desc_head = rshim_net_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				mutex_unlock(&net->lock);
+				return;
+			}
+			desc = vring->desc_head;
+
+			/* Packet length is unknown yet. */
+			vring->pkt_len = 0;
+			vring->rem_len = sizeof(vring->hdr);
+		}
+
+		/* Beginning of a packet. */
+		if (vring->pkt_len == 0) {
+			if (is_rx) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Read the packet header. */
+				len = rshim_fifo_read(net->bd,
+					(void *)&vring->hdr +
+					sizeof(vring->hdr) - vring->rem_len,
+					vring->rem_len, TMFIFO_NET_CHAN, true,
+					false);
+				if (len > 0) {
+					vring->rem_len -= len;
+					if (vring->rem_len != 0)
+						continue;
+				} else
+					break;
+
+				/* Update activity. */
+				net->rx_jiffies = jiffies;
+				rshim_net_update_activity(net, true);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (vring->hdr.len == 0) {
+					vring->rem_len = sizeof(vring->hdr);
+					continue;
+				}
+
+				/* Update total length. */
+				vring->pkt_len = ntohs(vring->hdr.len) +
+					sizeof(struct virtio_net_hdr);
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+					vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			} else {
+				/* Write packet header. */
+				if (vring->rem_len == sizeof(vring->hdr)) {
+					len = rshim_net_virtio_get_pkt_len(
+							vdev, desc, vr);
+					vring->hdr.data = 0;
+					vring->hdr.type = VIRTIO_ID_NET;
+					vring->hdr.len = htons(len -
+						sizeof(struct virtio_net_hdr));
+				}
+
+				len = rshim_fifo_write(net->bd,
+					(void *)&vring->hdr +
+					sizeof(vring->hdr) - vring->rem_len,
+					vring->rem_len, TMFIFO_NET_CHAN,
+					true, false);
+				if (len > 0) {
+					vring->rem_len -= len;
+					if (vring->rem_len != 0)
+						continue;
+				} else
+					break;
+
+				/* Update total length. */
+				vring->pkt_len = rshim_net_virtio_get_pkt_len(
+							vdev, desc, vr);
+			}
+
+			vring->cur_len = sizeof(struct virtio_net_hdr);
+			vring->rem_len = vring->pkt_len;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done with this chain. */
+			rshim_net_release_cur_desc(vdev, vring);
+
+			/* Clear desc and go back to the loop. */
+			desc = NULL;
+
+			continue;
+		}
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		if (is_rx) {
+			seg_len = rshim_fifo_read(net->bd,
+					addr + vring->cur_len,
+					len - vring->cur_len,
+					TMFIFO_NET_CHAN, true, false);
+		} else {
+			seg_len = rshim_fifo_write(net->bd,
+					addr + vring->cur_len,
+					len - vring->cur_len,
+					TMFIFO_NET_CHAN, true, false);
+		}
+		if (seg_len > 0)
+			vring->cur_len += seg_len;
+		else {
+			/* Schedule the worker to speed up Tx. */
+			if (!is_rx) {
+				if (!test_and_set_bit(RSH_NET_TX_EVENT,
+				    &net->pend_events))
+					schedule_work(&net->work);
+			}
+			break;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+
+	mutex_unlock(&net->lock);
+}
+
+/* The notify function is called when new buffers are posted. */
+static VIRTIO_NOTIFY_RETURN_TYPE rshim_net_virtio_notify(struct virtqueue *vq)
+{
+	struct rshim_net_vring *vring = (struct rshim_net_vring *)vq->priv;
+	struct rshim_net *net = vring->net;
+
+	/*
+	 * Virtio-net maintains vrings in pairs. Odd number ring for Rx
+	 * and even number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX bit. */
+		if (!test_and_set_bit(RSH_NET_RX_EVENT, &net->pend_events))
+			schedule_work(&net->work);
+	} else {
+		/* Set the TX bit. */
+		if (!test_and_set_bit(RSH_NET_TX_EVENT, &net->pend_events))
+			schedule_work(&net->work);
+	}
+
+	VIRTIO_NOTIFY_RETURN;
+}
+
+/* Get the array of feature bits for this device. */
+static VIRTIO_GET_FEATURES_RETURN_TYPE rshim_net_virtio_get_features(
+	struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	return net->features;
+}
+
+/* Confirm device features to use. */
+static VIRTIO_FINALIZE_FEATURES_RETURN_TYPE rshim_net_virtio_finalize_features(
+	struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	net->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void rshim_net_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct rshim_net_vring *vring;
+	struct virtqueue *vq;
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	for (i = 0; i < ARRAY_SIZE(net->vrings); i++) {
+		vring = &net->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			rshim_net_release_cur_desc(vdev, vring);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int rshim_net_virtio_find_vqs(struct virtio_device *vdev,
+				     unsigned int nvqs,
+				     struct virtqueue *vqs[],
+				     vq_callback_t *callbacks[],
+				     const char * const names[],
+				     const bool *ctx,
+				     struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct rshim_net_vring *vring;
+	struct virtqueue *vq;
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	if (nvqs > ARRAY_SIZE(net->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &net->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+
+		vq = vring_new_virtqueue(
+					 i,
+					 vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 rshim_net_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vq->priv = vring;
+		/*
+		 * Add barrier to make sure vq is ready before assigning to
+		 * vring.
+		 */
+		mb();
+		vring->vq = vq;
+		vqs[i] = vq;
+	}
+
+	return 0;
+
+error:
+	rshim_net_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 rshim_net_virtio_get_status(struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	return net->status;
+}
+
+/* Write the status byte. */
+static void rshim_net_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	net->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void rshim_net_virtio_reset(struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	net->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void rshim_net_virtio_get(struct virtio_device *vdev,
+				 unsigned int offset,
+				 void *buf,
+				 unsigned int len)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	if (offset + len > sizeof(net->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&net->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void rshim_net_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	if (offset + len > sizeof(net->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&net->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static struct virtio_config_ops rshim_net_virtio_config_ops = {
+	.get_features = rshim_net_virtio_get_features,
+	.finalize_features = rshim_net_virtio_finalize_features,
+	.find_vqs = rshim_net_virtio_find_vqs,
+	.del_vqs = rshim_net_virtio_del_vqs,
+	.reset = rshim_net_virtio_reset,
+	.set_status = rshim_net_virtio_set_status,
+	.get_status = rshim_net_virtio_get_status,
+	.get = rshim_net_virtio_get,
+	.set = rshim_net_virtio_set,
+};
+
+/* Remove. */
+static int rshim_net_delete_dev(struct rshim_net *net)
+{
+	if (net) {
+		/* Stop the timer. */
+		del_timer_sync(&net->timer);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&net->work);
+
+		/* Unregister virtio. */
+		if (net->virtio_registered)
+			unregister_virtio_device(&net->vdev);
+
+		/* Free vring. */
+		rshim_net_free_vrings(net);
+
+		kfree(net);
+	}
+
+	return 0;
+}
+
+/* Rx ready. */
+void rshim_net_rx_notify(struct rshim_backend *bd)
+{
+	struct rshim_net *net = (struct rshim_net *)bd->net;
+
+	if (net) {
+		test_and_set_bit(RSH_NET_RX_EVENT, &net->pend_events);
+		schedule_work(&net->work);
+	}
+}
+
+/* Remove. */
+int rshim_net_delete(struct rshim_backend *bd)
+{
+	int ret = 0;
+
+	if (bd->net) {
+		ret = rshim_net_delete_dev((struct rshim_net *)bd->net);
+		bd->net = NULL;
+	}
+
+	return ret;
+}
+
+/* Init. */
+int rshim_net_create(struct rshim_backend *bd)
+{
+	struct rshim_net *net;
+	struct virtio_device *vdev;
+	int ret = -ENOMEM;
+
+	if (bd->net)
+		return -EEXIST;
+
+	net = kzalloc(sizeof(struct rshim_net), GFP_KERNEL);
+	if (!net)
+		return ret;
+
+	INIT_WORK(&net->work, rshim_net_work_handler);
+
+	timer_setup(&net->timer, rshim_net_timer, 0);
+	net->timer.function = rshim_net_timer;
+
+	net->features = RSH_NET_FEATURES;
+	net->config.mtu = RSH_NET_MTU;
+	memcpy(net->config.mac, rshim_net_default_mac,
+	       sizeof(rshim_net_default_mac));
+	/* Set MAC address to be unique even number. */
+	net->config.mac[5] += bd->dev_index * 2;
+
+	mutex_init(&net->lock);
+
+	vdev = &net->vdev;
+	vdev->id.device = VIRTIO_ID_NET;
+	vdev->config = &rshim_net_virtio_config_ops;
+	vdev->dev.parent = bd->dev;
+	vdev->dev.release = rshim_net_virtio_dev_release;
+	if (rshim_net_alloc_vrings(net))
+		goto err;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(vdev);
+	if (ret) {
+		dev_err(bd->dev, "register_virtio_device() failed\n");
+		goto err;
+	}
+	net->virtio_registered = 1;
+
+	mod_timer(&net->timer, jiffies + rshim_net_timer_interval);
+
+	net->bd = bd;
+	/* Add a barrier to keep the order of the two pointer assignments. */
+	mb();
+	bd->net = net;
+
+	/* Bring up the interface. */
+	mutex_lock(&net->lock);
+	rshim_net_update_activity(net, true);
+	mutex_unlock(&net->lock);
+
+	return 0;
+
+err:
+	rshim_net_delete_dev(net);
+	return ret;
+}
+
+struct rshim_service rshim_svc = {
+	.type = RSH_SVC_NET,
+	.create = rshim_net_create,
+	.delete = rshim_net_delete,
+	.rx_notify = rshim_net_rx_notify
+};
+
+static int __init rshim_net_init(void)
+{
+	return rshim_register_service(&rshim_svc);
+}
+
+static void __exit rshim_net_exit(void)
+{
+	/*
+	 * Wait 200ms, which should be good enough to drain the current
+	 * pending packet.
+	 */
+	rshim_net_draining_mode = true;
+	msleep(200);
+
+	return rshim_deregister_service(&rshim_svc);
+}
+
+module_init(rshim_net_init);
+module_exit(rshim_net_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.7");
diff --git a/drivers/soc/mellanox/host/rshim_pcie.c b/drivers/soc/mellanox/host/rshim_pcie.c
new file mode 100644
index 0000000..3fa7bd9
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_pcie.c
@@ -0,0 +1,478 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_pcie.c - Mellanox RShim PCIe host driver
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/pci.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+
+#include "rshim.h"
+
+/* Disable RSim access. */
+static int rshim_disable;
+module_param(rshim_disable, int, 0444);
+MODULE_PARM_DESC(rshim_disable, "Disable rshim (obsoleted)");
+
+/** Our Vendor/Device IDs. */
+#define TILERA_VENDOR_ID					0x15b3
+#define BLUEFIELD_DEVICE_ID					0xc2d2
+
+/** The offset in BAR2 of the RShim region. */
+#define PCI_RSHIM_WINDOW_OFFSET					0x0
+
+/** The size the RShim region. */
+#define PCI_RSHIM_WINDOW_SIZE					0x100000
+
+/* Maximum number of devices this driver can handle */
+#define MAX_DEV_COUNT						16
+
+struct rshim_pcie {
+	/* RShim backend structure. */
+	struct rshim_backend	bd;
+
+	struct pci_dev *pci_dev;
+
+	/* RShim BAR size. */
+	uint64_t bar0_size;
+
+	/* Address of the RShim registers. */
+	u8 __iomem *rshim_regs;
+
+	/* Keep track of number of 8-byte word writes */
+	u8 write_count;
+};
+
+static struct rshim_pcie *instances[MAX_DEV_COUNT];
+
+#ifndef CONFIG_64BIT
+/* Wait until the RSH_BYTE_ACC_CTL pending bit is cleared */
+static int rshim_byte_acc_pending_wait(struct rshim_pcie *dev, int chan)
+{
+	u32 read_value;
+
+	do {
+		read_value = readl(dev->rshim_regs +
+			(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+		if (signal_pending(current))
+			return -EINTR;
+
+	} while (read_value & RSH_BYTE_ACC_PENDING);
+
+	return 0;
+}
+
+/*
+ * RShim read/write methods for 32-bit systems
+ * Mechanism to do an 8-byte access to the Rshim using
+ * two 4-byte accesses through the Rshim Byte Access Widget.
+ */
+static int rshim_byte_acc_read(struct rshim_pcie *dev, int chan, int addr,
+				u64 *result)
+{
+	int retval;
+	u32 read_value;
+	u64 read_result;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	writel(RSH_BYTE_ACC_SIZE, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	writel(addr, dev->rshim_regs + (RSH_BYTE_ACC_ADDR | (chan << 16)));
+
+	/* Write trigger bits to perform read */
+	writel(RSH_BYTE_ACC_READ_TRIGGER, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read lower 32-bits of data */
+	read_value = readl(dev->rshim_regs +
+		(RSH_BYTE_ACC_RDAT | (chan << 16)));
+
+	read_result = (u64)read_value << 32;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read upper 32-bits of data */
+	read_value = readl(dev->rshim_regs +
+		(RSH_BYTE_ACC_RDAT | (chan << 16)));
+
+	read_result |= (u64)read_value;
+	*result = be64_to_cpu(read_result);
+
+	return 0;
+}
+
+static int rshim_byte_acc_write(struct rshim_pcie *dev, int chan, int addr,
+				u64 value)
+{
+	int retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	writel(RSH_BYTE_ACC_SIZE, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	writel(addr, dev->rshim_regs + (RSH_BYTE_ACC_ADDR | (chan << 16)));
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	writel(RSH_BYTE_ACC_SIZE, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Write lower 32 bits of data to TRIO_CR_GW_DATA */
+	writel((u32)(value >> 32), dev->rshim_regs +
+		(RSH_BYTE_ACC_WDAT | (chan << 16)));
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Write upper 32 bits of data to TRIO_CR_GW_DATA */
+	writel((u32)(value), dev->rshim_regs +
+		(RSH_BYTE_ACC_WDAT | (chan << 16)));
+
+	return 0;
+}
+#endif /* CONFIG_64BIT */
+
+/* RShim read/write routines */
+static int rshim_pcie_read(struct rshim_backend *bd, int chan, int addr,
+				u64 *result)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	int retval = 0;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	dev->write_count = 0;
+
+#ifndef CONFIG_64BIT
+	retval = rshim_byte_acc_read(dev, chan, addr, result);
+#else
+	*result = readq(dev->rshim_regs + (addr | (chan << 16)));
+#endif
+	return retval;
+}
+
+static int rshim_pcie_write(struct rshim_backend *bd, int chan, int addr,
+				u64 value)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	u64 result;
+	int retval = 0;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	/*
+	 * We cannot stream large numbers of PCIe writes to the RShim's BAR.
+	 * Instead, we must write no more than 15 8-byte words before
+	 * doing a read from another register within the BAR,
+	 * which forces previous writes to drain.
+	 */
+	if (dev->write_count == 15) {
+		/* Add memory barrier to synchronize the order. */
+		mb();
+		rshim_pcie_read(bd, chan, RSH_SCRATCHPAD, &result);
+	}
+	dev->write_count++;
+#ifndef CONFIG_64BIT
+	retval = rshim_byte_acc_write(dev, chan, addr, value);
+#else
+	writeq(value, dev->rshim_regs + (addr | (chan << 16)));
+#endif
+
+	return retval;
+}
+
+static void rshim_pcie_delete(struct kref *kref)
+{
+	struct rshim_backend *bd;
+	struct rshim_pcie *dev;
+
+	bd = container_of(kref, struct rshim_backend, kref);
+	dev = container_of(bd, struct rshim_pcie, bd);
+
+	rshim_deregister(bd);
+	if (dev->pci_dev)
+		dev_set_drvdata(&dev->pci_dev->dev, NULL);
+	kfree(dev);
+}
+
+/* Probe routine */
+static int rshim_pcie_probe(struct pci_dev *pci_dev,
+			    const struct pci_device_id *id)
+{
+	struct rshim_pcie *dev;
+	struct rshim_backend *bd;
+	char *pcie_dev_name;
+	int index, retval, err = 0, allocfail = 0;
+	const int max_name_len = 20;
+
+	for (index = 0; index < MAX_DEV_COUNT; index++)
+		if (instances[index] == NULL)
+			break;
+	if (index == MAX_DEV_COUNT) {
+		pr_err("Driver cannot handle any more devices.\n");
+		return -ENODEV;
+	}
+
+	pcie_dev_name = kzalloc(max_name_len, GFP_KERNEL);
+	if (pcie_dev_name == NULL) {
+		err = -ENOMEM;
+		goto error;
+	}
+	retval = snprintf(pcie_dev_name, max_name_len,
+				"rshim_pcie%d", index);
+	if (WARN_ON_ONCE(retval >= max_name_len)) {
+		err = -EINVAL;
+		goto error;
+	}
+
+	pr_debug("Probing %s\n", pcie_dev_name);
+
+	rshim_lock();
+
+	/* Find the backend. */
+	bd = rshim_find(pcie_dev_name);
+	if (bd) {
+		kref_get(&bd->kref);
+		dev = container_of(bd, struct rshim_pcie, bd);
+	} else {
+		/* Get some memory for this device's driver state. */
+		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+		if (dev == NULL) {
+			err = -ENOMEM;
+			rshim_unlock();
+			goto error;
+		}
+
+		instances[index] = dev;
+		bd = &dev->bd;
+		bd->has_rshim = 1;
+		bd->has_tm = 1;
+		bd->dev_name = pcie_dev_name;
+		bd->read_rshim = rshim_pcie_read;
+		bd->write_rshim = rshim_pcie_write;
+		bd->destroy = rshim_pcie_delete;
+		bd->owner = THIS_MODULE;
+		dev->write_count = 0;
+		mutex_init(&bd->mutex);
+	}
+
+	retval = rshim_fifo_alloc(bd);
+	if (retval) {
+		rshim_unlock();
+		dev_err(&pci_dev->dev, "Failed to allocate fifo\n");
+		err = -ENOMEM;
+		goto enable_failed;
+	}
+
+	allocfail |= rshim_fifo_alloc(bd);
+
+	if (!bd->read_buf) {
+		bd->read_buf = kzalloc(READ_BUF_SIZE,
+					   GFP_KERNEL);
+	}
+	allocfail |= bd->read_buf == 0;
+
+	if (!bd->write_buf) {
+		bd->write_buf = kzalloc(WRITE_BUF_SIZE,
+					    GFP_KERNEL);
+	}
+	allocfail |= bd->write_buf == 0;
+
+	if (allocfail) {
+		rshim_unlock();
+		pr_err("can't allocate buffers\n");
+		goto enable_failed;
+	}
+
+	rshim_unlock();
+
+	/* Enable the device. */
+	err = pci_enable_device(pci_dev);
+	if (err != 0) {
+		pr_err("Device enable failed with error %d\n", err);
+		goto enable_failed;
+	}
+
+	/* Initialize object */
+	dev->pci_dev = pci_dev;
+	dev_set_drvdata(&pci_dev->dev, dev);
+
+	dev->bar0_size = pci_resource_len(pci_dev, 0);
+
+	/* Fail if the BAR is unassigned. */
+	if (!dev->bar0_size) {
+		pr_err("BAR unassigned, run 'lspci -v'.\n");
+		err = -ENOMEM;
+		goto rshim_map_failed;
+	}
+
+	/* Map in the RShim registers. */
+	dev->rshim_regs = ioremap(pci_resource_start(pci_dev, 0) +
+				  PCI_RSHIM_WINDOW_OFFSET,
+				  PCI_RSHIM_WINDOW_SIZE);
+	if (dev->rshim_regs == NULL) {
+		dev_err(&pci_dev->dev, "Failed to map RShim registers\n");
+		err = -ENOMEM;
+		goto rshim_map_failed;
+	}
+
+	/* Enable PCI bus mastering. */
+	pci_set_master(pci_dev);
+
+	/*
+	 * Register rshim here since it needs to detect whether other backend
+	 * has already registered or not, which involves reading/writing rshim
+	 * registers and has assumption that the under layer is working.
+	 */
+	rshim_lock();
+	if (!bd->registered) {
+		retval = rshim_register(bd);
+		if (retval) {
+			rshim_unlock();
+			goto rshim_map_failed;
+		} else
+			pcie_dev_name = NULL;
+	}
+	rshim_unlock();
+
+	/* Notify that the device is attached */
+	mutex_lock(&bd->mutex);
+	retval = rshim_notify(bd, RSH_EVENT_ATTACH, 0);
+	mutex_unlock(&bd->mutex);
+	if (retval)
+		goto rshim_map_failed;
+
+	return 0;
+
+ rshim_map_failed:
+	pci_disable_device(pci_dev);
+ enable_failed:
+	rshim_lock();
+	kref_put(&bd->kref, rshim_pcie_delete);
+	rshim_unlock();
+ error:
+	kfree(pcie_dev_name);
+	return err;
+}
+
+/* Called via pci_unregister_driver() when the module is removed. */
+static void rshim_pcie_remove(struct pci_dev *pci_dev)
+{
+	struct rshim_pcie *dev = dev_get_drvdata(&pci_dev->dev);
+	int flush_wq;
+
+	if (!dev)
+		return;
+
+	/*
+	 * Reset TRIO_PCIE_INTFC_RX_BAR0_ADDR_MASK and TRIO_MAP_RSH_BASE.
+	 * Otherwise, upon host reboot, the two registers will retain previous
+	 * values that don't match the new BAR0 address that is assigned to
+	 * the PCIe ports, causing host MMIO access to RShim to fail.
+	 */
+	rshim_pcie_write(&dev->bd, (RSH_SWINT >> 16) & 0xF,
+		RSH_SWINT & 0xFFFF, RSH_INT_VEC0_RTC__SWINT3_MASK);
+
+	/* Clear the flags before unmapping rshim registers to avoid race. */
+	dev->bd.has_rshim = 0;
+	dev->bd.has_tm = 0;
+	/* Add memory barrier to synchronize the order. */
+	mb();
+
+	if (dev->rshim_regs)
+		iounmap(dev->rshim_regs);
+
+	rshim_notify(&dev->bd, RSH_EVENT_DETACH, 0);
+	mutex_lock(&dev->bd.mutex);
+	flush_wq = !cancel_delayed_work(&dev->bd.work);
+	if (flush_wq)
+		flush_workqueue(rshim_wq);
+	dev->bd.has_cons_work = 0;
+	kfree(dev->bd.read_buf);
+	kfree(dev->bd.write_buf);
+	rshim_fifo_free(&dev->bd);
+	mutex_unlock(&dev->bd.mutex);
+
+	rshim_lock();
+	kref_put(&dev->bd.kref, rshim_pcie_delete);
+	rshim_unlock();
+
+	pci_disable_device(pci_dev);
+	dev_set_drvdata(&pci_dev->dev, NULL);
+}
+
+static struct pci_device_id rshim_pcie_table[] = {
+	{ PCI_DEVICE(TILERA_VENDOR_ID, BLUEFIELD_DEVICE_ID), },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, rshim_pcie_table);
+
+static struct pci_driver rshim_pcie_driver = {
+	.name = "rshim_pcie",
+	.probe = rshim_pcie_probe,
+	.remove = rshim_pcie_remove,
+	.id_table = rshim_pcie_table,
+};
+
+static int __init rshim_pcie_init(void)
+{
+	int result;
+
+	/* Register the driver */
+	result = pci_register_driver(&rshim_pcie_driver);
+	if (result)
+		pr_err("pci_register failed, error number %d\n", result);
+
+	return result;
+}
+
+static void __exit rshim_pcie_exit(void)
+{
+	/* Unregister the driver. */
+	pci_unregister_driver(&rshim_pcie_driver);
+}
+
+module_init(rshim_pcie_init);
+module_exit(rshim_pcie_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.6");
diff --git a/drivers/soc/mellanox/host/rshim_pcie_lf.c b/drivers/soc/mellanox/host/rshim_pcie_lf.c
new file mode 100644
index 0000000..08e2c15
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_pcie_lf.c
@@ -0,0 +1,695 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_pcie_lf.c - Mellanox RShim PCIe Livefish driver for x86 host
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/pci.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+
+#include "rshim.h"
+
+/* Disable RSim access. */
+static int rshim_disable;
+module_param(rshim_disable, int, 0444);
+MODULE_PARM_DESC(rshim_disable, "Disable rshim (obsoleted)");
+
+/** Our Vendor/Device IDs. */
+#define TILERA_VENDOR_ID					0x15b3
+#define BLUEFIELD_DEVICE_ID					0x0211
+
+/* Maximum number of devices this driver can handle */
+#define MAX_DEV_COUNT						16
+
+/* Mellanox Address & Data Capabilities */
+#define MELLANOX_ADDR						0x58
+#define MELLANOX_DATA						0x5c
+#define MELLANOX_CAP_READ					0x1
+
+/* TRIO_CR_GATEWAY registers */
+#define TRIO_CR_GW_LOCK						0xe38a0
+#define TRIO_CR_GW_LOCK_CPY					0xe38a4
+#define TRIO_CR_GW_DATA_UPPER					0xe38ac
+#define TRIO_CR_GW_DATA_LOWER					0xe38b0
+#define TRIO_CR_GW_CTL						0xe38b4
+#define TRIO_CR_GW_ADDR_UPPER					0xe38b8
+#define TRIO_CR_GW_ADDR_LOWER					0xe38bc
+#define TRIO_CR_GW_LOCK_ACQUIRED				0x80000000
+#define TRIO_CR_GW_LOCK_RELEASE					0x0
+#define TRIO_CR_GW_BUSY						0x60000000
+#define TRIO_CR_GW_TRIGGER					0xe0000000
+#define TRIO_CR_GW_READ_4BYTE					0x6
+#define TRIO_CR_GW_WRITE_4BYTE					0x2
+
+/* Base RShim Address */
+#define RSH_BASE_ADDR						0x80000000
+#define RSH_CHANNEL1_BASE					0x80010000
+
+struct rshim_pcie {
+	/* RShim backend structure. */
+	struct rshim_backend	bd;
+
+	struct pci_dev *pci_dev;
+
+	/* Keep track of number of 8-byte word writes */
+	u8 write_count;
+};
+
+static struct rshim_pcie *instances[MAX_DEV_COUNT];
+
+/* Mechanism to access the CR space using hidden PCI capabilities */
+static int pci_cap_read(struct pci_dev *pci_dev, int offset,
+				u32 *result)
+{
+	int retval;
+
+	/*
+	 * Write target offset to MELLANOX_ADDR.
+	 * Set LSB to indicate a read operation.
+	 */
+	retval = pci_write_config_dword(pci_dev, MELLANOX_ADDR,
+				offset | MELLANOX_CAP_READ);
+	if (retval)
+		return retval;
+
+	/* Read result from MELLANOX_DATA */
+	retval = pci_read_config_dword(pci_dev, MELLANOX_DATA,
+				result);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+static int pci_cap_write(struct pci_dev *pci_dev, int offset,
+				u32 value)
+{
+	int retval;
+
+	/* Write data to MELLANOX_DATA */
+	retval = pci_write_config_dword(pci_dev, MELLANOX_DATA,
+				value);
+	if (retval)
+		return retval;
+
+	/*
+	 * Write target offset to MELLANOX_ADDR.
+	 * Leave LSB clear to indicate a write operation.
+	 */
+	retval = pci_write_config_dword(pci_dev, MELLANOX_ADDR,
+				offset);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/* Acquire and release the TRIO_CR_GW_LOCK. */
+static int trio_cr_gw_lock_acquire(struct pci_dev *pci_dev)
+{
+	int retval;
+	u32 read_value;
+
+	/* Wait until TRIO_CR_GW_LOCK is free */
+	do {
+		retval = pci_cap_read(pci_dev, TRIO_CR_GW_LOCK,
+				&read_value);
+		if (retval)
+			return retval;
+		if (signal_pending(current))
+			return -EINTR;
+	} while (read_value & TRIO_CR_GW_LOCK_ACQUIRED);
+
+	/* Acquire TRIO_CR_GW_LOCK */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_LOCK_ACQUIRED);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+static int trio_cr_gw_lock_release(struct pci_dev *pci_dev)
+{
+	int retval;
+
+	/* Release TRIO_CR_GW_LOCK */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_LOCK_RELEASE);
+
+	return retval;
+}
+
+/*
+ * Mechanism to access the RShim from the CR space using the
+ * TRIO_CR_GATEWAY.
+ */
+static int trio_cr_gw_read(struct pci_dev *pci_dev, int addr,
+				u32 *result)
+{
+	int retval;
+
+	/* Acquire TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_acquire(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write addr to TRIO_CR_GW_ADDR_LOWER */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_ADDR_LOWER,
+				addr);
+	if (retval)
+		return retval;
+
+	/* Set TRIO_CR_GW_READ_4BYTE */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_CTL,
+				TRIO_CR_GW_READ_4BYTE);
+	if (retval)
+		return retval;
+
+	/* Trigger TRIO_CR_GW to read from addr */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_TRIGGER);
+	if (retval)
+		return retval;
+
+	/* Read 32-bit data from TRIO_CR_GW_DATA_LOWER */
+	retval = pci_cap_read(pci_dev, TRIO_CR_GW_DATA_LOWER,
+				result);
+	if (retval)
+		return retval;
+
+	/* Release TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_release(pci_dev);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+static int trio_cr_gw_write(struct pci_dev *pci_dev, int addr,
+				u32 value)
+{
+	int retval;
+
+	/* Acquire TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_acquire(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write 32-bit data to TRIO_CR_GW_DATA_LOWER */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_DATA_LOWER,
+				value);
+	if (retval)
+		return retval;
+
+	/* Write addr to TRIO_CR_GW_ADDR_LOWER */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_ADDR_LOWER,
+				addr);
+	if (retval)
+		return retval;
+
+	/* Set TRIO_CR_GW_WRITE_4BYTE */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_CTL,
+				TRIO_CR_GW_WRITE_4BYTE);
+	if (retval)
+		return retval;
+
+	/* Trigger CR gateway to write to RShim */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_TRIGGER);
+	if (retval)
+		return retval;
+
+	/* Release TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_release(pci_dev);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/* Wait until the RSH_BYTE_ACC_CTL pending bit is cleared */
+static int rshim_byte_acc_pending_wait(struct pci_dev *pci_dev)
+{
+	int retval;
+	u32 read_value;
+
+	do {
+		retval = trio_cr_gw_read(pci_dev,
+			RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL, &read_value);
+		if (retval)
+			return retval;
+		if (signal_pending(current))
+			return -EINTR;
+	} while (read_value & (RSH_CHANNEL1_BASE + RSH_BYTE_ACC_PENDING));
+
+	return 0;
+}
+
+/*
+ * Mechanism to do an 8-byte access to the Rshim using
+ * two 4-byte accesses through the Rshim Byte Access Widget.
+ */
+static int rshim_byte_acc_read(struct pci_dev *pci_dev, int addr,
+				u64 *result)
+{
+	int retval;
+	u32 read_value;
+	u64 read_result;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_SIZE);
+	if (retval)
+		return retval;
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_ADDR, addr);
+	if (retval)
+		return retval;
+
+	/* Write trigger bits to perform read */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_READ_TRIGGER);
+	if (retval)
+		return retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read lower 32-bits of data */
+	retval = trio_cr_gw_read(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_RDAT,
+				&read_value);
+	if (retval)
+		return retval;
+
+	read_result = (u64)read_value << 32;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read upper 32-bits of data */
+	retval = trio_cr_gw_read(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_RDAT,
+				&read_value);
+	if (retval)
+		return retval;
+
+	read_result |= (u64)read_value;
+	*result = be64_to_cpu(read_result);
+
+	return 0;
+}
+
+static int rshim_byte_acc_write(struct pci_dev *pci_dev, int addr,
+				u64 value)
+{
+	int retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_SIZE);
+	if (retval)
+		return retval;
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_ADDR, addr);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_SIZE);
+	if (retval)
+		return retval;
+
+	/* Write lower 32 bits of data to TRIO_CR_GW_DATA */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_WDAT, (u32)(value >> 32));
+	if (retval)
+		return retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write upper 32 bits of data to TRIO_CR_GW_DATA */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_WDAT, (u32)(value));
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/*
+ * The RShim Boot FIFO has a holding register which can couple
+ * two consecutive 4-byte writes into a single 8-byte write
+ * before pushing the data into the FIFO.
+ * Hence the RShim Byte Access Widget is not necessary to write
+ * to the BOOT FIFO using 4-byte writes.
+ */
+static int rshim_boot_fifo_write(struct pci_dev *pci_dev, int addr,
+				u64 value)
+{
+	int retval;
+
+	/* Write lower 32 bits of data to RSH_BOOT_FIFO_DATA */
+	retval = trio_cr_gw_write(pci_dev, addr,
+				(u32)(value >> 32));
+	if (retval)
+		return retval;
+
+	/* Write upper 32 bits of data to RSH_BOOT_FIFO_DATA */
+	retval = trio_cr_gw_write(pci_dev, addr,
+				(u32)(value));
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/* RShim read/write routines */
+static int rshim_pcie_read(struct rshim_backend *bd, int chan, int addr,
+				u64 *result)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	struct pci_dev *pci_dev = dev->pci_dev;
+	int retval;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	dev->write_count = 0;
+
+	addr = RSH_BASE_ADDR + (addr | (chan << 16));
+	addr = be32_to_cpu(addr);
+
+	retval = rshim_byte_acc_read(pci_dev, addr, result);
+
+	return retval;
+}
+
+static int rshim_pcie_write(struct rshim_backend *bd, int chan, int addr,
+				u64 value)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	struct pci_dev *pci_dev = dev->pci_dev;
+	int retval;
+	u64 result;
+	bool is_boot_stream = (addr == RSH_BOOT_FIFO_DATA);
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	addr = RSH_BASE_ADDR + (addr | (chan << 16));
+	if (!is_boot_stream)
+		addr = be32_to_cpu(addr);
+
+	value = be64_to_cpu(value);
+
+	/*
+	 * We cannot stream large numbers of PCIe writes to the RShim.
+	 * Instead, we must write no more than 15 words before
+	 * doing a read from another register within the RShim,
+	 * which forces previous writes to drain.
+	 * Note that we allow a max write_count of 7 since each 8-byte
+	 * write is done using 2 4-byte writes in the boot fifo case.
+	 */
+	if (dev->write_count == 7) {
+		/* Add memory barrier to synchronize the order. */
+		mb();
+		rshim_pcie_read(bd, 1, RSH_SCRATCHPAD, &result);
+	}
+	dev->write_count++;
+
+	if (is_boot_stream)
+		retval = rshim_boot_fifo_write(pci_dev, addr, value);
+	else
+		retval = rshim_byte_acc_write(pci_dev, addr, value);
+
+	return retval;
+}
+
+static void rshim_pcie_delete(struct kref *kref)
+{
+	struct rshim_backend *bd;
+	struct rshim_pcie *dev;
+
+	bd = container_of(kref, struct rshim_backend, kref);
+	dev = container_of(bd, struct rshim_pcie, bd);
+
+	rshim_deregister(bd);
+	if (dev->pci_dev)
+		dev_set_drvdata(&dev->pci_dev->dev, NULL);
+	kfree(dev);
+}
+
+/* Probe routine */
+static int rshim_pcie_probe(struct pci_dev *pci_dev,
+				const struct pci_device_id *id)
+{
+	struct rshim_pcie *dev = NULL;
+	struct rshim_backend *bd = NULL;
+	char *pcie_dev_name;
+	int index, retval, err = 0, allocfail = 0;
+	const int max_name_len = 20;
+
+	for (index = 0; index < MAX_DEV_COUNT; index++)
+		if (instances[index] == NULL)
+			break;
+	if (index == MAX_DEV_COUNT) {
+		pr_err("Driver cannot handle any more devices.\n");
+		return -ENODEV;
+	}
+
+	pcie_dev_name = kzalloc(max_name_len, GFP_KERNEL);
+	if (pcie_dev_name == NULL)
+		return -ENOMEM;
+	retval = snprintf(pcie_dev_name, max_name_len,
+				"rshim_pcie%d", index);
+	if (WARN_ON_ONCE(retval >= max_name_len)) {
+		err = -EINVAL;
+		goto error;
+	}
+
+	pr_debug("Probing %s\n", pcie_dev_name);
+
+	rshim_lock();
+
+	/* Find the backend. */
+	bd = rshim_find(pcie_dev_name);
+	if (bd) {
+		kref_get(&bd->kref);
+		dev = container_of(bd, struct rshim_pcie, bd);
+	} else {
+		/* Get some memory for this device's driver state. */
+		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+		if (dev == NULL) {
+			err = -ENOMEM;
+			rshim_unlock();
+			goto error;
+		}
+
+		instances[index] = dev;
+		bd = &dev->bd;
+		bd->has_rshim = 1;
+		bd->has_tm = 1;
+		bd->owner = THIS_MODULE;
+		bd->dev_name = pcie_dev_name;
+		bd->destroy = rshim_pcie_delete;
+		bd->read_rshim = rshim_pcie_read;
+		bd->write_rshim = rshim_pcie_write;
+		dev->write_count = 0;
+		mutex_init(&bd->mutex);
+	}
+
+	retval = rshim_fifo_alloc(bd);
+	if (retval) {
+		rshim_unlock();
+		pr_err("Failed to allocate fifo\n");
+		err = -ENOMEM;
+		goto enable_failed;
+	}
+
+	allocfail |= rshim_fifo_alloc(bd);
+
+	if (!bd->read_buf) {
+		bd->read_buf = kzalloc(READ_BUF_SIZE,
+					   GFP_KERNEL);
+	}
+	allocfail |= bd->read_buf == 0;
+
+	if (!bd->write_buf) {
+		bd->write_buf = kzalloc(WRITE_BUF_SIZE,
+					    GFP_KERNEL);
+	}
+	allocfail |= bd->write_buf == 0;
+
+	if (allocfail) {
+		rshim_unlock();
+		pr_err("can't allocate buffers\n");
+		goto enable_failed;
+	}
+
+	rshim_unlock();
+
+	/* Enable the device. */
+	err = pci_enable_device(pci_dev);
+	if (err != 0) {
+		pr_err("Device enable failed with error %d\n", err);
+		goto enable_failed;
+	}
+
+	/* Initialize object */
+	dev->pci_dev = pci_dev;
+	dev_set_drvdata(&pci_dev->dev, dev);
+
+	/* Enable PCI bus mastering. */
+	pci_set_master(pci_dev);
+
+	/*
+	 * Register rshim here since it needs to detect whether other backend
+	 * has already registered or not, which involves reading/writing rshim
+	 * registers and has assumption that the under layer is working.
+	 */
+	rshim_lock();
+	if (!bd->registered) {
+		retval = rshim_register(bd);
+		if (retval) {
+			pr_err("Backend register failed with error %d\n",
+				 retval);
+			rshim_unlock();
+			goto register_failed;
+		}
+	}
+	rshim_unlock();
+
+	/* Notify that the device is attached */
+	mutex_lock(&bd->mutex);
+	retval = rshim_notify(bd, RSH_EVENT_ATTACH, 0);
+	mutex_unlock(&bd->mutex);
+	if (retval)
+		goto register_failed;
+
+	return 0;
+
+register_failed:
+	pci_disable_device(pci_dev);
+
+enable_failed:
+	rshim_lock();
+	kref_put(&dev->bd.kref, rshim_pcie_delete);
+	rshim_unlock();
+error:
+	kfree(pcie_dev_name);
+
+	return err;
+}
+
+/* Called via pci_unregister_driver() when the module is removed. */
+static void rshim_pcie_remove(struct pci_dev *pci_dev)
+{
+	struct rshim_pcie *dev = dev_get_drvdata(&pci_dev->dev);
+	int retval, flush_wq;
+
+	/*
+	 * Reset TRIO_PCIE_INTFC_RX_BAR0_ADDR_MASK and TRIO_MAP_RSH_BASE.
+	 * Otherwise, upon host reboot, the two registers will retain previous
+	 * values that don't match the new BAR0 address that is assigned to
+	 * the PCIe ports, causing host MMIO access to RShim to fail.
+	 */
+	retval = rshim_pcie_write(&dev->bd, (RSH_SWINT >> 16) & 0xF,
+			RSH_SWINT & 0xFFFF, RSH_INT_VEC0_RTC__SWINT3_MASK);
+	if (retval)
+		pr_err("RShim write failed\n");
+
+	/* Clear the flags before deleting the backend. */
+	dev->bd.has_rshim = 0;
+	dev->bd.has_tm = 0;
+
+	rshim_notify(&dev->bd, RSH_EVENT_DETACH, 0);
+	mutex_lock(&dev->bd.mutex);
+	flush_wq = !cancel_delayed_work(&dev->bd.work);
+	if (flush_wq)
+		flush_workqueue(rshim_wq);
+	dev->bd.has_cons_work = 0;
+	kfree(dev->bd.read_buf);
+	kfree(dev->bd.write_buf);
+	rshim_fifo_free(&dev->bd);
+	mutex_unlock(&dev->bd.mutex);
+
+	rshim_lock();
+	kref_put(&dev->bd.kref, rshim_pcie_delete);
+	rshim_unlock();
+
+	pci_disable_device(pci_dev);
+	dev_set_drvdata(&pci_dev->dev, NULL);
+}
+
+static struct pci_device_id rshim_pcie_table[] = {
+	{ PCI_DEVICE(TILERA_VENDOR_ID, BLUEFIELD_DEVICE_ID), },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, rshim_pcie_table);
+
+static struct pci_driver rshim_pcie_driver = {
+	.name = "rshim_pcie_lf",
+	.probe = rshim_pcie_probe,
+	.remove = rshim_pcie_remove,
+	.id_table = rshim_pcie_table,
+};
+
+static int __init rshim_pcie_init(void)
+{
+	int result;
+
+	/* Register the driver */
+	result = pci_register_driver(&rshim_pcie_driver);
+	if (result)
+		pr_err("pci_register failed, error number %d\n", result);
+
+	return result;
+}
+
+static void __exit rshim_pcie_exit(void)
+{
+	/* Unregister the driver. */
+	pci_unregister_driver(&rshim_pcie_driver);
+}
+
+module_init(rshim_pcie_init);
+module_exit(rshim_pcie_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.4");
diff --git a/drivers/soc/mellanox/host/rshim_regs.h b/drivers/soc/mellanox/host/rshim_regs.h
new file mode 100644
index 0000000..74f8e30
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_regs.h
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#ifndef __RSHIM_REGS_H__
+#define __RSHIM_REGS_H__
+
+#ifdef __ASSEMBLER__
+#define _64bit(x) x
+#else /* __ASSEMBLER__ */
+#ifdef __tile__
+#define _64bit(x) x ## UL
+#else /* __tile__ */
+#define _64bit(x) x ## ULL
+#endif /* __tile__ */
+#endif /* __ASSEMBLER */
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+#ifndef __DOXYGEN__
+
+#define RSH_BOOT_FIFO_DATA 0x408
+
+#define RSH_BOOT_FIFO_COUNT 0x488
+#define RSH_BOOT_FIFO_COUNT__LENGTH 0x0001
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_SHIFT 0
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_WIDTH 10
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_RESET_VAL 0
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_RMASK 0x3ff
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_MASK  0x3ff
+
+#define RSH_BOOT_CONTROL 0x528
+#define RSH_BOOT_CONTROL__LENGTH 0x0001
+#define RSH_BOOT_CONTROL__BOOT_MODE_SHIFT 0
+#define RSH_BOOT_CONTROL__BOOT_MODE_WIDTH 2
+#define RSH_BOOT_CONTROL__BOOT_MODE_RESET_VAL 0
+#define RSH_BOOT_CONTROL__BOOT_MODE_RMASK 0x3
+#define RSH_BOOT_CONTROL__BOOT_MODE_MASK  0x3
+#define RSH_BOOT_CONTROL__BOOT_MODE_VAL_NONE 0x0
+#define RSH_BOOT_CONTROL__BOOT_MODE_VAL_EMMC 0x1
+#define RSH_BOOT_CONTROL__BOOT_MODE_VAL_EMMC_LEGACY 0x3
+
+#define RSH_RESET_CONTROL 0x500
+#define RSH_RESET_CONTROL__LENGTH 0x0001
+#define RSH_RESET_CONTROL__RESET_CHIP_SHIFT 0
+#define RSH_RESET_CONTROL__RESET_CHIP_WIDTH 32
+#define RSH_RESET_CONTROL__RESET_CHIP_RESET_VAL 0
+#define RSH_RESET_CONTROL__RESET_CHIP_RMASK 0xffffffff
+#define RSH_RESET_CONTROL__RESET_CHIP_MASK  0xffffffff
+#define RSH_RESET_CONTROL__RESET_CHIP_VAL_KEY 0xca710001
+#define RSH_RESET_CONTROL__DISABLE_SHIFT 32
+#define RSH_RESET_CONTROL__DISABLE_WIDTH 1
+#define RSH_RESET_CONTROL__DISABLE_RESET_VAL 0
+#define RSH_RESET_CONTROL__DISABLE_RMASK 0x1
+#define RSH_RESET_CONTROL__DISABLE_MASK  _64bit(0x100000000)
+#define RSH_RESET_CONTROL__REQ_PND_SHIFT 33
+#define RSH_RESET_CONTROL__REQ_PND_WIDTH 1
+#define RSH_RESET_CONTROL__REQ_PND_RESET_VAL 0
+#define RSH_RESET_CONTROL__REQ_PND_RMASK 0x1
+#define RSH_RESET_CONTROL__REQ_PND_MASK  _64bit(0x200000000)
+
+#define RSH_SCRATCHPAD1 0xc20
+
+#define RSH_SCRATCH_BUF_CTL 0x600
+
+#define RSH_SCRATCH_BUF_DAT 0x610
+
+#define RSH_SEMAPHORE0 0x28
+
+#define RSH_SCRATCHPAD 0x20
+
+#define RSH_TM_HOST_TO_TILE_CTL 0xa30
+#define RSH_TM_HOST_TO_TILE_CTL__LENGTH 0x0001
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_SHIFT 0
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_WIDTH 8
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_RESET_VAL 128
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_RMASK 0xff
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_MASK  0xff
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_SHIFT 8
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_WIDTH 8
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_RESET_VAL 128
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_RMASK 0xff
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_MASK  0xff00
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_SHIFT 32
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_WIDTH 9
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RESET_VAL 256
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_MASK  _64bit(0x1ff00000000)
+
+#define RSH_TM_HOST_TO_TILE_STS 0xa28
+#define RSH_TM_HOST_TO_TILE_STS__LENGTH 0x0001
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_SHIFT 0
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_WIDTH 9
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_RESET_VAL 0
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_RMASK 0x1ff
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_MASK  0x1ff
+
+#define RSH_TM_TILE_TO_HOST_STS 0xa48
+#define RSH_TM_TILE_TO_HOST_STS__LENGTH 0x0001
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_SHIFT 0
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_WIDTH 9
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_RESET_VAL 0
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_RMASK 0x1ff
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_MASK  0x1ff
+
+#define RSH_TM_HOST_TO_TILE_DATA 0xa20
+
+#define RSH_TM_TILE_TO_HOST_DATA 0xa40
+
+#define RSH_MMIO_ADDRESS_SPACE__LENGTH 0x10000000000
+#define RSH_MMIO_ADDRESS_SPACE__STRIDE 0x8
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_SHIFT 0
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_WIDTH 16
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_RESET_VAL 0
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_RMASK 0xffff
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_MASK  0xffff
+#define RSH_MMIO_ADDRESS_SPACE__PROT_SHIFT 16
+#define RSH_MMIO_ADDRESS_SPACE__PROT_WIDTH 3
+#define RSH_MMIO_ADDRESS_SPACE__PROT_RESET_VAL 0
+#define RSH_MMIO_ADDRESS_SPACE__PROT_RMASK 0x7
+#define RSH_MMIO_ADDRESS_SPACE__PROT_MASK  0x70000
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_SHIFT 23
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_WIDTH 4
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_RESET_VAL 0
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_RMASK 0xf
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_MASK  0x7800000
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_BOOT 0x0
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_RSHIM 0x1
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART0 0x2
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART1 0x3
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_DIAG_UART 0x4
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU 0x5
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU_EXT1 0x6
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU_EXT2 0x7
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU_EXT3 0x8
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TIMER 0x9
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_USB 0xa
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_GPIO 0xb
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_MMC 0xc
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TIMER_EXT 0xd
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_WDOG_NS 0xe
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_WDOG_SEC 0xf
+
+#define RSH_SWINT 0x318
+
+#define RSH_BYTE_ACC_CTL 0x490
+
+#define RSH_BYTE_ACC_WDAT 0x498
+
+#define RSH_BYTE_ACC_RDAT 0x4a0
+
+#define RSH_BYTE_ACC_ADDR 0x4a8
+
+#endif /* !defined(__DOXYGEN__) */
+#endif /* !defined(__RSHIM_REGS_H__) */
diff --git a/drivers/soc/mellanox/host/rshim_usb.c b/drivers/soc/mellanox/host/rshim_usb.c
new file mode 100644
index 0000000..aad6250
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_usb.c
@@ -0,0 +1,1035 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_usb.c - Mellanox RShim USB host driver
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/*
+ * This source code was originally derived from:
+ *
+ *   USB Skeleton driver - 2.0
+ *
+ *   Copyright (C) 2001-2004 Greg Kroah-Hartman (greg@kroah.com)
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License as
+ *	published by the Free Software Foundation, version 2.
+ *
+ * Some code was also lifted from the example drivers in "Linux Device
+ * Drivers" by Alessandro Rubini and Jonathan Corbet, published by
+ * O'Reilly & Associates.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/usb.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <linux/ioctl.h>
+#include <linux/termios.h>
+#include <linux/workqueue.h>
+#include <asm/termbits.h>
+#include <linux/circ_buf.h>
+
+#include "rshim.h"
+
+/* Disable RSim access. */
+static int rshim_disable;
+module_param(rshim_disable, int, 0444);
+MODULE_PARM_DESC(rshim_disable, "Disable rshim (obsoleted)");
+
+/* Our USB vendor/product IDs. */
+#define USB_TILERA_VENDOR_ID	0x22dc	 /* Tilera Corporation */
+#define USB_BLUEFIELD_PRODUCT_ID	0x0004	 /* Mellanox Bluefield */
+
+/* Number of retries for the tmfifo read/write path. */
+#define READ_RETRIES		5
+#define WRITE_RETRIES		5
+
+/* Structure to hold all of our device specific stuff. */
+struct rshim_usb {
+	/* RShim backend structure. */
+	struct rshim_backend bd;
+
+	/*
+	 * The USB device for this device.  We bump its reference count
+	 * when the first interface is probed, and drop the ref when the
+	 * last interface is disconnected.
+	 */
+	struct usb_device *udev;
+
+	/* The USB interfaces for this device. */
+	struct usb_interface *rshim_interface;
+
+	/* State for our outstanding boot write. */
+	struct urb *boot_urb;
+
+	/* Control data. */
+	u64 ctrl_data;
+
+	/* Interrupt data buffer.  This is a USB DMA'able buffer. */
+	u64 *intr_buf;
+	dma_addr_t intr_buf_dma;
+
+	/* Read/interrupt urb, retries, and mode. */
+	struct urb *read_or_intr_urb;
+	int read_or_intr_retries;
+	int read_urb_is_intr;
+
+	/* Write urb and retries. */
+	struct urb *write_urb;
+	int write_retries;
+
+	/* The address of the boot FIFO endpoint. */
+	u8 boot_fifo_ep;
+	/* The address of the tile-monitor FIFO interrupt endpoint. */
+	u8 tm_fifo_int_ep;
+	/* The address of the tile-monitor FIFO input endpoint. */
+	u8 tm_fifo_in_ep;
+	/* The address of the tile-monitor FIFO output endpoint. */
+	u8 tm_fifo_out_ep;
+};
+
+/* Table of devices that work with this driver */
+static struct usb_device_id rshim_usb_table[] = {
+	{ USB_DEVICE(USB_TILERA_VENDOR_ID, USB_BLUEFIELD_PRODUCT_ID) },
+	{ }					/* Terminating entry */
+};
+MODULE_DEVICE_TABLE(usb, rshim_usb_table);
+
+/* Random compatibility hacks. */
+
+/* Arguments to an urb completion handler. */
+#define URB_COMP_ARGS struct urb *urb
+
+static void rshim_usb_delete(struct kref *kref)
+{
+	struct rshim_backend *bd;
+	struct rshim_usb *dev;
+
+	bd = container_of(kref, struct rshim_backend, kref);
+	dev = container_of(bd, struct rshim_usb, bd);
+
+	rshim_deregister(bd);
+	kfree(dev);
+}
+
+/* Rshim read/write routines */
+
+static int rshim_usb_read_rshim(struct rshim_backend *bd, int chan, int addr,
+			      u64 *result)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+	int retval;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	/* Do a blocking control read and endian conversion. */
+	retval = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0),
+				 0,  /* request */
+				 USB_RECIP_ENDPOINT | USB_TYPE_VENDOR |
+				 USB_DIR_IN,  /* request type */
+				 chan, /* value */
+				 addr, /* index */
+				 &dev->ctrl_data, 8, 2000);
+
+	/*
+	 * The RShim HW puts bytes on the wire in little-endian order
+	 * regardless of endianness settings either in the host or the ARM
+	 * cores.
+	 */
+	*result = le64_to_cpu(dev->ctrl_data);
+	if (retval == 8)
+		return 0;
+
+	/*
+	 * These are weird error codes, but we want to use something
+	 * the USB stack doesn't use so that we can identify short/long
+	 * reads.
+	 */
+	return retval >= 0 ? (retval > 8 ? -EBADE : -EBADR) : retval;
+}
+
+static int rshim_usb_write_rshim(struct rshim_backend *bd, int chan, int addr,
+			       u64 value)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+	int retval;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	/* Convert the word to little endian and do blocking control write. */
+	dev->ctrl_data = cpu_to_le64(value);
+	retval = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0),
+				 0,  /* request */
+				 USB_RECIP_ENDPOINT | USB_TYPE_VENDOR |
+				 USB_DIR_OUT,  /* request type */
+				 chan, /* value */
+				 addr, /* index */
+				 &dev->ctrl_data, 8, 2000);
+
+	if (retval == 8)
+		return 0;
+
+	/*
+	 * These are weird error codes, but we want to use something
+	 * the USB stack doesn't use so that we can identify short/long
+	 * writes.
+	 */
+	return retval >= 0 ? (retval > 8 ? -EBADE : -EBADR) : retval;
+}
+
+/* Boot routines */
+
+static void rshim_usb_boot_write_callback(URB_COMP_ARGS)
+{
+	struct rshim_usb *dev = urb->context;
+
+	if (urb->status == -ENOENT)
+		pr_debug("boot tx canceled, actual length %d\n",
+			 urb->actual_length);
+	else if (urb->status)
+		pr_debug("boot tx failed, status %d, actual length %d\n",
+			 urb->status, urb->actual_length);
+
+	complete_all(&dev->bd.boot_write_complete);
+}
+
+static ssize_t rshim_usb_boot_write(struct rshim_usb *dev, const char *buf,
+				  size_t count)
+{
+	struct rshim_backend *bd = &dev->bd;
+	int retval = 0;
+	size_t bytes_written = 0;
+
+	/* Create and fill an urb */
+	dev->boot_urb = usb_alloc_urb(0, GFP_KERNEL);
+	if (unlikely(!dev->boot_urb)) {
+		pr_debug("boot_write: couldn't allocate urb\n");
+		return -ENOMEM;
+	}
+	usb_fill_bulk_urb(dev->boot_urb, dev->udev,
+			  usb_sndbulkpipe(dev->udev, dev->boot_fifo_ep),
+			  (char *)buf, count, rshim_usb_boot_write_callback,
+			  dev);
+
+	/* Submit the urb. */
+	reinit_completion(&bd->boot_write_complete);
+	retval = usb_submit_urb(dev->boot_urb, GFP_KERNEL);
+	if (retval)
+		goto done;
+
+	/*
+	 * Wait until it's done. If anything goes wrong in the USB layer,
+	 * the callback function might never get called and cause stuck.
+	 * Here we release the mutex so user could use 'ctrl + c' to terminate
+	 * the current write. Once the boot file is opened again, the
+	 * outstanding urb will be canceled.
+	 *
+	 * Note: when boot stream starts to write, it will either run to
+	 * completion, or be interrupted by user. The urb callback function will
+	 * be called during this period. There are no other operations to affect
+	 * the boot stream. So unlocking the mutex is considered safe.
+	 */
+	mutex_unlock(&bd->mutex);
+	retval = wait_for_completion_interruptible(&bd->boot_write_complete);
+	mutex_lock(&bd->mutex);
+	if (retval) {
+		usb_kill_urb(dev->boot_urb);
+		bytes_written += dev->boot_urb->actual_length;
+		goto done;
+	}
+
+	if (dev->boot_urb->actual_length !=
+		dev->boot_urb->transfer_buffer_length) {
+		pr_debug("length mismatch, exp %d act %d stat %d\n",
+			 dev->boot_urb->transfer_buffer_length,
+			 dev->boot_urb->actual_length,
+			 dev->boot_urb->status);
+	}
+
+#ifdef RSH_USB_BMC
+	/*
+	 * The UHCI host controller on the BMC seems to
+	 * overestimate the amount of data it's
+	 * successfully sent when it sees a babble error.
+	 */
+	if (dev->boot_urb->status == -EOVERFLOW &&
+	    dev->boot_urb->actual_length >= 64) {
+		dev->boot_urb->actual_length -= 64;
+		pr_debug("saw babble, new length %d\n",
+		dev->boot_urb->actual_length);
+	}
+#endif
+
+	bytes_written = dev->boot_urb->actual_length;
+
+	if (dev->boot_urb->status == -ENOENT &&
+	    dev->boot_urb->transfer_buffer_length !=
+	    dev->boot_urb->actual_length) {
+		pr_debug("boot_write: urb canceled.\n");
+	} else {
+		if (dev->boot_urb->status) {
+			pr_debug("boot_write: urb failed, status %d\n",
+				 dev->boot_urb->status);
+		}
+		if (dev->boot_urb->status != -ENOENT && !retval)
+			retval = dev->boot_urb->status;
+	}
+
+done:
+	usb_free_urb(dev->boot_urb);
+	dev->boot_urb = NULL;
+
+	return bytes_written ? bytes_written : retval;
+}
+
+/* FIFO routines */
+
+static void rshim_usb_fifo_read_callback(URB_COMP_ARGS)
+{
+	struct rshim_usb *dev = urb->context;
+	struct rshim_backend *bd = &dev->bd;
+
+	spin_lock(&bd->spinlock);
+
+	pr_debug("usb_fifo_read_callback: %s urb completed, status %d, "
+		 "actual length %d, intr buf %d\n",
+		 dev->read_urb_is_intr ? "interrupt" : "read",
+		 urb->status, urb->actual_length, (int) *dev->intr_buf);
+
+	bd->spin_flags &= ~RSH_SFLG_READING;
+
+	if (urb->status == 0) {
+		/*
+		 * If a read completed, clear the number of bytes available
+		 * from the last interrupt, and set up the new buffer for
+		 * processing.  (If an interrupt completed, there's nothing
+		 * to do, since the number of bytes available was already
+		 * set by the I/O itself.)
+		 */
+		if (!dev->read_urb_is_intr) {
+			*dev->intr_buf = 0;
+			bd->read_buf_bytes = urb->actual_length;
+			bd->read_buf_next = 0;
+		}
+
+		/* Process any data we got, and launch another I/O if needed. */
+		rshim_notify(bd, RSH_EVENT_FIFO_INPUT, 0);
+	} else if (urb->status == -ENOENT) {
+		/*
+		 * The urb was explicitly cancelled.  The only time we
+		 * currently do this is when we close the stream.  If we
+		 * mark this as an error, tile-monitor --resume won't work,
+		 * so we just want to do nothing.
+		 */
+	} else if (urb->status == -ECONNRESET ||
+		   urb->status == -ESHUTDOWN) {
+		/*
+		 * The device went away.  We don't want to retry this, and
+		 * we expect things to get better, probably after a device
+		 * reset, but in the meantime, we should let upper layers
+		 * know there was a problem.
+		 */
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	} else if (dev->read_or_intr_retries < READ_RETRIES &&
+		   urb->actual_length == 0 &&
+		   (urb->status == -EPROTO || urb->status == -EILSEQ ||
+		    urb->status == -EOVERFLOW)) {
+		/*
+		 * We got an error which could benefit from being retried.
+		 * Just submit the same urb again.  Note that we don't
+		 * handle partial reads; it's hard, and we haven't really
+		 * seen them.
+		 */
+		int retval;
+
+		dev->read_or_intr_retries++;
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			pr_debug("fifo_read_callback: resubmitted urb but got error %d",
+				 retval);
+			/*
+			 * In this case, we won't try again; signal the
+			 * error to upper layers.
+			 */
+			rshim_notify(bd, RSH_EVENT_FIFO_ERR, retval);
+		} else {
+			bd->spin_flags |= RSH_SFLG_READING;
+		}
+	} else {
+		/*
+		 * We got some error we don't know how to handle, or we got
+		 * too many errors.  Either way we don't retry any more,
+		 * but we signal the error to upper layers.
+		 */
+		pr_err("fifo_read_callback: %s urb completed abnormally, "
+		       "error %d\n",
+		       dev->read_urb_is_intr ? "interrupt" : "read",
+		       urb->status);
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	}
+
+	spin_unlock(&bd->spinlock);
+}
+
+static void rshim_usb_fifo_read(struct rshim_usb *dev, char *buffer,
+			      size_t count)
+{
+	struct rshim_backend *bd = &dev->bd;
+
+	if ((int) *dev->intr_buf || bd->read_buf_bytes) {
+		/* We're doing a read. */
+
+		int retval;
+		struct urb *urb = dev->read_or_intr_urb;
+
+		usb_fill_bulk_urb(urb, dev->udev,
+				  usb_rcvbulkpipe(dev->udev,
+						  dev->tm_fifo_in_ep),
+				  buffer, count,
+				  rshim_usb_fifo_read_callback,
+				  dev);
+		urb->transfer_dma = dev->bd.read_buf_dma;
+		urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+
+		dev->bd.spin_flags |= RSH_SFLG_READING;
+		dev->read_urb_is_intr = 0;
+		dev->read_or_intr_retries = 0;
+
+		/* Submit the urb. */
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			dev->bd.spin_flags &= ~RSH_SFLG_READING;
+			pr_debug("fifo_drain: failed submitting read "
+			      "urb, error %d", retval);
+		}
+		pr_debug("fifo_read_callback: resubmitted read urb\n");
+	} else {
+		/* We're doing an interrupt. */
+
+		int retval;
+		struct urb *urb = dev->read_or_intr_urb;
+
+		usb_fill_int_urb(urb, dev->udev,
+				 usb_rcvintpipe(dev->udev, dev->tm_fifo_int_ep),
+				 dev->intr_buf, sizeof(*dev->intr_buf),
+				 rshim_usb_fifo_read_callback,
+				 /*
+				  * FIXME: is 6 a good interval value?  That's
+				  * polling at 8000/(1 << 6) == 125 Hz.
+				  */
+				 dev, 6);
+		urb->transfer_dma = dev->intr_buf_dma;
+		urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+
+		dev->bd.spin_flags |= RSH_SFLG_READING;
+		dev->read_urb_is_intr = 1;
+		dev->read_or_intr_retries = 0;
+
+		/* Submit the urb */
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			dev->bd.spin_flags &= ~RSH_SFLG_READING;
+			pr_debug("fifo_read_callback: failed submitting "
+			      "interrupt urb, error %d", retval);
+		}
+		pr_debug("fifo_read_callback: resubmitted interrupt urb\n");
+	}
+}
+
+static void rshim_usb_fifo_write_callback(URB_COMP_ARGS)
+{
+	struct rshim_usb *dev = urb->context;
+	struct rshim_backend *bd = &dev->bd;
+
+	spin_lock(&bd->spinlock);
+
+	pr_debug("fifo_write_callback: urb completed, status %d, "
+		 "actual length %d, intr buf %d\n",
+		 urb->status, urb->actual_length, (int) *dev->intr_buf);
+
+	bd->spin_flags &= ~RSH_SFLG_WRITING;
+
+	if (urb->status == 0) {
+		/* A write completed. */
+		wake_up_interruptible_all(&bd->write_completed);
+		rshim_notify(bd, RSH_EVENT_FIFO_OUTPUT, 0);
+	} else if (urb->status == -ENOENT) {
+		/*
+		 * The urb was explicitly cancelled.  The only time we
+		 * currently do this is when we close the stream.  If we
+		 * mark this as an error, tile-monitor --resume won't work,
+		 * so we just want to do nothing.
+		 */
+	} else if (urb->status == -ECONNRESET ||
+		   urb->status == -ESHUTDOWN) {
+		/*
+		 * The device went away.  We don't want to retry this, and
+		 * we expect things to get better, probably after a device
+		 * reset, but in the meantime, we should let upper layers
+		 * know there was a problem.
+		 */
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	} else if (dev->write_retries < WRITE_RETRIES &&
+		   urb->actual_length == 0 &&
+		   (urb->status == -EPROTO || urb->status == -EILSEQ ||
+		    urb->status == -EOVERFLOW)) {
+		/*
+		 * We got an error which could benefit from being retried.
+		 * Just submit the same urb again.  Note that we don't
+		 * handle partial writes; it's hard, and we haven't really
+		 * seen them.
+		 */
+		int retval;
+
+		dev->write_retries++;
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			pr_err("fifo_write_callback: resubmitted urb but "
+			       "got error %d\n", retval);
+			/*
+			 * In this case, we won't try again; signal the
+			 * error to upper layers.
+			 */
+			rshim_notify(bd, RSH_EVENT_FIFO_ERR, retval);
+		} else {
+			bd->spin_flags |= RSH_SFLG_WRITING;
+		}
+	} else {
+		/*
+		 * We got some error we don't know how to handle, or we got
+		 * too many errors.  Either way we don't retry any more,
+		 * but we signal the error to upper layers.
+		 */
+		pr_err("fifo_write_callback: urb completed abnormally, "
+		       "error %d\n", urb->status);
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	}
+
+	spin_unlock(&bd->spinlock);
+}
+
+static int rshim_usb_fifo_write(struct rshim_usb *dev, const char *buffer,
+			      size_t count)
+{
+	struct rshim_backend *bd = &dev->bd;
+	int retval;
+
+	WARN_ONCE(count % 8 != 0, "rshim write %d is not multiple of 8 bytes\n",
+		  (int)count);
+
+	/* Initialize the urb properly. */
+	usb_fill_bulk_urb(dev->write_urb, dev->udev,
+			  usb_sndbulkpipe(dev->udev,
+					  dev->tm_fifo_out_ep),
+			  (char *)buffer,
+			  count,
+			  rshim_usb_fifo_write_callback,
+			  dev);
+	dev->write_urb->transfer_dma = bd->write_buf_dma;
+	dev->write_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+	dev->write_retries = 0;
+
+	/* Send the data out the bulk port. */
+	retval = usb_submit_urb(dev->write_urb, GFP_ATOMIC);
+	if (retval) {
+		bd->spin_flags &= ~RSH_SFLG_WRITING;
+		pr_err("fifo_write: failed submitting write "
+		       "urb, error %d\n", retval);
+		return -1;
+	}
+
+	bd->spin_flags |= RSH_SFLG_WRITING;
+	return 0;
+}
+
+/* Probe routines */
+
+/* These make the endpoint test code in rshim_usb_probe() a lot cleaner. */
+#define is_in_ep(ep)   (((ep)->bEndpointAddress & USB_ENDPOINT_DIR_MASK) == \
+			USB_DIR_IN)
+#define is_bulk_ep(ep) (((ep)->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == \
+			USB_ENDPOINT_XFER_BULK)
+#define is_int_ep(ep)  (((ep)->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == \
+			USB_ENDPOINT_XFER_INT)
+#define max_pkt(ep)    le16_to_cpu(ep->wMaxPacketSize)
+#define ep_addr(ep)    (ep->bEndpointAddress)
+
+static ssize_t rshim_usb_backend_read(struct rshim_backend *bd, int devtype,
+				    char *buf, size_t count)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		rshim_usb_fifo_read(dev, buf, count);
+		return 0;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+}
+
+static ssize_t rshim_usb_backend_write(struct rshim_backend *bd, int devtype,
+				     const char *buf, size_t count)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		return rshim_usb_fifo_write(dev, buf, count);
+
+	case RSH_DEV_TYPE_BOOT:
+		return rshim_usb_boot_write(dev, buf, count);
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+}
+
+static void rshim_usb_backend_cancel_req(struct rshim_backend *bd, int devtype,
+				       bool is_write)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		if (is_write)
+			usb_kill_urb(dev->write_urb);
+		else
+			usb_kill_urb(dev->read_or_intr_urb);
+		break;
+
+	case RSH_DEV_TYPE_BOOT:
+		usb_kill_urb(dev->boot_urb);
+		break;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		break;
+	}
+}
+
+static int rshim_usb_probe(struct usb_interface *interface,
+			 const struct usb_device_id *id)
+{
+	char *usb_dev_name;
+	int dev_name_len = 32;
+	struct rshim_usb *dev = NULL;
+	struct rshim_backend *bd;
+	struct usb_host_interface *iface_desc;
+	struct usb_endpoint_descriptor *ep;
+	int i;
+	int allocfail = 0;
+	int retval = -ENOMEM;
+
+	/*
+	 * Get our device pathname.  The usb_make_path interface uselessly
+	 * returns -1 if the output buffer is too small, instead of telling
+	 * us how big it needs to be, so we just start with a reasonable
+	 * size and double it until the name fits.
+	 */
+	while (1) {
+		usb_dev_name = kmalloc(dev_name_len, GFP_KERNEL);
+		if (!usb_dev_name)
+			goto error;
+		if (usb_make_path(interface_to_usbdev(interface), usb_dev_name,
+				  dev_name_len) >= 0)
+			break;
+		kfree(usb_dev_name);
+		dev_name_len *= 2;
+	}
+
+	pr_debug("probing %s\n", usb_dev_name);
+
+	/*
+	 * Now see if we've previously seen this device.  If so, we use the
+	 * same device number, otherwise we pick the first available one.
+	 */
+	rshim_lock();
+
+	/* Find the backend. */
+	bd = rshim_find(usb_dev_name);
+	if (bd) {
+		pr_debug("found previously allocated rshim_usb structure\n");
+		kref_get(&bd->kref);
+		dev = container_of(bd, struct rshim_usb, bd);
+		kfree(usb_dev_name);
+		usb_dev_name = NULL;
+	} else {
+		pr_debug("creating new rshim_usb structure\n");
+		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+		if (dev == NULL) {
+			pr_err("couldn't get memory for new device\n");
+			rshim_unlock();
+			goto error;
+		}
+
+		bd = &dev->bd;
+		bd->dev_name = usb_dev_name;
+		bd->read = rshim_usb_backend_read;
+		bd->write = rshim_usb_backend_write;
+		bd->cancel = rshim_usb_backend_cancel_req;
+		bd->destroy = rshim_usb_delete;
+		bd->read_rshim = rshim_usb_read_rshim;
+		bd->write_rshim = rshim_usb_write_rshim;
+		bd->has_reprobe = 1;
+		bd->owner = THIS_MODULE;
+		mutex_init(&bd->mutex);
+	}
+
+	/*
+	 * This has to be done on the first probe, whether or not we
+	 * allocated a new rshim_usb structure, since it's always dropped
+	 * on the second disconnect.
+	 */
+	if (!bd->has_rshim && !bd->has_tm)
+		dev->udev = usb_get_dev(interface_to_usbdev(interface));
+
+	/*
+	 * It would seem more logical to allocate these above when we create
+	 * a new rshim_usb structure, but we don't want to do it until we've
+	 * upped the usb device reference count.
+	 */
+	allocfail |= rshim_fifo_alloc(bd);
+
+	if (!bd->read_buf)
+		bd->read_buf = usb_alloc_coherent(dev->udev, READ_BUF_SIZE,
+						   GFP_KERNEL,
+						   &bd->read_buf_dma);
+	allocfail |= bd->read_buf == 0;
+
+	if (!dev->intr_buf) {
+		dev->intr_buf = usb_alloc_coherent(dev->udev,
+						   sizeof(*dev->intr_buf),
+						   GFP_KERNEL,
+						   &dev->intr_buf_dma);
+		if (dev->intr_buf != NULL)
+			*dev->intr_buf = 0;
+	}
+	allocfail |= dev->intr_buf == 0;
+
+	if (!bd->write_buf) {
+		bd->write_buf = usb_alloc_coherent(dev->udev,
+						       WRITE_BUF_SIZE,
+						       GFP_KERNEL,
+						       &bd->write_buf_dma);
+	}
+	allocfail |= bd->write_buf == 0;
+
+	if (!dev->read_or_intr_urb)
+		dev->read_or_intr_urb = usb_alloc_urb(0, GFP_KERNEL);
+	allocfail |= dev->read_or_intr_urb == 0;
+
+	if (!dev->write_urb)
+		dev->write_urb = usb_alloc_urb(0, GFP_KERNEL);
+	allocfail |= dev->write_urb == 0;
+
+	if (allocfail) {
+		pr_err("can't allocate buffers or urbs\n");
+		rshim_unlock();
+		goto error;
+	}
+
+	rshim_unlock();
+
+	iface_desc = interface->cur_altsetting;
+
+	/* Make sure this is a vendor-specific interface class. */
+	if (iface_desc->desc.bInterfaceClass != 0xFF)
+		goto error;
+
+	/* See which interface this is, then save the correct data. */
+
+	mutex_lock(&bd->mutex);
+	if (iface_desc->desc.bInterfaceSubClass == 0) {
+		pr_debug("found rshim interface\n");
+		/*
+		 * We only expect one endpoint here, just make sure its
+		 * attributes match.
+		 */
+		if (iface_desc->desc.bNumEndpoints != 1) {
+			pr_err("wrong number of endpoints for rshim "
+			       "interface\n");
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+		ep = &iface_desc->endpoint[0].desc;
+
+		/* We expect a bulk out endpoint. */
+		if (!is_bulk_ep(ep) || is_in_ep(ep)) {
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+
+		bd->has_rshim = 1;
+		dev->rshim_interface = interface;
+		dev->boot_fifo_ep = ep_addr(ep);
+
+	} else if (iface_desc->desc.bInterfaceSubClass == 1) {
+		pr_debug("found tmfifo interface\n");
+		/*
+		 * We expect 3 endpoints here.  Since they're listed in
+		 * random order we have to use their attributes to figure
+		 * out which is which.
+		 */
+		if (iface_desc->desc.bNumEndpoints != 3) {
+			pr_err("wrong number of endpoints for tm "
+			       "interface\n");
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+		dev->tm_fifo_in_ep = 0;
+		dev->tm_fifo_int_ep = 0;
+		dev->tm_fifo_out_ep = 0;
+
+		for (i = 0; i < iface_desc->desc.bNumEndpoints; i++) {
+			ep = &iface_desc->endpoint[i].desc;
+
+			if (is_in_ep(ep)) {
+				if (is_bulk_ep(ep)) {
+					/* Bulk in endpoint. */
+					dev->tm_fifo_in_ep = ep_addr(ep);
+				} else if (is_int_ep(ep)) {
+					/* Interrupt in endpoint. */
+					dev->tm_fifo_int_ep = ep_addr(ep);
+				}
+			} else {
+				if (is_bulk_ep(ep)) {
+					/* Bulk out endpoint. */
+					dev->tm_fifo_out_ep = ep_addr(ep);
+				}
+			}
+		}
+
+		if (!dev->tm_fifo_in_ep || !dev->tm_fifo_int_ep ||
+		    !dev->tm_fifo_out_ep) {
+			pr_err("could not find all required endpoints for "
+			       "tm interface\n");
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+		bd->has_tm = 1;
+	} else {
+		mutex_unlock(&bd->mutex);
+		goto error;
+	}
+
+	/* Save our data pointer in this interface device. */
+	usb_set_intfdata(interface, dev);
+
+	if (!bd->dev)
+		bd->dev = &dev->udev->dev;
+
+	/*
+	 * Register rshim here since it needs to detect whether other backend
+	 * has already registered or not, which involves reading/writing rshim
+	 * registers and has assumption that the under layer is working.
+	 */
+	rshim_lock();
+	if (!bd->registered) {
+		retval = rshim_register(bd);
+		if (retval) {
+			rshim_unlock();
+			goto error;
+		}
+	}
+	rshim_unlock();
+
+	/* Notify that device is attached. */
+	retval = rshim_notify(&dev->bd, RSH_EVENT_ATTACH, 0);
+	mutex_unlock(&dev->bd.mutex);
+	if (retval)
+		goto error;
+
+	return 0;
+
+error:
+	if (dev) {
+		usb_free_urb(dev->read_or_intr_urb);
+		dev->read_or_intr_urb = NULL;
+		usb_free_urb(dev->write_urb);
+		dev->write_urb = NULL;
+
+		usb_free_coherent(dev->udev, READ_BUF_SIZE,
+				  dev->bd.read_buf, dev->bd.read_buf_dma);
+		dev->bd.read_buf = NULL;
+
+		usb_free_coherent(dev->udev, WRITE_BUF_SIZE,
+				  dev->bd.write_buf, dev->bd.write_buf_dma);
+		dev->bd.write_buf = NULL;
+
+		rshim_fifo_free(&dev->bd);
+
+		usb_free_coherent(dev->udev, sizeof(*dev->intr_buf),
+				  dev->intr_buf, dev->intr_buf_dma);
+		dev->intr_buf = NULL;
+
+		rshim_lock();
+		kref_put(&dev->bd.kref, rshim_usb_delete);
+		rshim_unlock();
+	}
+
+	kfree(usb_dev_name);
+	return retval;
+}
+
+static void rshim_usb_disconnect(struct usb_interface *interface)
+{
+	struct rshim_usb *dev;
+	struct rshim_backend *bd;
+	int flush_wq = 0;
+
+	dev = usb_get_intfdata(interface);
+	bd = &dev->bd;
+	usb_set_intfdata(interface, NULL);
+
+	rshim_notify(bd, RSH_EVENT_DETACH, 0);
+
+	/*
+	 * Clear this interface so we don't unregister our devices next
+	 * time.
+	 */
+	mutex_lock(&bd->mutex);
+
+	if (dev->rshim_interface == interface) {
+		bd->has_rshim = 0;
+		dev->rshim_interface = NULL;
+	} else {
+		/*
+		 * We have to get rid of any USB state, since it may be
+		 * tied to the USB device which is going to vanish as soon
+		 * as we get both disconnects.  We'll reallocate these
+		 * on the next probe.
+		 *
+		 * Supposedly the code which called us already killed any
+		 * outstanding URBs, but it doesn't hurt to be sure.
+		 */
+
+		/*
+		 * We must make sure the console worker isn't running
+		 * before we free all these resources, and particularly
+		 * before we decrement our usage count, below.  Most of the
+		 * time, if it's even enabled, it'll be scheduled to run at
+		 * some point in the future, and we can take care of that
+		 * by asking that it be canceled.
+		 *
+		 * However, it's possible that it's already started
+		 * running, but can't make progress because it's waiting
+		 * for the device mutex, which we currently have.  We
+		 * handle this case by clearing the bit that says it's
+		 * enabled.  The worker tests this bit as soon as it gets
+		 * the mutex, and if it's clear, it just returns without
+		 * rescheduling itself.  Note that if we didn't
+		 * successfully cancel it, we flush the work entry below,
+		 * after we drop the mutex, to be sure it's done before we
+		 * decrement the device usage count.
+		 *
+		 * XXX This might be racy; what if something else which
+		 * would enable the worker runs after we drop the mutex
+		 * but before the worker itself runs?
+		 */
+		flush_wq = !cancel_delayed_work(&bd->work);
+		bd->has_cons_work = 0;
+
+		usb_kill_urb(dev->read_or_intr_urb);
+		usb_free_urb(dev->read_or_intr_urb);
+		dev->read_or_intr_urb = NULL;
+		usb_kill_urb(dev->write_urb);
+		usb_free_urb(dev->write_urb);
+		dev->write_urb = NULL;
+
+		usb_free_coherent(dev->udev, READ_BUF_SIZE,
+				  bd->read_buf, bd->read_buf_dma);
+		bd->read_buf = NULL;
+
+		usb_free_coherent(dev->udev, sizeof(*dev->intr_buf),
+				  dev->intr_buf, dev->intr_buf_dma);
+		dev->intr_buf = NULL;
+
+		usb_free_coherent(dev->udev, WRITE_BUF_SIZE,
+				  bd->write_buf, bd->write_buf_dma);
+		bd->write_buf = NULL;
+
+		rshim_fifo_free(bd);
+	}
+
+	if (!bd->has_rshim && !bd->has_tm) {
+		usb_put_dev(dev->udev);
+		dev->udev = NULL;
+		pr_info("now disconnected\n");
+	} else {
+		pr_debug("partially disconnected\n");
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	/* This can't be done while we hold the mutex; see comments above. */
+	if (flush_wq)
+		flush_workqueue(rshim_wq);
+
+	/* decrement our usage count */
+	rshim_lock();
+	kref_put(&bd->kref, rshim_usb_delete);
+	rshim_unlock();
+}
+
+static struct usb_driver rshim_usb_driver = {
+	.name = "rshim_usb",
+	.probe = rshim_usb_probe,
+	.disconnect = rshim_usb_disconnect,
+	.id_table = rshim_usb_table,
+};
+
+static int __init rshim_usb_init(void)
+{
+	int result;
+
+	/* Register this driver with the USB subsystem. */
+	result = usb_register(&rshim_usb_driver);
+	if (result)
+		pr_err("usb_register failed, error number %d\n", result);
+
+	return result;
+}
+
+static void __exit rshim_usb_exit(void)
+{
+	/* Deregister this driver with the USB subsystem. */
+	usb_deregister(&rshim_usb_driver);
+}
+
+module_init(rshim_usb_init);
+module_exit(rshim_usb_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.6");
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v6 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 16:06 ` Liming Sun
@ 2018-11-01 16:23   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-11-01 16:23 UTC (permalink / raw)
  To: y, Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit adds the TmFifo driver for Mellanox BlueField Soc.
TmFifo is a shared FIFO which enables external host machine to
exchange data with the SoC via USB or PCIe. The driver is based on
virtio framework and has console and network access enabled.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/Kconfig                |    1 +
 drivers/soc/Makefile               |    1 +
 drivers/soc/mellanox/Kconfig       |   18 +
 drivers/soc/mellanox/Makefile      |    5 +
 drivers/soc/mellanox/tmfifo.c      | 1236 ++++++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/tmfifo_regs.h |   76 +++
 6 files changed, 1337 insertions(+)
 create mode 100644 drivers/soc/mellanox/Kconfig
 create mode 100644 drivers/soc/mellanox/Makefile
 create mode 100644 drivers/soc/mellanox/tmfifo.c
 create mode 100644 drivers/soc/mellanox/tmfifo_regs.h

diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index c07b4a8..fa87dc8 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -7,6 +7,7 @@ source "drivers/soc/bcm/Kconfig"
 source "drivers/soc/fsl/Kconfig"
 source "drivers/soc/imx/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
+source "drivers/soc/mellanox/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/renesas/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 113e884..93052d0 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ARCH_GEMINI)	+= gemini/
 obj-$(CONFIG_ARCH_MXC)		+= imx/
 obj-$(CONFIG_SOC_XWAY)		+= lantiq/
 obj-y				+= mediatek/
+obj-$(CONFIG_SOC_MLNX)		+= mellanox/
 obj-$(CONFIG_ARCH_MESON)	+= amlogic/
 obj-y				+= qcom/
 obj-y				+= renesas/
diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
new file mode 100644
index 0000000..d88efa1
--- /dev/null
+++ b/drivers/soc/mellanox/Kconfig
@@ -0,0 +1,18 @@
+menuconfig SOC_MLNX
+	bool "Mellanox SoC drivers"
+	default y if ARCH_MLNX_BLUEFIELD
+
+if ARCH_MLNX_BLUEFIELD || COMPILE_TEST
+
+config MLNX_BLUEFIELD_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo driver"
+	depends on ARM64
+	default m
+	select VIRTIO_CONSOLE
+	select VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides the
+	  virtio driver framework for the TMFIFO of Mellanox BlueField SoC and
+	  the implementation of a console and network driver.
+
+endif # ARCH_MLNX_BLUEFIELD
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
new file mode 100644
index 0000000..c44c0e2
--- /dev/null
+++ b/drivers/soc/mellanox/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Mellanox SoC drivers.
+#
+obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
new file mode 100644
index 0000000..d5e3550
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo.c
@@ -0,0 +1,1236 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/cache.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <asm/byteorder.h>
+
+#include "tmfifo_regs.h"
+
+/* Vring size. */
+#define TMFIFO_VRING_SIZE			1024
+
+/* Console Tx buffer size. */
+#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
+
+/* House-keeping timer interval. */
+static int tmfifo_timer_interval = HZ / 10;
+module_param(tmfifo_timer_interval, int, 0644);
+MODULE_PARM_DESC(tmfifo_timer_interval, "timer interval");
+
+/* Global lock. */
+static DEFINE_MUTEX(tmfifo_lock);
+
+/* Virtio ring size. */
+static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
+module_param(tmfifo_vring_size, int, 0444);
+MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring");
+
+/* Struct declaration. */
+struct tmfifo;
+
+/* Virtual devices sharing the TM FIFO. */
+#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/* Structure to maintain the ring state. */
+struct tmfifo_vring {
+	void *va;			/* virtual address */
+	dma_addr_t dma;			/* dma address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	int vdev_id;			/* TMFIFO_VDEV_xxx */
+	u32 pkt_len;			/* packet total length */
+	__virtio16 next_avail;		/* next avail desc id */
+	struct tmfifo *fifo;		/* pointer back to the tmfifo */
+};
+
+/* Interrupt types. */
+enum {
+	TM_RX_LWM_IRQ,			/* Rx low water mark irq */
+	TM_RX_HWM_IRQ,			/* Rx high water mark irq */
+	TM_TX_LWM_IRQ,			/* Tx low water mark irq */
+	TM_TX_HWM_IRQ,			/* Tx high water mark irq */
+	TM_IRQ_CNT
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	TMFIFO_VRING_RX,		/* Rx ring */
+	TMFIFO_VRING_TX,		/* Tx ring */
+	TMFIFO_VRING_NUM
+};
+
+struct tmfifo_vdev {
+	struct virtio_device vdev;	/* virtual device */
+	u8 status;
+	u64 features;
+	union {				/* virtio config space */
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
+	u8 *tx_buf;			/* tx buffer */
+	u32 tx_head;			/* tx buffer head */
+	u32 tx_tail;			/* tx buffer tail */
+};
+
+/* TMFIFO device structure */
+struct tmfifo {
+	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
+	struct platform_device *pdev;	/* platform device */
+	struct mutex lock;
+	void __iomem *rx_base;		/* mapped register base */
+	void __iomem *tx_base;		/* mapped register base */
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	int irq[TM_IRQ_CNT];		/* irq numbers */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
+	bool is_ready;			/* ready flag */
+	spinlock_t spin_lock;		/* spin lock */
+};
+
+union tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* MTU setting of the virtio-net interface. */
+#define TMFIFO_NET_MTU		1500
+
+/* Supported virtio-net features. */
+#define TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
+				 (1UL << VIRTIO_NET_F_STATUS) | \
+				 (1UL << VIRTIO_NET_F_MAC))
+
+/* Return the available Tx buffer space. */
+static inline int tmfifo_vdev_tx_buf_avail(struct tmfifo_vdev *vdev)
+{
+	return ((vdev->tx_tail >= vdev->tx_head) ?
+		(TMFIFO_CONS_TX_BUF_SIZE - 8 - (vdev->tx_tail -
+		vdev->tx_head)) : (vdev->tx_head - vdev->tx_tail - 8));
+}
+
+/* Update Tx buffer pointer after pushing data. */
+static inline void tmfifo_vdev_tx_buf_push(struct tmfifo_vdev *vdev, u32 len)
+{
+	vdev->tx_tail += len;
+	if (vdev->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE)
+		vdev->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Update Tx buffer pointer after popping data. */
+static inline void tmfifo_vdev_tx_buf_pop(struct tmfifo_vdev *vdev, u32 len)
+{
+	vdev->tx_head += len;
+	if (vdev->tx_head >= TMFIFO_CONS_TX_BUF_SIZE)
+		vdev->tx_head -= TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Allocate vrings for the fifo. */
+static int tmfifo_alloc_vrings(struct tmfifo *fifo,
+			       struct tmfifo_vdev *tm_vdev, int vdev_id)
+{
+	dma_addr_t dma;
+	void *va;
+	int i, size;
+	struct tmfifo_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->size = tmfifo_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+		vring->vdev_id = vdev_id;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
+					GFP_KERNEL);
+		if (!va) {
+			dev_err(tm_vdev->vdev.dev.parent,
+				"vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void tmfifo_free_vrings(struct tmfifo *fifo, int vdev_id)
+{
+	int i, size;
+	struct tmfifo_vring *vring;
+	struct tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Free interrupts of the fifo device. */
+static void tmfifo_free_irqs(struct tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		irq = fifo->irq[i];
+		if (irq) {
+			fifo->irq[i] = 0;
+			disable_irq(irq);
+			free_irq(irq, (u8 *)fifo + i);
+		}
+	}
+}
+
+/* Interrupt handler. */
+static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
+{
+	int i = (uintptr_t)dev_id % sizeof(void *);
+	struct tmfifo *fifo = dev_id - i;
+
+	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
+		schedule_work(&fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Nothing to do for now. */
+static void tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+tmfifo_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+
+	if (!vr || vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vr->num);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+static inline void tmfifo_virtio_release_desc(struct virtio_device *vdev,
+					      struct vring *vr,
+					      struct vring_desc *desc, u32 len)
+{
+	unsigned int idx;
+
+	idx = vr->used->idx % vr->num;
+	vr->used->ring[idx].id = desc - vr->desc;
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/* Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc)
+{
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+	struct vring_desc *desc_head;
+	uint32_t pkt_len = 0;
+
+	if (!vr)
+		return;
+
+	if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
+		desc_head = vring->desc_head;
+		pkt_len = vring->pkt_len;
+	} else {
+		desc_head = tmfifo_virtio_get_next_desc(vring->vq);
+		if (desc_head != NULL) {
+			pkt_len = tmfifo_virtio_get_pkt_len(vdev,
+							desc_head, vr);
+		}
+	}
+
+	if (desc_head != NULL)
+		tmfifo_virtio_release_desc(vdev, vr, desc_head, pkt_len);
+
+	if (desc != NULL)
+		*desc = NULL;
+	vring->pkt_len = 0;
+}
+
+/* House-keeping timer. */
+static void tmfifo_timer(struct timer_list *arg)
+{
+	struct tmfifo *fifo = container_of(arg, struct tmfifo, timer);
+
+	/*
+	 * Wake up the work handler to poll the Rx FIFO in case interrupt
+	 * missing or any leftover bytes stuck in the FIFO.
+	 */
+	test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+}
+
+/* Buffer the console output. */
+static void tmfifo_console_output(struct tmfifo_vdev *cons,
+				  struct virtqueue *vq)
+{
+	u32 len, pkt_len, idx;
+	struct vring_desc *head_desc, *desc = NULL;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &cons->vdev;
+	void *addr;
+	union tmfifo_msg_hdr *hdr;
+
+	for (;;) {
+		head_desc = tmfifo_virtio_get_next_desc(vq);
+		if (head_desc == NULL)
+			break;
+
+		/* Release the packet if no more space. */
+		pkt_len = tmfifo_virtio_get_pkt_len(vdev, head_desc, vr);
+		if (pkt_len + sizeof(*hdr) > tmfifo_vdev_tx_buf_avail(cons)) {
+			tmfifo_virtio_release_desc(vdev, vr, head_desc,
+						   pkt_len);
+			break;
+		}
+
+		hdr = (union tmfifo_msg_hdr *)&cons->tx_buf[cons->tx_tail];
+		hdr->data = 0;
+		hdr->type = VIRTIO_ID_CONSOLE;
+		hdr->len = htons(pkt_len);
+
+		tmfifo_vdev_tx_buf_push(cons, sizeof(*hdr));
+		desc = head_desc;
+
+		while (desc != NULL) {
+			addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+			len = virtio32_to_cpu(vdev, desc->len);
+
+			if (len <= TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail) {
+				memcpy(cons->tx_buf + cons->tx_tail, addr, len);
+			} else {
+				u32 seg;
+
+				seg = TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail;
+				memcpy(cons->tx_buf + cons->tx_tail, addr, seg);
+				addr += seg;
+				memcpy(cons->tx_buf, addr, len - seg);
+			}
+			tmfifo_vdev_tx_buf_push(cons, len);
+
+			if (!(virtio16_to_cpu(vdev, desc->flags) &
+			    VRING_DESC_F_NEXT))
+				break;
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+		}
+
+		/* Make each packet 8-byte aligned. */
+		tmfifo_vdev_tx_buf_push(cons, ((pkt_len + 7) & -8) - pkt_len);
+
+		tmfifo_virtio_release_desc(vdev, vr, head_desc, pkt_len);
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct tmfifo_vring *vring;
+	struct tmfifo *fifo;
+	struct vring *vr;
+	struct virtio_device *vdev;
+	u64 sts, data;
+	int num_avail = 0, hdr_len, tx_reserve;
+	void *addr;
+	u32 len, idx;
+	struct vring_desc *desc;
+	unsigned long flags;
+	struct tmfifo_vdev *cons;
+
+	if (!vq)
+		return;
+
+	vring = (struct tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+	vr = (struct vring *)virtqueue_get_vring(vq);
+
+	if (!fifo->vdev[vring->vdev_id])
+		return;
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+
+	/* Don't continue if another vring is running. */
+	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
+		return;
+
+	/* tx_reserve is used to reserved some room in FIFO for console. */
+	if (vring->vdev_id == VIRTIO_ID_NET) {
+		hdr_len = sizeof(struct virtio_net_hdr);
+		tx_reserve = fifo->tx_fifo_size / 16;
+	} else {
+		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
+		hdr_len = 0;
+		tx_reserve = 1;
+	}
+
+	desc = vring->desc;
+
+again:
+	while (1) {
+		/* Get available FIFO space. */
+		if (num_avail == 0) {
+			if (is_rx) {
+				/* Get the number of available words in FIFO. */
+				sts = readq(fifo->rx_base + TMFIFO_RX_STS);
+				num_avail = FIELD_GET(
+					TMFIFO_RX_STS__COUNT_MASK, sts);
+
+				/* Don't continue if nothing in FIFO. */
+				if (num_avail <= 0)
+					break;
+			} else {
+				/* Get available space in FIFO. */
+				sts = readq(fifo->tx_base + TMFIFO_TX_STS);
+				num_avail = fifo->tx_fifo_size - tx_reserve -
+					FIELD_GET(
+						TMFIFO_TX_STS__COUNT_MASK, sts);
+
+				if (num_avail <= 0)
+					break;
+			}
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
+		    cons != NULL && cons->tx_buf != NULL) {
+			for (;;) {
+				spin_lock_irqsave(&fifo->spin_lock, flags);
+				if (cons->tx_head == cons->tx_tail) {
+					spin_unlock_irqrestore(
+						&fifo->spin_lock, flags);
+					return;
+				}
+				addr = cons->tx_buf + cons->tx_head;
+				writeq(cpu_to_le64(*(u64 *)addr),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+				tmfifo_vdev_tx_buf_pop(cons, sizeof(u64));
+				spin_unlock_irqrestore(&fifo->spin_lock,
+						       flags);
+				if (--num_avail <= 0)
+					goto again;
+			}
+		}
+
+		/* Get the desc of next packet. */
+		if (!desc) {
+			/* Save the head desc of the chain. */
+			vring->desc_head = tmfifo_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				return;
+			}
+			desc = vring->desc_head;
+			vring->desc = desc;
+
+			if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+						vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			}
+		}
+
+		/* Beginning of each packet. */
+		if (vring->pkt_len == 0) {
+			int vdev_id, vring_change = 0;
+			union tmfifo_msg_hdr hdr;
+
+			num_avail--;
+
+			/* Read/Write packet length. */
+			if (is_rx) {
+				hdr.data = readq(fifo->rx_base +
+						 TMFIFO_RX_DATA);
+				hdr.data = le64_to_cpu(hdr.data);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (hdr.len == 0)
+					continue;
+
+				/* Check packet type. */
+				if (hdr.type == VIRTIO_ID_NET) {
+					vdev_id = VIRTIO_ID_NET;
+					hdr_len = sizeof(struct virtio_net_hdr);
+				} else if (hdr.type == VIRTIO_ID_CONSOLE) {
+					vdev_id = VIRTIO_ID_CONSOLE;
+					hdr_len = 0;
+				} else {
+					continue;
+				}
+
+				/*
+				 * Check whether the new packet still belongs
+				 * to this vring or not. If not, update the
+				 * pkt_len of the new vring and return.
+				 */
+				if (vdev_id != vring->vdev_id) {
+					struct tmfifo_vdev *dev2 =
+						fifo->vdev[vdev_id];
+
+					if (!dev2)
+						break;
+					vring->desc = desc;
+					vring = &dev2->vrings[TMFIFO_VRING_RX];
+					vring_change = 1;
+				}
+				vring->pkt_len = ntohs(hdr.len) + hdr_len;
+			} else {
+				vring->pkt_len = tmfifo_virtio_get_pkt_len(
+					vdev, desc, vr);
+
+				hdr.data = 0;
+				hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+					VIRTIO_ID_NET :
+					VIRTIO_ID_CONSOLE;
+				hdr.len = htons(vring->pkt_len - hdr_len);
+				writeq(cpu_to_le64(hdr.data),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+			}
+
+			vring->cur_len = hdr_len;
+			vring->rem_len = vring->pkt_len;
+			fifo->vring[is_rx] = vring;
+
+			if (vring_change)
+				return;
+			continue;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check if the current desc is already done. */
+		if (vring->cur_len == len)
+			goto check_done;
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		/* Read a word from FIFO for Rx. */
+		if (is_rx) {
+			data = readq(fifo->rx_base + TMFIFO_RX_DATA);
+			data = le64_to_cpu(data);
+		}
+
+		if (vring->cur_len + sizeof(u64) <= len) {
+			/* The whole word. */
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       sizeof(u64));
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       sizeof(u64));
+			}
+			vring->cur_len += sizeof(u64);
+		} else {
+			/* Leftover bytes. */
+			BUG_ON(vring->cur_len > len);
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       len - vring->cur_len);
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       len - vring->cur_len);
+			}
+			vring->cur_len = len;
+		}
+
+		/* Write the word into FIFO for Tx. */
+		if (!is_rx) {
+			writeq(cpu_to_le64(data),
+			       fifo->tx_base + TMFIFO_TX_DATA);
+		}
+
+		num_avail--;
+
+check_done:
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done and release the desc. */
+			tmfifo_release_pkt(vdev, vring, &desc);
+			fifo->vring[is_rx] = NULL;
+
+			/* Notify upper layer that packet is done. */
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			vring_interrupt(0, vq);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			continue;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+	struct tmfifo *fifo = vring->fifo;
+	unsigned long flags;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX HWM bit to start Rx. */
+		if (!test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			tmfifo_console_output(fifo->vdev[VIRTIO_ID_CONSOLE],
+					      vq);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			schedule_work(&fifo->work);
+		} else if (!test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	}
+
+	return true;
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void tmfifo_work_handler(struct work_struct *work)
+{
+	int i;
+	struct tmfifo_vdev *tm_vdev;
+	struct tmfifo *fifo = container_of(work, struct tmfifo, work);
+
+	if (!fifo->is_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx. */
+	if (test_and_clear_bit(TM_TX_LWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_TX_LWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_TX].vq,
+					false);
+			}
+		}
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(TM_RX_HWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_RX_HWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_RX].vq,
+					true);
+			}
+		}
+	}
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* Get the array of feature bits for this device. */
+static u64 tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			tmfifo_release_pkt(&tm_vdev->vdev, vring, &vring->desc);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+				  unsigned int nvqs,
+				  struct virtqueue *vqs[],
+				  vq_callback_t *callbacks[],
+				  const char * const names[],
+				  const bool *ctx,
+				  struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void tmfifo_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void tmfifo_virtio_get(struct virtio_device *vdev,
+			      unsigned int offset,
+			      void *buf,
+			      unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void tmfifo_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops tmfifo_virtio_config_ops = {
+	.get_features = tmfifo_virtio_get_features,
+	.finalize_features = tmfifo_virtio_finalize_features,
+	.find_vqs = tmfifo_virtio_find_vqs,
+	.del_vqs = tmfifo_virtio_del_vqs,
+	.reset = tmfifo_virtio_reset,
+	.set_status = tmfifo_virtio_set_status,
+	.get_status = tmfifo_virtio_get_status,
+	.get = tmfifo_virtio_get,
+	.set = tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+int tmfifo_create_vdev(struct tmfifo *fifo, int vdev_id, u64 features,
+		       void *config, u32 size)
+{
+	struct tmfifo_vdev *tm_vdev;
+	int ret = 0;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev != NULL) {
+		pr_err("vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto already_exist;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto already_exist;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+	if (tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
+		pr_err("Unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto alloc_vring_fail;
+	}
+	if (vdev_id == VIRTIO_ID_CONSOLE) {
+		tm_vdev->tx_buf = kmalloc(TMFIFO_CONS_TX_BUF_SIZE,
+					  GFP_KERNEL);
+	}
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
+		goto register_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+register_fail:
+	tmfifo_free_vrings(fifo, vdev_id);
+	fifo->vdev[vdev_id] = NULL;
+alloc_vring_fail:
+	kfree(tm_vdev);
+already_exist:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+int tmfifo_delete_vdev(struct tmfifo *fifo, int vdev_id)
+{
+	struct tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		tmfifo_free_vrings(fifo, vdev_id);
+		kfree(tm_vdev->tx_buf);
+		kfree(tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Device remove function. */
+static int tmfifo_remove(struct platform_device *pdev)
+{
+	int i;
+	struct tmfifo *fifo = platform_get_drvdata(pdev);
+	struct resource *rx_res, *tx_res;
+
+	if (fifo) {
+		mutex_lock(&tmfifo_lock);
+
+		fifo->is_ready = false;
+
+		/* Stop the timer. */
+		del_timer_sync(&fifo->timer);
+
+		/* Release interrupts. */
+		tmfifo_free_irqs(fifo);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&fifo->work);
+
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++)
+			tmfifo_delete_vdev(fifo, i);
+
+		/* Release IO resources. */
+		if (fifo->rx_base)
+			iounmap(fifo->rx_base);
+		if (fifo->tx_base)
+			iounmap(fifo->tx_base);
+
+		platform_set_drvdata(pdev, NULL);
+		kfree(fifo);
+
+		mutex_unlock(&tmfifo_lock);
+	}
+
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (rx_res)
+		release_mem_region(rx_res->start, resource_size(rx_res));
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (tx_res)
+		release_mem_region(tx_res->start, resource_size(tx_res));
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void tmfifo_get_cfg_mac(u8 *mac)
+{
+	u8 buf[6];
+	efi_status_t status;
+	unsigned long size = sizeof(buf);
+	efi_char16_t name[] = { 'R', 's', 'h', 'i', 'm', 'M', 'a', 'c',
+				'A', 'd', 'd', 'r', 0 };
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+
+	status = efi.get_variable(name, &guid, NULL, &size, buf);
+	if (status == EFI_SUCCESS && size == sizeof(buf))
+		memcpy(mac, buf, sizeof(buf));
+}
+
+/* Probe the TMFIFO. */
+static int tmfifo_probe(struct platform_device *pdev)
+{
+	u64 ctl;
+	struct tmfifo *fifo;
+	struct resource *rx_res, *tx_res;
+	struct virtio_net_config net_config;
+	int i, ret;
+
+	/* Get the resource of the Rx & Tx FIFO. */
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!rx_res || !tx_res) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (request_mem_region(rx_res->start,
+			       resource_size(rx_res), "bf-tmfifo") == NULL) {
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	if (request_mem_region(tx_res->start,
+			       resource_size(tx_res), "bf-tmfifo") == NULL) {
+		release_mem_region(rx_res->start, resource_size(rx_res));
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	ret = -ENOMEM;
+	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
+	if (!fifo)
+		goto err;
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	spin_lock_init(&fifo->spin_lock);
+	INIT_WORK(&fifo->work, tmfifo_work_handler);
+
+	timer_setup(&fifo->timer, tmfifo_timer, 0);
+	fifo->timer.function = tmfifo_timer;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		fifo->irq[i] = platform_get_irq(pdev, i);
+		ret = request_irq(fifo->irq[i], tmfifo_irq_handler, 0,
+				  "tmfifo", (u8 *)fifo + i);
+		if (ret) {
+			pr_err("Unable to request irq\n");
+			fifo->irq[i] = 0;
+			goto err;
+		}
+	}
+
+	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
+	if (!fifo->rx_base)
+		goto err;
+
+	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
+	if (!fifo->tx_base)
+		goto err;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		FIELD_GET(TMFIFO_TX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~TMFIFO_TX_CTL__LWM_MASK) |
+		FIELD_PREP(TMFIFO_TX_CTL__LWM_MASK, fifo->tx_fifo_size / 2);
+	ctl = (ctl & ~TMFIFO_TX_CTL__HWM_MASK) |
+		FIELD_PREP(TMFIFO_TX_CTL__HWM_MASK, fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		FIELD_GET(TMFIFO_RX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~TMFIFO_RX_CTL__LWM_MASK) |
+		FIELD_PREP(TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = (ctl & ~TMFIFO_RX_CTL__HWM_MASK) |
+		FIELD_PREP(TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + TMFIFO_RX_CTL);
+
+	mutex_init(&fifo->lock);
+
+	/* Create the console vdev. */
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (ret)
+		goto err;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = TMFIFO_NET_MTU;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	memcpy(net_config.mac, tmfifo_net_default_mac, 6);
+	tmfifo_get_cfg_mac(net_config.mac);
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_NET, TMFIFO_NET_FEATURES,
+				 &net_config, sizeof(net_config));
+	if (ret)
+		goto err;
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+
+	fifo->is_ready = true;
+
+	return 0;
+
+err:
+	tmfifo_remove(pdev);
+early_err:
+	dev_err(&pdev->dev, "Probe Failed\n");
+	return ret;
+}
+
+static const struct of_device_id tmfifo_match[] = {
+	{ .compatible = "mellanox,bf-tmfifo" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, tmfifo_match);
+
+static const struct acpi_device_id bf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, bf_tmfifo_acpi_match);
+
+static struct platform_driver tmfifo_driver = {
+	.probe = tmfifo_probe,
+	.remove = tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.of_match_table = tmfifo_match,
+		.acpi_match_table = ACPI_PTR(bf_tmfifo_acpi_match),
+	},
+};
+
+static int __init tmfifo_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&tmfifo_driver);
+	if (ret)
+		pr_err("Failed to register tmfifo driver.\n");
+
+	return ret;
+}
+
+static void __exit tmfifo_exit(void)
+{
+	platform_driver_unregister(&tmfifo_driver);
+}
+
+module_init(tmfifo_init);
+module_exit(tmfifo_exit);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
new file mode 100644
index 0000000..9f21764
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo_regs.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __TMFIFO_REGS_H__
+#define __TMFIFO_REGS_H__
+
+#include <linux/types.h>
+
+#define TMFIFO_TX_DATA 0x0
+
+#define TMFIFO_TX_STS 0x8
+#define TMFIFO_TX_STS__LENGTH 0x0001
+#define TMFIFO_TX_STS__COUNT_SHIFT 0
+#define TMFIFO_TX_STS__COUNT_WIDTH 9
+#define TMFIFO_TX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_TX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_TX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_TX_CTL 0x10
+#define TMFIFO_TX_CTL__LENGTH 0x0001
+#define TMFIFO_TX_CTL__LWM_SHIFT 0
+#define TMFIFO_TX_CTL__LWM_WIDTH 8
+#define TMFIFO_TX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__LWM_RMASK 0xff
+#define TMFIFO_TX_CTL__LWM_MASK  0xff
+#define TMFIFO_TX_CTL__HWM_SHIFT 8
+#define TMFIFO_TX_CTL__HWM_WIDTH 8
+#define TMFIFO_TX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__HWM_RMASK 0xff
+#define TMFIFO_TX_CTL__HWM_MASK  0xff00
+#define TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#define TMFIFO_RX_DATA 0x0
+
+#define TMFIFO_RX_STS 0x8
+#define TMFIFO_RX_STS__LENGTH 0x0001
+#define TMFIFO_RX_STS__COUNT_SHIFT 0
+#define TMFIFO_RX_STS__COUNT_WIDTH 9
+#define TMFIFO_RX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_RX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_RX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_RX_CTL 0x10
+#define TMFIFO_RX_CTL__LENGTH 0x0001
+#define TMFIFO_RX_CTL__LWM_SHIFT 0
+#define TMFIFO_RX_CTL__LWM_WIDTH 8
+#define TMFIFO_RX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__LWM_RMASK 0xff
+#define TMFIFO_RX_CTL__LWM_MASK  0xff
+#define TMFIFO_RX_CTL__HWM_SHIFT 8
+#define TMFIFO_RX_CTL__HWM_WIDTH 8
+#define TMFIFO_RX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__HWM_RMASK 0xff
+#define TMFIFO_RX_CTL__HWM_MASK  0xff00
+#define TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#endif /* !defined(__TMFIFO_REGS_H__) */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v6 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc
@ 2018-11-01 16:23   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-11-01 16:23 UTC (permalink / raw)
  To: linux-arm-kernel

This commit adds the TmFifo driver for Mellanox BlueField Soc.
TmFifo is a shared FIFO which enables external host machine to
exchange data with the SoC via USB or PCIe. The driver is based on
virtio framework and has console and network access enabled.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/Kconfig                |    1 +
 drivers/soc/Makefile               |    1 +
 drivers/soc/mellanox/Kconfig       |   18 +
 drivers/soc/mellanox/Makefile      |    5 +
 drivers/soc/mellanox/tmfifo.c      | 1236 ++++++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/tmfifo_regs.h |   76 +++
 6 files changed, 1337 insertions(+)
 create mode 100644 drivers/soc/mellanox/Kconfig
 create mode 100644 drivers/soc/mellanox/Makefile
 create mode 100644 drivers/soc/mellanox/tmfifo.c
 create mode 100644 drivers/soc/mellanox/tmfifo_regs.h

diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index c07b4a8..fa87dc8 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -7,6 +7,7 @@ source "drivers/soc/bcm/Kconfig"
 source "drivers/soc/fsl/Kconfig"
 source "drivers/soc/imx/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
+source "drivers/soc/mellanox/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/renesas/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 113e884..93052d0 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ARCH_GEMINI)	+= gemini/
 obj-$(CONFIG_ARCH_MXC)		+= imx/
 obj-$(CONFIG_SOC_XWAY)		+= lantiq/
 obj-y				+= mediatek/
+obj-$(CONFIG_SOC_MLNX)		+= mellanox/
 obj-$(CONFIG_ARCH_MESON)	+= amlogic/
 obj-y				+= qcom/
 obj-y				+= renesas/
diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
new file mode 100644
index 0000000..d88efa1
--- /dev/null
+++ b/drivers/soc/mellanox/Kconfig
@@ -0,0 +1,18 @@
+menuconfig SOC_MLNX
+	bool "Mellanox SoC drivers"
+	default y if ARCH_MLNX_BLUEFIELD
+
+if ARCH_MLNX_BLUEFIELD || COMPILE_TEST
+
+config MLNX_BLUEFIELD_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo driver"
+	depends on ARM64
+	default m
+	select VIRTIO_CONSOLE
+	select VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides the
+	  virtio driver framework for the TMFIFO of Mellanox BlueField SoC and
+	  the implementation of a console and network driver.
+
+endif # ARCH_MLNX_BLUEFIELD
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
new file mode 100644
index 0000000..c44c0e2
--- /dev/null
+++ b/drivers/soc/mellanox/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Mellanox SoC drivers.
+#
+obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
new file mode 100644
index 0000000..d5e3550
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo.c
@@ -0,0 +1,1236 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/cache.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <asm/byteorder.h>
+
+#include "tmfifo_regs.h"
+
+/* Vring size. */
+#define TMFIFO_VRING_SIZE			1024
+
+/* Console Tx buffer size. */
+#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
+
+/* House-keeping timer interval. */
+static int tmfifo_timer_interval = HZ / 10;
+module_param(tmfifo_timer_interval, int, 0644);
+MODULE_PARM_DESC(tmfifo_timer_interval, "timer interval");
+
+/* Global lock. */
+static DEFINE_MUTEX(tmfifo_lock);
+
+/* Virtio ring size. */
+static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
+module_param(tmfifo_vring_size, int, 0444);
+MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring");
+
+/* Struct declaration. */
+struct tmfifo;
+
+/* Virtual devices sharing the TM FIFO. */
+#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/* Structure to maintain the ring state. */
+struct tmfifo_vring {
+	void *va;			/* virtual address */
+	dma_addr_t dma;			/* dma address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	int vdev_id;			/* TMFIFO_VDEV_xxx */
+	u32 pkt_len;			/* packet total length */
+	__virtio16 next_avail;		/* next avail desc id */
+	struct tmfifo *fifo;		/* pointer back to the tmfifo */
+};
+
+/* Interrupt types. */
+enum {
+	TM_RX_LWM_IRQ,			/* Rx low water mark irq */
+	TM_RX_HWM_IRQ,			/* Rx high water mark irq */
+	TM_TX_LWM_IRQ,			/* Tx low water mark irq */
+	TM_TX_HWM_IRQ,			/* Tx high water mark irq */
+	TM_IRQ_CNT
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	TMFIFO_VRING_RX,		/* Rx ring */
+	TMFIFO_VRING_TX,		/* Tx ring */
+	TMFIFO_VRING_NUM
+};
+
+struct tmfifo_vdev {
+	struct virtio_device vdev;	/* virtual device */
+	u8 status;
+	u64 features;
+	union {				/* virtio config space */
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
+	u8 *tx_buf;			/* tx buffer */
+	u32 tx_head;			/* tx buffer head */
+	u32 tx_tail;			/* tx buffer tail */
+};
+
+/* TMFIFO device structure */
+struct tmfifo {
+	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
+	struct platform_device *pdev;	/* platform device */
+	struct mutex lock;
+	void __iomem *rx_base;		/* mapped register base */
+	void __iomem *tx_base;		/* mapped register base */
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	int irq[TM_IRQ_CNT];		/* irq numbers */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
+	bool is_ready;			/* ready flag */
+	spinlock_t spin_lock;		/* spin lock */
+};
+
+union tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* MTU setting of the virtio-net interface. */
+#define TMFIFO_NET_MTU		1500
+
+/* Supported virtio-net features. */
+#define TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
+				 (1UL << VIRTIO_NET_F_STATUS) | \
+				 (1UL << VIRTIO_NET_F_MAC))
+
+/* Return the available Tx buffer space. */
+static inline int tmfifo_vdev_tx_buf_avail(struct tmfifo_vdev *vdev)
+{
+	return ((vdev->tx_tail >= vdev->tx_head) ?
+		(TMFIFO_CONS_TX_BUF_SIZE - 8 - (vdev->tx_tail -
+		vdev->tx_head)) : (vdev->tx_head - vdev->tx_tail - 8));
+}
+
+/* Update Tx buffer pointer after pushing data. */
+static inline void tmfifo_vdev_tx_buf_push(struct tmfifo_vdev *vdev, u32 len)
+{
+	vdev->tx_tail += len;
+	if (vdev->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE)
+		vdev->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Update Tx buffer pointer after popping data. */
+static inline void tmfifo_vdev_tx_buf_pop(struct tmfifo_vdev *vdev, u32 len)
+{
+	vdev->tx_head += len;
+	if (vdev->tx_head >= TMFIFO_CONS_TX_BUF_SIZE)
+		vdev->tx_head -= TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Allocate vrings for the fifo. */
+static int tmfifo_alloc_vrings(struct tmfifo *fifo,
+			       struct tmfifo_vdev *tm_vdev, int vdev_id)
+{
+	dma_addr_t dma;
+	void *va;
+	int i, size;
+	struct tmfifo_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->size = tmfifo_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+		vring->vdev_id = vdev_id;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
+					GFP_KERNEL);
+		if (!va) {
+			dev_err(tm_vdev->vdev.dev.parent,
+				"vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void tmfifo_free_vrings(struct tmfifo *fifo, int vdev_id)
+{
+	int i, size;
+	struct tmfifo_vring *vring;
+	struct tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Free interrupts of the fifo device. */
+static void tmfifo_free_irqs(struct tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		irq = fifo->irq[i];
+		if (irq) {
+			fifo->irq[i] = 0;
+			disable_irq(irq);
+			free_irq(irq, (u8 *)fifo + i);
+		}
+	}
+}
+
+/* Interrupt handler. */
+static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
+{
+	int i = (uintptr_t)dev_id % sizeof(void *);
+	struct tmfifo *fifo = dev_id - i;
+
+	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
+		schedule_work(&fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Nothing to do for now. */
+static void tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+tmfifo_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+
+	if (!vr || vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vr->num);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+static inline void tmfifo_virtio_release_desc(struct virtio_device *vdev,
+					      struct vring *vr,
+					      struct vring_desc *desc, u32 len)
+{
+	unsigned int idx;
+
+	idx = vr->used->idx % vr->num;
+	vr->used->ring[idx].id = desc - vr->desc;
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/* Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc)
+{
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+	struct vring_desc *desc_head;
+	uint32_t pkt_len = 0;
+
+	if (!vr)
+		return;
+
+	if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
+		desc_head = vring->desc_head;
+		pkt_len = vring->pkt_len;
+	} else {
+		desc_head = tmfifo_virtio_get_next_desc(vring->vq);
+		if (desc_head != NULL) {
+			pkt_len = tmfifo_virtio_get_pkt_len(vdev,
+							desc_head, vr);
+		}
+	}
+
+	if (desc_head != NULL)
+		tmfifo_virtio_release_desc(vdev, vr, desc_head, pkt_len);
+
+	if (desc != NULL)
+		*desc = NULL;
+	vring->pkt_len = 0;
+}
+
+/* House-keeping timer. */
+static void tmfifo_timer(struct timer_list *arg)
+{
+	struct tmfifo *fifo = container_of(arg, struct tmfifo, timer);
+
+	/*
+	 * Wake up the work handler to poll the Rx FIFO in case interrupt
+	 * missing or any leftover bytes stuck in the FIFO.
+	 */
+	test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+}
+
+/* Buffer the console output. */
+static void tmfifo_console_output(struct tmfifo_vdev *cons,
+				  struct virtqueue *vq)
+{
+	u32 len, pkt_len, idx;
+	struct vring_desc *head_desc, *desc = NULL;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &cons->vdev;
+	void *addr;
+	union tmfifo_msg_hdr *hdr;
+
+	for (;;) {
+		head_desc = tmfifo_virtio_get_next_desc(vq);
+		if (head_desc == NULL)
+			break;
+
+		/* Release the packet if no more space. */
+		pkt_len = tmfifo_virtio_get_pkt_len(vdev, head_desc, vr);
+		if (pkt_len + sizeof(*hdr) > tmfifo_vdev_tx_buf_avail(cons)) {
+			tmfifo_virtio_release_desc(vdev, vr, head_desc,
+						   pkt_len);
+			break;
+		}
+
+		hdr = (union tmfifo_msg_hdr *)&cons->tx_buf[cons->tx_tail];
+		hdr->data = 0;
+		hdr->type = VIRTIO_ID_CONSOLE;
+		hdr->len = htons(pkt_len);
+
+		tmfifo_vdev_tx_buf_push(cons, sizeof(*hdr));
+		desc = head_desc;
+
+		while (desc != NULL) {
+			addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+			len = virtio32_to_cpu(vdev, desc->len);
+
+			if (len <= TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail) {
+				memcpy(cons->tx_buf + cons->tx_tail, addr, len);
+			} else {
+				u32 seg;
+
+				seg = TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail;
+				memcpy(cons->tx_buf + cons->tx_tail, addr, seg);
+				addr += seg;
+				memcpy(cons->tx_buf, addr, len - seg);
+			}
+			tmfifo_vdev_tx_buf_push(cons, len);
+
+			if (!(virtio16_to_cpu(vdev, desc->flags) &
+			    VRING_DESC_F_NEXT))
+				break;
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+		}
+
+		/* Make each packet 8-byte aligned. */
+		tmfifo_vdev_tx_buf_push(cons, ((pkt_len + 7) & -8) - pkt_len);
+
+		tmfifo_virtio_release_desc(vdev, vr, head_desc, pkt_len);
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct tmfifo_vring *vring;
+	struct tmfifo *fifo;
+	struct vring *vr;
+	struct virtio_device *vdev;
+	u64 sts, data;
+	int num_avail = 0, hdr_len, tx_reserve;
+	void *addr;
+	u32 len, idx;
+	struct vring_desc *desc;
+	unsigned long flags;
+	struct tmfifo_vdev *cons;
+
+	if (!vq)
+		return;
+
+	vring = (struct tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+	vr = (struct vring *)virtqueue_get_vring(vq);
+
+	if (!fifo->vdev[vring->vdev_id])
+		return;
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+
+	/* Don't continue if another vring is running. */
+	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
+		return;
+
+	/* tx_reserve is used to reserved some room in FIFO for console. */
+	if (vring->vdev_id == VIRTIO_ID_NET) {
+		hdr_len = sizeof(struct virtio_net_hdr);
+		tx_reserve = fifo->tx_fifo_size / 16;
+	} else {
+		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
+		hdr_len = 0;
+		tx_reserve = 1;
+	}
+
+	desc = vring->desc;
+
+again:
+	while (1) {
+		/* Get available FIFO space. */
+		if (num_avail == 0) {
+			if (is_rx) {
+				/* Get the number of available words in FIFO. */
+				sts = readq(fifo->rx_base + TMFIFO_RX_STS);
+				num_avail = FIELD_GET(
+					TMFIFO_RX_STS__COUNT_MASK, sts);
+
+				/* Don't continue if nothing in FIFO. */
+				if (num_avail <= 0)
+					break;
+			} else {
+				/* Get available space in FIFO. */
+				sts = readq(fifo->tx_base + TMFIFO_TX_STS);
+				num_avail = fifo->tx_fifo_size - tx_reserve -
+					FIELD_GET(
+						TMFIFO_TX_STS__COUNT_MASK, sts);
+
+				if (num_avail <= 0)
+					break;
+			}
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
+		    cons != NULL && cons->tx_buf != NULL) {
+			for (;;) {
+				spin_lock_irqsave(&fifo->spin_lock, flags);
+				if (cons->tx_head == cons->tx_tail) {
+					spin_unlock_irqrestore(
+						&fifo->spin_lock, flags);
+					return;
+				}
+				addr = cons->tx_buf + cons->tx_head;
+				writeq(cpu_to_le64(*(u64 *)addr),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+				tmfifo_vdev_tx_buf_pop(cons, sizeof(u64));
+				spin_unlock_irqrestore(&fifo->spin_lock,
+						       flags);
+				if (--num_avail <= 0)
+					goto again;
+			}
+		}
+
+		/* Get the desc of next packet. */
+		if (!desc) {
+			/* Save the head desc of the chain. */
+			vring->desc_head = tmfifo_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				return;
+			}
+			desc = vring->desc_head;
+			vring->desc = desc;
+
+			if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+						vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			}
+		}
+
+		/* Beginning of each packet. */
+		if (vring->pkt_len == 0) {
+			int vdev_id, vring_change = 0;
+			union tmfifo_msg_hdr hdr;
+
+			num_avail--;
+
+			/* Read/Write packet length. */
+			if (is_rx) {
+				hdr.data = readq(fifo->rx_base +
+						 TMFIFO_RX_DATA);
+				hdr.data = le64_to_cpu(hdr.data);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (hdr.len == 0)
+					continue;
+
+				/* Check packet type. */
+				if (hdr.type == VIRTIO_ID_NET) {
+					vdev_id = VIRTIO_ID_NET;
+					hdr_len = sizeof(struct virtio_net_hdr);
+				} else if (hdr.type == VIRTIO_ID_CONSOLE) {
+					vdev_id = VIRTIO_ID_CONSOLE;
+					hdr_len = 0;
+				} else {
+					continue;
+				}
+
+				/*
+				 * Check whether the new packet still belongs
+				 * to this vring or not. If not, update the
+				 * pkt_len of the new vring and return.
+				 */
+				if (vdev_id != vring->vdev_id) {
+					struct tmfifo_vdev *dev2 =
+						fifo->vdev[vdev_id];
+
+					if (!dev2)
+						break;
+					vring->desc = desc;
+					vring = &dev2->vrings[TMFIFO_VRING_RX];
+					vring_change = 1;
+				}
+				vring->pkt_len = ntohs(hdr.len) + hdr_len;
+			} else {
+				vring->pkt_len = tmfifo_virtio_get_pkt_len(
+					vdev, desc, vr);
+
+				hdr.data = 0;
+				hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+					VIRTIO_ID_NET :
+					VIRTIO_ID_CONSOLE;
+				hdr.len = htons(vring->pkt_len - hdr_len);
+				writeq(cpu_to_le64(hdr.data),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+			}
+
+			vring->cur_len = hdr_len;
+			vring->rem_len = vring->pkt_len;
+			fifo->vring[is_rx] = vring;
+
+			if (vring_change)
+				return;
+			continue;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check if the current desc is already done. */
+		if (vring->cur_len == len)
+			goto check_done;
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		/* Read a word from FIFO for Rx. */
+		if (is_rx) {
+			data = readq(fifo->rx_base + TMFIFO_RX_DATA);
+			data = le64_to_cpu(data);
+		}
+
+		if (vring->cur_len + sizeof(u64) <= len) {
+			/* The whole word. */
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       sizeof(u64));
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       sizeof(u64));
+			}
+			vring->cur_len += sizeof(u64);
+		} else {
+			/* Leftover bytes. */
+			BUG_ON(vring->cur_len > len);
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       len - vring->cur_len);
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       len - vring->cur_len);
+			}
+			vring->cur_len = len;
+		}
+
+		/* Write the word into FIFO for Tx. */
+		if (!is_rx) {
+			writeq(cpu_to_le64(data),
+			       fifo->tx_base + TMFIFO_TX_DATA);
+		}
+
+		num_avail--;
+
+check_done:
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done and release the desc. */
+			tmfifo_release_pkt(vdev, vring, &desc);
+			fifo->vring[is_rx] = NULL;
+
+			/* Notify upper layer that packet is done. */
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			vring_interrupt(0, vq);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			continue;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+	struct tmfifo *fifo = vring->fifo;
+	unsigned long flags;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX HWM bit to start Rx. */
+		if (!test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			tmfifo_console_output(fifo->vdev[VIRTIO_ID_CONSOLE],
+					      vq);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			schedule_work(&fifo->work);
+		} else if (!test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	}
+
+	return true;
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void tmfifo_work_handler(struct work_struct *work)
+{
+	int i;
+	struct tmfifo_vdev *tm_vdev;
+	struct tmfifo *fifo = container_of(work, struct tmfifo, work);
+
+	if (!fifo->is_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx. */
+	if (test_and_clear_bit(TM_TX_LWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_TX_LWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_TX].vq,
+					false);
+			}
+		}
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(TM_RX_HWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_RX_HWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_RX].vq,
+					true);
+			}
+		}
+	}
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* Get the array of feature bits for this device. */
+static u64 tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			tmfifo_release_pkt(&tm_vdev->vdev, vring, &vring->desc);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+				  unsigned int nvqs,
+				  struct virtqueue *vqs[],
+				  vq_callback_t *callbacks[],
+				  const char * const names[],
+				  const bool *ctx,
+				  struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void tmfifo_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void tmfifo_virtio_get(struct virtio_device *vdev,
+			      unsigned int offset,
+			      void *buf,
+			      unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void tmfifo_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops tmfifo_virtio_config_ops = {
+	.get_features = tmfifo_virtio_get_features,
+	.finalize_features = tmfifo_virtio_finalize_features,
+	.find_vqs = tmfifo_virtio_find_vqs,
+	.del_vqs = tmfifo_virtio_del_vqs,
+	.reset = tmfifo_virtio_reset,
+	.set_status = tmfifo_virtio_set_status,
+	.get_status = tmfifo_virtio_get_status,
+	.get = tmfifo_virtio_get,
+	.set = tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+int tmfifo_create_vdev(struct tmfifo *fifo, int vdev_id, u64 features,
+		       void *config, u32 size)
+{
+	struct tmfifo_vdev *tm_vdev;
+	int ret = 0;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev != NULL) {
+		pr_err("vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto already_exist;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto already_exist;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+	if (tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
+		pr_err("Unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto alloc_vring_fail;
+	}
+	if (vdev_id == VIRTIO_ID_CONSOLE) {
+		tm_vdev->tx_buf = kmalloc(TMFIFO_CONS_TX_BUF_SIZE,
+					  GFP_KERNEL);
+	}
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
+		goto register_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+register_fail:
+	tmfifo_free_vrings(fifo, vdev_id);
+	fifo->vdev[vdev_id] = NULL;
+alloc_vring_fail:
+	kfree(tm_vdev);
+already_exist:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+int tmfifo_delete_vdev(struct tmfifo *fifo, int vdev_id)
+{
+	struct tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		tmfifo_free_vrings(fifo, vdev_id);
+		kfree(tm_vdev->tx_buf);
+		kfree(tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Device remove function. */
+static int tmfifo_remove(struct platform_device *pdev)
+{
+	int i;
+	struct tmfifo *fifo = platform_get_drvdata(pdev);
+	struct resource *rx_res, *tx_res;
+
+	if (fifo) {
+		mutex_lock(&tmfifo_lock);
+
+		fifo->is_ready = false;
+
+		/* Stop the timer. */
+		del_timer_sync(&fifo->timer);
+
+		/* Release interrupts. */
+		tmfifo_free_irqs(fifo);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&fifo->work);
+
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++)
+			tmfifo_delete_vdev(fifo, i);
+
+		/* Release IO resources. */
+		if (fifo->rx_base)
+			iounmap(fifo->rx_base);
+		if (fifo->tx_base)
+			iounmap(fifo->tx_base);
+
+		platform_set_drvdata(pdev, NULL);
+		kfree(fifo);
+
+		mutex_unlock(&tmfifo_lock);
+	}
+
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (rx_res)
+		release_mem_region(rx_res->start, resource_size(rx_res));
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (tx_res)
+		release_mem_region(tx_res->start, resource_size(tx_res));
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void tmfifo_get_cfg_mac(u8 *mac)
+{
+	u8 buf[6];
+	efi_status_t status;
+	unsigned long size = sizeof(buf);
+	efi_char16_t name[] = { 'R', 's', 'h', 'i', 'm', 'M', 'a', 'c',
+				'A', 'd', 'd', 'r', 0 };
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+
+	status = efi.get_variable(name, &guid, NULL, &size, buf);
+	if (status == EFI_SUCCESS && size == sizeof(buf))
+		memcpy(mac, buf, sizeof(buf));
+}
+
+/* Probe the TMFIFO. */
+static int tmfifo_probe(struct platform_device *pdev)
+{
+	u64 ctl;
+	struct tmfifo *fifo;
+	struct resource *rx_res, *tx_res;
+	struct virtio_net_config net_config;
+	int i, ret;
+
+	/* Get the resource of the Rx & Tx FIFO. */
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!rx_res || !tx_res) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (request_mem_region(rx_res->start,
+			       resource_size(rx_res), "bf-tmfifo") == NULL) {
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	if (request_mem_region(tx_res->start,
+			       resource_size(tx_res), "bf-tmfifo") == NULL) {
+		release_mem_region(rx_res->start, resource_size(rx_res));
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	ret = -ENOMEM;
+	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
+	if (!fifo)
+		goto err;
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	spin_lock_init(&fifo->spin_lock);
+	INIT_WORK(&fifo->work, tmfifo_work_handler);
+
+	timer_setup(&fifo->timer, tmfifo_timer, 0);
+	fifo->timer.function = tmfifo_timer;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		fifo->irq[i] = platform_get_irq(pdev, i);
+		ret = request_irq(fifo->irq[i], tmfifo_irq_handler, 0,
+				  "tmfifo", (u8 *)fifo + i);
+		if (ret) {
+			pr_err("Unable to request irq\n");
+			fifo->irq[i] = 0;
+			goto err;
+		}
+	}
+
+	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
+	if (!fifo->rx_base)
+		goto err;
+
+	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
+	if (!fifo->tx_base)
+		goto err;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		FIELD_GET(TMFIFO_TX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~TMFIFO_TX_CTL__LWM_MASK) |
+		FIELD_PREP(TMFIFO_TX_CTL__LWM_MASK, fifo->tx_fifo_size / 2);
+	ctl = (ctl & ~TMFIFO_TX_CTL__HWM_MASK) |
+		FIELD_PREP(TMFIFO_TX_CTL__HWM_MASK, fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		FIELD_GET(TMFIFO_RX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~TMFIFO_RX_CTL__LWM_MASK) |
+		FIELD_PREP(TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = (ctl & ~TMFIFO_RX_CTL__HWM_MASK) |
+		FIELD_PREP(TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + TMFIFO_RX_CTL);
+
+	mutex_init(&fifo->lock);
+
+	/* Create the console vdev. */
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (ret)
+		goto err;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = TMFIFO_NET_MTU;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	memcpy(net_config.mac, tmfifo_net_default_mac, 6);
+	tmfifo_get_cfg_mac(net_config.mac);
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_NET, TMFIFO_NET_FEATURES,
+				 &net_config, sizeof(net_config));
+	if (ret)
+		goto err;
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+
+	fifo->is_ready = true;
+
+	return 0;
+
+err:
+	tmfifo_remove(pdev);
+early_err:
+	dev_err(&pdev->dev, "Probe Failed\n");
+	return ret;
+}
+
+static const struct of_device_id tmfifo_match[] = {
+	{ .compatible = "mellanox,bf-tmfifo" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, tmfifo_match);
+
+static const struct acpi_device_id bf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, bf_tmfifo_acpi_match);
+
+static struct platform_driver tmfifo_driver = {
+	.probe = tmfifo_probe,
+	.remove = tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.of_match_table = tmfifo_match,
+		.acpi_match_table = ACPI_PTR(bf_tmfifo_acpi_match),
+	},
+};
+
+static int __init tmfifo_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&tmfifo_driver);
+	if (ret)
+		pr_err("Failed to register tmfifo driver.\n");
+
+	return ret;
+}
+
+static void __exit tmfifo_exit(void)
+{
+	platform_driver_unregister(&tmfifo_driver);
+}
+
+module_init(tmfifo_init);
+module_exit(tmfifo_exit);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
new file mode 100644
index 0000000..9f21764
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo_regs.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __TMFIFO_REGS_H__
+#define __TMFIFO_REGS_H__
+
+#include <linux/types.h>
+
+#define TMFIFO_TX_DATA 0x0
+
+#define TMFIFO_TX_STS 0x8
+#define TMFIFO_TX_STS__LENGTH 0x0001
+#define TMFIFO_TX_STS__COUNT_SHIFT 0
+#define TMFIFO_TX_STS__COUNT_WIDTH 9
+#define TMFIFO_TX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_TX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_TX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_TX_CTL 0x10
+#define TMFIFO_TX_CTL__LENGTH 0x0001
+#define TMFIFO_TX_CTL__LWM_SHIFT 0
+#define TMFIFO_TX_CTL__LWM_WIDTH 8
+#define TMFIFO_TX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__LWM_RMASK 0xff
+#define TMFIFO_TX_CTL__LWM_MASK  0xff
+#define TMFIFO_TX_CTL__HWM_SHIFT 8
+#define TMFIFO_TX_CTL__HWM_WIDTH 8
+#define TMFIFO_TX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__HWM_RMASK 0xff
+#define TMFIFO_TX_CTL__HWM_MASK  0xff00
+#define TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#define TMFIFO_RX_DATA 0x0
+
+#define TMFIFO_RX_STS 0x8
+#define TMFIFO_RX_STS__LENGTH 0x0001
+#define TMFIFO_RX_STS__COUNT_SHIFT 0
+#define TMFIFO_RX_STS__COUNT_WIDTH 9
+#define TMFIFO_RX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_RX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_RX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_RX_CTL 0x10
+#define TMFIFO_RX_CTL__LENGTH 0x0001
+#define TMFIFO_RX_CTL__LWM_SHIFT 0
+#define TMFIFO_RX_CTL__LWM_WIDTH 8
+#define TMFIFO_RX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__LWM_RMASK 0xff
+#define TMFIFO_RX_CTL__LWM_MASK  0xff
+#define TMFIFO_RX_CTL__HWM_SHIFT 8
+#define TMFIFO_RX_CTL__HWM_WIDTH 8
+#define TMFIFO_RX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__HWM_RMASK 0xff
+#define TMFIFO_RX_CTL__HWM_MASK  0xff00
+#define TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#endif /* !defined(__TMFIFO_REGS_H__) */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v6 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 16:06 ` Liming Sun
@ 2018-11-01 16:25   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-11-01 16:25 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit adds the TmFifo driver for Mellanox BlueField Soc.
TmFifo is a shared FIFO which enables external host machine to
exchange data with the SoC via USB or PCIe. The driver is based on
virtio framework and has console and network access enabled.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/Kconfig                |    1 +
 drivers/soc/Makefile               |    1 +
 drivers/soc/mellanox/Kconfig       |   18 +
 drivers/soc/mellanox/Makefile      |    5 +
 drivers/soc/mellanox/tmfifo.c      | 1236 ++++++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/tmfifo_regs.h |   76 +++
 6 files changed, 1337 insertions(+)
 create mode 100644 drivers/soc/mellanox/Kconfig
 create mode 100644 drivers/soc/mellanox/Makefile
 create mode 100644 drivers/soc/mellanox/tmfifo.c
 create mode 100644 drivers/soc/mellanox/tmfifo_regs.h

diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index c07b4a8..fa87dc8 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -7,6 +7,7 @@ source "drivers/soc/bcm/Kconfig"
 source "drivers/soc/fsl/Kconfig"
 source "drivers/soc/imx/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
+source "drivers/soc/mellanox/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/renesas/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 113e884..93052d0 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ARCH_GEMINI)	+= gemini/
 obj-$(CONFIG_ARCH_MXC)		+= imx/
 obj-$(CONFIG_SOC_XWAY)		+= lantiq/
 obj-y				+= mediatek/
+obj-$(CONFIG_SOC_MLNX)		+= mellanox/
 obj-$(CONFIG_ARCH_MESON)	+= amlogic/
 obj-y				+= qcom/
 obj-y				+= renesas/
diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
new file mode 100644
index 0000000..d88efa1
--- /dev/null
+++ b/drivers/soc/mellanox/Kconfig
@@ -0,0 +1,18 @@
+menuconfig SOC_MLNX
+	bool "Mellanox SoC drivers"
+	default y if ARCH_MLNX_BLUEFIELD
+
+if ARCH_MLNX_BLUEFIELD || COMPILE_TEST
+
+config MLNX_BLUEFIELD_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo driver"
+	depends on ARM64
+	default m
+	select VIRTIO_CONSOLE
+	select VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides the
+	  virtio driver framework for the TMFIFO of Mellanox BlueField SoC and
+	  the implementation of a console and network driver.
+
+endif # ARCH_MLNX_BLUEFIELD
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
new file mode 100644
index 0000000..c44c0e2
--- /dev/null
+++ b/drivers/soc/mellanox/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Mellanox SoC drivers.
+#
+obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
new file mode 100644
index 0000000..d5e3550
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo.c
@@ -0,0 +1,1236 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/cache.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <asm/byteorder.h>
+
+#include "tmfifo_regs.h"
+
+/* Vring size. */
+#define TMFIFO_VRING_SIZE			1024
+
+/* Console Tx buffer size. */
+#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
+
+/* House-keeping timer interval. */
+static int tmfifo_timer_interval = HZ / 10;
+module_param(tmfifo_timer_interval, int, 0644);
+MODULE_PARM_DESC(tmfifo_timer_interval, "timer interval");
+
+/* Global lock. */
+static DEFINE_MUTEX(tmfifo_lock);
+
+/* Virtio ring size. */
+static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
+module_param(tmfifo_vring_size, int, 0444);
+MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring");
+
+/* Struct declaration. */
+struct tmfifo;
+
+/* Virtual devices sharing the TM FIFO. */
+#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/* Structure to maintain the ring state. */
+struct tmfifo_vring {
+	void *va;			/* virtual address */
+	dma_addr_t dma;			/* dma address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	int vdev_id;			/* TMFIFO_VDEV_xxx */
+	u32 pkt_len;			/* packet total length */
+	__virtio16 next_avail;		/* next avail desc id */
+	struct tmfifo *fifo;		/* pointer back to the tmfifo */
+};
+
+/* Interrupt types. */
+enum {
+	TM_RX_LWM_IRQ,			/* Rx low water mark irq */
+	TM_RX_HWM_IRQ,			/* Rx high water mark irq */
+	TM_TX_LWM_IRQ,			/* Tx low water mark irq */
+	TM_TX_HWM_IRQ,			/* Tx high water mark irq */
+	TM_IRQ_CNT
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	TMFIFO_VRING_RX,		/* Rx ring */
+	TMFIFO_VRING_TX,		/* Tx ring */
+	TMFIFO_VRING_NUM
+};
+
+struct tmfifo_vdev {
+	struct virtio_device vdev;	/* virtual device */
+	u8 status;
+	u64 features;
+	union {				/* virtio config space */
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
+	u8 *tx_buf;			/* tx buffer */
+	u32 tx_head;			/* tx buffer head */
+	u32 tx_tail;			/* tx buffer tail */
+};
+
+/* TMFIFO device structure */
+struct tmfifo {
+	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
+	struct platform_device *pdev;	/* platform device */
+	struct mutex lock;
+	void __iomem *rx_base;		/* mapped register base */
+	void __iomem *tx_base;		/* mapped register base */
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	int irq[TM_IRQ_CNT];		/* irq numbers */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
+	bool is_ready;			/* ready flag */
+	spinlock_t spin_lock;		/* spin lock */
+};
+
+union tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* MTU setting of the virtio-net interface. */
+#define TMFIFO_NET_MTU		1500
+
+/* Supported virtio-net features. */
+#define TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
+				 (1UL << VIRTIO_NET_F_STATUS) | \
+				 (1UL << VIRTIO_NET_F_MAC))
+
+/* Return the available Tx buffer space. */
+static inline int tmfifo_vdev_tx_buf_avail(struct tmfifo_vdev *vdev)
+{
+	return ((vdev->tx_tail >= vdev->tx_head) ?
+		(TMFIFO_CONS_TX_BUF_SIZE - 8 - (vdev->tx_tail -
+		vdev->tx_head)) : (vdev->tx_head - vdev->tx_tail - 8));
+}
+
+/* Update Tx buffer pointer after pushing data. */
+static inline void tmfifo_vdev_tx_buf_push(struct tmfifo_vdev *vdev, u32 len)
+{
+	vdev->tx_tail += len;
+	if (vdev->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE)
+		vdev->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Update Tx buffer pointer after popping data. */
+static inline void tmfifo_vdev_tx_buf_pop(struct tmfifo_vdev *vdev, u32 len)
+{
+	vdev->tx_head += len;
+	if (vdev->tx_head >= TMFIFO_CONS_TX_BUF_SIZE)
+		vdev->tx_head -= TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Allocate vrings for the fifo. */
+static int tmfifo_alloc_vrings(struct tmfifo *fifo,
+			       struct tmfifo_vdev *tm_vdev, int vdev_id)
+{
+	dma_addr_t dma;
+	void *va;
+	int i, size;
+	struct tmfifo_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->size = tmfifo_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+		vring->vdev_id = vdev_id;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
+					GFP_KERNEL);
+		if (!va) {
+			dev_err(tm_vdev->vdev.dev.parent,
+				"vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void tmfifo_free_vrings(struct tmfifo *fifo, int vdev_id)
+{
+	int i, size;
+	struct tmfifo_vring *vring;
+	struct tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Free interrupts of the fifo device. */
+static void tmfifo_free_irqs(struct tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		irq = fifo->irq[i];
+		if (irq) {
+			fifo->irq[i] = 0;
+			disable_irq(irq);
+			free_irq(irq, (u8 *)fifo + i);
+		}
+	}
+}
+
+/* Interrupt handler. */
+static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
+{
+	int i = (uintptr_t)dev_id % sizeof(void *);
+	struct tmfifo *fifo = dev_id - i;
+
+	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
+		schedule_work(&fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Nothing to do for now. */
+static void tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+tmfifo_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+
+	if (!vr || vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vr->num);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+static inline void tmfifo_virtio_release_desc(struct virtio_device *vdev,
+					      struct vring *vr,
+					      struct vring_desc *desc, u32 len)
+{
+	unsigned int idx;
+
+	idx = vr->used->idx % vr->num;
+	vr->used->ring[idx].id = desc - vr->desc;
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/* Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc)
+{
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+	struct vring_desc *desc_head;
+	uint32_t pkt_len = 0;
+
+	if (!vr)
+		return;
+
+	if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
+		desc_head = vring->desc_head;
+		pkt_len = vring->pkt_len;
+	} else {
+		desc_head = tmfifo_virtio_get_next_desc(vring->vq);
+		if (desc_head != NULL) {
+			pkt_len = tmfifo_virtio_get_pkt_len(vdev,
+							desc_head, vr);
+		}
+	}
+
+	if (desc_head != NULL)
+		tmfifo_virtio_release_desc(vdev, vr, desc_head, pkt_len);
+
+	if (desc != NULL)
+		*desc = NULL;
+	vring->pkt_len = 0;
+}
+
+/* House-keeping timer. */
+static void tmfifo_timer(struct timer_list *arg)
+{
+	struct tmfifo *fifo = container_of(arg, struct tmfifo, timer);
+
+	/*
+	 * Wake up the work handler to poll the Rx FIFO in case interrupt
+	 * missing or any leftover bytes stuck in the FIFO.
+	 */
+	test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+}
+
+/* Buffer the console output. */
+static void tmfifo_console_output(struct tmfifo_vdev *cons,
+				  struct virtqueue *vq)
+{
+	u32 len, pkt_len, idx;
+	struct vring_desc *head_desc, *desc = NULL;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &cons->vdev;
+	void *addr;
+	union tmfifo_msg_hdr *hdr;
+
+	for (;;) {
+		head_desc = tmfifo_virtio_get_next_desc(vq);
+		if (head_desc == NULL)
+			break;
+
+		/* Release the packet if no more space. */
+		pkt_len = tmfifo_virtio_get_pkt_len(vdev, head_desc, vr);
+		if (pkt_len + sizeof(*hdr) > tmfifo_vdev_tx_buf_avail(cons)) {
+			tmfifo_virtio_release_desc(vdev, vr, head_desc,
+						   pkt_len);
+			break;
+		}
+
+		hdr = (union tmfifo_msg_hdr *)&cons->tx_buf[cons->tx_tail];
+		hdr->data = 0;
+		hdr->type = VIRTIO_ID_CONSOLE;
+		hdr->len = htons(pkt_len);
+
+		tmfifo_vdev_tx_buf_push(cons, sizeof(*hdr));
+		desc = head_desc;
+
+		while (desc != NULL) {
+			addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+			len = virtio32_to_cpu(vdev, desc->len);
+
+			if (len <= TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail) {
+				memcpy(cons->tx_buf + cons->tx_tail, addr, len);
+			} else {
+				u32 seg;
+
+				seg = TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail;
+				memcpy(cons->tx_buf + cons->tx_tail, addr, seg);
+				addr += seg;
+				memcpy(cons->tx_buf, addr, len - seg);
+			}
+			tmfifo_vdev_tx_buf_push(cons, len);
+
+			if (!(virtio16_to_cpu(vdev, desc->flags) &
+			    VRING_DESC_F_NEXT))
+				break;
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+		}
+
+		/* Make each packet 8-byte aligned. */
+		tmfifo_vdev_tx_buf_push(cons, ((pkt_len + 7) & -8) - pkt_len);
+
+		tmfifo_virtio_release_desc(vdev, vr, head_desc, pkt_len);
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct tmfifo_vring *vring;
+	struct tmfifo *fifo;
+	struct vring *vr;
+	struct virtio_device *vdev;
+	u64 sts, data;
+	int num_avail = 0, hdr_len, tx_reserve;
+	void *addr;
+	u32 len, idx;
+	struct vring_desc *desc;
+	unsigned long flags;
+	struct tmfifo_vdev *cons;
+
+	if (!vq)
+		return;
+
+	vring = (struct tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+	vr = (struct vring *)virtqueue_get_vring(vq);
+
+	if (!fifo->vdev[vring->vdev_id])
+		return;
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+
+	/* Don't continue if another vring is running. */
+	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
+		return;
+
+	/* tx_reserve is used to reserved some room in FIFO for console. */
+	if (vring->vdev_id == VIRTIO_ID_NET) {
+		hdr_len = sizeof(struct virtio_net_hdr);
+		tx_reserve = fifo->tx_fifo_size / 16;
+	} else {
+		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
+		hdr_len = 0;
+		tx_reserve = 1;
+	}
+
+	desc = vring->desc;
+
+again:
+	while (1) {
+		/* Get available FIFO space. */
+		if (num_avail == 0) {
+			if (is_rx) {
+				/* Get the number of available words in FIFO. */
+				sts = readq(fifo->rx_base + TMFIFO_RX_STS);
+				num_avail = FIELD_GET(
+					TMFIFO_RX_STS__COUNT_MASK, sts);
+
+				/* Don't continue if nothing in FIFO. */
+				if (num_avail <= 0)
+					break;
+			} else {
+				/* Get available space in FIFO. */
+				sts = readq(fifo->tx_base + TMFIFO_TX_STS);
+				num_avail = fifo->tx_fifo_size - tx_reserve -
+					FIELD_GET(
+						TMFIFO_TX_STS__COUNT_MASK, sts);
+
+				if (num_avail <= 0)
+					break;
+			}
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
+		    cons != NULL && cons->tx_buf != NULL) {
+			for (;;) {
+				spin_lock_irqsave(&fifo->spin_lock, flags);
+				if (cons->tx_head == cons->tx_tail) {
+					spin_unlock_irqrestore(
+						&fifo->spin_lock, flags);
+					return;
+				}
+				addr = cons->tx_buf + cons->tx_head;
+				writeq(cpu_to_le64(*(u64 *)addr),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+				tmfifo_vdev_tx_buf_pop(cons, sizeof(u64));
+				spin_unlock_irqrestore(&fifo->spin_lock,
+						       flags);
+				if (--num_avail <= 0)
+					goto again;
+			}
+		}
+
+		/* Get the desc of next packet. */
+		if (!desc) {
+			/* Save the head desc of the chain. */
+			vring->desc_head = tmfifo_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				return;
+			}
+			desc = vring->desc_head;
+			vring->desc = desc;
+
+			if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+						vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			}
+		}
+
+		/* Beginning of each packet. */
+		if (vring->pkt_len == 0) {
+			int vdev_id, vring_change = 0;
+			union tmfifo_msg_hdr hdr;
+
+			num_avail--;
+
+			/* Read/Write packet length. */
+			if (is_rx) {
+				hdr.data = readq(fifo->rx_base +
+						 TMFIFO_RX_DATA);
+				hdr.data = le64_to_cpu(hdr.data);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (hdr.len == 0)
+					continue;
+
+				/* Check packet type. */
+				if (hdr.type == VIRTIO_ID_NET) {
+					vdev_id = VIRTIO_ID_NET;
+					hdr_len = sizeof(struct virtio_net_hdr);
+				} else if (hdr.type == VIRTIO_ID_CONSOLE) {
+					vdev_id = VIRTIO_ID_CONSOLE;
+					hdr_len = 0;
+				} else {
+					continue;
+				}
+
+				/*
+				 * Check whether the new packet still belongs
+				 * to this vring or not. If not, update the
+				 * pkt_len of the new vring and return.
+				 */
+				if (vdev_id != vring->vdev_id) {
+					struct tmfifo_vdev *dev2 =
+						fifo->vdev[vdev_id];
+
+					if (!dev2)
+						break;
+					vring->desc = desc;
+					vring = &dev2->vrings[TMFIFO_VRING_RX];
+					vring_change = 1;
+				}
+				vring->pkt_len = ntohs(hdr.len) + hdr_len;
+			} else {
+				vring->pkt_len = tmfifo_virtio_get_pkt_len(
+					vdev, desc, vr);
+
+				hdr.data = 0;
+				hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+					VIRTIO_ID_NET :
+					VIRTIO_ID_CONSOLE;
+				hdr.len = htons(vring->pkt_len - hdr_len);
+				writeq(cpu_to_le64(hdr.data),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+			}
+
+			vring->cur_len = hdr_len;
+			vring->rem_len = vring->pkt_len;
+			fifo->vring[is_rx] = vring;
+
+			if (vring_change)
+				return;
+			continue;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check if the current desc is already done. */
+		if (vring->cur_len == len)
+			goto check_done;
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		/* Read a word from FIFO for Rx. */
+		if (is_rx) {
+			data = readq(fifo->rx_base + TMFIFO_RX_DATA);
+			data = le64_to_cpu(data);
+		}
+
+		if (vring->cur_len + sizeof(u64) <= len) {
+			/* The whole word. */
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       sizeof(u64));
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       sizeof(u64));
+			}
+			vring->cur_len += sizeof(u64);
+		} else {
+			/* Leftover bytes. */
+			BUG_ON(vring->cur_len > len);
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       len - vring->cur_len);
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       len - vring->cur_len);
+			}
+			vring->cur_len = len;
+		}
+
+		/* Write the word into FIFO for Tx. */
+		if (!is_rx) {
+			writeq(cpu_to_le64(data),
+			       fifo->tx_base + TMFIFO_TX_DATA);
+		}
+
+		num_avail--;
+
+check_done:
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done and release the desc. */
+			tmfifo_release_pkt(vdev, vring, &desc);
+			fifo->vring[is_rx] = NULL;
+
+			/* Notify upper layer that packet is done. */
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			vring_interrupt(0, vq);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			continue;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+	struct tmfifo *fifo = vring->fifo;
+	unsigned long flags;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX HWM bit to start Rx. */
+		if (!test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			tmfifo_console_output(fifo->vdev[VIRTIO_ID_CONSOLE],
+					      vq);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			schedule_work(&fifo->work);
+		} else if (!test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	}
+
+	return true;
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void tmfifo_work_handler(struct work_struct *work)
+{
+	int i;
+	struct tmfifo_vdev *tm_vdev;
+	struct tmfifo *fifo = container_of(work, struct tmfifo, work);
+
+	if (!fifo->is_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx. */
+	if (test_and_clear_bit(TM_TX_LWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_TX_LWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_TX].vq,
+					false);
+			}
+		}
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(TM_RX_HWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_RX_HWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_RX].vq,
+					true);
+			}
+		}
+	}
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* Get the array of feature bits for this device. */
+static u64 tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			tmfifo_release_pkt(&tm_vdev->vdev, vring, &vring->desc);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+				  unsigned int nvqs,
+				  struct virtqueue *vqs[],
+				  vq_callback_t *callbacks[],
+				  const char * const names[],
+				  const bool *ctx,
+				  struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void tmfifo_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void tmfifo_virtio_get(struct virtio_device *vdev,
+			      unsigned int offset,
+			      void *buf,
+			      unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void tmfifo_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops tmfifo_virtio_config_ops = {
+	.get_features = tmfifo_virtio_get_features,
+	.finalize_features = tmfifo_virtio_finalize_features,
+	.find_vqs = tmfifo_virtio_find_vqs,
+	.del_vqs = tmfifo_virtio_del_vqs,
+	.reset = tmfifo_virtio_reset,
+	.set_status = tmfifo_virtio_set_status,
+	.get_status = tmfifo_virtio_get_status,
+	.get = tmfifo_virtio_get,
+	.set = tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+int tmfifo_create_vdev(struct tmfifo *fifo, int vdev_id, u64 features,
+		       void *config, u32 size)
+{
+	struct tmfifo_vdev *tm_vdev;
+	int ret = 0;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev != NULL) {
+		pr_err("vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto already_exist;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto already_exist;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+	if (tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
+		pr_err("Unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto alloc_vring_fail;
+	}
+	if (vdev_id == VIRTIO_ID_CONSOLE) {
+		tm_vdev->tx_buf = kmalloc(TMFIFO_CONS_TX_BUF_SIZE,
+					  GFP_KERNEL);
+	}
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
+		goto register_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+register_fail:
+	tmfifo_free_vrings(fifo, vdev_id);
+	fifo->vdev[vdev_id] = NULL;
+alloc_vring_fail:
+	kfree(tm_vdev);
+already_exist:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+int tmfifo_delete_vdev(struct tmfifo *fifo, int vdev_id)
+{
+	struct tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		tmfifo_free_vrings(fifo, vdev_id);
+		kfree(tm_vdev->tx_buf);
+		kfree(tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Device remove function. */
+static int tmfifo_remove(struct platform_device *pdev)
+{
+	int i;
+	struct tmfifo *fifo = platform_get_drvdata(pdev);
+	struct resource *rx_res, *tx_res;
+
+	if (fifo) {
+		mutex_lock(&tmfifo_lock);
+
+		fifo->is_ready = false;
+
+		/* Stop the timer. */
+		del_timer_sync(&fifo->timer);
+
+		/* Release interrupts. */
+		tmfifo_free_irqs(fifo);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&fifo->work);
+
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++)
+			tmfifo_delete_vdev(fifo, i);
+
+		/* Release IO resources. */
+		if (fifo->rx_base)
+			iounmap(fifo->rx_base);
+		if (fifo->tx_base)
+			iounmap(fifo->tx_base);
+
+		platform_set_drvdata(pdev, NULL);
+		kfree(fifo);
+
+		mutex_unlock(&tmfifo_lock);
+	}
+
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (rx_res)
+		release_mem_region(rx_res->start, resource_size(rx_res));
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (tx_res)
+		release_mem_region(tx_res->start, resource_size(tx_res));
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void tmfifo_get_cfg_mac(u8 *mac)
+{
+	u8 buf[6];
+	efi_status_t status;
+	unsigned long size = sizeof(buf);
+	efi_char16_t name[] = { 'R', 's', 'h', 'i', 'm', 'M', 'a', 'c',
+				'A', 'd', 'd', 'r', 0 };
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+
+	status = efi.get_variable(name, &guid, NULL, &size, buf);
+	if (status == EFI_SUCCESS && size == sizeof(buf))
+		memcpy(mac, buf, sizeof(buf));
+}
+
+/* Probe the TMFIFO. */
+static int tmfifo_probe(struct platform_device *pdev)
+{
+	u64 ctl;
+	struct tmfifo *fifo;
+	struct resource *rx_res, *tx_res;
+	struct virtio_net_config net_config;
+	int i, ret;
+
+	/* Get the resource of the Rx & Tx FIFO. */
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!rx_res || !tx_res) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (request_mem_region(rx_res->start,
+			       resource_size(rx_res), "bf-tmfifo") == NULL) {
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	if (request_mem_region(tx_res->start,
+			       resource_size(tx_res), "bf-tmfifo") == NULL) {
+		release_mem_region(rx_res->start, resource_size(rx_res));
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	ret = -ENOMEM;
+	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
+	if (!fifo)
+		goto err;
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	spin_lock_init(&fifo->spin_lock);
+	INIT_WORK(&fifo->work, tmfifo_work_handler);
+
+	timer_setup(&fifo->timer, tmfifo_timer, 0);
+	fifo->timer.function = tmfifo_timer;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		fifo->irq[i] = platform_get_irq(pdev, i);
+		ret = request_irq(fifo->irq[i], tmfifo_irq_handler, 0,
+				  "tmfifo", (u8 *)fifo + i);
+		if (ret) {
+			pr_err("Unable to request irq\n");
+			fifo->irq[i] = 0;
+			goto err;
+		}
+	}
+
+	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
+	if (!fifo->rx_base)
+		goto err;
+
+	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
+	if (!fifo->tx_base)
+		goto err;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		FIELD_GET(TMFIFO_TX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~TMFIFO_TX_CTL__LWM_MASK) |
+		FIELD_PREP(TMFIFO_TX_CTL__LWM_MASK, fifo->tx_fifo_size / 2);
+	ctl = (ctl & ~TMFIFO_TX_CTL__HWM_MASK) |
+		FIELD_PREP(TMFIFO_TX_CTL__HWM_MASK, fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		FIELD_GET(TMFIFO_RX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~TMFIFO_RX_CTL__LWM_MASK) |
+		FIELD_PREP(TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = (ctl & ~TMFIFO_RX_CTL__HWM_MASK) |
+		FIELD_PREP(TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + TMFIFO_RX_CTL);
+
+	mutex_init(&fifo->lock);
+
+	/* Create the console vdev. */
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (ret)
+		goto err;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = TMFIFO_NET_MTU;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	memcpy(net_config.mac, tmfifo_net_default_mac, 6);
+	tmfifo_get_cfg_mac(net_config.mac);
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_NET, TMFIFO_NET_FEATURES,
+				 &net_config, sizeof(net_config));
+	if (ret)
+		goto err;
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+
+	fifo->is_ready = true;
+
+	return 0;
+
+err:
+	tmfifo_remove(pdev);
+early_err:
+	dev_err(&pdev->dev, "Probe Failed\n");
+	return ret;
+}
+
+static const struct of_device_id tmfifo_match[] = {
+	{ .compatible = "mellanox,bf-tmfifo" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, tmfifo_match);
+
+static const struct acpi_device_id bf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, bf_tmfifo_acpi_match);
+
+static struct platform_driver tmfifo_driver = {
+	.probe = tmfifo_probe,
+	.remove = tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.of_match_table = tmfifo_match,
+		.acpi_match_table = ACPI_PTR(bf_tmfifo_acpi_match),
+	},
+};
+
+static int __init tmfifo_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&tmfifo_driver);
+	if (ret)
+		pr_err("Failed to register tmfifo driver.\n");
+
+	return ret;
+}
+
+static void __exit tmfifo_exit(void)
+{
+	platform_driver_unregister(&tmfifo_driver);
+}
+
+module_init(tmfifo_init);
+module_exit(tmfifo_exit);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
new file mode 100644
index 0000000..9f21764
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo_regs.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __TMFIFO_REGS_H__
+#define __TMFIFO_REGS_H__
+
+#include <linux/types.h>
+
+#define TMFIFO_TX_DATA 0x0
+
+#define TMFIFO_TX_STS 0x8
+#define TMFIFO_TX_STS__LENGTH 0x0001
+#define TMFIFO_TX_STS__COUNT_SHIFT 0
+#define TMFIFO_TX_STS__COUNT_WIDTH 9
+#define TMFIFO_TX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_TX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_TX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_TX_CTL 0x10
+#define TMFIFO_TX_CTL__LENGTH 0x0001
+#define TMFIFO_TX_CTL__LWM_SHIFT 0
+#define TMFIFO_TX_CTL__LWM_WIDTH 8
+#define TMFIFO_TX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__LWM_RMASK 0xff
+#define TMFIFO_TX_CTL__LWM_MASK  0xff
+#define TMFIFO_TX_CTL__HWM_SHIFT 8
+#define TMFIFO_TX_CTL__HWM_WIDTH 8
+#define TMFIFO_TX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__HWM_RMASK 0xff
+#define TMFIFO_TX_CTL__HWM_MASK  0xff00
+#define TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#define TMFIFO_RX_DATA 0x0
+
+#define TMFIFO_RX_STS 0x8
+#define TMFIFO_RX_STS__LENGTH 0x0001
+#define TMFIFO_RX_STS__COUNT_SHIFT 0
+#define TMFIFO_RX_STS__COUNT_WIDTH 9
+#define TMFIFO_RX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_RX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_RX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_RX_CTL 0x10
+#define TMFIFO_RX_CTL__LENGTH 0x0001
+#define TMFIFO_RX_CTL__LWM_SHIFT 0
+#define TMFIFO_RX_CTL__LWM_WIDTH 8
+#define TMFIFO_RX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__LWM_RMASK 0xff
+#define TMFIFO_RX_CTL__LWM_MASK  0xff
+#define TMFIFO_RX_CTL__HWM_SHIFT 8
+#define TMFIFO_RX_CTL__HWM_WIDTH 8
+#define TMFIFO_RX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__HWM_RMASK 0xff
+#define TMFIFO_RX_CTL__HWM_MASK  0xff00
+#define TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#endif /* !defined(__TMFIFO_REGS_H__) */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v6 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc
@ 2018-11-01 16:25   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-11-01 16:25 UTC (permalink / raw)
  To: linux-arm-kernel

This commit adds the TmFifo driver for Mellanox BlueField Soc.
TmFifo is a shared FIFO which enables external host machine to
exchange data with the SoC via USB or PCIe. The driver is based on
virtio framework and has console and network access enabled.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/Kconfig                |    1 +
 drivers/soc/Makefile               |    1 +
 drivers/soc/mellanox/Kconfig       |   18 +
 drivers/soc/mellanox/Makefile      |    5 +
 drivers/soc/mellanox/tmfifo.c      | 1236 ++++++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/tmfifo_regs.h |   76 +++
 6 files changed, 1337 insertions(+)
 create mode 100644 drivers/soc/mellanox/Kconfig
 create mode 100644 drivers/soc/mellanox/Makefile
 create mode 100644 drivers/soc/mellanox/tmfifo.c
 create mode 100644 drivers/soc/mellanox/tmfifo_regs.h

diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index c07b4a8..fa87dc8 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -7,6 +7,7 @@ source "drivers/soc/bcm/Kconfig"
 source "drivers/soc/fsl/Kconfig"
 source "drivers/soc/imx/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
+source "drivers/soc/mellanox/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/renesas/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 113e884..93052d0 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ARCH_GEMINI)	+= gemini/
 obj-$(CONFIG_ARCH_MXC)		+= imx/
 obj-$(CONFIG_SOC_XWAY)		+= lantiq/
 obj-y				+= mediatek/
+obj-$(CONFIG_SOC_MLNX)		+= mellanox/
 obj-$(CONFIG_ARCH_MESON)	+= amlogic/
 obj-y				+= qcom/
 obj-y				+= renesas/
diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
new file mode 100644
index 0000000..d88efa1
--- /dev/null
+++ b/drivers/soc/mellanox/Kconfig
@@ -0,0 +1,18 @@
+menuconfig SOC_MLNX
+	bool "Mellanox SoC drivers"
+	default y if ARCH_MLNX_BLUEFIELD
+
+if ARCH_MLNX_BLUEFIELD || COMPILE_TEST
+
+config MLNX_BLUEFIELD_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo driver"
+	depends on ARM64
+	default m
+	select VIRTIO_CONSOLE
+	select VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides the
+	  virtio driver framework for the TMFIFO of Mellanox BlueField SoC and
+	  the implementation of a console and network driver.
+
+endif # ARCH_MLNX_BLUEFIELD
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
new file mode 100644
index 0000000..c44c0e2
--- /dev/null
+++ b/drivers/soc/mellanox/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Mellanox SoC drivers.
+#
+obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
new file mode 100644
index 0000000..d5e3550
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo.c
@@ -0,0 +1,1236 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/cache.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <asm/byteorder.h>
+
+#include "tmfifo_regs.h"
+
+/* Vring size. */
+#define TMFIFO_VRING_SIZE			1024
+
+/* Console Tx buffer size. */
+#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
+
+/* House-keeping timer interval. */
+static int tmfifo_timer_interval = HZ / 10;
+module_param(tmfifo_timer_interval, int, 0644);
+MODULE_PARM_DESC(tmfifo_timer_interval, "timer interval");
+
+/* Global lock. */
+static DEFINE_MUTEX(tmfifo_lock);
+
+/* Virtio ring size. */
+static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
+module_param(tmfifo_vring_size, int, 0444);
+MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring");
+
+/* Struct declaration. */
+struct tmfifo;
+
+/* Virtual devices sharing the TM FIFO. */
+#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/* Structure to maintain the ring state. */
+struct tmfifo_vring {
+	void *va;			/* virtual address */
+	dma_addr_t dma;			/* dma address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	int vdev_id;			/* TMFIFO_VDEV_xxx */
+	u32 pkt_len;			/* packet total length */
+	__virtio16 next_avail;		/* next avail desc id */
+	struct tmfifo *fifo;		/* pointer back to the tmfifo */
+};
+
+/* Interrupt types. */
+enum {
+	TM_RX_LWM_IRQ,			/* Rx low water mark irq */
+	TM_RX_HWM_IRQ,			/* Rx high water mark irq */
+	TM_TX_LWM_IRQ,			/* Tx low water mark irq */
+	TM_TX_HWM_IRQ,			/* Tx high water mark irq */
+	TM_IRQ_CNT
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	TMFIFO_VRING_RX,		/* Rx ring */
+	TMFIFO_VRING_TX,		/* Tx ring */
+	TMFIFO_VRING_NUM
+};
+
+struct tmfifo_vdev {
+	struct virtio_device vdev;	/* virtual device */
+	u8 status;
+	u64 features;
+	union {				/* virtio config space */
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
+	u8 *tx_buf;			/* tx buffer */
+	u32 tx_head;			/* tx buffer head */
+	u32 tx_tail;			/* tx buffer tail */
+};
+
+/* TMFIFO device structure */
+struct tmfifo {
+	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
+	struct platform_device *pdev;	/* platform device */
+	struct mutex lock;
+	void __iomem *rx_base;		/* mapped register base */
+	void __iomem *tx_base;		/* mapped register base */
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	int irq[TM_IRQ_CNT];		/* irq numbers */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
+	bool is_ready;			/* ready flag */
+	spinlock_t spin_lock;		/* spin lock */
+};
+
+union tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* MTU setting of the virtio-net interface. */
+#define TMFIFO_NET_MTU		1500
+
+/* Supported virtio-net features. */
+#define TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
+				 (1UL << VIRTIO_NET_F_STATUS) | \
+				 (1UL << VIRTIO_NET_F_MAC))
+
+/* Return the available Tx buffer space. */
+static inline int tmfifo_vdev_tx_buf_avail(struct tmfifo_vdev *vdev)
+{
+	return ((vdev->tx_tail >= vdev->tx_head) ?
+		(TMFIFO_CONS_TX_BUF_SIZE - 8 - (vdev->tx_tail -
+		vdev->tx_head)) : (vdev->tx_head - vdev->tx_tail - 8));
+}
+
+/* Update Tx buffer pointer after pushing data. */
+static inline void tmfifo_vdev_tx_buf_push(struct tmfifo_vdev *vdev, u32 len)
+{
+	vdev->tx_tail += len;
+	if (vdev->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE)
+		vdev->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Update Tx buffer pointer after popping data. */
+static inline void tmfifo_vdev_tx_buf_pop(struct tmfifo_vdev *vdev, u32 len)
+{
+	vdev->tx_head += len;
+	if (vdev->tx_head >= TMFIFO_CONS_TX_BUF_SIZE)
+		vdev->tx_head -= TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Allocate vrings for the fifo. */
+static int tmfifo_alloc_vrings(struct tmfifo *fifo,
+			       struct tmfifo_vdev *tm_vdev, int vdev_id)
+{
+	dma_addr_t dma;
+	void *va;
+	int i, size;
+	struct tmfifo_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->size = tmfifo_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+		vring->vdev_id = vdev_id;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
+					GFP_KERNEL);
+		if (!va) {
+			dev_err(tm_vdev->vdev.dev.parent,
+				"vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void tmfifo_free_vrings(struct tmfifo *fifo, int vdev_id)
+{
+	int i, size;
+	struct tmfifo_vring *vring;
+	struct tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Free interrupts of the fifo device. */
+static void tmfifo_free_irqs(struct tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		irq = fifo->irq[i];
+		if (irq) {
+			fifo->irq[i] = 0;
+			disable_irq(irq);
+			free_irq(irq, (u8 *)fifo + i);
+		}
+	}
+}
+
+/* Interrupt handler. */
+static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
+{
+	int i = (uintptr_t)dev_id % sizeof(void *);
+	struct tmfifo *fifo = dev_id - i;
+
+	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
+		schedule_work(&fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Nothing to do for now. */
+static void tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+tmfifo_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+
+	if (!vr || vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vr->num);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+static inline void tmfifo_virtio_release_desc(struct virtio_device *vdev,
+					      struct vring *vr,
+					      struct vring_desc *desc, u32 len)
+{
+	unsigned int idx;
+
+	idx = vr->used->idx % vr->num;
+	vr->used->ring[idx].id = desc - vr->desc;
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/* Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc)
+{
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+	struct vring_desc *desc_head;
+	uint32_t pkt_len = 0;
+
+	if (!vr)
+		return;
+
+	if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
+		desc_head = vring->desc_head;
+		pkt_len = vring->pkt_len;
+	} else {
+		desc_head = tmfifo_virtio_get_next_desc(vring->vq);
+		if (desc_head != NULL) {
+			pkt_len = tmfifo_virtio_get_pkt_len(vdev,
+							desc_head, vr);
+		}
+	}
+
+	if (desc_head != NULL)
+		tmfifo_virtio_release_desc(vdev, vr, desc_head, pkt_len);
+
+	if (desc != NULL)
+		*desc = NULL;
+	vring->pkt_len = 0;
+}
+
+/* House-keeping timer. */
+static void tmfifo_timer(struct timer_list *arg)
+{
+	struct tmfifo *fifo = container_of(arg, struct tmfifo, timer);
+
+	/*
+	 * Wake up the work handler to poll the Rx FIFO in case interrupt
+	 * missing or any leftover bytes stuck in the FIFO.
+	 */
+	test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+}
+
+/* Buffer the console output. */
+static void tmfifo_console_output(struct tmfifo_vdev *cons,
+				  struct virtqueue *vq)
+{
+	u32 len, pkt_len, idx;
+	struct vring_desc *head_desc, *desc = NULL;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &cons->vdev;
+	void *addr;
+	union tmfifo_msg_hdr *hdr;
+
+	for (;;) {
+		head_desc = tmfifo_virtio_get_next_desc(vq);
+		if (head_desc == NULL)
+			break;
+
+		/* Release the packet if no more space. */
+		pkt_len = tmfifo_virtio_get_pkt_len(vdev, head_desc, vr);
+		if (pkt_len + sizeof(*hdr) > tmfifo_vdev_tx_buf_avail(cons)) {
+			tmfifo_virtio_release_desc(vdev, vr, head_desc,
+						   pkt_len);
+			break;
+		}
+
+		hdr = (union tmfifo_msg_hdr *)&cons->tx_buf[cons->tx_tail];
+		hdr->data = 0;
+		hdr->type = VIRTIO_ID_CONSOLE;
+		hdr->len = htons(pkt_len);
+
+		tmfifo_vdev_tx_buf_push(cons, sizeof(*hdr));
+		desc = head_desc;
+
+		while (desc != NULL) {
+			addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+			len = virtio32_to_cpu(vdev, desc->len);
+
+			if (len <= TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail) {
+				memcpy(cons->tx_buf + cons->tx_tail, addr, len);
+			} else {
+				u32 seg;
+
+				seg = TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail;
+				memcpy(cons->tx_buf + cons->tx_tail, addr, seg);
+				addr += seg;
+				memcpy(cons->tx_buf, addr, len - seg);
+			}
+			tmfifo_vdev_tx_buf_push(cons, len);
+
+			if (!(virtio16_to_cpu(vdev, desc->flags) &
+			    VRING_DESC_F_NEXT))
+				break;
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+		}
+
+		/* Make each packet 8-byte aligned. */
+		tmfifo_vdev_tx_buf_push(cons, ((pkt_len + 7) & -8) - pkt_len);
+
+		tmfifo_virtio_release_desc(vdev, vr, head_desc, pkt_len);
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct tmfifo_vring *vring;
+	struct tmfifo *fifo;
+	struct vring *vr;
+	struct virtio_device *vdev;
+	u64 sts, data;
+	int num_avail = 0, hdr_len, tx_reserve;
+	void *addr;
+	u32 len, idx;
+	struct vring_desc *desc;
+	unsigned long flags;
+	struct tmfifo_vdev *cons;
+
+	if (!vq)
+		return;
+
+	vring = (struct tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+	vr = (struct vring *)virtqueue_get_vring(vq);
+
+	if (!fifo->vdev[vring->vdev_id])
+		return;
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+
+	/* Don't continue if another vring is running. */
+	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
+		return;
+
+	/* tx_reserve is used to reserved some room in FIFO for console. */
+	if (vring->vdev_id == VIRTIO_ID_NET) {
+		hdr_len = sizeof(struct virtio_net_hdr);
+		tx_reserve = fifo->tx_fifo_size / 16;
+	} else {
+		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
+		hdr_len = 0;
+		tx_reserve = 1;
+	}
+
+	desc = vring->desc;
+
+again:
+	while (1) {
+		/* Get available FIFO space. */
+		if (num_avail == 0) {
+			if (is_rx) {
+				/* Get the number of available words in FIFO. */
+				sts = readq(fifo->rx_base + TMFIFO_RX_STS);
+				num_avail = FIELD_GET(
+					TMFIFO_RX_STS__COUNT_MASK, sts);
+
+				/* Don't continue if nothing in FIFO. */
+				if (num_avail <= 0)
+					break;
+			} else {
+				/* Get available space in FIFO. */
+				sts = readq(fifo->tx_base + TMFIFO_TX_STS);
+				num_avail = fifo->tx_fifo_size - tx_reserve -
+					FIELD_GET(
+						TMFIFO_TX_STS__COUNT_MASK, sts);
+
+				if (num_avail <= 0)
+					break;
+			}
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
+		    cons != NULL && cons->tx_buf != NULL) {
+			for (;;) {
+				spin_lock_irqsave(&fifo->spin_lock, flags);
+				if (cons->tx_head == cons->tx_tail) {
+					spin_unlock_irqrestore(
+						&fifo->spin_lock, flags);
+					return;
+				}
+				addr = cons->tx_buf + cons->tx_head;
+				writeq(cpu_to_le64(*(u64 *)addr),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+				tmfifo_vdev_tx_buf_pop(cons, sizeof(u64));
+				spin_unlock_irqrestore(&fifo->spin_lock,
+						       flags);
+				if (--num_avail <= 0)
+					goto again;
+			}
+		}
+
+		/* Get the desc of next packet. */
+		if (!desc) {
+			/* Save the head desc of the chain. */
+			vring->desc_head = tmfifo_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				return;
+			}
+			desc = vring->desc_head;
+			vring->desc = desc;
+
+			if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+						vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			}
+		}
+
+		/* Beginning of each packet. */
+		if (vring->pkt_len == 0) {
+			int vdev_id, vring_change = 0;
+			union tmfifo_msg_hdr hdr;
+
+			num_avail--;
+
+			/* Read/Write packet length. */
+			if (is_rx) {
+				hdr.data = readq(fifo->rx_base +
+						 TMFIFO_RX_DATA);
+				hdr.data = le64_to_cpu(hdr.data);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (hdr.len == 0)
+					continue;
+
+				/* Check packet type. */
+				if (hdr.type == VIRTIO_ID_NET) {
+					vdev_id = VIRTIO_ID_NET;
+					hdr_len = sizeof(struct virtio_net_hdr);
+				} else if (hdr.type == VIRTIO_ID_CONSOLE) {
+					vdev_id = VIRTIO_ID_CONSOLE;
+					hdr_len = 0;
+				} else {
+					continue;
+				}
+
+				/*
+				 * Check whether the new packet still belongs
+				 * to this vring or not. If not, update the
+				 * pkt_len of the new vring and return.
+				 */
+				if (vdev_id != vring->vdev_id) {
+					struct tmfifo_vdev *dev2 =
+						fifo->vdev[vdev_id];
+
+					if (!dev2)
+						break;
+					vring->desc = desc;
+					vring = &dev2->vrings[TMFIFO_VRING_RX];
+					vring_change = 1;
+				}
+				vring->pkt_len = ntohs(hdr.len) + hdr_len;
+			} else {
+				vring->pkt_len = tmfifo_virtio_get_pkt_len(
+					vdev, desc, vr);
+
+				hdr.data = 0;
+				hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+					VIRTIO_ID_NET :
+					VIRTIO_ID_CONSOLE;
+				hdr.len = htons(vring->pkt_len - hdr_len);
+				writeq(cpu_to_le64(hdr.data),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+			}
+
+			vring->cur_len = hdr_len;
+			vring->rem_len = vring->pkt_len;
+			fifo->vring[is_rx] = vring;
+
+			if (vring_change)
+				return;
+			continue;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check if the current desc is already done. */
+		if (vring->cur_len == len)
+			goto check_done;
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		/* Read a word from FIFO for Rx. */
+		if (is_rx) {
+			data = readq(fifo->rx_base + TMFIFO_RX_DATA);
+			data = le64_to_cpu(data);
+		}
+
+		if (vring->cur_len + sizeof(u64) <= len) {
+			/* The whole word. */
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       sizeof(u64));
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       sizeof(u64));
+			}
+			vring->cur_len += sizeof(u64);
+		} else {
+			/* Leftover bytes. */
+			BUG_ON(vring->cur_len > len);
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       len - vring->cur_len);
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       len - vring->cur_len);
+			}
+			vring->cur_len = len;
+		}
+
+		/* Write the word into FIFO for Tx. */
+		if (!is_rx) {
+			writeq(cpu_to_le64(data),
+			       fifo->tx_base + TMFIFO_TX_DATA);
+		}
+
+		num_avail--;
+
+check_done:
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done and release the desc. */
+			tmfifo_release_pkt(vdev, vring, &desc);
+			fifo->vring[is_rx] = NULL;
+
+			/* Notify upper layer that packet is done. */
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			vring_interrupt(0, vq);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			continue;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+	struct tmfifo *fifo = vring->fifo;
+	unsigned long flags;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX HWM bit to start Rx. */
+		if (!test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			tmfifo_console_output(fifo->vdev[VIRTIO_ID_CONSOLE],
+					      vq);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			schedule_work(&fifo->work);
+		} else if (!test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	}
+
+	return true;
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void tmfifo_work_handler(struct work_struct *work)
+{
+	int i;
+	struct tmfifo_vdev *tm_vdev;
+	struct tmfifo *fifo = container_of(work, struct tmfifo, work);
+
+	if (!fifo->is_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx. */
+	if (test_and_clear_bit(TM_TX_LWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_TX_LWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_TX].vq,
+					false);
+			}
+		}
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(TM_RX_HWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_RX_HWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_RX].vq,
+					true);
+			}
+		}
+	}
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* Get the array of feature bits for this device. */
+static u64 tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			tmfifo_release_pkt(&tm_vdev->vdev, vring, &vring->desc);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+				  unsigned int nvqs,
+				  struct virtqueue *vqs[],
+				  vq_callback_t *callbacks[],
+				  const char * const names[],
+				  const bool *ctx,
+				  struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void tmfifo_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void tmfifo_virtio_get(struct virtio_device *vdev,
+			      unsigned int offset,
+			      void *buf,
+			      unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void tmfifo_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops tmfifo_virtio_config_ops = {
+	.get_features = tmfifo_virtio_get_features,
+	.finalize_features = tmfifo_virtio_finalize_features,
+	.find_vqs = tmfifo_virtio_find_vqs,
+	.del_vqs = tmfifo_virtio_del_vqs,
+	.reset = tmfifo_virtio_reset,
+	.set_status = tmfifo_virtio_set_status,
+	.get_status = tmfifo_virtio_get_status,
+	.get = tmfifo_virtio_get,
+	.set = tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+int tmfifo_create_vdev(struct tmfifo *fifo, int vdev_id, u64 features,
+		       void *config, u32 size)
+{
+	struct tmfifo_vdev *tm_vdev;
+	int ret = 0;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev != NULL) {
+		pr_err("vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto already_exist;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto already_exist;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+	if (tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
+		pr_err("Unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto alloc_vring_fail;
+	}
+	if (vdev_id == VIRTIO_ID_CONSOLE) {
+		tm_vdev->tx_buf = kmalloc(TMFIFO_CONS_TX_BUF_SIZE,
+					  GFP_KERNEL);
+	}
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
+		goto register_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+register_fail:
+	tmfifo_free_vrings(fifo, vdev_id);
+	fifo->vdev[vdev_id] = NULL;
+alloc_vring_fail:
+	kfree(tm_vdev);
+already_exist:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+int tmfifo_delete_vdev(struct tmfifo *fifo, int vdev_id)
+{
+	struct tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		tmfifo_free_vrings(fifo, vdev_id);
+		kfree(tm_vdev->tx_buf);
+		kfree(tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Device remove function. */
+static int tmfifo_remove(struct platform_device *pdev)
+{
+	int i;
+	struct tmfifo *fifo = platform_get_drvdata(pdev);
+	struct resource *rx_res, *tx_res;
+
+	if (fifo) {
+		mutex_lock(&tmfifo_lock);
+
+		fifo->is_ready = false;
+
+		/* Stop the timer. */
+		del_timer_sync(&fifo->timer);
+
+		/* Release interrupts. */
+		tmfifo_free_irqs(fifo);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&fifo->work);
+
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++)
+			tmfifo_delete_vdev(fifo, i);
+
+		/* Release IO resources. */
+		if (fifo->rx_base)
+			iounmap(fifo->rx_base);
+		if (fifo->tx_base)
+			iounmap(fifo->tx_base);
+
+		platform_set_drvdata(pdev, NULL);
+		kfree(fifo);
+
+		mutex_unlock(&tmfifo_lock);
+	}
+
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (rx_res)
+		release_mem_region(rx_res->start, resource_size(rx_res));
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (tx_res)
+		release_mem_region(tx_res->start, resource_size(tx_res));
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void tmfifo_get_cfg_mac(u8 *mac)
+{
+	u8 buf[6];
+	efi_status_t status;
+	unsigned long size = sizeof(buf);
+	efi_char16_t name[] = { 'R', 's', 'h', 'i', 'm', 'M', 'a', 'c',
+				'A', 'd', 'd', 'r', 0 };
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+
+	status = efi.get_variable(name, &guid, NULL, &size, buf);
+	if (status == EFI_SUCCESS && size == sizeof(buf))
+		memcpy(mac, buf, sizeof(buf));
+}
+
+/* Probe the TMFIFO. */
+static int tmfifo_probe(struct platform_device *pdev)
+{
+	u64 ctl;
+	struct tmfifo *fifo;
+	struct resource *rx_res, *tx_res;
+	struct virtio_net_config net_config;
+	int i, ret;
+
+	/* Get the resource of the Rx & Tx FIFO. */
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!rx_res || !tx_res) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (request_mem_region(rx_res->start,
+			       resource_size(rx_res), "bf-tmfifo") == NULL) {
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	if (request_mem_region(tx_res->start,
+			       resource_size(tx_res), "bf-tmfifo") == NULL) {
+		release_mem_region(rx_res->start, resource_size(rx_res));
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	ret = -ENOMEM;
+	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
+	if (!fifo)
+		goto err;
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	spin_lock_init(&fifo->spin_lock);
+	INIT_WORK(&fifo->work, tmfifo_work_handler);
+
+	timer_setup(&fifo->timer, tmfifo_timer, 0);
+	fifo->timer.function = tmfifo_timer;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		fifo->irq[i] = platform_get_irq(pdev, i);
+		ret = request_irq(fifo->irq[i], tmfifo_irq_handler, 0,
+				  "tmfifo", (u8 *)fifo + i);
+		if (ret) {
+			pr_err("Unable to request irq\n");
+			fifo->irq[i] = 0;
+			goto err;
+		}
+	}
+
+	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
+	if (!fifo->rx_base)
+		goto err;
+
+	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
+	if (!fifo->tx_base)
+		goto err;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		FIELD_GET(TMFIFO_TX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~TMFIFO_TX_CTL__LWM_MASK) |
+		FIELD_PREP(TMFIFO_TX_CTL__LWM_MASK, fifo->tx_fifo_size / 2);
+	ctl = (ctl & ~TMFIFO_TX_CTL__HWM_MASK) |
+		FIELD_PREP(TMFIFO_TX_CTL__HWM_MASK, fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		FIELD_GET(TMFIFO_RX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~TMFIFO_RX_CTL__LWM_MASK) |
+		FIELD_PREP(TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = (ctl & ~TMFIFO_RX_CTL__HWM_MASK) |
+		FIELD_PREP(TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + TMFIFO_RX_CTL);
+
+	mutex_init(&fifo->lock);
+
+	/* Create the console vdev. */
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (ret)
+		goto err;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = TMFIFO_NET_MTU;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	memcpy(net_config.mac, tmfifo_net_default_mac, 6);
+	tmfifo_get_cfg_mac(net_config.mac);
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_NET, TMFIFO_NET_FEATURES,
+				 &net_config, sizeof(net_config));
+	if (ret)
+		goto err;
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+
+	fifo->is_ready = true;
+
+	return 0;
+
+err:
+	tmfifo_remove(pdev);
+early_err:
+	dev_err(&pdev->dev, "Probe Failed\n");
+	return ret;
+}
+
+static const struct of_device_id tmfifo_match[] = {
+	{ .compatible = "mellanox,bf-tmfifo" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, tmfifo_match);
+
+static const struct acpi_device_id bf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, bf_tmfifo_acpi_match);
+
+static struct platform_driver tmfifo_driver = {
+	.probe = tmfifo_probe,
+	.remove = tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.of_match_table = tmfifo_match,
+		.acpi_match_table = ACPI_PTR(bf_tmfifo_acpi_match),
+	},
+};
+
+static int __init tmfifo_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&tmfifo_driver);
+	if (ret)
+		pr_err("Failed to register tmfifo driver.\n");
+
+	return ret;
+}
+
+static void __exit tmfifo_exit(void)
+{
+	platform_driver_unregister(&tmfifo_driver);
+}
+
+module_init(tmfifo_init);
+module_exit(tmfifo_exit);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
new file mode 100644
index 0000000..9f21764
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo_regs.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __TMFIFO_REGS_H__
+#define __TMFIFO_REGS_H__
+
+#include <linux/types.h>
+
+#define TMFIFO_TX_DATA 0x0
+
+#define TMFIFO_TX_STS 0x8
+#define TMFIFO_TX_STS__LENGTH 0x0001
+#define TMFIFO_TX_STS__COUNT_SHIFT 0
+#define TMFIFO_TX_STS__COUNT_WIDTH 9
+#define TMFIFO_TX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_TX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_TX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_TX_CTL 0x10
+#define TMFIFO_TX_CTL__LENGTH 0x0001
+#define TMFIFO_TX_CTL__LWM_SHIFT 0
+#define TMFIFO_TX_CTL__LWM_WIDTH 8
+#define TMFIFO_TX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__LWM_RMASK 0xff
+#define TMFIFO_TX_CTL__LWM_MASK  0xff
+#define TMFIFO_TX_CTL__HWM_SHIFT 8
+#define TMFIFO_TX_CTL__HWM_WIDTH 8
+#define TMFIFO_TX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__HWM_RMASK 0xff
+#define TMFIFO_TX_CTL__HWM_MASK  0xff00
+#define TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#define TMFIFO_RX_DATA 0x0
+
+#define TMFIFO_RX_STS 0x8
+#define TMFIFO_RX_STS__LENGTH 0x0001
+#define TMFIFO_RX_STS__COUNT_SHIFT 0
+#define TMFIFO_RX_STS__COUNT_WIDTH 9
+#define TMFIFO_RX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_RX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_RX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_RX_CTL 0x10
+#define TMFIFO_RX_CTL__LENGTH 0x0001
+#define TMFIFO_RX_CTL__LWM_SHIFT 0
+#define TMFIFO_RX_CTL__LWM_WIDTH 8
+#define TMFIFO_RX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__LWM_RMASK 0xff
+#define TMFIFO_RX_CTL__LWM_MASK  0xff
+#define TMFIFO_RX_CTL__HWM_SHIFT 8
+#define TMFIFO_RX_CTL__HWM_WIDTH 8
+#define TMFIFO_RX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__HWM_RMASK 0xff
+#define TMFIFO_RX_CTL__HWM_MASK  0xff00
+#define TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#endif /* !defined(__TMFIFO_REGS_H__) */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v6 2/9] arm64: Add Mellanox BlueField SoC config option
  2018-11-01 16:25   ` Liming Sun
@ 2018-11-01 16:25   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-11-01 16:25 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit introduces config option for Mellanox BlueField SoC,
which can be used to build the SoC specific drivers, and enables
it by default in configs/defconfig.

Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 arch/arm64/Kconfig.platforms | 6 ++++++
 arch/arm64/configs/defconfig | 1 +
 2 files changed, 7 insertions(+)

diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 393d2b5..1e28eb6 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -117,6 +117,12 @@ config ARCH_MESON
 	help
 	  This enables support for the Amlogic S905 SoCs.
 
+config ARCH_MLNX_BLUEFIELD
+	bool "Mellanox BlueField SoC Family"
+	select SOC_MLNX
+	help
+	  This enables support for the Mellanox BlueField SoC.
+
 config ARCH_MVEBU
 	bool "Marvell EBU SoC Family"
 	select ARMADA_AP806_SYSCON
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index db8d364..8865ada 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -44,6 +44,7 @@ CONFIG_ARCH_LG1K=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
 CONFIG_ARCH_MESON=y
+CONFIG_ARCH_MLNX_BLUEFIELD=y
 CONFIG_ARCH_MVEBU=y
 CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v6 2/9] arm64: Add Mellanox BlueField SoC config option
@ 2018-11-01 16:25   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-11-01 16:25 UTC (permalink / raw)
  To: linux-arm-kernel

This commit introduces config option for Mellanox BlueField SoC,
which can be used to build the SoC specific drivers, and enables
it by default in configs/defconfig.

Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 arch/arm64/Kconfig.platforms | 6 ++++++
 arch/arm64/configs/defconfig | 1 +
 2 files changed, 7 insertions(+)

diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 393d2b5..1e28eb6 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -117,6 +117,12 @@ config ARCH_MESON
 	help
 	  This enables support for the Amlogic S905 SoCs.
 
+config ARCH_MLNX_BLUEFIELD
+	bool "Mellanox BlueField SoC Family"
+	select SOC_MLNX
+	help
+	  This enables support for the Mellanox BlueField SoC.
+
 config ARCH_MVEBU
 	bool "Marvell EBU SoC Family"
 	select ARMADA_AP806_SYSCON
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index db8d364..8865ada 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -44,6 +44,7 @@ CONFIG_ARCH_LG1K=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
 CONFIG_ARCH_MESON=y
+CONFIG_ARCH_MLNX_BLUEFIELD=y
 CONFIG_ARCH_MVEBU=y
 CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v6 3/9] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-11-01 16:25   ` Liming Sun
@ 2018-11-01 16:25   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-11-01 16:25 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

Add devicetree bindings for the TmFifo which is found on Mellanox
BlueField SoCs.

Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 .../devicetree/bindings/soc/mellanox/tmfifo.txt    | 23 ++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt

diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
new file mode 100644
index 0000000..8a13fa6
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
@@ -0,0 +1,23 @@
+* Mellanox BlueField SoC TmFifo
+
+BlueField TmFifo provides a shared FIFO between the target and the
+external host machine, which can be accessed by external host via
+USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
+to implement virtual console and network interface based on the virtio
+framework.
+
+Required properties:
+
+- compatible:	Should be "mellanox,bf-tmfifo"
+- reg:		Physical base address and length of Rx/Tx block
+- interrupts:	The interrupt number of Rx low water mark, Rx high water mark
+		Tx low water mark, Tx high water mark respectively.
+
+Example:
+
+tmfifo@800a20 {
+	compatible = "mellanox,bf-tmfifo";
+	reg = <0x00800a20 0x00000018
+	       0x00800a40 0x00000018>;
+	interrupts = <41, 42, 43, 44>;
+};
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v6 3/9] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
@ 2018-11-01 16:25   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-11-01 16:25 UTC (permalink / raw)
  To: linux-arm-kernel

Add devicetree bindings for the TmFifo which is found on Mellanox
BlueField SoCs.

Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 .../devicetree/bindings/soc/mellanox/tmfifo.txt    | 23 ++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt

diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
new file mode 100644
index 0000000..8a13fa6
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
@@ -0,0 +1,23 @@
+* Mellanox BlueField SoC TmFifo
+
+BlueField TmFifo provides a shared FIFO between the target and the
+external host machine, which can be accessed by external host via
+USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
+to implement virtual console and network interface based on the virtio
+framework.
+
+Required properties:
+
+- compatible:	Should be "mellanox,bf-tmfifo"
+- reg:		Physical base address and length of Rx/Tx block
+- interrupts:	The interrupt number of Rx low water mark, Rx high water mark
+		Tx low water mark, Tx high water mark respectively.
+
+Example:
+
+tmfifo at 800a20 {
+	compatible = "mellanox,bf-tmfifo";
+	reg = <0x00800a20 0x00000018
+	       0x00800a40 0x00000018>;
+	interrupts = <41, 42, 43, 44>;
+};
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v6 4/9] MAINTAINERS: Add entry for Mellanox Bluefield Soc
  2018-11-01 16:25   ` Liming Sun
@ 2018-11-01 16:25   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-11-01 16:25 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

Add maintainer information for Mellanox BlueField SoC.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 42a2f31..44fb693 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1694,6 +1694,14 @@ S:	Maintained
 F:	drivers/phy/mediatek/
 F:	Documentation/devicetree/bindings/phy/phy-mtk-*
 
+ARM/Mellanox BlueField SoC support
+M:	David Woods <dwoods@mellanox.com>
+M:	Liming Sun <lsun@mellanox.com>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	drivers/soc/mellanox/*
+F:	Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
+
 ARM/MICREL KS8695 ARCHITECTURE
 M:	Greg Ungerer <gerg@uclinux.org>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v6 4/9] MAINTAINERS: Add entry for Mellanox Bluefield Soc
@ 2018-11-01 16:25   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-11-01 16:25 UTC (permalink / raw)
  To: linux-arm-kernel

Add maintainer information for Mellanox BlueField SoC.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 42a2f31..44fb693 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1694,6 +1694,14 @@ S:	Maintained
 F:	drivers/phy/mediatek/
 F:	Documentation/devicetree/bindings/phy/phy-mtk-*
 
+ARM/Mellanox BlueField SoC support
+M:	David Woods <dwoods@mellanox.com>
+M:	Liming Sun <lsun@mellanox.com>
+L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	drivers/soc/mellanox/*
+F:	Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
+
 ARM/MICREL KS8695 ARCHITECTURE
 M:	Greg Ungerer <gerg@uclinux.org>
 L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
  2018-11-01 16:25   ` Liming Sun
@ 2018-11-01 16:25   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-11-01 16:25 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

An external host can connect to a Mellanox BlueField SoC via an
interface called Rshim. The Rshim driver provides boot, console,
and networking services over this interface. This commit is
the common driver where the other backend (transport) driver will
use.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/Kconfig           |    8 +
 drivers/soc/mellanox/Makefile          |    1 +
 drivers/soc/mellanox/host/Makefile     |    2 +
 drivers/soc/mellanox/host/rshim.c      | 2673 ++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/host/rshim.h      |  361 +++++
 drivers/soc/mellanox/host/rshim_regs.h |  152 ++
 6 files changed, 3197 insertions(+)
 create mode 100644 drivers/soc/mellanox/host/Makefile
 create mode 100644 drivers/soc/mellanox/host/rshim.c
 create mode 100644 drivers/soc/mellanox/host/rshim.h
 create mode 100644 drivers/soc/mellanox/host/rshim_regs.h

diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
index d88efa1..ecd83a4 100644
--- a/drivers/soc/mellanox/Kconfig
+++ b/drivers/soc/mellanox/Kconfig
@@ -16,3 +16,11 @@ config MLNX_BLUEFIELD_TMFIFO
 	  the implementation of a console and network driver.
 
 endif # ARCH_MLNX_BLUEFIELD
+
+config MLNX_BLUEFIELD_HOST
+	tristate "Mellnox BlueField host side drivers"
+	help
+	  If you say yes to this option, then support will be added
+	  for control and communication of Mellanox BlueField SoCs
+	  from an external host via USB or PCI-express.
+
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
index c44c0e2..aaaf2be 100644
--- a/drivers/soc/mellanox/Makefile
+++ b/drivers/soc/mellanox/Makefile
@@ -3,3 +3,4 @@
 # Makefile for Mellanox SoC drivers.
 #
 obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
+obj-$(CONFIG_MLNX_BLUEFIELD_HOST)	+= host/
diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
new file mode 100644
index 0000000..e47842f
--- /dev/null
+++ b/drivers/soc/mellanox/host/Makefile
@@ -0,0 +1,2 @@
+obj-m := rshim.o
+
diff --git a/drivers/soc/mellanox/host/rshim.c b/drivers/soc/mellanox/host/rshim.c
new file mode 100644
index 0000000..32f1124
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim.c
@@ -0,0 +1,2673 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_common.c - Mellanox host-side driver for RShim
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.	See the GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <linux/ioctl.h>
+#include <linux/termios.h>
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+#include <asm/termbits.h>
+#include <linux/circ_buf.h>
+#include <linux/delay.h>
+#include <linux/virtio_ids.h>
+
+#include "rshim.h"
+
+/* Maximum number of devices controlled by this driver. */
+int rshim_nr_devs = 64;
+module_param(rshim_nr_devs, int, 0444);
+MODULE_PARM_DESC(rshim_nr_devs, "Maximum number of supported devices");
+
+static char *backend_driver = "";
+module_param(backend_driver, charp, 0444);
+MODULE_PARM_DESC(backend_driver, "Rshim backend driver to use");
+
+static int rshim_keepalive_period = 300;
+module_param(rshim_keepalive_period, int, 0644);
+MODULE_PARM_DESC(rshim_keepalive_period, "keepalive period in milliseconds");
+
+#define RSH_KEEPALIVE_MAGIC_NUM 0x5089836482ULL
+
+/* Circular buffer macros. */
+
+#define read_empty(bd, chan) \
+	(CIRC_CNT((bd)->read_fifo[chan].head, \
+		  (bd)->read_fifo[chan].tail, READ_FIFO_SIZE) == 0)
+#define read_full(bd, chan) \
+	(CIRC_SPACE((bd)->read_fifo[chan].head, \
+		    (bd)->read_fifo[chan].tail, READ_FIFO_SIZE) == 0)
+#define read_space(bd, chan) \
+	CIRC_SPACE((bd)->read_fifo[chan].head, \
+		   (bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_cnt(bd, chan) \
+	CIRC_CNT((bd)->read_fifo[chan].head, \
+		 (bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_cnt_to_end(bd, chan) \
+	CIRC_CNT_TO_END((bd)->read_fifo[chan].head, \
+			(bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_data_ptr(bd, chan) \
+	((bd)->read_fifo[chan].data + \
+	 ((bd)->read_fifo[chan].tail & (READ_FIFO_SIZE - 1)))
+#define read_consume_bytes(bd, chan, nbytes) \
+	((bd)->read_fifo[chan].tail = \
+		((bd)->read_fifo[chan].tail + (nbytes)) & \
+		 (READ_FIFO_SIZE - 1))
+#define read_space_to_end(bd, chan) \
+	CIRC_SPACE_TO_END((bd)->read_fifo[chan].head, \
+			  (bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_space_offset(bd, chan) \
+	((bd)->read_fifo[chan].head & (READ_FIFO_SIZE - 1))
+#define read_space_ptr(bd, chan) \
+	((bd)->read_fifo[chan].data + read_space_offset(bd, (chan)))
+#define read_add_bytes(bd, chan, nbytes) \
+	((bd)->read_fifo[chan].head = \
+		((bd)->read_fifo[chan].head + (nbytes)) & \
+		 (READ_FIFO_SIZE - 1))
+#define read_reset(bd, chan) \
+	((bd)->read_fifo[chan].head = (bd)->read_fifo[chan].tail = 0)
+
+#define write_empty(bd, chan) \
+	(CIRC_CNT((bd)->write_fifo[chan].head, \
+		  (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE) == 0)
+#define write_full(bd, chan) \
+	(CIRC_SPACE((bd)->write_fifo[chan].head, \
+		    (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE) == 0)
+#define write_space(bd, chan) \
+	CIRC_SPACE((bd)->write_fifo[chan].head, \
+		   (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_cnt(bd, chan) \
+	CIRC_CNT((bd)->write_fifo[chan].head, \
+		 (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_cnt_to_end(bd, chan) \
+	CIRC_CNT_TO_END((bd)->write_fifo[chan].head, \
+			(bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_data_offset(bd, chan) \
+	((bd)->write_fifo[chan].tail & (WRITE_FIFO_SIZE - 1))
+#define write_data_ptr(bd, chan) \
+	((bd)->write_fifo[chan].data + write_data_offset(bd, (chan)))
+#define write_consume_bytes(bd, chan, nbytes) \
+	((bd)->write_fifo[chan].tail = \
+		 ((bd)->write_fifo[chan].tail + (nbytes)) & \
+		  (WRITE_FIFO_SIZE - 1))
+#define write_space_to_end(bd, chan) \
+	CIRC_SPACE_TO_END((bd)->write_fifo[chan].head, \
+			  (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_space_ptr(bd, chan) \
+	((bd)->write_fifo[chan].data + \
+	 ((bd)->write_fifo[chan].head & (WRITE_FIFO_SIZE - 1)))
+#define write_add_bytes(bd, chan, nbytes) \
+	((bd)->write_fifo[chan].head = \
+	 ((bd)->write_fifo[chan].head + (nbytes)) & \
+	  (WRITE_FIFO_SIZE - 1))
+#define write_reset(bd, chan) \
+	((bd)->write_fifo[chan].head = (bd)->write_fifo[chan].tail = 0)
+
+/*
+ * Tile-to-host bits (UART 0 scratchpad).
+ */
+/*
+ * Output write pointer mask.  Note that this is the maximum size; the
+ * write pointer may be smaller if requested by the host.
+ */
+#define CONS_RSHIM_T2H_OUT_WPTR_MASK     0x3FF
+
+/* Tile is done mask. */
+#define CONS_RSHIM_T2H_DONE_MASK         0x400
+
+/*
+ * Input read pointer mask.  Note that this is the maximum size; the read
+ * pointer may be smaller if requested by the host.
+ */
+#define CONS_RSHIM_T2H_IN_RPTR_MASK      0x1FF800
+
+/* Input read pointer shift. */
+#define CONS_RSHIM_T2H_IN_RPTR_SHIFT     11
+
+/* Tile is done mask. */
+#define CONS_RSHIM_T2H_DONE_MASK         0x400
+
+/* Number of words to send as sync-data (calculated by packet MTU). */
+#define TMFIFO_MAX_SYNC_WORDS            (1536 / 8)
+
+/* Terminal characteristics for newly created consoles. */
+static struct ktermios init_console_termios = {
+	.c_iflag = INLCR | ICRNL,
+	.c_oflag = OPOST | ONLCR,
+	.c_cflag = B115200 | HUPCL | CLOCAL | CREAD | CS8,
+	.c_lflag = ISIG | ICANON | ECHOE | ECHOK | ECHOCTL | ECHOKE | IEXTEN,
+	.c_line = 0,
+	.c_cc = INIT_C_CC,
+};
+
+/* Global mutex. */
+static DEFINE_MUTEX(rshim_mutex);
+
+/*
+ * Array of all of the rshim devices.  The high bits of our minor number
+ * index into this table to find the relevant device.
+ */
+struct rshim_backend **rshim_devs;
+
+/*
+ * Work queue. Right now we have one for the whole driver; we might
+ * eventually decide that we need one per device, but we'll see.
+ */
+struct workqueue_struct *rshim_wq;
+EXPORT_SYMBOL(rshim_wq);
+
+/*
+ * Array of pointers to kmalloc'ed strings, holding the path name for
+ * all of the devices we've seen.  If rshim_devs[i] is non-NULL, then
+ * rshim_dev_names[i] is its path name.  If rshim_devs[i] is NULL, then
+ * rshim_dev_names[i] is the name that was last used for that device.
+ * When we see a new device, we look it up in this table; this allows us to
+ * use the same device index we did last time we saw the device.  The
+ * strings within the array persist until the driver is unloaded.
+ */
+char **rshim_dev_names;
+
+/* Name of the sub-device types. */
+char *rshim_dev_minor_names[RSH_DEV_TYPES] = {
+	[RSH_DEV_TYPE_RSHIM] = "rshim",
+	[RSH_DEV_TYPE_BOOT] = "boot",
+	[RSH_DEV_TYPE_CONSOLE] = "console",
+	[RSH_DEV_TYPE_NET] = "net",
+	[RSH_DEV_TYPE_MISC] = "misc",
+};
+
+/* dev_t base index. */
+static dev_t rshim_dev_base;
+
+/* Class structure for our device class. */
+static struct class *rshim_class;
+
+/* Registered services. */
+static struct rshim_service *rshim_svc[RSH_SVC_MAX];
+
+/* FIFO reset. */
+static void rshim_fifo_reset(struct rshim_backend *bd);
+
+/* Global lock / unlock. */
+
+void rshim_lock(void)
+{
+	mutex_lock(&rshim_mutex);
+}
+EXPORT_SYMBOL(rshim_lock);
+
+void rshim_unlock(void)
+{
+	mutex_unlock(&rshim_mutex);
+}
+EXPORT_SYMBOL(rshim_unlock);
+
+/*
+ * Read some bytes from RShim.
+ *
+ * The provided buffer size should be multiple of 8 bytes. If not, the
+ * leftover bytes (which presumably were sent as NUL bytes by the sender)
+ * will be discarded.
+ */
+static ssize_t rshim_read_default(struct rshim_backend *bd, int devtype,
+				char *buf, size_t count)
+{
+	int retval, total = 0, avail = 0;
+	u64 word;
+
+	/* Read is only supported for RShim TMFIFO. */
+	if (devtype != RSH_DEV_TYPE_NET && devtype != RSH_DEV_TYPE_CONSOLE) {
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+	if (bd->is_boot_open)
+		return 0;
+
+	while (total < count) {
+		if (avail == 0) {
+			retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+						RSH_TM_TILE_TO_HOST_STS, &word);
+			if (retval < 0)
+				break;
+			avail = word & RSH_TM_TILE_TO_HOST_STS__COUNT_MASK;
+			if (avail == 0)
+				break;
+		}
+		retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+					RSH_TM_TILE_TO_HOST_DATA, &word);
+		if (retval < 0)
+			break;
+		/*
+		 * Convert it to little endian before sending to RShim. The
+		 * other side should decode it as little endian as well which
+		 * is usually the default case.
+		 */
+		word = le64_to_cpu(word);
+		if (total + sizeof(word) <= count) {
+			*(u64 *)buf = word;
+			buf += sizeof(word);
+			total += sizeof(word);
+		} else {
+			/* Copy the rest data which is less than 8 bytes. */
+			memcpy(buf, &word, count - total);
+			total = count;
+			break;
+		}
+		avail--;
+	}
+
+	return total;
+}
+
+/*
+ * Write some bytes to the RShim backend.
+ *
+ * If count is not multiple of 8-bytes, the data will be padded to 8-byte
+ * aligned which is required by RShim HW.
+ */
+static ssize_t rshim_write_delayed(struct rshim_backend *bd, int devtype,
+				   const char *buf, size_t count)
+{
+	u64 word;
+	char pad_buf[sizeof(u64)] = { 0 };
+	int size_addr, size_mask, data_addr, max_size;
+	int retval, avail = 0, byte_cnt = 0, retry;
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		if (bd->is_boot_open)
+			return count;
+		size_addr = RSH_TM_HOST_TO_TILE_STS;
+		size_mask = RSH_TM_HOST_TO_TILE_STS__COUNT_MASK;
+		data_addr = RSH_TM_HOST_TO_TILE_DATA;
+		retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+					RSH_TM_HOST_TO_TILE_CTL, &word);
+		if (retval < 0) {
+			pr_err("read_rshim error %d\n", retval);
+			return retval;
+		}
+		max_size = (word >> RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_SHIFT)
+			   & RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RMASK;
+		break;
+
+	case RSH_DEV_TYPE_BOOT:
+		size_addr = RSH_BOOT_FIFO_COUNT;
+		size_mask = RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_MASK;
+		data_addr = RSH_BOOT_FIFO_DATA;
+		max_size = RSH_BOOT_FIFO_SIZE;
+		break;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+
+	while (byte_cnt < count) {
+		/* Check the boot cancel condition. */
+		if (devtype == RSH_DEV_TYPE_BOOT && !bd->boot_work_buf)
+			break;
+
+		/* Add padding if less than 8 bytes left. */
+		if (byte_cnt + sizeof(u64) > count) {
+			memcpy(pad_buf, buf, count - byte_cnt);
+			buf = (const char *)pad_buf;
+		}
+
+		retry = 0;
+		while (avail <= 0) {
+			/* Calculate available space in words. */
+			retval = bd->read_rshim(bd, RSHIM_CHANNEL, size_addr,
+						&word);
+			if (retval < 0) {
+				pr_err("read_rshim error %d\n", retval);
+				break;
+			}
+			avail = max_size - (int)(word & size_mask) - 8;
+			if (avail > 0)
+				break;
+
+			/*
+			 * Retry 100s, or else return failure since the other
+			 * side seems not to be responding.
+			 */
+			if (++retry > 100000)
+				return -ETIMEDOUT;
+			msleep(1);
+		}
+
+		word = *(u64 *)buf;
+		/*
+		 * Convert to little endian before sending to RShim. The
+		 * receiving side should call le64_to_cpu() to convert
+		 * it back.
+		 */
+		word = cpu_to_le64(word);
+		retval = bd->write_rshim(bd, RSHIM_CHANNEL, data_addr, word);
+		if (retval < 0) {
+			pr_err("write_rshim error %d\n", retval);
+			break;
+		}
+		buf += sizeof(word);
+		byte_cnt += sizeof(word);
+		avail--;
+	}
+
+	/* Return number shouldn't count the padded bytes. */
+	return (byte_cnt > count) ? count : byte_cnt;
+}
+
+static ssize_t rshim_write_default(struct rshim_backend *bd, int devtype,
+				   const char *buf, size_t count)
+{
+	int retval;
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		if (bd->is_boot_open)
+			return count;
+
+		/* Set the flag so there is only one outstanding request. */
+		bd->spin_flags |= RSH_SFLG_WRITING;
+
+		/* Wake up the worker. */
+		bd->fifo_work_buf = (char *)buf;
+		bd->fifo_work_buf_len = count;
+		bd->fifo_work_devtype = devtype;
+		/*
+		 * Add barrier so the above writes complete before setting the
+		 * has_fifo_work flag.
+		 */
+		wmb();
+		bd->has_fifo_work = 1;
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+		return 0;
+
+	case RSH_DEV_TYPE_BOOT:
+		reinit_completion(&bd->boot_write_complete);
+		bd->boot_work_buf_len = count;
+		bd->boot_work_buf_actual_len = 0;
+		/*
+		 * Add barrier so the above writes complete before setting the
+		 * boot_work_buf pointer since it's checked in other places.
+		 */
+		wmb();
+		bd->boot_work_buf = (char *)buf;
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+
+		mutex_unlock(&bd->mutex);
+		retval = wait_for_completion_interruptible(
+					&bd->boot_write_complete);
+		/* Cancel the request if interrupted. */
+		if (retval)
+			bd->boot_work_buf = NULL;
+
+		mutex_lock(&bd->mutex);
+		return bd->boot_work_buf_actual_len;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+}
+
+/* Boot file operations routines */
+
+/*
+ * Wait for boot to complete, if necessary.  Return 0 if the boot is done
+ * and it's safe to continue, an error code if something went wrong.  Note
+ * that this routine must be called with the device mutex held.  If it
+ * returns successfully, the mutex will still be held (although it may have
+ * been dropped and reacquired); if it returns unsuccessfully the mutex
+ * will have been dropped.
+ */
+static int wait_for_boot_done(struct rshim_backend *bd)
+{
+	int retval;
+
+	if (!bd->has_reprobe)
+		return 0;
+
+	if (!bd->has_rshim || bd->is_booting) {
+		while (bd->is_booting) {
+			pr_info("boot write, waiting for re-probe\n");
+			/* We're booting, and the backend isn't ready yet. */
+			mutex_unlock(&bd->mutex);
+			/*
+			 * FIXME: might we want a timeout here, too?  If
+			 * the reprobe takes a very long time, something's
+			 * probably wrong.  Maybe a couple of minutes?
+			 */
+			retval = wait_for_completion_interruptible(
+				&bd->booting_complete);
+			if (retval)
+				return retval;
+			mutex_lock(&bd->mutex);
+		}
+		if (!bd->has_rshim) {
+			mutex_unlock(&bd->mutex);
+			return -ENODEV;
+		}
+	}
+
+	return 0;
+}
+
+static ssize_t rshim_boot_write(struct file *file, const char *user_buffer,
+			      size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+	int retval = 0, whichbuf = 0;
+	size_t bytes_written = 0, bytes_left;
+
+	/*
+	 * Hardware requires that we send multiples of 8 bytes.  Ideally
+	 * we'd handle the case where we got unaligned writes by
+	 * accumulating the residue somehow, but none of our clients
+	 * typically do this, so we just clip the size to prevent any
+	 * inadvertent errors from causing hardware problems.
+	 */
+	bytes_left = count & (-((size_t)8));
+	if (!bytes_left)
+		return 0;
+
+	mutex_lock(&bd->mutex);
+	if (bd->is_in_boot_write) {
+		mutex_unlock(&bd->mutex);
+		return -EBUSY;
+	}
+
+	retval = wait_for_boot_done(bd);
+	if (retval) {
+		pr_err("boot_write: wait for boot failed, err %d\n", retval);
+		/* wait_for_boot_done already dropped mutex */
+		return retval;
+	}
+
+	/*
+	 * We're going to drop the mutex while we wait for any outstanding
+	 * write to complete; this keeps another thread from getting in here
+	 * while we do that.
+	 */
+	bd->is_in_boot_write = 1;
+
+	while (bytes_left) {
+		size_t buf_bytes = min((size_t)BOOT_BUF_SIZE, bytes_left);
+		char *buf = bd->boot_buf[whichbuf];
+
+		whichbuf ^= 1;
+		if (copy_from_user(buf, user_buffer, buf_bytes)) {
+			retval = -EFAULT;
+			pr_err("boot_write: copy from user failed\n");
+			break;
+		}
+
+		retval = bd->write(bd, RSH_DEV_TYPE_BOOT, buf, buf_bytes);
+		if (retval > 0) {
+			bytes_left -= retval;
+			user_buffer += retval;
+			bytes_written += retval;
+		} else if (retval == 0) {
+			/* Wait for some time instead of busy polling. */
+			msleep_interruptible(1);
+			continue;
+		}
+		if (retval != buf_bytes)
+			break;
+	}
+
+	bd->is_in_boot_write = 0;
+	mutex_unlock(&bd->mutex);
+
+	/*
+	 * Return an error in case the 'count' is not multiple of 8 bytes.
+	 * At this moment, the truncated data has already been sent to
+	 * the BOOT fifo and hopefully it could still boot the chip.
+	 */
+	if (count % 8 != 0)
+		return -EINVAL;
+
+	return bytes_written ? bytes_written : retval;
+}
+
+static int rshim_boot_release(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+	struct module *owner;
+	int retval;
+
+	/* Restore the boot mode register. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL,
+				 RSH_BOOT_CONTROL,
+				 RSH_BOOT_CONTROL__BOOT_MODE_VAL_EMMC);
+	if (retval)
+		pr_err("couldn't set boot_control, err %d\n", retval);
+
+	mutex_lock(&bd->mutex);
+	bd->is_boot_open = 0;
+	queue_delayed_work(rshim_wq, &bd->work, HZ);
+	mutex_unlock(&bd->mutex);
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+static const struct file_operations rshim_boot_fops = {
+	.owner = THIS_MODULE,
+	.write = rshim_boot_write,
+	.release = rshim_boot_release,
+};
+
+int rshim_boot_open(struct file *file)
+{
+	int retval;
+	int i;
+	struct rshim_backend *bd = file->private_data;
+#if RSH_RESET_MUTEX
+	unsigned long devs_locked = 0;
+#endif
+
+	file->f_op = &rshim_boot_fops;
+
+#if RSH_RESET_MUTEX
+	/*
+	 * We're going to prevent resets and operations from running in
+	 * parallel with other resets.  Our method for this is to grab
+	 * every device's mutex before doing the reset, and then holding
+	 * onto them until the device we reset is reprobed, or a timeout
+	 * expires; the latter is mostly paranoia.  Anyway, in order to
+	 * find all of the other devices, we're going to need to walk the
+	 * device table, so we need to grab its mutex.  We have to do it
+	 * before we get our own device's mutex for lock ordering reasons.
+	 */
+	rshim_lock();
+#endif
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->is_boot_open) {
+		pr_info("can't boot, boot file already open\n");
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		rshim_unlock();
+#endif
+		return -EBUSY;
+	}
+
+	if (!bd->has_rshim) {
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		rshim_unlock();
+#endif
+		return -ENODEV;
+	}
+
+	pr_info("begin booting\n");
+	reinit_completion(&bd->booting_complete);
+	bd->is_booting = 1;
+
+	/*
+	 * Before we reset the chip, make sure we don't have any
+	 * outstanding writes, and flush the write and read FIFOs. (Note
+	 * that we can't have any outstanding reads, since we kill those
+	 * upon release of the TM FIFO file.)
+	 */
+	if (bd->cancel)
+		bd->cancel(bd, RSH_DEV_TYPE_NET, true);
+	bd->read_buf_bytes = 0;
+	bd->read_buf_pkt_rem = 0;
+	bd->read_buf_pkt_padding = 0;
+	spin_lock_irq(&bd->spinlock);
+	/* FIXME: should we be waiting for WRITING to go off, instead? */
+	bd->spin_flags &= ~RSH_SFLG_WRITING;
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		read_reset(bd, i);
+		write_reset(bd, i);
+	}
+	spin_unlock_irq(&bd->spinlock);
+
+	/* Set RShim (external) boot mode. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_BOOT_CONTROL,
+				 RSH_BOOT_CONTROL__BOOT_MODE_VAL_NONE);
+	if (retval) {
+		pr_err("boot_open: error %d writing boot control\n", retval);
+		bd->is_booting = 0;
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		rshim_unlock();
+#endif
+		return retval;
+	}
+
+#if RSH_RESET_MUTEX
+	/*
+	 * Acquire all of the other devices' mutexes, to keep them from
+	 * doing anything while we're performing the reset.  Also kill
+	 * any outstanding boot urbs; that way we'll restart them, after
+	 * the reset is done, and not report errors to the writers.
+	 */
+	for (i = 0; i < rshim_nr_devs; i++) {
+		if (rshim_devs[i] && rshim_devs[i] != bd) {
+			mutex_lock(&rshim_devs[i]->mutex);
+			devs_locked |= 1UL << i;
+			if (rshim_devs[i]->cancel) {
+				rshim_devs[i]->cancel(rshim_devs[i],
+						    RSH_DEV_TYPE_BOOT, true);
+			}
+		}
+	}
+	reinit_completion(&bd->reset_complete);
+#endif
+
+	bd->is_boot_open = 1;
+
+	/* SW reset. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_RESET_CONTROL,
+				 RSH_RESET_CONTROL__RESET_CHIP_VAL_KEY);
+
+	/* Reset the TmFifo. */
+	rshim_fifo_reset(bd);
+
+	/*
+	 * Note that occasionally, we get various errors on writing to
+	 * the reset register.  This appears to be caused by the chip
+	 * actually resetting before the response goes out, or perhaps by
+	 * our noticing the device unplug before we've seen the response.
+	 * Either way, the chip _does_ actually reset, so we just ignore
+	 * the error.  Should we ever start getting these errors without
+	 * the chip being reset, we'll have to figure out how to handle
+	 * this more intelligently.  (One potential option is to not reset
+	 * directly, but to set up a down counter to do the reset, but that
+	 * seems kind of kludgy, especially since Tile software might also
+	 * be trying to use the down counter.)
+	 */
+	if (retval && retval != -EPROTO && retval != -ESHUTDOWN &&
+#ifdef RSH_USB_BMC
+	    /*
+	     * The host driver on the BMC sometimes produces EOVERFLOW on
+	     * reset.  It also seems to have seems to have some sort of bug
+	     * which makes it return more bytes than we actually wrote!  In
+	     * that case we're returning EBADE.
+	     */
+	    retval != -EOVERFLOW && retval != -EBADE &&
+#endif
+	    retval != -ETIMEDOUT && retval != -EPIPE) {
+		pr_err("boot_open: error %d writing reset control\n", retval);
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		while (devs_locked) {
+			int i = __builtin_ctzl(devs_locked);
+
+			mutex_unlock(&rshim_devs[i]->mutex);
+			devs_locked &= ~(1UL << i);
+		}
+		rshim_unlock();
+#endif
+		bd->is_boot_open = 0;
+
+		return retval;
+	}
+
+	if (retval)
+		pr_err("boot_open: got error %d on reset write\n", retval);
+
+	mutex_unlock(&bd->mutex);
+
+#if RSH_RESET_MUTEX
+	rshim_unlock();
+	/*
+	 * We wait for reset_complete (signaled by probe), or for an
+	 * interrupt, or a timeout (set to 5s because of no re-probe
+	 * in the PCIe case). Note that we dropped dev->mutex above
+	 * so that probe can run; the BOOT_OPEN flag should keep our device
+	 * from trying to do anything before the device is reprobed.
+	 */
+	retval = wait_for_completion_interruptible_timeout(&bd->reset_complete,
+							   5 * HZ);
+	if (retval == 0)
+		pr_err("timed out waiting for device reprobe after reset\n");
+
+	while (devs_locked) {
+		int i = __builtin_ctz(devs_locked);
+
+		mutex_unlock(&rshim_devs[i]->mutex);
+		devs_locked &= ~(1UL << i);
+	}
+#endif
+
+	return 0;
+}
+
+/* FIFO common file operations routines */
+
+/*
+ * Signal an error on the FIFO, and wake up anyone who might need to know
+ * about it.
+ */
+static void rshim_fifo_err(struct rshim_backend *bd, int err)
+{
+	int i;
+
+	bd->tmfifo_error = err;
+	wake_up_interruptible_all(&bd->write_completed);
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		wake_up_interruptible_all(&bd->read_fifo[i].operable);
+		wake_up_interruptible_all(&bd->write_fifo[i].operable);
+	}
+}
+
+/* Drain the read buffer, and start another read/interrupt if needed. */
+static void rshim_fifo_input(struct rshim_backend *bd)
+{
+	union rshim_tmfifo_msg_hdr *hdr;
+	bool rx_avail = false;
+
+	if (bd->is_boot_open)
+		return;
+
+again:
+	while (bd->read_buf_next < bd->read_buf_bytes) {
+		int copysize;
+
+		/*
+		 * If we're at the start of a packet, then extract the
+		 * header, and update our count of bytes remaining in the
+		 * packet.
+		 */
+		if (bd->read_buf_pkt_rem == 0) {
+			/* Make sure header is received. */
+			if (bd->read_buf_next + sizeof(*hdr) >
+				bd->read_buf_bytes)
+				break;
+
+			pr_debug("next hdr %d\n", bd->read_buf_next);
+
+			hdr = (union rshim_tmfifo_msg_hdr *)
+				&bd->read_buf[bd->read_buf_next];
+
+			bd->read_buf_pkt_rem = ntohs(hdr->len) + sizeof(*hdr);
+			bd->read_buf_pkt_padding =
+				(8 - (bd->read_buf_pkt_rem & 7)) & 7;
+			if (hdr->type == VIRTIO_ID_NET)
+				bd->rx_chan = TMFIFO_NET_CHAN;
+			else if (hdr->type == VIRTIO_ID_CONSOLE) {
+				bd->rx_chan = TMFIFO_CONS_CHAN;
+				/* Strip off the message header for console. */
+				bd->read_buf_next += sizeof(*hdr);
+				bd->read_buf_pkt_rem -= sizeof(*hdr);
+				if (bd->read_buf_pkt_rem == 0)
+					continue;
+			} else {
+				pr_debug("bad type %d, drop it", hdr->type);
+				bd->read_buf_pkt_rem = 0;
+				bd->read_buf_pkt_padding = 0;
+				bd->read_buf_next = bd->read_buf_bytes;
+				break;
+			}
+
+			pr_debug("drain: hdr, nxt %d rem %d chn %d\n",
+			      bd->read_buf_next, bd->read_buf_pkt_rem,
+			      bd->rx_chan);
+			bd->drop = 0;
+		}
+
+		if (bd->rx_chan == TMFIFO_CONS_CHAN &&
+		    !(bd->spin_flags & RSH_SFLG_CONS_OPEN)) {
+			/*
+			 * If data is coming in for a closed console
+			 * channel, we want to just throw it away.
+			 * Resetting the channel every time through this
+			 * loop is a relatively cheap way to do that.  Note
+			 * that this works because the read buffer is no
+			 * larger than the read FIFO; thus, we know that if
+			 * we reset it here, we will always be able to
+			 * drain the read buffer of any console data, and
+			 * will then launch another read.
+			 */
+			read_reset(bd, TMFIFO_CONS_CHAN);
+			bd->drop = 1;
+		} else if (bd->rx_chan == TMFIFO_NET_CHAN && bd->net == NULL) {
+			/* Drop if networking is not enabled. */
+			read_reset(bd, TMFIFO_NET_CHAN);
+			bd->drop = 1;
+		}
+
+		copysize = min(bd->read_buf_pkt_rem,
+			       bd->read_buf_bytes - bd->read_buf_next);
+		copysize = min(copysize,
+			       read_space_to_end(bd, bd->rx_chan));
+
+		pr_debug("drain: copysize %d, head %d, tail %d, remaining %d\n",
+			 copysize, bd->read_fifo[bd->rx_chan].head,
+			 bd->read_fifo[bd->rx_chan].tail,
+			 bd->read_buf_pkt_rem);
+
+		if (copysize == 0) {
+			/*
+			 * We have data, but no space to put it in, so
+			 * we're done.
+			 */
+			pr_debug("drain: no more space in channel %d\n",
+				 bd->rx_chan);
+			break;
+		}
+
+		if (!bd->drop) {
+			memcpy(read_space_ptr(bd, bd->rx_chan),
+			       &bd->read_buf[bd->read_buf_next],
+			       copysize);
+			read_add_bytes(bd, bd->rx_chan, copysize);
+		}
+
+		bd->read_buf_next += copysize;
+		bd->read_buf_pkt_rem -= copysize;
+
+		wake_up_interruptible_all(&bd->read_fifo[
+				      bd->rx_chan].operable);
+		pr_debug("woke up readable chan %d\n", bd->rx_chan);
+
+		if (bd->read_buf_pkt_rem <= 0) {
+			bd->read_buf_next = bd->read_buf_next +
+				bd->read_buf_pkt_padding;
+			rx_avail = true;
+		}
+	}
+
+	/*
+	 * We've processed all of the data we can, so now we decide if we
+	 * need to launch another I/O.  If there's still data in the read
+	 * buffer, or if we're already reading, don't launch any new
+	 * operations.  If an interrupt just completed, and said there was
+	 * data, or the last time we did a read we got some data, then do
+	 * another read.  Otherwise, do an interrupt.
+	 */
+	if (bd->read_buf_next < bd->read_buf_bytes ||
+	    (bd->spin_flags & RSH_SFLG_READING)) {
+		/* We're doing nothing. */
+		pr_debug("fifo_input: no new read: %s\n",
+			 (bd->read_buf_next < bd->read_buf_bytes) ?
+			 "have data" : "already reading");
+	} else {
+		int len;
+
+		/* Process it if more data is received. */
+		len = bd->read(bd, RSH_DEV_TYPE_NET, (char *)bd->read_buf,
+			      READ_BUF_SIZE);
+		if (len > 0) {
+			bd->read_buf_bytes = len;
+			bd->read_buf_next = 0;
+			goto again;
+		}
+	}
+
+	if (rx_avail) {
+		if (bd->rx_chan == TMFIFO_NET_CHAN) {
+			struct rshim_service *svc;
+
+			/*
+			 * Protect rshim_svc with RCU lock. See comments in
+			 * rshim_register_service() / rshim_register_service()
+			 */
+			rcu_read_lock();
+			svc = rcu_dereference(rshim_svc[RSH_SVC_NET]);
+			if (svc != NULL)
+				(*svc->rx_notify)(bd);
+			rcu_read_unlock();
+		}
+	}
+}
+
+ssize_t rshim_fifo_read(struct rshim_backend *bd, char *buffer,
+		      size_t count, int chan, bool nonblock,
+		      bool to_user)
+{
+	size_t rd_cnt = 0;
+
+	mutex_lock(&bd->mutex);
+
+	while (count) {
+		size_t readsize;
+		int pass1;
+		int pass2;
+
+		pr_debug("fifo_read, top of loop, remaining count %zd\n",
+			 count);
+
+		/*
+		 * We check this each time through the loop since the
+		 * device could get disconnected while we're waiting for
+		 * more data in the read FIFO.
+		 */
+		if (!bd->has_tm) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_read: returning %zd/ENODEV\n", rd_cnt);
+			return rd_cnt ? rd_cnt : -ENODEV;
+		}
+
+		if (bd->tmfifo_error) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_read: returning %zd/%d\n", rd_cnt,
+			      bd->tmfifo_error);
+			return rd_cnt ? rd_cnt : bd->tmfifo_error;
+		}
+
+		if (read_empty(bd, chan)) {
+			pr_debug("fifo_read: fifo empty\n");
+			if (rd_cnt || nonblock) {
+				if (rd_cnt == 0) {
+					spin_lock_irq(&bd->spinlock);
+					rshim_fifo_input(bd);
+					spin_unlock_irq(&bd->spinlock);
+				}
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_read: returning %zd/EAGAIN\n",
+				      rd_cnt);
+				return rd_cnt ? rd_cnt : -EAGAIN;
+			}
+
+			mutex_unlock(&bd->mutex);
+
+			pr_debug("fifo_read: waiting for readable chan %d\n",
+				 chan);
+			if (wait_event_interruptible(
+					bd->read_fifo[chan].operable,
+					    !read_empty(bd, chan))) {
+				pr_debug("fifo_read: returning ERESTARTSYS\n");
+				return to_user ? -EINTR : -ERESTARTSYS;
+			}
+
+			mutex_lock(&bd->mutex);
+
+			/*
+			 * Since we dropped the mutex, we must make
+			 * sure our interface is still there before
+			 * we do anything else.
+			 */
+			continue;
+		}
+
+		/*
+		 * Figure out how many bytes we will transfer on this pass.
+		 */
+		spin_lock_irq(&bd->spinlock);
+
+		readsize = min(count, (size_t)read_cnt(bd, chan));
+
+		pass1 = min(readsize, (size_t)read_cnt_to_end(bd, chan));
+		pass2 = readsize - pass1;
+
+		spin_unlock_irq(&bd->spinlock);
+
+		pr_debug("fifo_read: readsize %zd, head %d, tail %d\n",
+			 readsize, bd->read_fifo[chan].head,
+			 bd->read_fifo[chan].tail);
+
+		if (!to_user) {
+			memcpy(buffer, read_data_ptr(bd, chan), pass1);
+			if (pass2) {
+				memcpy(buffer + pass1,
+				       bd->read_fifo[chan].data, pass2);
+			}
+		} else {
+			if (copy_to_user(buffer, read_data_ptr(bd, chan),
+				pass1) || (pass2 && copy_to_user(buffer + pass1,
+				bd->read_fifo[chan].data, pass2))) {
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_read: returns %zd/EFAULT\n",
+					 rd_cnt);
+				return rd_cnt ? rd_cnt : -EFAULT;
+			}
+		}
+
+		spin_lock_irq(&bd->spinlock);
+
+		read_consume_bytes(bd, chan, readsize);
+
+		/*
+		 * We consumed some bytes, so let's see if we can process
+		 * any more incoming data.
+		 */
+		rshim_fifo_input(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+
+		count -= readsize;
+		buffer += readsize;
+		rd_cnt += readsize;
+		pr_debug("fifo_read: transferred %zd bytes\n", readsize);
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	pr_debug("fifo_read: returning %zd\n", rd_cnt);
+	return rd_cnt;
+}
+EXPORT_SYMBOL(rshim_fifo_read);
+
+static void rshim_fifo_output(struct rshim_backend *bd)
+{
+	int writesize, write_buf_next = 0;
+	int write_avail = WRITE_BUF_SIZE - write_buf_next;
+	int numchan = TMFIFO_MAX_CHAN;
+	int chan, chan_offset;
+
+	/* If we're already writing, we have nowhere to put data. */
+	if (bd->spin_flags & RSH_SFLG_WRITING)
+		return;
+
+	/* Walk through all the channels, sending as much data as possible. */
+	for (chan_offset = 0; chan_offset < numchan; chan_offset++) {
+		/*
+		 * Pick the current channel if not done, otherwise round-robin
+		 * to the next channel.
+		 */
+		if (bd->write_buf_pkt_rem > 0)
+			chan = bd->tx_chan;
+		else {
+			u16 cur_len;
+			union rshim_tmfifo_msg_hdr *hdr = &bd->msg_hdr;
+
+			chan = bd->tx_chan = (bd->tx_chan + 1) % numchan;
+			cur_len = write_cnt(bd, chan);
+
+			/*
+			 * Set up message header for console data which is byte
+			 * stream. Network packets already have the message
+			 * header included.
+			 */
+			if (chan == TMFIFO_CONS_CHAN) {
+				if (cur_len == 0)
+					continue;
+				hdr->data = 0;
+				hdr->type = VIRTIO_ID_CONSOLE;
+				hdr->len = htons(cur_len);
+			} else {
+				int pass1;
+
+				if (cur_len <
+					sizeof(union rshim_tmfifo_msg_hdr))
+					continue;
+
+				pass1 = write_cnt_to_end(bd, chan);
+				if (pass1 >= sizeof(*hdr)) {
+					hdr = (union rshim_tmfifo_msg_hdr *)
+						write_data_ptr(bd, chan);
+				} else {
+					memcpy(hdr, write_data_ptr(bd, chan),
+					       pass1);
+					memcpy((u8 *)hdr + pass1,
+					       bd->write_fifo[chan].data,
+					       sizeof(*hdr) - pass1);
+				}
+			}
+
+			bd->write_buf_pkt_rem = ntohs(hdr->len) + sizeof(*hdr);
+		}
+
+		/* Send out the packet header for the console data. */
+		if (chan == TMFIFO_CONS_CHAN &&
+		    bd->write_buf_pkt_rem > ntohs(bd->msg_hdr.len)) {
+			union rshim_tmfifo_msg_hdr *hdr = &bd->msg_hdr;
+			int left = bd->write_buf_pkt_rem - ntohs(hdr->len);
+			u8 *pos = (u8 *)hdr + sizeof(*hdr) - left;
+
+			writesize = min(write_avail, left);
+			memcpy(&bd->write_buf[write_buf_next], pos, writesize);
+			write_buf_next += writesize;
+			bd->write_buf_pkt_rem -= writesize;
+			write_avail -= writesize;
+
+			/*
+			 * Don't continue if no more space for the header.
+			 * It'll be picked up next time.
+			 */
+			if (left != writesize)
+				break;
+		}
+
+		writesize = min(write_avail, (int)write_cnt(bd, chan));
+		writesize = min(writesize, bd->write_buf_pkt_rem);
+
+		/*
+		 * The write size should be aligned to 8 bytes unless for the
+		 * last block, which will be padded at the end.
+		 */
+		if (bd->write_buf_pkt_rem != writesize)
+			writesize &= -8;
+
+		if (writesize > 0) {
+			int pass1;
+			int pass2;
+
+			pass1 = min(writesize,
+				    (int)write_cnt_to_end(bd, chan));
+			pass2 = writesize - pass1;
+
+			pr_debug("fifo_outproc: chan %d, writesize %d, next %d,"
+				 " head %d, tail %d\n",
+				 chan, writesize, write_buf_next,
+				 bd->write_fifo[chan].head,
+				 bd->write_fifo[chan].tail);
+
+			memcpy(&bd->write_buf[write_buf_next],
+			       write_data_ptr(bd, chan), pass1);
+			memcpy(&bd->write_buf[write_buf_next + pass1],
+			       bd->write_fifo[chan].data, pass2);
+
+			write_consume_bytes(bd, chan, writesize);
+			write_buf_next += writesize;
+			bd->write_buf_pkt_rem -= writesize;
+			/* Add padding at the end. */
+			if (bd->write_buf_pkt_rem == 0)
+				write_buf_next = (write_buf_next + 7) & -8;
+			write_avail = WRITE_BUF_SIZE - write_buf_next;
+
+			wake_up_interruptible_all(
+				&bd->write_fifo[chan].operable);
+			pr_debug("woke up writable chan %d\n", chan);
+		}
+	}
+
+	/* Drop the data if it is still booting. */
+	if (bd->is_boot_open)
+		return;
+
+	/* If we actually put anything in the buffer, send it. */
+	if (write_buf_next) {
+		bd->write(bd, RSH_DEV_TYPE_NET, (char *)bd->write_buf,
+			  write_buf_next);
+	}
+}
+
+int rshim_fifo_alloc(struct rshim_backend *bd)
+{
+	int i, allocfail = 0;
+
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		if (!bd->read_fifo[i].data)
+			bd->read_fifo[i].data =
+				kmalloc(READ_FIFO_SIZE, GFP_KERNEL);
+		allocfail |= bd->read_fifo[i].data == 0;
+
+		if (!bd->write_fifo[i].data)
+			bd->write_fifo[i].data =
+				kmalloc(WRITE_FIFO_SIZE, GFP_KERNEL);
+		allocfail |= bd->write_fifo[i].data == 0;
+	}
+
+	return allocfail;
+}
+EXPORT_SYMBOL(rshim_fifo_alloc);
+
+static void rshim_fifo_reset(struct rshim_backend *bd)
+{
+	int i;
+
+	bd->read_buf_bytes = 0;
+	bd->read_buf_pkt_rem = 0;
+	bd->read_buf_next = 0;
+	bd->read_buf_pkt_padding = 0;
+	bd->write_buf_pkt_rem = 0;
+	bd->rx_chan = bd->tx_chan = 0;
+
+	spin_lock_irq(&bd->spinlock);
+	bd->spin_flags &= ~(RSH_SFLG_WRITING |
+			    RSH_SFLG_READING);
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		read_reset(bd, i);
+		write_reset(bd, i);
+	}
+	spin_unlock_irq(&bd->spinlock);
+}
+
+void rshim_fifo_free(struct rshim_backend *bd)
+{
+	int i;
+
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		kfree(bd->read_fifo[i].data);
+		bd->read_fifo[i].data = NULL;
+		kfree(bd->write_fifo[i].data);
+		bd->write_fifo[i].data = NULL;
+	}
+
+	rshim_fifo_reset(bd);
+
+	bd->has_tm = 0;
+}
+EXPORT_SYMBOL(rshim_fifo_free);
+
+ssize_t rshim_fifo_write(struct rshim_backend *bd, const char *buffer,
+		       size_t count, int chan, bool nonblock,
+		       bool from_user)
+{
+	size_t wr_cnt = 0;
+
+	mutex_lock(&bd->mutex);
+
+	while (count) {
+		size_t writesize;
+		int pass1;
+		int pass2;
+
+		pr_debug("fifo_write, top of loop, remaining count %zd\n",
+			 count);
+
+		/*
+		 * We check this each time through the loop since the
+		 * device could get disconnected while we're waiting for
+		 * more space in the write buffer.
+		 */
+		if (!bd->has_tm) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_write: returning %zd/ENODEV\n", wr_cnt);
+			return wr_cnt ? wr_cnt : -ENODEV;
+		}
+
+		if (bd->tmfifo_error) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_write: returning %zd/%d\n", wr_cnt,
+				 bd->tmfifo_error);
+			return wr_cnt ? wr_cnt : bd->tmfifo_error;
+		}
+
+		if (write_full(bd, chan)) {
+			pr_debug("fifo_write: fifo full\n");
+			if (nonblock) {
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_write: returning %zd/EAGAIN\n",
+					 wr_cnt);
+				return wr_cnt ? wr_cnt : -EAGAIN;
+			}
+
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_write: waiting for writable chan %d\n",
+				 chan);
+			if (wait_event_interruptible(
+				     bd->write_fifo[chan].operable,
+					     !write_full(bd, chan))) {
+				pr_debug("fifo_write: returning %zd/ERESTARTSYS\n",
+					 wr_cnt);
+				return wr_cnt ? wr_cnt : -ERESTARTSYS;
+			}
+			mutex_lock(&bd->mutex);
+			/*
+			 * Since we dropped the mutex, we must make
+			 * sure our interface is still there before
+			 * we do anything else.
+			 */
+			continue;
+		}
+
+		spin_lock_irq(&bd->spinlock);
+
+		writesize = min(count, (size_t)write_space(bd, chan));
+		pass1 = min(writesize, (size_t)write_space_to_end(bd, chan));
+		pass2 = writesize - pass1;
+
+		spin_unlock_irq(&bd->spinlock);
+
+		pr_debug("fifo_write: writesize %zd, head %d, tail %d\n",
+			 writesize, bd->write_fifo[chan].head,
+			 bd->write_fifo[chan].tail);
+
+		if (!from_user) {
+			memcpy(write_space_ptr(bd, chan), buffer, pass1);
+			if (pass2) {
+				memcpy(bd->write_fifo[chan].data,
+				       buffer + pass1, pass2);
+			}
+		} else {
+			if (copy_from_user(write_space_ptr(bd, chan), buffer,
+				pass1) || (pass2 &&
+				copy_from_user(bd->write_fifo[chan].data,
+						buffer + pass1, pass2))) {
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_write: returns %zd/EFAULT\n",
+					 wr_cnt);
+				return wr_cnt ? wr_cnt : -EFAULT;
+			}
+		}
+
+		spin_lock_irq(&bd->spinlock);
+
+		write_add_bytes(bd, chan, writesize);
+
+		/* We have some new bytes, let's see if we can write any. */
+		rshim_fifo_output(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+
+		count -= writesize;
+		buffer += writesize;
+		wr_cnt += writesize;
+		pr_debug("fifo_write: transferred %zd bytes this pass\n",
+			 writesize);
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	pr_debug("fifo_write: returning %zd\n", wr_cnt);
+	return wr_cnt;
+}
+EXPORT_SYMBOL(rshim_fifo_write);
+
+static int rshim_fifo_fsync(struct file *file, loff_t start, loff_t end,
+			    int datasync, int chan)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	mutex_lock(&bd->mutex);
+
+	/*
+	 * To ensure that all of our data has actually made it to the
+	 * device, we first wait until the channel is empty, then we wait
+	 * until there is no outstanding write urb.
+	 */
+	while (!write_empty(bd, chan))
+		if (wait_event_interruptible(bd->write_fifo[chan].operable,
+					     write_empty(bd, chan))) {
+			mutex_unlock(&bd->mutex);
+			return -ERESTARTSYS;
+		}
+
+	while (bd->spin_flags & RSH_SFLG_WRITING)
+		if (wait_event_interruptible(bd->write_completed,
+					     !(bd->spin_flags &
+					       RSH_SFLG_WRITING))) {
+			mutex_unlock(&bd->mutex);
+			return -ERESTARTSYS;
+		}
+
+	mutex_unlock(&bd->mutex);
+
+	return 0;
+}
+
+static unsigned int rshim_fifo_poll(struct file *file, poll_table *wait,
+				  int chan)
+{
+	struct rshim_backend *bd = file->private_data;
+	unsigned int retval = 0;
+
+	mutex_lock(&bd->mutex);
+
+	poll_wait(file, &bd->read_fifo[chan].operable, wait);
+	poll_wait(file, &bd->write_fifo[chan].operable, wait);
+
+	spin_lock_irq(&bd->spinlock);
+
+	if (!read_empty(bd, chan))
+		retval |= POLLIN | POLLRDNORM;
+	if (!write_full(bd, chan))
+		retval |= POLLOUT | POLLWRNORM;
+	/*
+	 * We don't report POLLERR on the console so that it doesn't get
+	 * automatically disconnected when it fails, and so that you can
+	 * connect to it in the error state before rebooting the target.
+	 * This is inconsistent, but being consistent turns out to be very
+	 * annoying.  If someone tries to actually type on it, they'll
+	 * get an error.
+	 */
+	if (bd->tmfifo_error && chan != TMFIFO_CONS_CHAN)
+		retval |= POLLERR;
+	spin_unlock_irq(&bd->spinlock);
+
+	mutex_unlock(&bd->mutex);
+
+	pr_debug("poll chan %d file %p returns 0x%x\n", chan, file, retval);
+
+	return retval;
+}
+
+
+static int rshim_fifo_release(struct inode *inode, struct file *file,
+			      int chan)
+{
+	struct rshim_backend *bd = file->private_data;
+	struct module *owner;
+
+	mutex_lock(&bd->mutex);
+
+	if (chan == TMFIFO_CONS_CHAN) {
+		/*
+		 * If we aren't the last console file, nothing to do but
+		 * fix the reference count.
+		 */
+		bd->console_opens--;
+		if (bd->console_opens) {
+			mutex_unlock(&bd->mutex);
+			return 0;
+		}
+
+		/*
+		 * We've told the host to stop using the TM FIFO console,
+		 * but there may be a lag before it does.  Unless we
+		 * continue to read data from the console stream, the host
+		 * may spin forever waiting for the console to be drained
+		 * and not realize that it's time to stop using it.
+		 * Clearing the CONS_OPEN spin flag will discard any future
+		 * incoming console data, but if our input buffers are full
+		 * now, we might not be even reading from the hardware
+		 * FIFO.  To avoid problems, clear the buffers and call the
+		 * drainer so that it knows there's space.
+		 */
+		spin_lock_irq(&bd->spinlock);
+
+		bd->spin_flags &= ~RSH_SFLG_CONS_OPEN;
+
+		read_reset(bd, TMFIFO_CONS_CHAN);
+		write_reset(bd, TMFIFO_CONS_CHAN);
+
+		if (bd->has_tm)
+			rshim_fifo_input(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+	}
+
+	if (chan == TMFIFO_CONS_CHAN)
+		bd->is_cons_open = 0;
+	else
+		bd->is_tm_open = 0;
+
+	if (!bd->is_tm_open && !bd->is_cons_open) {
+		if (bd->cancel)
+			bd->cancel(bd, RSH_DEV_TYPE_NET, false);
+
+		spin_lock_irq(&bd->spinlock);
+		bd->spin_flags &= ~RSH_SFLG_READING;
+		spin_unlock_irq(&bd->spinlock);
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+/* TMFIFO file operations routines */
+
+static ssize_t rshim_tmfifo_read(struct file *file, char *user_buffer,
+				   size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_read(bd, user_buffer, count, TMFIFO_NET_CHAN,
+			     file->f_flags & O_NONBLOCK, true);
+}
+
+static ssize_t rshim_tmfifo_write(struct file *file, const char *user_buffer,
+				size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_write(bd, user_buffer, count, TMFIFO_NET_CHAN,
+			      file->f_flags & O_NONBLOCK, true);
+}
+
+static int rshim_tmfifo_fsync(struct file *file, loff_t start,
+			      loff_t end, int datasync)
+{
+	return rshim_fifo_fsync(file, start, end, datasync, TMFIFO_NET_CHAN);
+}
+
+static unsigned int rshim_tmfifo_poll(struct file *file, poll_table *wait)
+{
+	return rshim_fifo_poll(file, wait, TMFIFO_NET_CHAN);
+}
+
+static int rshim_tmfifo_release(struct inode *inode, struct file *file)
+{
+	return rshim_fifo_release(inode, file, TMFIFO_NET_CHAN);
+}
+
+static const struct file_operations rshim_tmfifo_fops = {
+	.owner = THIS_MODULE,
+	.read = rshim_tmfifo_read,
+	.write = rshim_tmfifo_write,
+	.fsync = rshim_tmfifo_fsync,
+	.poll = rshim_tmfifo_poll,
+	.release = rshim_tmfifo_release,
+};
+
+static int rshim_tmfifo_open(struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	file->f_op = &rshim_tmfifo_fops;
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->is_tm_open) {
+		pr_debug("tmfifo_open: file already open\n");
+		mutex_unlock(&bd->mutex);
+		return -EBUSY;
+	}
+
+	bd->is_tm_open = 1;
+
+	spin_lock_irq(&bd->spinlock);
+
+	/* Call the drainer to do an initial read, if needed. */
+	rshim_fifo_input(bd);
+
+	spin_unlock_irq(&bd->spinlock);
+
+	mutex_unlock(&bd->mutex);
+
+	return 0;
+}
+
+/* Console file operations routines */
+
+static void rshim_work_handler(struct work_struct *work)
+{
+	struct rshim_backend *bd = container_of((struct delayed_work *) work,
+					      struct rshim_backend, work);
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->keepalive && bd->has_rshim) {
+		bd->write_rshim(bd, RSHIM_CHANNEL, RSH_SCRATCHPAD1,
+				RSH_KEEPALIVE_MAGIC_NUM);
+		bd->keepalive = 0;
+	}
+
+	if (bd->boot_work_buf != NULL) {
+		bd->boot_work_buf_actual_len = rshim_write_delayed(bd,
+							RSH_DEV_TYPE_BOOT,
+							bd->boot_work_buf,
+							bd->boot_work_buf_len);
+		bd->boot_work_buf = NULL;
+		complete_all(&bd->boot_write_complete);
+	}
+
+	if (bd->is_boot_open) {
+		mutex_unlock(&bd->mutex);
+		return;
+	}
+
+	if (bd->has_fifo_work) {
+		int len;
+
+		len = rshim_write_delayed(bd, bd->fifo_work_devtype,
+					  bd->fifo_work_buf,
+					  bd->fifo_work_buf_len);
+		bd->has_fifo_work = 0;
+
+		spin_lock(&bd->spinlock);
+		bd->spin_flags &= ~RSH_SFLG_WRITING;
+		if (len == bd->fifo_work_buf_len) {
+			wake_up_interruptible_all(&bd->write_completed);
+			rshim_notify(bd, RSH_EVENT_FIFO_OUTPUT, 0);
+		} else {
+			pr_err("fifo_write: completed abnormally.\n");
+			rshim_notify(bd, RSH_EVENT_FIFO_ERR, -1);
+		}
+		spin_unlock(&bd->spinlock);
+	}
+
+	if (bd->has_cons_work) {
+		spin_lock_irq(&bd->spinlock);
+
+		/* FIFO output. */
+		rshim_fifo_output(bd);
+
+		/* FIFO input. */
+		rshim_fifo_input(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+
+		bd->has_cons_work = 0;
+	}
+
+	if (!bd->has_reprobe && bd->is_cons_open) {
+		bd->has_cons_work = 1;
+		mod_timer(&bd->timer, jiffies + HZ / 10);
+	}
+
+	mutex_unlock(&bd->mutex);
+}
+
+static ssize_t rshim_console_read(struct file *file, char *user_buffer,
+				    size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_read(bd, user_buffer, count, TMFIFO_CONS_CHAN,
+			     file->f_flags & O_NONBLOCK, true);
+}
+
+static ssize_t rshim_console_write(struct file *file, const char *user_buffer,
+				 size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_write(bd, user_buffer, count, TMFIFO_CONS_CHAN,
+			      file->f_flags & O_NONBLOCK, true);
+}
+
+static int rshim_console_fsync(struct file *file, loff_t start,
+			       loff_t end, int datasync)
+{
+	return rshim_fifo_fsync(file, start, end, datasync, TMFIFO_CONS_CHAN);
+}
+
+static long rshim_console_unlocked_ioctl(struct file *file, unsigned int
+				       cmd, unsigned long arg)
+{
+	struct rshim_backend *bd = file->private_data;
+	int retval = 0;
+
+	mutex_lock(&bd->mutex);
+
+	switch (cmd) {
+	case TCGETS: {
+#ifdef TCGETS2
+		if (kernel_termios_to_user_termios_1(
+			(struct termios __user *)arg, &bd->cons_termios))
+#else
+		if (kernel_termios_to_user_termios(
+			(struct termios __user *)arg, &bd->cons_termios))
+#endif
+			retval = -EFAULT;
+		break;
+	}
+
+	case TCSETS:
+	case TCSETSW:
+	case TCSETSF: {
+#ifdef TCGETS2
+		if (user_termios_to_kernel_termios_1(
+			&bd->cons_termios, (struct termios __user *)arg))
+#else
+		if (user_termios_to_kernel_termios(
+			&bd->cons_termios, (struct termios __user *)arg))
+#endif
+			retval = -EFAULT;
+		break;
+	}
+
+	default:
+		retval = -EINVAL;
+		break;
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	return retval;
+}
+
+static unsigned int rshim_console_poll(struct file *file, poll_table *wait)
+{
+	return rshim_fifo_poll(file, wait, TMFIFO_CONS_CHAN);
+}
+
+static int rshim_console_release(struct inode *inode, struct file *file)
+{
+	return rshim_fifo_release(inode, file, TMFIFO_CONS_CHAN);
+}
+
+static const struct file_operations rshim_console_fops = {
+	.owner = THIS_MODULE,
+	.read = rshim_console_read,
+	.write = rshim_console_write,
+	.fsync = rshim_console_fsync,
+	.unlocked_ioctl = rshim_console_unlocked_ioctl,
+	.poll = rshim_console_poll,
+	.release = rshim_console_release,
+};
+
+static int rshim_console_open(struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	file->f_op = &rshim_console_fops;
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->is_cons_open) {
+		/*
+		 * The console is already open.  This is OK, but it means
+		 * there's no work to do other than updating the reference
+		 * count.
+		 */
+		bd->console_opens++;
+		mutex_unlock(&bd->mutex);
+		return 0;
+	}
+
+	bd->is_cons_open = 1;
+
+	spin_lock_irq(&bd->spinlock);
+
+	bd->spin_flags |= RSH_SFLG_CONS_OPEN;
+
+	spin_unlock_irq(&bd->spinlock);
+
+	if (!bd->has_cons_work) {
+		bd->has_cons_work = 1;
+		queue_delayed_work(rshim_wq, &bd->work, HZ / 10);
+	}
+
+	bd->console_opens++;
+	mutex_unlock(&bd->mutex);
+
+	return 0;
+}
+
+static int rshim_boot_done(struct rshim_backend *bd)
+{
+	if (bd->has_rshim && bd->has_tm) {
+		/* Clear any previous errors. */
+		bd->tmfifo_error = 0;
+
+		/*
+		 * If someone might be waiting for the device to come up,
+		 * tell them it's ready.
+		 */
+		if (bd->is_booting) {
+			bd->is_booting = 0;
+
+			pr_debug("signaling booting complete\n");
+			complete_all(&bd->booting_complete);
+#if RSH_RESET_MUTEX
+			complete_all(&bd->reset_complete);
+#endif
+		};
+
+		/* If the console device is open, start the worker. */
+		if (bd->is_cons_open && !bd->has_cons_work) {
+			bd->has_cons_work = 1;
+			pr_debug("probe: console_work submitted\n");
+			queue_delayed_work(rshim_wq, &bd->work, 0);
+		}
+
+		/* Tell the user this device is now attached. */
+		pr_info("%s now attached\n", rshim_dev_names[bd->dev_index]);
+	}
+
+	return 0;
+}
+
+/* Rshim file operations routines */
+
+static ssize_t rshim_rshim_read(struct file *file, char *user_buffer,
+			      size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd;
+	int retval = 0;
+	u64 buf;
+
+	/* rshim registers are all 8-byte aligned. */
+	if (count != 8 || (*ppos & 7) != 0)
+		return -EINVAL;
+
+	bd = file->private_data;
+
+	mutex_lock(&bd->mutex);
+	retval = bd->read_rshim(bd,
+				(*ppos >> 16) & 0xF, /* channel # */
+				*ppos & 0xFFFF,	 /* addr */
+				&buf);
+	mutex_unlock(&bd->mutex);
+
+	/* If the read was successful, copy the data to userspace */
+	if (!retval && copy_to_user(user_buffer, &buf, count))
+		return -EFAULT;
+
+	return retval ? retval : count;
+}
+
+static ssize_t rshim_rshim_write(struct file *file, const char *user_buffer,
+			       size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd;
+	int retval = 0;
+	u64 buf;
+
+	/* rshim registers are all 8-byte aligned. */
+	if (count != 8 || (*ppos & 7) != 0)
+		return -EINVAL;
+
+	/* Copy the data from userspace */
+	if (copy_from_user(&buf, user_buffer, count))
+		return -EFAULT;
+
+	bd = file->private_data;
+
+	mutex_lock(&bd->mutex);
+	retval = bd->write_rshim(bd,
+				 (*ppos >> 16) & 0xF, /* channel # */
+				 *ppos & 0xFFFF, /* addr */
+				 buf);
+	mutex_unlock(&bd->mutex);
+
+	return retval ? retval : count;
+}
+
+static int rshim_rshim_release(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+	struct module *owner;
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+static const struct file_operations rshim_rshim_fops = {
+	.owner = THIS_MODULE,
+	.read = rshim_rshim_read,
+	.write = rshim_rshim_write,
+	.release = rshim_rshim_release,
+	.llseek = default_llseek,
+};
+
+static int rshim_rshim_open(struct file *file)
+{
+	file->f_op = &rshim_rshim_fops;
+
+	return 0;
+}
+
+/* Misc file operations routines */
+
+static int
+rshim_misc_seq_show(struct seq_file *s, void *token)
+{
+	struct rshim_backend *bd = s->private;
+	int retval;
+	u64 value;
+
+	/* Boot mode. */
+	retval = bd->read_rshim(bd, RSHIM_CHANNEL, RSH_BOOT_CONTROL,
+				&value);
+	if (retval) {
+		pr_err("couldn't read rshim register\n");
+		return retval;
+	}
+	seq_printf(s, "BOOT_MODE %lld\n",
+		   value & RSH_BOOT_CONTROL__BOOT_MODE_MASK);
+
+	/* SW reset flag is always 0. */
+	seq_printf(s, "SW_RESET  %d\n", 0);
+
+	/* Display the driver name. */
+	seq_printf(s, "DRV_NAME  %s\n", bd->owner->name);
+
+	return 0;
+}
+
+static ssize_t rshim_misc_write(struct file *file, const char *user_buffer,
+				size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd;
+	int retval = 0, value;
+	char buf[64], key[32];
+
+	if (*ppos != 0 || count >= sizeof(buf))
+		return -EINVAL;
+
+	/* Copy the data from userspace */
+	if (copy_from_user(buf, user_buffer, count))
+		return -EFAULT;
+
+	if (sscanf(buf, "%s %x", key, &value) != 2)
+		return -EINVAL;
+
+	bd = ((struct seq_file *)file->private_data)->private;
+
+	if (strcmp(key, "BOOT_MODE") == 0) {
+		retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_BOOT_CONTROL,
+				 value & RSH_BOOT_CONTROL__BOOT_MODE_MASK);
+	} else if (strcmp(key, "SW_RESET") == 0) {
+		if (value) {
+			if (!bd->has_reprobe) {
+				/* Detach, which shouldn't hold bd->mutex. */
+				rshim_notify(bd, RSH_EVENT_DETACH, 0);
+
+				mutex_lock(&bd->mutex);
+				/* Reset the TmFifo. */
+				rshim_fifo_reset(bd);
+				mutex_unlock(&bd->mutex);
+			}
+
+			retval = bd->write_rshim(bd, RSHIM_CHANNEL,
+					RSH_RESET_CONTROL,
+					RSH_RESET_CONTROL__RESET_CHIP_VAL_KEY);
+
+			if (!bd->has_reprobe) {
+				/* Attach. */
+				msleep_interruptible(1000);
+				mutex_lock(&bd->mutex);
+				rshim_notify(bd, RSH_EVENT_ATTACH, 0);
+				mutex_unlock(&bd->mutex);
+			}
+		}
+	} else
+		return -EINVAL;
+
+	return retval ? retval : count;
+}
+
+static int rshim_misc_release(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd;
+	struct module *owner;
+	int retval;
+
+	/*
+	 * Note that since this got turned into a seq file by
+	 * rshim_misc_open(), our device pointer isn't in the usual spot
+	 * (the file's private data); that's used by the seq file
+	 * subsystem.
+	 */
+	bd = ((struct seq_file *)file->private_data)->private;
+
+	retval = single_release(inode, file);
+	if (retval)
+		return retval;
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+static const struct file_operations rshim_misc_fops = {
+	.owner = THIS_MODULE,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.write = rshim_misc_write,
+	.release = rshim_misc_release,
+};
+
+static int rshim_misc_open(struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+	int retval;
+
+	/*
+	 * If file->private_data is non-NULL, seq_open (called by
+	 * single_open) thinks it's already a seq_file struct, and
+	 * scribbles over it!  Very bad.
+	 */
+	file->private_data = NULL;
+
+	file->f_op = &rshim_misc_fops;
+	retval = single_open(file, rshim_misc_seq_show, bd);
+
+	return retval;
+}
+
+/* Common file operations routines */
+
+static int rshim_open(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd;
+	int subminor = iminor(inode);
+	int retval;
+
+	rshim_lock();
+
+	bd = rshim_devs[subminor / RSH_DEV_TYPES];
+	if (!bd) {
+		rshim_unlock();
+		return -ENODEV;
+	}
+
+	/* Add a reference to the owner. */
+	if (!try_module_get(bd->owner)) {
+		rshim_unlock();
+		return -ENODEV;
+	}
+
+	/* Increment our usage count for the device. */
+	kref_get(&bd->kref);
+
+	rshim_unlock();
+
+	file->private_data = bd;
+
+	switch (subminor % RSH_DEV_TYPES) {
+	case RSH_DEV_TYPE_BOOT:
+		retval = rshim_boot_open(file);
+		break;
+
+	case RSH_DEV_TYPE_RSHIM:
+		retval = rshim_rshim_open(file);
+		break;
+
+	case RSH_DEV_TYPE_CONSOLE:
+		retval = rshim_console_open(file);
+		break;
+
+	case RSH_DEV_TYPE_NET:
+		retval = rshim_tmfifo_open(file);
+		break;
+
+	case RSH_DEV_TYPE_MISC:
+		retval = rshim_misc_open(file);
+		break;
+
+	default:
+		retval = -ENODEV;
+		break;
+	}
+
+	/* If the minor open failed, drop the usage count. */
+	if (retval < 0) {
+		struct module *owner;
+
+		rshim_lock();
+		owner = RSHIM_READ_ONCE(bd->owner);
+		kref_put(&bd->kref, bd->destroy);
+		module_put(owner);
+		rshim_unlock();
+	}
+
+	return retval;
+}
+
+static const struct file_operations rshim_fops = {
+	.owner = THIS_MODULE,
+	.open =	rshim_open,
+};
+
+int rshim_tmfifo_sync(struct rshim_backend *bd)
+{
+	u64 word;
+	int i, retval, max_size, avail;
+	union rshim_tmfifo_msg_hdr hdr;
+
+	/* Get FIFO max size. */
+	retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+				RSH_TM_HOST_TO_TILE_CTL, &word);
+	if (retval < 0) {
+		pr_err("read_rshim error %d\n", retval);
+		return retval;
+	}
+	max_size = (word >> RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_SHIFT)
+		   & RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RMASK;
+
+	/* Calculate available size. */
+	retval = bd->read_rshim(bd, RSHIM_CHANNEL, RSH_TM_HOST_TO_TILE_STS,
+				&word);
+	if (retval < 0) {
+		pr_err("read_rshim error %d\n", retval);
+		return retval;
+	}
+	avail = max_size - (int)(word & RSH_TM_HOST_TO_TILE_STS__COUNT_MASK);
+
+	if (avail > TMFIFO_MAX_SYNC_WORDS)
+		avail = TMFIFO_MAX_SYNC_WORDS;
+
+	hdr.type = VIRTIO_ID_NET;
+	hdr.len = 0;
+	for (i = 0; i < avail; i++) {
+		retval = bd->write_rshim(bd, RSHIM_CHANNEL,
+					 RSH_TM_HOST_TO_TILE_STS, hdr.data);
+		if (retval < 0)
+			break;
+	}
+
+	return 0;
+}
+
+int rshim_notify(struct rshim_backend *bd, int event, int code)
+{
+	int i, rc = 0;
+	struct rshim_service *svc;
+
+	switch (event) {
+	case RSH_EVENT_FIFO_INPUT:
+		rshim_fifo_input(bd);
+		break;
+
+	case RSH_EVENT_FIFO_OUTPUT:
+		rshim_fifo_output(bd);
+		break;
+
+	case RSH_EVENT_FIFO_ERR:
+		rshim_fifo_err(bd, code);
+		break;
+
+	case RSH_EVENT_ATTACH:
+		rshim_boot_done(bd);
+
+		/* Sync-up the tmfifo if reprobe is not supported. */
+		if (!bd->has_reprobe && bd->has_rshim)
+			rshim_tmfifo_sync(bd);
+
+		rcu_read_lock();
+		for (i = 0; i < RSH_SVC_MAX; i++) {
+			svc = rcu_dereference(rshim_svc[i]);
+			if (svc != NULL && svc->create != NULL) {
+				rc = (*svc->create)(bd);
+				if (rc == -EEXIST)
+					rc = 0;
+				else if (rc) {
+					pr_err("Failed to attach svc %d\n", i);
+					break;
+				}
+			}
+		}
+		rcu_read_unlock();
+
+		spin_lock_irq(&bd->spinlock);
+		rshim_fifo_input(bd);
+		spin_unlock_irq(&bd->spinlock);
+		break;
+
+	case RSH_EVENT_DETACH:
+		for (i = 0; i < RSH_SVC_MAX; i++) {
+			/*
+			 * The svc->delete() could call into Linux kernel and
+			 * potentially trigger synchronize_rcu(). So it should
+			 * be outside of the rcu_read_lock(). Instead, a ref
+			 * counter is used here to avoid race condition between
+			 * svc deletion such as caused by kernel module unload.
+			 */
+			rcu_read_lock();
+			svc = rcu_dereference(rshim_svc[i]);
+			if (svc != NULL)
+				atomic_inc(&svc->ref);
+			rcu_read_unlock();
+
+			if (svc != NULL) {
+				(*svc->delete)(bd);
+				atomic_dec(&svc->ref);
+			}
+		}
+		bd->dev = NULL;
+		break;
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL(rshim_notify);
+
+static int rshim_find_index(char *dev_name)
+{
+	int i, dev_index = -1;
+
+	/* First look for a match with a previous device name. */
+	for (i = 0; i < rshim_nr_devs; i++)
+		if (rshim_dev_names[i] &&
+		    !strcmp(dev_name, rshim_dev_names[i])) {
+			pr_debug("found match with previous at index %d\n", i);
+			dev_index = i;
+			break;
+		}
+
+	/* Then look for a never-used slot. */
+	if (dev_index < 0) {
+		for (i = 0; i < rshim_nr_devs; i++)
+			if (!rshim_dev_names[i]) {
+				pr_debug("found never-used slot %d\n", i);
+				dev_index = i;
+				break;
+			}
+	}
+
+	/* Finally look for a currently-unused slot. */
+	if (dev_index < 0) {
+		for (i = 0; i < rshim_nr_devs; i++)
+			if (!rshim_devs[i]) {
+				pr_debug("found unused slot %d\n", i);
+				dev_index = i;
+				break;
+			}
+	}
+
+	return dev_index;
+}
+
+struct rshim_backend *rshim_find(char *dev_name)
+{
+	int dev_index = rshim_find_index(dev_name);
+
+	/* If none of that worked, we fail. */
+	if (dev_index < 0) {
+		pr_err("couldn't find slot for new device %s\n", dev_name);
+		return NULL;
+	}
+
+	return rshim_devs[dev_index];
+}
+EXPORT_SYMBOL(rshim_find);
+
+/* House-keeping timer. */
+static void rshim_timer_func(struct timer_list *arg)
+{
+	struct rshim_backend *bd =
+	  container_of(arg, struct rshim_backend, timer);
+
+	u32 period = msecs_to_jiffies(rshim_keepalive_period);
+
+	if (bd->has_cons_work)
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+
+	/* Request keepalive update and restart the ~300ms timer. */
+	if (time_after(jiffies, (unsigned long)bd->last_keepalive + period)) {
+		bd->keepalive = 1;
+		bd->last_keepalive = jiffies;
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+	}
+	mod_timer(&bd->timer, jiffies + period);
+}
+
+static ssize_t rshim_path_show(struct device *cdev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct rshim_backend *bd = dev_get_drvdata(cdev);
+
+	if (bd == NULL)
+		return -ENODEV;
+	return snprintf(buf, PAGE_SIZE, "%s\n",
+			rshim_dev_names[bd->dev_index]);
+}
+
+static DEVICE_ATTR(rshim_path, 0444, rshim_path_show, NULL);
+
+static void
+rshim_load_modules(struct work_struct *work)
+{
+	request_module("rshim_net");
+}
+
+static DECLARE_DELAYED_WORK(rshim_load_modules_work, rshim_load_modules);
+
+/* Check whether backend is allowed to register or not. */
+static int rshim_access_check(struct rshim_backend *bd)
+{
+	int i, retval;
+	u64 value;
+
+	/* Write value 0 to RSH_SCRATCHPAD1. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_SCRATCHPAD1, 0);
+	if (retval < 0)
+		return -ENODEV;
+
+	/*
+	 * Poll RSH_SCRATCHPAD1 up to one second to check whether it's reset to
+	 * the keepalive magic value, which indicates another backend driver has
+	 * already attached to this target.
+	 */
+	for (i = 0; i < 10; i++) {
+		retval = bd->read_rshim(bd, RSHIM_CHANNEL, RSH_SCRATCHPAD1,
+					&value);
+		if (retval < 0)
+			return -ENODEV;
+
+		if (value == RSH_KEEPALIVE_MAGIC_NUM) {
+			pr_info("another backend already attached.\n");
+			return -EEXIST;
+		}
+
+		msleep(100);
+	}
+
+	return 0;
+}
+
+int rshim_register(struct rshim_backend *bd)
+{
+	int i, retval, dev_index;
+
+	if (bd->registered)
+		return 0;
+
+	if (backend_driver[0] && strcmp(backend_driver, bd->owner->name))
+		return -EACCES;
+
+	dev_index = rshim_find_index(bd->dev_name);
+	if (dev_index < 0)
+		return -ENODEV;
+
+	if (!bd->read_rshim || !bd->write_rshim) {
+		pr_err("read_rshim/write_rshim missing\n");
+		return -EINVAL;
+	}
+
+	retval = rshim_access_check(bd);
+	if (retval)
+		return retval;
+
+	if (!bd->write)
+		bd->write = rshim_write_default;
+	if (!bd->read)
+		bd->read = rshim_read_default;
+
+	kref_init(&bd->kref);
+	spin_lock_init(&bd->spinlock);
+#if RSH_RESET_MUTEX
+	init_completion(&bd->reset_complete);
+#endif
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		init_waitqueue_head(&bd->read_fifo[i].operable);
+		init_waitqueue_head(&bd->write_fifo[i].operable);
+	}
+
+	init_waitqueue_head(&bd->write_completed);
+	init_completion(&bd->booting_complete);
+	init_completion(&bd->boot_write_complete);
+	memcpy(&bd->cons_termios, &init_console_termios,
+	       sizeof(init_console_termios));
+	INIT_DELAYED_WORK(&bd->work, rshim_work_handler);
+
+	bd->dev_index = dev_index;
+	if (rshim_dev_names[dev_index] != bd->dev_name) {
+		kfree(rshim_dev_names[dev_index]);
+		rshim_dev_names[dev_index] = bd->dev_name;
+	}
+	rshim_devs[dev_index] = bd;
+
+	for (i = 0; i < RSH_DEV_TYPES; i++) {
+		struct device *cl_dev;
+		int err;
+		char devbuf[32];
+
+		cdev_init(&bd->cdevs[i], &rshim_fops);
+		bd->cdevs[i].owner = THIS_MODULE;
+		/*
+		 * FIXME: is this addition really legal, or should
+		 * we be using MKDEV?
+		 */
+		err = cdev_add(&bd->cdevs[i],
+			       rshim_dev_base +
+			       bd->dev_index * RSH_DEV_TYPES + i,
+			       1);
+		/*
+		 * We complain if this fails, but we don't return
+		 * an error; it really shouldn't happen, and it's
+		 * hard to go un-do the rest of the adds.
+		 */
+		if (err)
+			pr_err("rsh%d: couldn't add minor %d\n", dev_index, i);
+
+		cl_dev = device_create(rshim_class, NULL, rshim_dev_base +
+				       bd->dev_index * RSH_DEV_TYPES + i, NULL,
+				       "rshim%d!%s",
+				       bd->dev_index, rshim_dev_minor_names[i]);
+		if (IS_ERR(cl_dev)) {
+			pr_err("rsh%d: couldn't add dev %s, err %ld\n",
+			       dev_index,
+			       format_dev_t(devbuf, rshim_dev_base + dev_index *
+					    RSH_DEV_TYPES + i),
+			       PTR_ERR(cl_dev));
+		} else {
+			pr_debug("added class dev %s\n",
+				 format_dev_t(devbuf, rshim_dev_base +
+					      bd->dev_index *
+					      RSH_DEV_TYPES + i));
+		}
+
+		dev_set_drvdata(cl_dev, bd);
+		if (device_create_file(cl_dev, &dev_attr_rshim_path))
+			pr_err("could not create rshim_path file in sysfs\n");
+	}
+
+	for (i = 0; i < 2; i++) {
+		bd->boot_buf[i] = kmalloc(BOOT_BUF_SIZE, GFP_KERNEL);
+		if (!bd->boot_buf[i]) {
+			if (i == 1) {
+				kfree(bd->boot_buf[0]);
+				bd->boot_buf[0] = NULL;
+			}
+		}
+	}
+
+	timer_setup(&bd->timer, rshim_timer_func, 0);
+
+	bd->registered = 1;
+
+	/* Start the keepalive timer. */
+	bd->last_keepalive = jiffies;
+	mod_timer(&bd->timer, jiffies + 1);
+
+	schedule_delayed_work(&rshim_load_modules_work, 3 * HZ);
+
+	return 0;
+}
+EXPORT_SYMBOL(rshim_register);
+
+void rshim_deregister(struct rshim_backend *bd)
+{
+	int i;
+
+	if (!bd->registered)
+		return;
+
+	/* Stop the timer. */
+	del_timer_sync(&bd->timer);
+
+	for (i = 0; i < 2; i++)
+		kfree(bd->boot_buf[i]);
+
+	for (i = 0; i < RSH_DEV_TYPES; i++) {
+		cdev_del(&bd->cdevs[i]);
+		device_destroy(rshim_class,
+			       rshim_dev_base + bd->dev_index *
+			       RSH_DEV_TYPES + i);
+	}
+
+	rshim_devs[bd->dev_index] = NULL;
+	bd->registered = 0;
+}
+EXPORT_SYMBOL(rshim_deregister);
+
+int rshim_register_service(struct rshim_service *service)
+{
+	int i, retval = 0;
+	struct rshim_service *svc;
+
+	rshim_lock();
+
+	atomic_set(&service->ref, 0);
+
+	BUG_ON(service->type >= RSH_SVC_MAX);
+
+	if (!rshim_svc[service->type]) {
+		svc = kmalloc(sizeof(*svc), GFP_KERNEL);
+		if (svc) {
+			memcpy(svc, service, sizeof(*svc));
+			/*
+			 * Add memory barrir to make sure 'svc' is ready
+			 * before switching the pointer.
+			 */
+			smp_mb();
+
+			/*
+			 * rshim_svc[] is protected by RCU. References to it
+			 * should have rcu_read_lock() / rcu_dereference() /
+			 * rcu_read_lock().
+			 */
+			rcu_assign_pointer(rshim_svc[service->type], svc);
+
+			/* Attach the service to all backends. */
+			for (i = 0; i < rshim_nr_devs; i++) {
+				if (rshim_devs[i] != NULL) {
+					retval = svc->create(rshim_devs[i]);
+					if (retval && retval != -EEXIST)
+						break;
+				}
+			}
+		} else
+			retval = -ENOMEM;
+	} else
+		retval = -EEXIST;
+
+	rshim_unlock();
+
+	/* Deregister / cleanup the service in case of failures. */
+	if (retval && retval != -EEXIST)
+		rshim_deregister_service(service);
+
+	return retval;
+}
+EXPORT_SYMBOL(rshim_register_service);
+
+void rshim_deregister_service(struct rshim_service *service)
+{
+	int i;
+	struct rshim_service *svc = NULL;
+
+	BUG_ON(service->type >= RSH_SVC_MAX);
+
+	/*
+	 * Use synchronize_rcu() to make sure no more outstanding
+	 * references to the 'svc' pointer before releasing it.
+	 *
+	 * The reason to use RCU is that the rshim_svc pointer will be
+	 * accessed in rshim_notify() which could be called in interrupt
+	 * context and not suitable for mutex lock.
+	 */
+	rshim_lock();
+	if (rshim_svc[service->type]) {
+		svc = rshim_svc[service->type];
+
+		/* Delete the service from all backends. */
+		for (i = 0; i < rshim_nr_devs; i++)
+			if (rshim_devs[i] != NULL)
+				svc->delete(rshim_devs[i]);
+
+		rcu_assign_pointer(rshim_svc[service->type], NULL);
+	}
+	rshim_unlock();
+	if (svc != NULL) {
+		synchronize_rcu();
+
+		/* Make sure no more references to the svc pointer. */
+		while (atomic_read(&svc->ref) != 0)
+			msleep(100);
+		kfree(svc);
+	}
+}
+EXPORT_SYMBOL(rshim_deregister_service);
+
+static int __init rshim_init(void)
+{
+	int result, class_registered = 0;
+
+	/* Register our device class. */
+	rshim_class = class_create(THIS_MODULE, "rsh");
+	if (IS_ERR(rshim_class)) {
+		result = PTR_ERR(rshim_class);
+		goto error;
+	}
+	class_registered = 1;
+
+	/* Allocate major/minor numbers. */
+	result = alloc_chrdev_region(&rshim_dev_base, 0,
+				     rshim_nr_devs * RSH_DEV_TYPES,
+				     "rsh");
+	if (result < 0) {
+		pr_err("can't get rshim major\n");
+		goto error;
+	}
+
+	rshim_dev_names = kzalloc(rshim_nr_devs *
+				    sizeof(rshim_dev_names[0]), GFP_KERNEL);
+	rshim_devs = kcalloc(rshim_nr_devs, sizeof(rshim_devs[0]),
+			       GFP_KERNEL);
+
+	if (!rshim_dev_names || !rshim_devs) {
+		result = -ENOMEM;
+		goto error;
+	}
+
+	rshim_wq = create_workqueue("rshim");
+	if (!rshim_wq) {
+		result = -ENOMEM;
+		goto error;
+	}
+
+	return 0;
+
+error:
+	if (rshim_dev_base)
+		unregister_chrdev_region(rshim_dev_base,
+				 rshim_nr_devs * RSH_DEV_TYPES);
+	if (class_registered)
+		class_destroy(rshim_class);
+	kfree(rshim_dev_names);
+	kfree(rshim_devs);
+
+	return result;
+}
+
+static void __exit rshim_exit(void)
+{
+	int i;
+
+	flush_delayed_work(&rshim_load_modules_work);
+
+	/* Free the major/minor numbers. */
+	unregister_chrdev_region(rshim_dev_base,
+				 rshim_nr_devs * RSH_DEV_TYPES);
+
+	/* Destroy our device class. */
+	class_destroy(rshim_class);
+
+	/* Destroy our work queue. */
+	destroy_workqueue(rshim_wq);
+
+	for (i = 0; i < RSH_SVC_MAX; i++)
+		kfree(rshim_svc[i]);
+
+	for (i = 0; i < rshim_nr_devs; i++)
+		kfree(rshim_dev_names[i]);
+
+	kfree(rshim_dev_names);
+	kfree(rshim_devs);
+}
+
+module_init(rshim_init);
+module_exit(rshim_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.12");
diff --git a/drivers/soc/mellanox/host/rshim.h b/drivers/soc/mellanox/host/rshim.h
new file mode 100644
index 0000000..3ac3410
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim.h
@@ -0,0 +1,361 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _RSHIM_H
+#define _RSHIM_H
+
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/termios.h>
+#include <linux/workqueue.h>
+#include <linux/device.h>
+#include <linux/cdev.h>
+
+#include "rshim_regs.h"
+
+/* ACCESS_ONCE() wrapper. */
+#define RSHIM_READ_ONCE(x)	READ_ONCE(x)
+
+/*
+ * This forces only one reset to occur at a time.  Once we've gotten
+ * more experience with this mode we'll probably remove the #define.
+ */
+#define RSH_RESET_MUTEX		1
+
+/* Spin flag values. */
+#define RSH_SFLG_READING	0x1  /* read is active. */
+#define RSH_SFLG_WRITING	0x2  /* write_urb is active. */
+#define RSH_SFLG_CONS_OPEN	0x4  /* console stream is open. */
+
+/*
+ * Buffer/FIFO sizes.  Note that the FIFO sizes must be powers of 2; also,
+ * the read and write buffers must be no larger than the corresponding
+ * FIFOs.
+ */
+#define READ_BUF_SIZE		2048
+#define WRITE_BUF_SIZE		2048
+#define READ_FIFO_SIZE		(4 * 1024)
+#define WRITE_FIFO_SIZE		(4 * 1024)
+#define BOOT_BUF_SIZE		(16 * 1024)
+
+/* Sub-device types. */
+enum {
+	RSH_DEV_TYPE_RSHIM,
+	RSH_DEV_TYPE_BOOT,
+	RSH_DEV_TYPE_CONSOLE,
+	RSH_DEV_TYPE_NET,
+	RSH_DEV_TYPE_MISC,
+	RSH_DEV_TYPES
+};
+
+/* Event types used in rshim_notify(). */
+enum {
+	RSH_EVENT_FIFO_INPUT,		/* fifo ready for input */
+	RSH_EVENT_FIFO_OUTPUT,		/* fifo ready for output */
+	RSH_EVENT_FIFO_ERR,		/* fifo error */
+	RSH_EVENT_ATTACH,		/* backend attaching */
+	RSH_EVENT_DETACH,		/* backend detaching */
+};
+
+/* RShim service types. */
+enum {
+	RSH_SVC_NET,			/* networking service */
+	RSH_SVC_MAX
+};
+
+/* TMFIFO message header. */
+union rshim_tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/* TMFIFO demux channels. */
+enum {
+	TMFIFO_CONS_CHAN,	/* Console */
+	TMFIFO_NET_CHAN,	/* Network */
+	TMFIFO_MAX_CHAN		/* Number of channels */
+};
+
+/* Various rshim definitions. */
+#define RSH_INT_VEC0_RTC__SWINT3_MASK 0x8
+
+#define RSH_BYTE_ACC_READ_TRIGGER 0x50000000
+#define RSH_BYTE_ACC_SIZE 0x10000000
+#define RSH_BYTE_ACC_PENDING 0x20000000
+
+
+#define BOOT_CHANNEL        RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_BOOT
+#define RSHIM_CHANNEL       RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_RSHIM
+#define UART0_CHANNEL       RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART0
+#define UART1_CHANNEL       RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART1
+
+#define RSH_BOOT_FIFO_SIZE   512
+
+/* FIFO structure. */
+struct rshim_fifo {
+	unsigned char *data;
+	unsigned int head;
+	unsigned int tail;
+	wait_queue_head_t operable;
+};
+
+/* RShim backend. */
+struct rshim_backend {
+	/* Device name. */
+	char *dev_name;
+
+	/* Backend owner. */
+	struct module *owner;
+
+	/* Pointer to the backend device. */
+	struct device *dev;
+
+	/* Pointer to the net device. */
+	void *net;
+
+	/* House-keeping Timer. */
+	struct timer_list timer;
+
+	/* Character device structure for each device. */
+	struct cdev cdevs[RSH_DEV_TYPES];
+
+	/*
+	 * The reference count for this structure.  This is incremented by
+	 * each open, and by the probe routine (thus, one reference for
+	 * each of the two interfaces).  It's decremented on each release,
+	 * and on each disconnect.
+	 */
+	struct kref kref;
+
+	/* State flags. */
+	u32 is_booting : 1;        /* Waiting for device to come back. */
+	u32 is_boot_open : 1;      /* Boot device is open. */
+	u32 is_tm_open : 1;        /* TM FIFO device is open. */
+	u32 is_cons_open : 1;      /* Console device is open. */
+	u32 is_in_boot_write : 1;  /* A thread is in boot_write(). */
+	u32 has_cons_work : 1;     /* Console worker thread running. */
+	u32 has_debug : 1;         /* Debug enabled for this device. */
+	u32 has_tm : 1;            /* TM FIFO found. */
+	u32 has_rshim : 1;         /* RSHIM found. */
+	u32 has_fifo_work : 1;     /* FIFO output to be done in worker. */
+	u32 has_reprobe : 1;       /* Reprobe support after SW reset. */
+	u32 drop : 1;              /* Drop the rest of the packet. */
+	u32 registered : 1;        /* Backend has been registered. */
+	u32 keepalive : 1;         /* A flag to update keepalive. */
+
+	/* Jiffies of last keepalive. */
+	u64 last_keepalive;
+
+	/* State flag bits from RSH_SFLG_xxx (see above). */
+	int spin_flags;
+
+	/* Total bytes in the read buffer. */
+	int read_buf_bytes;
+	/* Offset of next unread byte in the read buffer. */
+	int read_buf_next;
+	/* Bytes left in the current packet, or 0 if no current packet. */
+	int read_buf_pkt_rem;
+	/* Padded bytes in the read buffer. */
+	int read_buf_pkt_padding;
+
+	/* Bytes left in the current packet pending to write. */
+	int write_buf_pkt_rem;
+
+	/* Current message header. */
+	union rshim_tmfifo_msg_hdr msg_hdr;
+
+	/* Read FIFOs. */
+	struct rshim_fifo read_fifo[TMFIFO_MAX_CHAN];
+
+	/* Write FIFOs. */
+	struct rshim_fifo write_fifo[TMFIFO_MAX_CHAN];
+
+	/* Read buffer.  This is a DMA'able buffer. */
+	unsigned char *read_buf;
+	dma_addr_t read_buf_dma;
+
+	/* Write buffer.  This is a DMA'able buffer. */
+	unsigned char *write_buf;
+	dma_addr_t write_buf_dma;
+
+	/* Current Tx FIFO channel. */
+	int tx_chan;
+
+	/* Current Rx FIFO channel. */
+	int rx_chan;
+
+	/* First error encountered during read or write. */
+	int tmfifo_error;
+
+	/* Buffers used for boot writes.  Allocated at startup. */
+	char *boot_buf[2];
+
+	/*
+	 * This mutex is used to prevent the interface pointers and the
+	 * device pointer from disappearing while a driver entry point
+	 * is using them.  It's held throughout a read or write operation
+	 * (at least the parts of those operations which depend upon those
+	 * pointers) and is also held whenever those pointers are modified.
+	 * It also protects state flags, and booting_complete.
+	 */
+	struct mutex mutex;
+
+	/* We'll signal completion on this when FLG_BOOTING is turned off. */
+	struct completion booting_complete;
+
+#ifdef RSH_RESET_MUTEX
+	/* Signaled when a device is disconnected. */
+	struct completion reset_complete;
+#endif
+
+	/*
+	 * This wait queue supports fsync; it's woken up whenever an
+	 * outstanding USB write URB is done.  This will need to be more
+	 * complex if we start doing write double-buffering.
+	 */
+	wait_queue_head_t write_completed;
+
+	/* State for our outstanding boot write. */
+	struct completion boot_write_complete;
+
+	/*
+	 * This spinlock is used to protect items which must be updated by
+	 * URB completion handlers, since those can't sleep.  This includes
+	 * the read and write buffer pointers, as well as spin_flags.
+	 */
+	spinlock_t spinlock;
+
+	/* Current termios settings for the console. */
+	struct ktermios cons_termios;
+
+	/* Work queue entry. */
+	struct delayed_work	work;
+
+	/* Pending boot & fifo request for the worker. */
+	u8 *boot_work_buf;
+	u32 boot_work_buf_len;
+	u32 boot_work_buf_actual_len;
+	u8 *fifo_work_buf;
+	u32 fifo_work_buf_len;
+	int fifo_work_devtype;
+
+	/* Number of open console files. */
+	long console_opens;
+
+	/*
+	 * Our index in rshim_devs, which is also the high bits of our
+	 * minor number.
+	 */
+	int dev_index;
+
+	/* APIs provided by backend. */
+
+	/* API to write bulk data to RShim via the backend. */
+	ssize_t (*write)(struct rshim_backend *bd, int devtype,
+			 const char *buf, size_t count);
+
+	/* API to read bulk data from RShim via the backend. */
+	ssize_t (*read)(struct rshim_backend *bd, int devtype,
+			char *buf, size_t count);
+
+	/* API to cancel a read / write request (optional). */
+	void (*cancel)(struct rshim_backend *bd, int devtype, bool is_write);
+
+	/* API to destroy the backend. */
+	void (*destroy)(struct kref *kref);
+
+	/* API to read 8 bytes from RShim. */
+	int (*read_rshim)(struct rshim_backend *bd, int chan, int addr,
+			  u64 *value);
+
+	/* API to write 8 bytes to RShim. */
+	int (*write_rshim)(struct rshim_backend *bd, int chan, int addr,
+			   u64 value);
+};
+
+/* RShim service. */
+struct rshim_service {
+	/* Service type RSH_SVC_xxx. */
+	int type;
+
+	/* Reference number. */
+	atomic_t ref;
+
+	/* Create service. */
+	int (*create)(struct rshim_backend *bd);
+
+	/* Delete service. */
+	int (*delete)(struct rshim_backend *bd);
+
+	/* Notify service Rx is ready. */
+	void (*rx_notify)(struct rshim_backend *bd);
+};
+
+/* Global variables. */
+
+/* Global array to store RShim devices and names. */
+extern struct workqueue_struct *rshim_wq;
+
+/* Common APIs. */
+
+/* Register/unregister backend. */
+int rshim_register(struct rshim_backend *bd);
+void rshim_deregister(struct rshim_backend *bd);
+
+/* Register / deregister service. */
+int rshim_register_service(struct rshim_service *service);
+void rshim_deregister_service(struct rshim_service *service);
+
+/* Find backend by name. */
+struct rshim_backend *rshim_find(char *dev_name);
+
+/* RShim global lock. */
+void rshim_lock(void);
+void rshim_unlock(void);
+
+/* Event notification. */
+int rshim_notify(struct rshim_backend *bd, int event, int code);
+
+/*
+ * FIFO APIs.
+ *
+ * FIFO is demuxed into two channels, one for network interface
+ * (TMFIFO_NET_CHAN), one for console (TMFIFO_CONS_CHAN).
+ */
+
+/* Write / read some bytes to / from the FIFO via the backend. */
+ssize_t rshim_fifo_read(struct rshim_backend *bd, char *buffer,
+		      size_t count, int chan, bool nonblock,
+		      bool to_user);
+ssize_t rshim_fifo_write(struct rshim_backend *bd, const char *buffer,
+		       size_t count, int chan, bool nonblock,
+		       bool from_user);
+
+/* Alloc/free the FIFO. */
+int rshim_fifo_alloc(struct rshim_backend *bd);
+void rshim_fifo_free(struct rshim_backend *bd);
+
+/* Console APIs. */
+
+/* Enable early console. */
+int rshim_cons_early_enable(struct rshim_backend *bd);
+
+#endif /* _RSHIM_H */
diff --git a/drivers/soc/mellanox/host/rshim_regs.h b/drivers/soc/mellanox/host/rshim_regs.h
new file mode 100644
index 0000000..b14df716
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_regs.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#ifndef __RSHIM_REGS_H__
+#define __RSHIM_REGS_H__
+
+#ifdef __ASSEMBLER__
+#define _64bit(x) x
+#else /* __ASSEMBLER__ */
+#define _64bit(x) x ## ULL
+#endif /* __ASSEMBLER */
+
+#include <linux/types.h>
+
+#define RSH_BOOT_FIFO_DATA 0x408
+
+#define RSH_BOOT_FIFO_COUNT 0x488
+#define RSH_BOOT_FIFO_COUNT__LENGTH 0x0001
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_SHIFT 0
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_WIDTH 10
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_RESET_VAL 0
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_RMASK 0x3ff
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_MASK  0x3ff
+
+#define RSH_BOOT_CONTROL 0x528
+#define RSH_BOOT_CONTROL__LENGTH 0x0001
+#define RSH_BOOT_CONTROL__BOOT_MODE_SHIFT 0
+#define RSH_BOOT_CONTROL__BOOT_MODE_WIDTH 2
+#define RSH_BOOT_CONTROL__BOOT_MODE_RESET_VAL 0
+#define RSH_BOOT_CONTROL__BOOT_MODE_RMASK 0x3
+#define RSH_BOOT_CONTROL__BOOT_MODE_MASK  0x3
+#define RSH_BOOT_CONTROL__BOOT_MODE_VAL_NONE 0x0
+#define RSH_BOOT_CONTROL__BOOT_MODE_VAL_EMMC 0x1
+#define RSH_BOOT_CONTROL__BOOT_MODE_VAL_EMMC_LEGACY 0x3
+
+#define RSH_RESET_CONTROL 0x500
+#define RSH_RESET_CONTROL__LENGTH 0x0001
+#define RSH_RESET_CONTROL__RESET_CHIP_SHIFT 0
+#define RSH_RESET_CONTROL__RESET_CHIP_WIDTH 32
+#define RSH_RESET_CONTROL__RESET_CHIP_RESET_VAL 0
+#define RSH_RESET_CONTROL__RESET_CHIP_RMASK 0xffffffff
+#define RSH_RESET_CONTROL__RESET_CHIP_MASK  0xffffffff
+#define RSH_RESET_CONTROL__RESET_CHIP_VAL_KEY 0xca710001
+#define RSH_RESET_CONTROL__DISABLE_SHIFT 32
+#define RSH_RESET_CONTROL__DISABLE_WIDTH 1
+#define RSH_RESET_CONTROL__DISABLE_RESET_VAL 0
+#define RSH_RESET_CONTROL__DISABLE_RMASK 0x1
+#define RSH_RESET_CONTROL__DISABLE_MASK  _64bit(0x100000000)
+#define RSH_RESET_CONTROL__REQ_PND_SHIFT 33
+#define RSH_RESET_CONTROL__REQ_PND_WIDTH 1
+#define RSH_RESET_CONTROL__REQ_PND_RESET_VAL 0
+#define RSH_RESET_CONTROL__REQ_PND_RMASK 0x1
+#define RSH_RESET_CONTROL__REQ_PND_MASK  _64bit(0x200000000)
+
+#define RSH_SCRATCHPAD1 0xc20
+
+#define RSH_SCRATCH_BUF_CTL 0x600
+
+#define RSH_SCRATCH_BUF_DAT 0x610
+
+#define RSH_SEMAPHORE0 0x28
+
+#define RSH_SCRATCHPAD 0x20
+
+#define RSH_TM_HOST_TO_TILE_CTL 0xa30
+#define RSH_TM_HOST_TO_TILE_CTL__LENGTH 0x0001
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_SHIFT 0
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_WIDTH 8
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_RESET_VAL 128
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_RMASK 0xff
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_MASK  0xff
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_SHIFT 8
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_WIDTH 8
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_RESET_VAL 128
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_RMASK 0xff
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_MASK  0xff00
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_SHIFT 32
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_WIDTH 9
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RESET_VAL 256
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_MASK  _64bit(0x1ff00000000)
+
+#define RSH_TM_HOST_TO_TILE_STS 0xa28
+#define RSH_TM_HOST_TO_TILE_STS__LENGTH 0x0001
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_SHIFT 0
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_WIDTH 9
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_RESET_VAL 0
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_RMASK 0x1ff
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_MASK  0x1ff
+
+#define RSH_TM_TILE_TO_HOST_STS 0xa48
+#define RSH_TM_TILE_TO_HOST_STS__LENGTH 0x0001
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_SHIFT 0
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_WIDTH 9
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_RESET_VAL 0
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_RMASK 0x1ff
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_MASK  0x1ff
+
+#define RSH_TM_HOST_TO_TILE_DATA 0xa20
+
+#define RSH_TM_TILE_TO_HOST_DATA 0xa40
+
+#define RSH_MMIO_ADDRESS_SPACE__LENGTH 0x10000000000
+#define RSH_MMIO_ADDRESS_SPACE__STRIDE 0x8
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_SHIFT 0
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_WIDTH 16
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_RESET_VAL 0
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_RMASK 0xffff
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_MASK  0xffff
+#define RSH_MMIO_ADDRESS_SPACE__PROT_SHIFT 16
+#define RSH_MMIO_ADDRESS_SPACE__PROT_WIDTH 3
+#define RSH_MMIO_ADDRESS_SPACE__PROT_RESET_VAL 0
+#define RSH_MMIO_ADDRESS_SPACE__PROT_RMASK 0x7
+#define RSH_MMIO_ADDRESS_SPACE__PROT_MASK  0x70000
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_SHIFT 23
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_WIDTH 4
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_RESET_VAL 0
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_RMASK 0xf
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_MASK  0x7800000
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_BOOT 0x0
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_RSHIM 0x1
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART0 0x2
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART1 0x3
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_DIAG_UART 0x4
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU 0x5
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU_EXT1 0x6
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU_EXT2 0x7
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU_EXT3 0x8
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TIMER 0x9
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_USB 0xa
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_GPIO 0xb
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_MMC 0xc
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TIMER_EXT 0xd
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_WDOG_NS 0xe
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_WDOG_SEC 0xf
+
+#define RSH_SWINT 0x318
+
+#define RSH_BYTE_ACC_CTL 0x490
+
+#define RSH_BYTE_ACC_WDAT 0x498
+
+#define RSH_BYTE_ACC_RDAT 0x4a0
+
+#define RSH_BYTE_ACC_ADDR 0x4a8
+
+#endif /* !defined(__RSHIM_REGS_H__) */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
@ 2018-11-01 16:25   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-11-01 16:25 UTC (permalink / raw)
  To: linux-arm-kernel

An external host can connect to a Mellanox BlueField SoC via an
interface called Rshim. The Rshim driver provides boot, console,
and networking services over this interface. This commit is
the common driver where the other backend (transport) driver will
use.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/Kconfig           |    8 +
 drivers/soc/mellanox/Makefile          |    1 +
 drivers/soc/mellanox/host/Makefile     |    2 +
 drivers/soc/mellanox/host/rshim.c      | 2673 ++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/host/rshim.h      |  361 +++++
 drivers/soc/mellanox/host/rshim_regs.h |  152 ++
 6 files changed, 3197 insertions(+)
 create mode 100644 drivers/soc/mellanox/host/Makefile
 create mode 100644 drivers/soc/mellanox/host/rshim.c
 create mode 100644 drivers/soc/mellanox/host/rshim.h
 create mode 100644 drivers/soc/mellanox/host/rshim_regs.h

diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
index d88efa1..ecd83a4 100644
--- a/drivers/soc/mellanox/Kconfig
+++ b/drivers/soc/mellanox/Kconfig
@@ -16,3 +16,11 @@ config MLNX_BLUEFIELD_TMFIFO
 	  the implementation of a console and network driver.
 
 endif # ARCH_MLNX_BLUEFIELD
+
+config MLNX_BLUEFIELD_HOST
+	tristate "Mellnox BlueField host side drivers"
+	help
+	  If you say yes to this option, then support will be added
+	  for control and communication of Mellanox BlueField SoCs
+	  from an external host via USB or PCI-express.
+
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
index c44c0e2..aaaf2be 100644
--- a/drivers/soc/mellanox/Makefile
+++ b/drivers/soc/mellanox/Makefile
@@ -3,3 +3,4 @@
 # Makefile for Mellanox SoC drivers.
 #
 obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
+obj-$(CONFIG_MLNX_BLUEFIELD_HOST)	+= host/
diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
new file mode 100644
index 0000000..e47842f
--- /dev/null
+++ b/drivers/soc/mellanox/host/Makefile
@@ -0,0 +1,2 @@
+obj-m := rshim.o
+
diff --git a/drivers/soc/mellanox/host/rshim.c b/drivers/soc/mellanox/host/rshim.c
new file mode 100644
index 0000000..32f1124
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim.c
@@ -0,0 +1,2673 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_common.c - Mellanox host-side driver for RShim
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.	See the GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <linux/ioctl.h>
+#include <linux/termios.h>
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+#include <asm/termbits.h>
+#include <linux/circ_buf.h>
+#include <linux/delay.h>
+#include <linux/virtio_ids.h>
+
+#include "rshim.h"
+
+/* Maximum number of devices controlled by this driver. */
+int rshim_nr_devs = 64;
+module_param(rshim_nr_devs, int, 0444);
+MODULE_PARM_DESC(rshim_nr_devs, "Maximum number of supported devices");
+
+static char *backend_driver = "";
+module_param(backend_driver, charp, 0444);
+MODULE_PARM_DESC(backend_driver, "Rshim backend driver to use");
+
+static int rshim_keepalive_period = 300;
+module_param(rshim_keepalive_period, int, 0644);
+MODULE_PARM_DESC(rshim_keepalive_period, "keepalive period in milliseconds");
+
+#define RSH_KEEPALIVE_MAGIC_NUM 0x5089836482ULL
+
+/* Circular buffer macros. */
+
+#define read_empty(bd, chan) \
+	(CIRC_CNT((bd)->read_fifo[chan].head, \
+		  (bd)->read_fifo[chan].tail, READ_FIFO_SIZE) == 0)
+#define read_full(bd, chan) \
+	(CIRC_SPACE((bd)->read_fifo[chan].head, \
+		    (bd)->read_fifo[chan].tail, READ_FIFO_SIZE) == 0)
+#define read_space(bd, chan) \
+	CIRC_SPACE((bd)->read_fifo[chan].head, \
+		   (bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_cnt(bd, chan) \
+	CIRC_CNT((bd)->read_fifo[chan].head, \
+		 (bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_cnt_to_end(bd, chan) \
+	CIRC_CNT_TO_END((bd)->read_fifo[chan].head, \
+			(bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_data_ptr(bd, chan) \
+	((bd)->read_fifo[chan].data + \
+	 ((bd)->read_fifo[chan].tail & (READ_FIFO_SIZE - 1)))
+#define read_consume_bytes(bd, chan, nbytes) \
+	((bd)->read_fifo[chan].tail = \
+		((bd)->read_fifo[chan].tail + (nbytes)) & \
+		 (READ_FIFO_SIZE - 1))
+#define read_space_to_end(bd, chan) \
+	CIRC_SPACE_TO_END((bd)->read_fifo[chan].head, \
+			  (bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_space_offset(bd, chan) \
+	((bd)->read_fifo[chan].head & (READ_FIFO_SIZE - 1))
+#define read_space_ptr(bd, chan) \
+	((bd)->read_fifo[chan].data + read_space_offset(bd, (chan)))
+#define read_add_bytes(bd, chan, nbytes) \
+	((bd)->read_fifo[chan].head = \
+		((bd)->read_fifo[chan].head + (nbytes)) & \
+		 (READ_FIFO_SIZE - 1))
+#define read_reset(bd, chan) \
+	((bd)->read_fifo[chan].head = (bd)->read_fifo[chan].tail = 0)
+
+#define write_empty(bd, chan) \
+	(CIRC_CNT((bd)->write_fifo[chan].head, \
+		  (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE) == 0)
+#define write_full(bd, chan) \
+	(CIRC_SPACE((bd)->write_fifo[chan].head, \
+		    (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE) == 0)
+#define write_space(bd, chan) \
+	CIRC_SPACE((bd)->write_fifo[chan].head, \
+		   (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_cnt(bd, chan) \
+	CIRC_CNT((bd)->write_fifo[chan].head, \
+		 (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_cnt_to_end(bd, chan) \
+	CIRC_CNT_TO_END((bd)->write_fifo[chan].head, \
+			(bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_data_offset(bd, chan) \
+	((bd)->write_fifo[chan].tail & (WRITE_FIFO_SIZE - 1))
+#define write_data_ptr(bd, chan) \
+	((bd)->write_fifo[chan].data + write_data_offset(bd, (chan)))
+#define write_consume_bytes(bd, chan, nbytes) \
+	((bd)->write_fifo[chan].tail = \
+		 ((bd)->write_fifo[chan].tail + (nbytes)) & \
+		  (WRITE_FIFO_SIZE - 1))
+#define write_space_to_end(bd, chan) \
+	CIRC_SPACE_TO_END((bd)->write_fifo[chan].head, \
+			  (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_space_ptr(bd, chan) \
+	((bd)->write_fifo[chan].data + \
+	 ((bd)->write_fifo[chan].head & (WRITE_FIFO_SIZE - 1)))
+#define write_add_bytes(bd, chan, nbytes) \
+	((bd)->write_fifo[chan].head = \
+	 ((bd)->write_fifo[chan].head + (nbytes)) & \
+	  (WRITE_FIFO_SIZE - 1))
+#define write_reset(bd, chan) \
+	((bd)->write_fifo[chan].head = (bd)->write_fifo[chan].tail = 0)
+
+/*
+ * Tile-to-host bits (UART 0 scratchpad).
+ */
+/*
+ * Output write pointer mask.  Note that this is the maximum size; the
+ * write pointer may be smaller if requested by the host.
+ */
+#define CONS_RSHIM_T2H_OUT_WPTR_MASK     0x3FF
+
+/* Tile is done mask. */
+#define CONS_RSHIM_T2H_DONE_MASK         0x400
+
+/*
+ * Input read pointer mask.  Note that this is the maximum size; the read
+ * pointer may be smaller if requested by the host.
+ */
+#define CONS_RSHIM_T2H_IN_RPTR_MASK      0x1FF800
+
+/* Input read pointer shift. */
+#define CONS_RSHIM_T2H_IN_RPTR_SHIFT     11
+
+/* Tile is done mask. */
+#define CONS_RSHIM_T2H_DONE_MASK         0x400
+
+/* Number of words to send as sync-data (calculated by packet MTU). */
+#define TMFIFO_MAX_SYNC_WORDS            (1536 / 8)
+
+/* Terminal characteristics for newly created consoles. */
+static struct ktermios init_console_termios = {
+	.c_iflag = INLCR | ICRNL,
+	.c_oflag = OPOST | ONLCR,
+	.c_cflag = B115200 | HUPCL | CLOCAL | CREAD | CS8,
+	.c_lflag = ISIG | ICANON | ECHOE | ECHOK | ECHOCTL | ECHOKE | IEXTEN,
+	.c_line = 0,
+	.c_cc = INIT_C_CC,
+};
+
+/* Global mutex. */
+static DEFINE_MUTEX(rshim_mutex);
+
+/*
+ * Array of all of the rshim devices.  The high bits of our minor number
+ * index into this table to find the relevant device.
+ */
+struct rshim_backend **rshim_devs;
+
+/*
+ * Work queue. Right now we have one for the whole driver; we might
+ * eventually decide that we need one per device, but we'll see.
+ */
+struct workqueue_struct *rshim_wq;
+EXPORT_SYMBOL(rshim_wq);
+
+/*
+ * Array of pointers to kmalloc'ed strings, holding the path name for
+ * all of the devices we've seen.  If rshim_devs[i] is non-NULL, then
+ * rshim_dev_names[i] is its path name.  If rshim_devs[i] is NULL, then
+ * rshim_dev_names[i] is the name that was last used for that device.
+ * When we see a new device, we look it up in this table; this allows us to
+ * use the same device index we did last time we saw the device.  The
+ * strings within the array persist until the driver is unloaded.
+ */
+char **rshim_dev_names;
+
+/* Name of the sub-device types. */
+char *rshim_dev_minor_names[RSH_DEV_TYPES] = {
+	[RSH_DEV_TYPE_RSHIM] = "rshim",
+	[RSH_DEV_TYPE_BOOT] = "boot",
+	[RSH_DEV_TYPE_CONSOLE] = "console",
+	[RSH_DEV_TYPE_NET] = "net",
+	[RSH_DEV_TYPE_MISC] = "misc",
+};
+
+/* dev_t base index. */
+static dev_t rshim_dev_base;
+
+/* Class structure for our device class. */
+static struct class *rshim_class;
+
+/* Registered services. */
+static struct rshim_service *rshim_svc[RSH_SVC_MAX];
+
+/* FIFO reset. */
+static void rshim_fifo_reset(struct rshim_backend *bd);
+
+/* Global lock / unlock. */
+
+void rshim_lock(void)
+{
+	mutex_lock(&rshim_mutex);
+}
+EXPORT_SYMBOL(rshim_lock);
+
+void rshim_unlock(void)
+{
+	mutex_unlock(&rshim_mutex);
+}
+EXPORT_SYMBOL(rshim_unlock);
+
+/*
+ * Read some bytes from RShim.
+ *
+ * The provided buffer size should be multiple of 8 bytes. If not, the
+ * leftover bytes (which presumably were sent as NUL bytes by the sender)
+ * will be discarded.
+ */
+static ssize_t rshim_read_default(struct rshim_backend *bd, int devtype,
+				char *buf, size_t count)
+{
+	int retval, total = 0, avail = 0;
+	u64 word;
+
+	/* Read is only supported for RShim TMFIFO. */
+	if (devtype != RSH_DEV_TYPE_NET && devtype != RSH_DEV_TYPE_CONSOLE) {
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+	if (bd->is_boot_open)
+		return 0;
+
+	while (total < count) {
+		if (avail == 0) {
+			retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+						RSH_TM_TILE_TO_HOST_STS, &word);
+			if (retval < 0)
+				break;
+			avail = word & RSH_TM_TILE_TO_HOST_STS__COUNT_MASK;
+			if (avail == 0)
+				break;
+		}
+		retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+					RSH_TM_TILE_TO_HOST_DATA, &word);
+		if (retval < 0)
+			break;
+		/*
+		 * Convert it to little endian before sending to RShim. The
+		 * other side should decode it as little endian as well which
+		 * is usually the default case.
+		 */
+		word = le64_to_cpu(word);
+		if (total + sizeof(word) <= count) {
+			*(u64 *)buf = word;
+			buf += sizeof(word);
+			total += sizeof(word);
+		} else {
+			/* Copy the rest data which is less than 8 bytes. */
+			memcpy(buf, &word, count - total);
+			total = count;
+			break;
+		}
+		avail--;
+	}
+
+	return total;
+}
+
+/*
+ * Write some bytes to the RShim backend.
+ *
+ * If count is not multiple of 8-bytes, the data will be padded to 8-byte
+ * aligned which is required by RShim HW.
+ */
+static ssize_t rshim_write_delayed(struct rshim_backend *bd, int devtype,
+				   const char *buf, size_t count)
+{
+	u64 word;
+	char pad_buf[sizeof(u64)] = { 0 };
+	int size_addr, size_mask, data_addr, max_size;
+	int retval, avail = 0, byte_cnt = 0, retry;
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		if (bd->is_boot_open)
+			return count;
+		size_addr = RSH_TM_HOST_TO_TILE_STS;
+		size_mask = RSH_TM_HOST_TO_TILE_STS__COUNT_MASK;
+		data_addr = RSH_TM_HOST_TO_TILE_DATA;
+		retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+					RSH_TM_HOST_TO_TILE_CTL, &word);
+		if (retval < 0) {
+			pr_err("read_rshim error %d\n", retval);
+			return retval;
+		}
+		max_size = (word >> RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_SHIFT)
+			   & RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RMASK;
+		break;
+
+	case RSH_DEV_TYPE_BOOT:
+		size_addr = RSH_BOOT_FIFO_COUNT;
+		size_mask = RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_MASK;
+		data_addr = RSH_BOOT_FIFO_DATA;
+		max_size = RSH_BOOT_FIFO_SIZE;
+		break;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+
+	while (byte_cnt < count) {
+		/* Check the boot cancel condition. */
+		if (devtype == RSH_DEV_TYPE_BOOT && !bd->boot_work_buf)
+			break;
+
+		/* Add padding if less than 8 bytes left. */
+		if (byte_cnt + sizeof(u64) > count) {
+			memcpy(pad_buf, buf, count - byte_cnt);
+			buf = (const char *)pad_buf;
+		}
+
+		retry = 0;
+		while (avail <= 0) {
+			/* Calculate available space in words. */
+			retval = bd->read_rshim(bd, RSHIM_CHANNEL, size_addr,
+						&word);
+			if (retval < 0) {
+				pr_err("read_rshim error %d\n", retval);
+				break;
+			}
+			avail = max_size - (int)(word & size_mask) - 8;
+			if (avail > 0)
+				break;
+
+			/*
+			 * Retry 100s, or else return failure since the other
+			 * side seems not to be responding.
+			 */
+			if (++retry > 100000)
+				return -ETIMEDOUT;
+			msleep(1);
+		}
+
+		word = *(u64 *)buf;
+		/*
+		 * Convert to little endian before sending to RShim. The
+		 * receiving side should call le64_to_cpu() to convert
+		 * it back.
+		 */
+		word = cpu_to_le64(word);
+		retval = bd->write_rshim(bd, RSHIM_CHANNEL, data_addr, word);
+		if (retval < 0) {
+			pr_err("write_rshim error %d\n", retval);
+			break;
+		}
+		buf += sizeof(word);
+		byte_cnt += sizeof(word);
+		avail--;
+	}
+
+	/* Return number shouldn't count the padded bytes. */
+	return (byte_cnt > count) ? count : byte_cnt;
+}
+
+static ssize_t rshim_write_default(struct rshim_backend *bd, int devtype,
+				   const char *buf, size_t count)
+{
+	int retval;
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		if (bd->is_boot_open)
+			return count;
+
+		/* Set the flag so there is only one outstanding request. */
+		bd->spin_flags |= RSH_SFLG_WRITING;
+
+		/* Wake up the worker. */
+		bd->fifo_work_buf = (char *)buf;
+		bd->fifo_work_buf_len = count;
+		bd->fifo_work_devtype = devtype;
+		/*
+		 * Add barrier so the above writes complete before setting the
+		 * has_fifo_work flag.
+		 */
+		wmb();
+		bd->has_fifo_work = 1;
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+		return 0;
+
+	case RSH_DEV_TYPE_BOOT:
+		reinit_completion(&bd->boot_write_complete);
+		bd->boot_work_buf_len = count;
+		bd->boot_work_buf_actual_len = 0;
+		/*
+		 * Add barrier so the above writes complete before setting the
+		 * boot_work_buf pointer since it's checked in other places.
+		 */
+		wmb();
+		bd->boot_work_buf = (char *)buf;
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+
+		mutex_unlock(&bd->mutex);
+		retval = wait_for_completion_interruptible(
+					&bd->boot_write_complete);
+		/* Cancel the request if interrupted. */
+		if (retval)
+			bd->boot_work_buf = NULL;
+
+		mutex_lock(&bd->mutex);
+		return bd->boot_work_buf_actual_len;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+}
+
+/* Boot file operations routines */
+
+/*
+ * Wait for boot to complete, if necessary.  Return 0 if the boot is done
+ * and it's safe to continue, an error code if something went wrong.  Note
+ * that this routine must be called with the device mutex held.  If it
+ * returns successfully, the mutex will still be held (although it may have
+ * been dropped and reacquired); if it returns unsuccessfully the mutex
+ * will have been dropped.
+ */
+static int wait_for_boot_done(struct rshim_backend *bd)
+{
+	int retval;
+
+	if (!bd->has_reprobe)
+		return 0;
+
+	if (!bd->has_rshim || bd->is_booting) {
+		while (bd->is_booting) {
+			pr_info("boot write, waiting for re-probe\n");
+			/* We're booting, and the backend isn't ready yet. */
+			mutex_unlock(&bd->mutex);
+			/*
+			 * FIXME: might we want a timeout here, too?  If
+			 * the reprobe takes a very long time, something's
+			 * probably wrong.  Maybe a couple of minutes?
+			 */
+			retval = wait_for_completion_interruptible(
+				&bd->booting_complete);
+			if (retval)
+				return retval;
+			mutex_lock(&bd->mutex);
+		}
+		if (!bd->has_rshim) {
+			mutex_unlock(&bd->mutex);
+			return -ENODEV;
+		}
+	}
+
+	return 0;
+}
+
+static ssize_t rshim_boot_write(struct file *file, const char *user_buffer,
+			      size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+	int retval = 0, whichbuf = 0;
+	size_t bytes_written = 0, bytes_left;
+
+	/*
+	 * Hardware requires that we send multiples of 8 bytes.  Ideally
+	 * we'd handle the case where we got unaligned writes by
+	 * accumulating the residue somehow, but none of our clients
+	 * typically do this, so we just clip the size to prevent any
+	 * inadvertent errors from causing hardware problems.
+	 */
+	bytes_left = count & (-((size_t)8));
+	if (!bytes_left)
+		return 0;
+
+	mutex_lock(&bd->mutex);
+	if (bd->is_in_boot_write) {
+		mutex_unlock(&bd->mutex);
+		return -EBUSY;
+	}
+
+	retval = wait_for_boot_done(bd);
+	if (retval) {
+		pr_err("boot_write: wait for boot failed, err %d\n", retval);
+		/* wait_for_boot_done already dropped mutex */
+		return retval;
+	}
+
+	/*
+	 * We're going to drop the mutex while we wait for any outstanding
+	 * write to complete; this keeps another thread from getting in here
+	 * while we do that.
+	 */
+	bd->is_in_boot_write = 1;
+
+	while (bytes_left) {
+		size_t buf_bytes = min((size_t)BOOT_BUF_SIZE, bytes_left);
+		char *buf = bd->boot_buf[whichbuf];
+
+		whichbuf ^= 1;
+		if (copy_from_user(buf, user_buffer, buf_bytes)) {
+			retval = -EFAULT;
+			pr_err("boot_write: copy from user failed\n");
+			break;
+		}
+
+		retval = bd->write(bd, RSH_DEV_TYPE_BOOT, buf, buf_bytes);
+		if (retval > 0) {
+			bytes_left -= retval;
+			user_buffer += retval;
+			bytes_written += retval;
+		} else if (retval == 0) {
+			/* Wait for some time instead of busy polling. */
+			msleep_interruptible(1);
+			continue;
+		}
+		if (retval != buf_bytes)
+			break;
+	}
+
+	bd->is_in_boot_write = 0;
+	mutex_unlock(&bd->mutex);
+
+	/*
+	 * Return an error in case the 'count' is not multiple of 8 bytes.
+	 * At this moment, the truncated data has already been sent to
+	 * the BOOT fifo and hopefully it could still boot the chip.
+	 */
+	if (count % 8 != 0)
+		return -EINVAL;
+
+	return bytes_written ? bytes_written : retval;
+}
+
+static int rshim_boot_release(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+	struct module *owner;
+	int retval;
+
+	/* Restore the boot mode register. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL,
+				 RSH_BOOT_CONTROL,
+				 RSH_BOOT_CONTROL__BOOT_MODE_VAL_EMMC);
+	if (retval)
+		pr_err("couldn't set boot_control, err %d\n", retval);
+
+	mutex_lock(&bd->mutex);
+	bd->is_boot_open = 0;
+	queue_delayed_work(rshim_wq, &bd->work, HZ);
+	mutex_unlock(&bd->mutex);
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+static const struct file_operations rshim_boot_fops = {
+	.owner = THIS_MODULE,
+	.write = rshim_boot_write,
+	.release = rshim_boot_release,
+};
+
+int rshim_boot_open(struct file *file)
+{
+	int retval;
+	int i;
+	struct rshim_backend *bd = file->private_data;
+#if RSH_RESET_MUTEX
+	unsigned long devs_locked = 0;
+#endif
+
+	file->f_op = &rshim_boot_fops;
+
+#if RSH_RESET_MUTEX
+	/*
+	 * We're going to prevent resets and operations from running in
+	 * parallel with other resets.  Our method for this is to grab
+	 * every device's mutex before doing the reset, and then holding
+	 * onto them until the device we reset is reprobed, or a timeout
+	 * expires; the latter is mostly paranoia.  Anyway, in order to
+	 * find all of the other devices, we're going to need to walk the
+	 * device table, so we need to grab its mutex.  We have to do it
+	 * before we get our own device's mutex for lock ordering reasons.
+	 */
+	rshim_lock();
+#endif
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->is_boot_open) {
+		pr_info("can't boot, boot file already open\n");
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		rshim_unlock();
+#endif
+		return -EBUSY;
+	}
+
+	if (!bd->has_rshim) {
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		rshim_unlock();
+#endif
+		return -ENODEV;
+	}
+
+	pr_info("begin booting\n");
+	reinit_completion(&bd->booting_complete);
+	bd->is_booting = 1;
+
+	/*
+	 * Before we reset the chip, make sure we don't have any
+	 * outstanding writes, and flush the write and read FIFOs. (Note
+	 * that we can't have any outstanding reads, since we kill those
+	 * upon release of the TM FIFO file.)
+	 */
+	if (bd->cancel)
+		bd->cancel(bd, RSH_DEV_TYPE_NET, true);
+	bd->read_buf_bytes = 0;
+	bd->read_buf_pkt_rem = 0;
+	bd->read_buf_pkt_padding = 0;
+	spin_lock_irq(&bd->spinlock);
+	/* FIXME: should we be waiting for WRITING to go off, instead? */
+	bd->spin_flags &= ~RSH_SFLG_WRITING;
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		read_reset(bd, i);
+		write_reset(bd, i);
+	}
+	spin_unlock_irq(&bd->spinlock);
+
+	/* Set RShim (external) boot mode. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_BOOT_CONTROL,
+				 RSH_BOOT_CONTROL__BOOT_MODE_VAL_NONE);
+	if (retval) {
+		pr_err("boot_open: error %d writing boot control\n", retval);
+		bd->is_booting = 0;
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		rshim_unlock();
+#endif
+		return retval;
+	}
+
+#if RSH_RESET_MUTEX
+	/*
+	 * Acquire all of the other devices' mutexes, to keep them from
+	 * doing anything while we're performing the reset.  Also kill
+	 * any outstanding boot urbs; that way we'll restart them, after
+	 * the reset is done, and not report errors to the writers.
+	 */
+	for (i = 0; i < rshim_nr_devs; i++) {
+		if (rshim_devs[i] && rshim_devs[i] != bd) {
+			mutex_lock(&rshim_devs[i]->mutex);
+			devs_locked |= 1UL << i;
+			if (rshim_devs[i]->cancel) {
+				rshim_devs[i]->cancel(rshim_devs[i],
+						    RSH_DEV_TYPE_BOOT, true);
+			}
+		}
+	}
+	reinit_completion(&bd->reset_complete);
+#endif
+
+	bd->is_boot_open = 1;
+
+	/* SW reset. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_RESET_CONTROL,
+				 RSH_RESET_CONTROL__RESET_CHIP_VAL_KEY);
+
+	/* Reset the TmFifo. */
+	rshim_fifo_reset(bd);
+
+	/*
+	 * Note that occasionally, we get various errors on writing to
+	 * the reset register.  This appears to be caused by the chip
+	 * actually resetting before the response goes out, or perhaps by
+	 * our noticing the device unplug before we've seen the response.
+	 * Either way, the chip _does_ actually reset, so we just ignore
+	 * the error.  Should we ever start getting these errors without
+	 * the chip being reset, we'll have to figure out how to handle
+	 * this more intelligently.  (One potential option is to not reset
+	 * directly, but to set up a down counter to do the reset, but that
+	 * seems kind of kludgy, especially since Tile software might also
+	 * be trying to use the down counter.)
+	 */
+	if (retval && retval != -EPROTO && retval != -ESHUTDOWN &&
+#ifdef RSH_USB_BMC
+	    /*
+	     * The host driver on the BMC sometimes produces EOVERFLOW on
+	     * reset.  It also seems to have seems to have some sort of bug
+	     * which makes it return more bytes than we actually wrote!  In
+	     * that case we're returning EBADE.
+	     */
+	    retval != -EOVERFLOW && retval != -EBADE &&
+#endif
+	    retval != -ETIMEDOUT && retval != -EPIPE) {
+		pr_err("boot_open: error %d writing reset control\n", retval);
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		while (devs_locked) {
+			int i = __builtin_ctzl(devs_locked);
+
+			mutex_unlock(&rshim_devs[i]->mutex);
+			devs_locked &= ~(1UL << i);
+		}
+		rshim_unlock();
+#endif
+		bd->is_boot_open = 0;
+
+		return retval;
+	}
+
+	if (retval)
+		pr_err("boot_open: got error %d on reset write\n", retval);
+
+	mutex_unlock(&bd->mutex);
+
+#if RSH_RESET_MUTEX
+	rshim_unlock();
+	/*
+	 * We wait for reset_complete (signaled by probe), or for an
+	 * interrupt, or a timeout (set to 5s because of no re-probe
+	 * in the PCIe case). Note that we dropped dev->mutex above
+	 * so that probe can run; the BOOT_OPEN flag should keep our device
+	 * from trying to do anything before the device is reprobed.
+	 */
+	retval = wait_for_completion_interruptible_timeout(&bd->reset_complete,
+							   5 * HZ);
+	if (retval == 0)
+		pr_err("timed out waiting for device reprobe after reset\n");
+
+	while (devs_locked) {
+		int i = __builtin_ctz(devs_locked);
+
+		mutex_unlock(&rshim_devs[i]->mutex);
+		devs_locked &= ~(1UL << i);
+	}
+#endif
+
+	return 0;
+}
+
+/* FIFO common file operations routines */
+
+/*
+ * Signal an error on the FIFO, and wake up anyone who might need to know
+ * about it.
+ */
+static void rshim_fifo_err(struct rshim_backend *bd, int err)
+{
+	int i;
+
+	bd->tmfifo_error = err;
+	wake_up_interruptible_all(&bd->write_completed);
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		wake_up_interruptible_all(&bd->read_fifo[i].operable);
+		wake_up_interruptible_all(&bd->write_fifo[i].operable);
+	}
+}
+
+/* Drain the read buffer, and start another read/interrupt if needed. */
+static void rshim_fifo_input(struct rshim_backend *bd)
+{
+	union rshim_tmfifo_msg_hdr *hdr;
+	bool rx_avail = false;
+
+	if (bd->is_boot_open)
+		return;
+
+again:
+	while (bd->read_buf_next < bd->read_buf_bytes) {
+		int copysize;
+
+		/*
+		 * If we're at the start of a packet, then extract the
+		 * header, and update our count of bytes remaining in the
+		 * packet.
+		 */
+		if (bd->read_buf_pkt_rem == 0) {
+			/* Make sure header is received. */
+			if (bd->read_buf_next + sizeof(*hdr) >
+				bd->read_buf_bytes)
+				break;
+
+			pr_debug("next hdr %d\n", bd->read_buf_next);
+
+			hdr = (union rshim_tmfifo_msg_hdr *)
+				&bd->read_buf[bd->read_buf_next];
+
+			bd->read_buf_pkt_rem = ntohs(hdr->len) + sizeof(*hdr);
+			bd->read_buf_pkt_padding =
+				(8 - (bd->read_buf_pkt_rem & 7)) & 7;
+			if (hdr->type == VIRTIO_ID_NET)
+				bd->rx_chan = TMFIFO_NET_CHAN;
+			else if (hdr->type == VIRTIO_ID_CONSOLE) {
+				bd->rx_chan = TMFIFO_CONS_CHAN;
+				/* Strip off the message header for console. */
+				bd->read_buf_next += sizeof(*hdr);
+				bd->read_buf_pkt_rem -= sizeof(*hdr);
+				if (bd->read_buf_pkt_rem == 0)
+					continue;
+			} else {
+				pr_debug("bad type %d, drop it", hdr->type);
+				bd->read_buf_pkt_rem = 0;
+				bd->read_buf_pkt_padding = 0;
+				bd->read_buf_next = bd->read_buf_bytes;
+				break;
+			}
+
+			pr_debug("drain: hdr, nxt %d rem %d chn %d\n",
+			      bd->read_buf_next, bd->read_buf_pkt_rem,
+			      bd->rx_chan);
+			bd->drop = 0;
+		}
+
+		if (bd->rx_chan == TMFIFO_CONS_CHAN &&
+		    !(bd->spin_flags & RSH_SFLG_CONS_OPEN)) {
+			/*
+			 * If data is coming in for a closed console
+			 * channel, we want to just throw it away.
+			 * Resetting the channel every time through this
+			 * loop is a relatively cheap way to do that.  Note
+			 * that this works because the read buffer is no
+			 * larger than the read FIFO; thus, we know that if
+			 * we reset it here, we will always be able to
+			 * drain the read buffer of any console data, and
+			 * will then launch another read.
+			 */
+			read_reset(bd, TMFIFO_CONS_CHAN);
+			bd->drop = 1;
+		} else if (bd->rx_chan == TMFIFO_NET_CHAN && bd->net == NULL) {
+			/* Drop if networking is not enabled. */
+			read_reset(bd, TMFIFO_NET_CHAN);
+			bd->drop = 1;
+		}
+
+		copysize = min(bd->read_buf_pkt_rem,
+			       bd->read_buf_bytes - bd->read_buf_next);
+		copysize = min(copysize,
+			       read_space_to_end(bd, bd->rx_chan));
+
+		pr_debug("drain: copysize %d, head %d, tail %d, remaining %d\n",
+			 copysize, bd->read_fifo[bd->rx_chan].head,
+			 bd->read_fifo[bd->rx_chan].tail,
+			 bd->read_buf_pkt_rem);
+
+		if (copysize == 0) {
+			/*
+			 * We have data, but no space to put it in, so
+			 * we're done.
+			 */
+			pr_debug("drain: no more space in channel %d\n",
+				 bd->rx_chan);
+			break;
+		}
+
+		if (!bd->drop) {
+			memcpy(read_space_ptr(bd, bd->rx_chan),
+			       &bd->read_buf[bd->read_buf_next],
+			       copysize);
+			read_add_bytes(bd, bd->rx_chan, copysize);
+		}
+
+		bd->read_buf_next += copysize;
+		bd->read_buf_pkt_rem -= copysize;
+
+		wake_up_interruptible_all(&bd->read_fifo[
+				      bd->rx_chan].operable);
+		pr_debug("woke up readable chan %d\n", bd->rx_chan);
+
+		if (bd->read_buf_pkt_rem <= 0) {
+			bd->read_buf_next = bd->read_buf_next +
+				bd->read_buf_pkt_padding;
+			rx_avail = true;
+		}
+	}
+
+	/*
+	 * We've processed all of the data we can, so now we decide if we
+	 * need to launch another I/O.  If there's still data in the read
+	 * buffer, or if we're already reading, don't launch any new
+	 * operations.  If an interrupt just completed, and said there was
+	 * data, or the last time we did a read we got some data, then do
+	 * another read.  Otherwise, do an interrupt.
+	 */
+	if (bd->read_buf_next < bd->read_buf_bytes ||
+	    (bd->spin_flags & RSH_SFLG_READING)) {
+		/* We're doing nothing. */
+		pr_debug("fifo_input: no new read: %s\n",
+			 (bd->read_buf_next < bd->read_buf_bytes) ?
+			 "have data" : "already reading");
+	} else {
+		int len;
+
+		/* Process it if more data is received. */
+		len = bd->read(bd, RSH_DEV_TYPE_NET, (char *)bd->read_buf,
+			      READ_BUF_SIZE);
+		if (len > 0) {
+			bd->read_buf_bytes = len;
+			bd->read_buf_next = 0;
+			goto again;
+		}
+	}
+
+	if (rx_avail) {
+		if (bd->rx_chan == TMFIFO_NET_CHAN) {
+			struct rshim_service *svc;
+
+			/*
+			 * Protect rshim_svc with RCU lock. See comments in
+			 * rshim_register_service() / rshim_register_service()
+			 */
+			rcu_read_lock();
+			svc = rcu_dereference(rshim_svc[RSH_SVC_NET]);
+			if (svc != NULL)
+				(*svc->rx_notify)(bd);
+			rcu_read_unlock();
+		}
+	}
+}
+
+ssize_t rshim_fifo_read(struct rshim_backend *bd, char *buffer,
+		      size_t count, int chan, bool nonblock,
+		      bool to_user)
+{
+	size_t rd_cnt = 0;
+
+	mutex_lock(&bd->mutex);
+
+	while (count) {
+		size_t readsize;
+		int pass1;
+		int pass2;
+
+		pr_debug("fifo_read, top of loop, remaining count %zd\n",
+			 count);
+
+		/*
+		 * We check this each time through the loop since the
+		 * device could get disconnected while we're waiting for
+		 * more data in the read FIFO.
+		 */
+		if (!bd->has_tm) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_read: returning %zd/ENODEV\n", rd_cnt);
+			return rd_cnt ? rd_cnt : -ENODEV;
+		}
+
+		if (bd->tmfifo_error) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_read: returning %zd/%d\n", rd_cnt,
+			      bd->tmfifo_error);
+			return rd_cnt ? rd_cnt : bd->tmfifo_error;
+		}
+
+		if (read_empty(bd, chan)) {
+			pr_debug("fifo_read: fifo empty\n");
+			if (rd_cnt || nonblock) {
+				if (rd_cnt == 0) {
+					spin_lock_irq(&bd->spinlock);
+					rshim_fifo_input(bd);
+					spin_unlock_irq(&bd->spinlock);
+				}
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_read: returning %zd/EAGAIN\n",
+				      rd_cnt);
+				return rd_cnt ? rd_cnt : -EAGAIN;
+			}
+
+			mutex_unlock(&bd->mutex);
+
+			pr_debug("fifo_read: waiting for readable chan %d\n",
+				 chan);
+			if (wait_event_interruptible(
+					bd->read_fifo[chan].operable,
+					    !read_empty(bd, chan))) {
+				pr_debug("fifo_read: returning ERESTARTSYS\n");
+				return to_user ? -EINTR : -ERESTARTSYS;
+			}
+
+			mutex_lock(&bd->mutex);
+
+			/*
+			 * Since we dropped the mutex, we must make
+			 * sure our interface is still there before
+			 * we do anything else.
+			 */
+			continue;
+		}
+
+		/*
+		 * Figure out how many bytes we will transfer on this pass.
+		 */
+		spin_lock_irq(&bd->spinlock);
+
+		readsize = min(count, (size_t)read_cnt(bd, chan));
+
+		pass1 = min(readsize, (size_t)read_cnt_to_end(bd, chan));
+		pass2 = readsize - pass1;
+
+		spin_unlock_irq(&bd->spinlock);
+
+		pr_debug("fifo_read: readsize %zd, head %d, tail %d\n",
+			 readsize, bd->read_fifo[chan].head,
+			 bd->read_fifo[chan].tail);
+
+		if (!to_user) {
+			memcpy(buffer, read_data_ptr(bd, chan), pass1);
+			if (pass2) {
+				memcpy(buffer + pass1,
+				       bd->read_fifo[chan].data, pass2);
+			}
+		} else {
+			if (copy_to_user(buffer, read_data_ptr(bd, chan),
+				pass1) || (pass2 && copy_to_user(buffer + pass1,
+				bd->read_fifo[chan].data, pass2))) {
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_read: returns %zd/EFAULT\n",
+					 rd_cnt);
+				return rd_cnt ? rd_cnt : -EFAULT;
+			}
+		}
+
+		spin_lock_irq(&bd->spinlock);
+
+		read_consume_bytes(bd, chan, readsize);
+
+		/*
+		 * We consumed some bytes, so let's see if we can process
+		 * any more incoming data.
+		 */
+		rshim_fifo_input(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+
+		count -= readsize;
+		buffer += readsize;
+		rd_cnt += readsize;
+		pr_debug("fifo_read: transferred %zd bytes\n", readsize);
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	pr_debug("fifo_read: returning %zd\n", rd_cnt);
+	return rd_cnt;
+}
+EXPORT_SYMBOL(rshim_fifo_read);
+
+static void rshim_fifo_output(struct rshim_backend *bd)
+{
+	int writesize, write_buf_next = 0;
+	int write_avail = WRITE_BUF_SIZE - write_buf_next;
+	int numchan = TMFIFO_MAX_CHAN;
+	int chan, chan_offset;
+
+	/* If we're already writing, we have nowhere to put data. */
+	if (bd->spin_flags & RSH_SFLG_WRITING)
+		return;
+
+	/* Walk through all the channels, sending as much data as possible. */
+	for (chan_offset = 0; chan_offset < numchan; chan_offset++) {
+		/*
+		 * Pick the current channel if not done, otherwise round-robin
+		 * to the next channel.
+		 */
+		if (bd->write_buf_pkt_rem > 0)
+			chan = bd->tx_chan;
+		else {
+			u16 cur_len;
+			union rshim_tmfifo_msg_hdr *hdr = &bd->msg_hdr;
+
+			chan = bd->tx_chan = (bd->tx_chan + 1) % numchan;
+			cur_len = write_cnt(bd, chan);
+
+			/*
+			 * Set up message header for console data which is byte
+			 * stream. Network packets already have the message
+			 * header included.
+			 */
+			if (chan == TMFIFO_CONS_CHAN) {
+				if (cur_len == 0)
+					continue;
+				hdr->data = 0;
+				hdr->type = VIRTIO_ID_CONSOLE;
+				hdr->len = htons(cur_len);
+			} else {
+				int pass1;
+
+				if (cur_len <
+					sizeof(union rshim_tmfifo_msg_hdr))
+					continue;
+
+				pass1 = write_cnt_to_end(bd, chan);
+				if (pass1 >= sizeof(*hdr)) {
+					hdr = (union rshim_tmfifo_msg_hdr *)
+						write_data_ptr(bd, chan);
+				} else {
+					memcpy(hdr, write_data_ptr(bd, chan),
+					       pass1);
+					memcpy((u8 *)hdr + pass1,
+					       bd->write_fifo[chan].data,
+					       sizeof(*hdr) - pass1);
+				}
+			}
+
+			bd->write_buf_pkt_rem = ntohs(hdr->len) + sizeof(*hdr);
+		}
+
+		/* Send out the packet header for the console data. */
+		if (chan == TMFIFO_CONS_CHAN &&
+		    bd->write_buf_pkt_rem > ntohs(bd->msg_hdr.len)) {
+			union rshim_tmfifo_msg_hdr *hdr = &bd->msg_hdr;
+			int left = bd->write_buf_pkt_rem - ntohs(hdr->len);
+			u8 *pos = (u8 *)hdr + sizeof(*hdr) - left;
+
+			writesize = min(write_avail, left);
+			memcpy(&bd->write_buf[write_buf_next], pos, writesize);
+			write_buf_next += writesize;
+			bd->write_buf_pkt_rem -= writesize;
+			write_avail -= writesize;
+
+			/*
+			 * Don't continue if no more space for the header.
+			 * It'll be picked up next time.
+			 */
+			if (left != writesize)
+				break;
+		}
+
+		writesize = min(write_avail, (int)write_cnt(bd, chan));
+		writesize = min(writesize, bd->write_buf_pkt_rem);
+
+		/*
+		 * The write size should be aligned to 8 bytes unless for the
+		 * last block, which will be padded at the end.
+		 */
+		if (bd->write_buf_pkt_rem != writesize)
+			writesize &= -8;
+
+		if (writesize > 0) {
+			int pass1;
+			int pass2;
+
+			pass1 = min(writesize,
+				    (int)write_cnt_to_end(bd, chan));
+			pass2 = writesize - pass1;
+
+			pr_debug("fifo_outproc: chan %d, writesize %d, next %d,"
+				 " head %d, tail %d\n",
+				 chan, writesize, write_buf_next,
+				 bd->write_fifo[chan].head,
+				 bd->write_fifo[chan].tail);
+
+			memcpy(&bd->write_buf[write_buf_next],
+			       write_data_ptr(bd, chan), pass1);
+			memcpy(&bd->write_buf[write_buf_next + pass1],
+			       bd->write_fifo[chan].data, pass2);
+
+			write_consume_bytes(bd, chan, writesize);
+			write_buf_next += writesize;
+			bd->write_buf_pkt_rem -= writesize;
+			/* Add padding at the end. */
+			if (bd->write_buf_pkt_rem == 0)
+				write_buf_next = (write_buf_next + 7) & -8;
+			write_avail = WRITE_BUF_SIZE - write_buf_next;
+
+			wake_up_interruptible_all(
+				&bd->write_fifo[chan].operable);
+			pr_debug("woke up writable chan %d\n", chan);
+		}
+	}
+
+	/* Drop the data if it is still booting. */
+	if (bd->is_boot_open)
+		return;
+
+	/* If we actually put anything in the buffer, send it. */
+	if (write_buf_next) {
+		bd->write(bd, RSH_DEV_TYPE_NET, (char *)bd->write_buf,
+			  write_buf_next);
+	}
+}
+
+int rshim_fifo_alloc(struct rshim_backend *bd)
+{
+	int i, allocfail = 0;
+
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		if (!bd->read_fifo[i].data)
+			bd->read_fifo[i].data =
+				kmalloc(READ_FIFO_SIZE, GFP_KERNEL);
+		allocfail |= bd->read_fifo[i].data == 0;
+
+		if (!bd->write_fifo[i].data)
+			bd->write_fifo[i].data =
+				kmalloc(WRITE_FIFO_SIZE, GFP_KERNEL);
+		allocfail |= bd->write_fifo[i].data == 0;
+	}
+
+	return allocfail;
+}
+EXPORT_SYMBOL(rshim_fifo_alloc);
+
+static void rshim_fifo_reset(struct rshim_backend *bd)
+{
+	int i;
+
+	bd->read_buf_bytes = 0;
+	bd->read_buf_pkt_rem = 0;
+	bd->read_buf_next = 0;
+	bd->read_buf_pkt_padding = 0;
+	bd->write_buf_pkt_rem = 0;
+	bd->rx_chan = bd->tx_chan = 0;
+
+	spin_lock_irq(&bd->spinlock);
+	bd->spin_flags &= ~(RSH_SFLG_WRITING |
+			    RSH_SFLG_READING);
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		read_reset(bd, i);
+		write_reset(bd, i);
+	}
+	spin_unlock_irq(&bd->spinlock);
+}
+
+void rshim_fifo_free(struct rshim_backend *bd)
+{
+	int i;
+
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		kfree(bd->read_fifo[i].data);
+		bd->read_fifo[i].data = NULL;
+		kfree(bd->write_fifo[i].data);
+		bd->write_fifo[i].data = NULL;
+	}
+
+	rshim_fifo_reset(bd);
+
+	bd->has_tm = 0;
+}
+EXPORT_SYMBOL(rshim_fifo_free);
+
+ssize_t rshim_fifo_write(struct rshim_backend *bd, const char *buffer,
+		       size_t count, int chan, bool nonblock,
+		       bool from_user)
+{
+	size_t wr_cnt = 0;
+
+	mutex_lock(&bd->mutex);
+
+	while (count) {
+		size_t writesize;
+		int pass1;
+		int pass2;
+
+		pr_debug("fifo_write, top of loop, remaining count %zd\n",
+			 count);
+
+		/*
+		 * We check this each time through the loop since the
+		 * device could get disconnected while we're waiting for
+		 * more space in the write buffer.
+		 */
+		if (!bd->has_tm) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_write: returning %zd/ENODEV\n", wr_cnt);
+			return wr_cnt ? wr_cnt : -ENODEV;
+		}
+
+		if (bd->tmfifo_error) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_write: returning %zd/%d\n", wr_cnt,
+				 bd->tmfifo_error);
+			return wr_cnt ? wr_cnt : bd->tmfifo_error;
+		}
+
+		if (write_full(bd, chan)) {
+			pr_debug("fifo_write: fifo full\n");
+			if (nonblock) {
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_write: returning %zd/EAGAIN\n",
+					 wr_cnt);
+				return wr_cnt ? wr_cnt : -EAGAIN;
+			}
+
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_write: waiting for writable chan %d\n",
+				 chan);
+			if (wait_event_interruptible(
+				     bd->write_fifo[chan].operable,
+					     !write_full(bd, chan))) {
+				pr_debug("fifo_write: returning %zd/ERESTARTSYS\n",
+					 wr_cnt);
+				return wr_cnt ? wr_cnt : -ERESTARTSYS;
+			}
+			mutex_lock(&bd->mutex);
+			/*
+			 * Since we dropped the mutex, we must make
+			 * sure our interface is still there before
+			 * we do anything else.
+			 */
+			continue;
+		}
+
+		spin_lock_irq(&bd->spinlock);
+
+		writesize = min(count, (size_t)write_space(bd, chan));
+		pass1 = min(writesize, (size_t)write_space_to_end(bd, chan));
+		pass2 = writesize - pass1;
+
+		spin_unlock_irq(&bd->spinlock);
+
+		pr_debug("fifo_write: writesize %zd, head %d, tail %d\n",
+			 writesize, bd->write_fifo[chan].head,
+			 bd->write_fifo[chan].tail);
+
+		if (!from_user) {
+			memcpy(write_space_ptr(bd, chan), buffer, pass1);
+			if (pass2) {
+				memcpy(bd->write_fifo[chan].data,
+				       buffer + pass1, pass2);
+			}
+		} else {
+			if (copy_from_user(write_space_ptr(bd, chan), buffer,
+				pass1) || (pass2 &&
+				copy_from_user(bd->write_fifo[chan].data,
+						buffer + pass1, pass2))) {
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_write: returns %zd/EFAULT\n",
+					 wr_cnt);
+				return wr_cnt ? wr_cnt : -EFAULT;
+			}
+		}
+
+		spin_lock_irq(&bd->spinlock);
+
+		write_add_bytes(bd, chan, writesize);
+
+		/* We have some new bytes, let's see if we can write any. */
+		rshim_fifo_output(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+
+		count -= writesize;
+		buffer += writesize;
+		wr_cnt += writesize;
+		pr_debug("fifo_write: transferred %zd bytes this pass\n",
+			 writesize);
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	pr_debug("fifo_write: returning %zd\n", wr_cnt);
+	return wr_cnt;
+}
+EXPORT_SYMBOL(rshim_fifo_write);
+
+static int rshim_fifo_fsync(struct file *file, loff_t start, loff_t end,
+			    int datasync, int chan)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	mutex_lock(&bd->mutex);
+
+	/*
+	 * To ensure that all of our data has actually made it to the
+	 * device, we first wait until the channel is empty, then we wait
+	 * until there is no outstanding write urb.
+	 */
+	while (!write_empty(bd, chan))
+		if (wait_event_interruptible(bd->write_fifo[chan].operable,
+					     write_empty(bd, chan))) {
+			mutex_unlock(&bd->mutex);
+			return -ERESTARTSYS;
+		}
+
+	while (bd->spin_flags & RSH_SFLG_WRITING)
+		if (wait_event_interruptible(bd->write_completed,
+					     !(bd->spin_flags &
+					       RSH_SFLG_WRITING))) {
+			mutex_unlock(&bd->mutex);
+			return -ERESTARTSYS;
+		}
+
+	mutex_unlock(&bd->mutex);
+
+	return 0;
+}
+
+static unsigned int rshim_fifo_poll(struct file *file, poll_table *wait,
+				  int chan)
+{
+	struct rshim_backend *bd = file->private_data;
+	unsigned int retval = 0;
+
+	mutex_lock(&bd->mutex);
+
+	poll_wait(file, &bd->read_fifo[chan].operable, wait);
+	poll_wait(file, &bd->write_fifo[chan].operable, wait);
+
+	spin_lock_irq(&bd->spinlock);
+
+	if (!read_empty(bd, chan))
+		retval |= POLLIN | POLLRDNORM;
+	if (!write_full(bd, chan))
+		retval |= POLLOUT | POLLWRNORM;
+	/*
+	 * We don't report POLLERR on the console so that it doesn't get
+	 * automatically disconnected when it fails, and so that you can
+	 * connect to it in the error state before rebooting the target.
+	 * This is inconsistent, but being consistent turns out to be very
+	 * annoying.  If someone tries to actually type on it, they'll
+	 * get an error.
+	 */
+	if (bd->tmfifo_error && chan != TMFIFO_CONS_CHAN)
+		retval |= POLLERR;
+	spin_unlock_irq(&bd->spinlock);
+
+	mutex_unlock(&bd->mutex);
+
+	pr_debug("poll chan %d file %p returns 0x%x\n", chan, file, retval);
+
+	return retval;
+}
+
+
+static int rshim_fifo_release(struct inode *inode, struct file *file,
+			      int chan)
+{
+	struct rshim_backend *bd = file->private_data;
+	struct module *owner;
+
+	mutex_lock(&bd->mutex);
+
+	if (chan == TMFIFO_CONS_CHAN) {
+		/*
+		 * If we aren't the last console file, nothing to do but
+		 * fix the reference count.
+		 */
+		bd->console_opens--;
+		if (bd->console_opens) {
+			mutex_unlock(&bd->mutex);
+			return 0;
+		}
+
+		/*
+		 * We've told the host to stop using the TM FIFO console,
+		 * but there may be a lag before it does.  Unless we
+		 * continue to read data from the console stream, the host
+		 * may spin forever waiting for the console to be drained
+		 * and not realize that it's time to stop using it.
+		 * Clearing the CONS_OPEN spin flag will discard any future
+		 * incoming console data, but if our input buffers are full
+		 * now, we might not be even reading from the hardware
+		 * FIFO.  To avoid problems, clear the buffers and call the
+		 * drainer so that it knows there's space.
+		 */
+		spin_lock_irq(&bd->spinlock);
+
+		bd->spin_flags &= ~RSH_SFLG_CONS_OPEN;
+
+		read_reset(bd, TMFIFO_CONS_CHAN);
+		write_reset(bd, TMFIFO_CONS_CHAN);
+
+		if (bd->has_tm)
+			rshim_fifo_input(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+	}
+
+	if (chan == TMFIFO_CONS_CHAN)
+		bd->is_cons_open = 0;
+	else
+		bd->is_tm_open = 0;
+
+	if (!bd->is_tm_open && !bd->is_cons_open) {
+		if (bd->cancel)
+			bd->cancel(bd, RSH_DEV_TYPE_NET, false);
+
+		spin_lock_irq(&bd->spinlock);
+		bd->spin_flags &= ~RSH_SFLG_READING;
+		spin_unlock_irq(&bd->spinlock);
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+/* TMFIFO file operations routines */
+
+static ssize_t rshim_tmfifo_read(struct file *file, char *user_buffer,
+				   size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_read(bd, user_buffer, count, TMFIFO_NET_CHAN,
+			     file->f_flags & O_NONBLOCK, true);
+}
+
+static ssize_t rshim_tmfifo_write(struct file *file, const char *user_buffer,
+				size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_write(bd, user_buffer, count, TMFIFO_NET_CHAN,
+			      file->f_flags & O_NONBLOCK, true);
+}
+
+static int rshim_tmfifo_fsync(struct file *file, loff_t start,
+			      loff_t end, int datasync)
+{
+	return rshim_fifo_fsync(file, start, end, datasync, TMFIFO_NET_CHAN);
+}
+
+static unsigned int rshim_tmfifo_poll(struct file *file, poll_table *wait)
+{
+	return rshim_fifo_poll(file, wait, TMFIFO_NET_CHAN);
+}
+
+static int rshim_tmfifo_release(struct inode *inode, struct file *file)
+{
+	return rshim_fifo_release(inode, file, TMFIFO_NET_CHAN);
+}
+
+static const struct file_operations rshim_tmfifo_fops = {
+	.owner = THIS_MODULE,
+	.read = rshim_tmfifo_read,
+	.write = rshim_tmfifo_write,
+	.fsync = rshim_tmfifo_fsync,
+	.poll = rshim_tmfifo_poll,
+	.release = rshim_tmfifo_release,
+};
+
+static int rshim_tmfifo_open(struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	file->f_op = &rshim_tmfifo_fops;
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->is_tm_open) {
+		pr_debug("tmfifo_open: file already open\n");
+		mutex_unlock(&bd->mutex);
+		return -EBUSY;
+	}
+
+	bd->is_tm_open = 1;
+
+	spin_lock_irq(&bd->spinlock);
+
+	/* Call the drainer to do an initial read, if needed. */
+	rshim_fifo_input(bd);
+
+	spin_unlock_irq(&bd->spinlock);
+
+	mutex_unlock(&bd->mutex);
+
+	return 0;
+}
+
+/* Console file operations routines */
+
+static void rshim_work_handler(struct work_struct *work)
+{
+	struct rshim_backend *bd = container_of((struct delayed_work *) work,
+					      struct rshim_backend, work);
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->keepalive && bd->has_rshim) {
+		bd->write_rshim(bd, RSHIM_CHANNEL, RSH_SCRATCHPAD1,
+				RSH_KEEPALIVE_MAGIC_NUM);
+		bd->keepalive = 0;
+	}
+
+	if (bd->boot_work_buf != NULL) {
+		bd->boot_work_buf_actual_len = rshim_write_delayed(bd,
+							RSH_DEV_TYPE_BOOT,
+							bd->boot_work_buf,
+							bd->boot_work_buf_len);
+		bd->boot_work_buf = NULL;
+		complete_all(&bd->boot_write_complete);
+	}
+
+	if (bd->is_boot_open) {
+		mutex_unlock(&bd->mutex);
+		return;
+	}
+
+	if (bd->has_fifo_work) {
+		int len;
+
+		len = rshim_write_delayed(bd, bd->fifo_work_devtype,
+					  bd->fifo_work_buf,
+					  bd->fifo_work_buf_len);
+		bd->has_fifo_work = 0;
+
+		spin_lock(&bd->spinlock);
+		bd->spin_flags &= ~RSH_SFLG_WRITING;
+		if (len == bd->fifo_work_buf_len) {
+			wake_up_interruptible_all(&bd->write_completed);
+			rshim_notify(bd, RSH_EVENT_FIFO_OUTPUT, 0);
+		} else {
+			pr_err("fifo_write: completed abnormally.\n");
+			rshim_notify(bd, RSH_EVENT_FIFO_ERR, -1);
+		}
+		spin_unlock(&bd->spinlock);
+	}
+
+	if (bd->has_cons_work) {
+		spin_lock_irq(&bd->spinlock);
+
+		/* FIFO output. */
+		rshim_fifo_output(bd);
+
+		/* FIFO input. */
+		rshim_fifo_input(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+
+		bd->has_cons_work = 0;
+	}
+
+	if (!bd->has_reprobe && bd->is_cons_open) {
+		bd->has_cons_work = 1;
+		mod_timer(&bd->timer, jiffies + HZ / 10);
+	}
+
+	mutex_unlock(&bd->mutex);
+}
+
+static ssize_t rshim_console_read(struct file *file, char *user_buffer,
+				    size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_read(bd, user_buffer, count, TMFIFO_CONS_CHAN,
+			     file->f_flags & O_NONBLOCK, true);
+}
+
+static ssize_t rshim_console_write(struct file *file, const char *user_buffer,
+				 size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_write(bd, user_buffer, count, TMFIFO_CONS_CHAN,
+			      file->f_flags & O_NONBLOCK, true);
+}
+
+static int rshim_console_fsync(struct file *file, loff_t start,
+			       loff_t end, int datasync)
+{
+	return rshim_fifo_fsync(file, start, end, datasync, TMFIFO_CONS_CHAN);
+}
+
+static long rshim_console_unlocked_ioctl(struct file *file, unsigned int
+				       cmd, unsigned long arg)
+{
+	struct rshim_backend *bd = file->private_data;
+	int retval = 0;
+
+	mutex_lock(&bd->mutex);
+
+	switch (cmd) {
+	case TCGETS: {
+#ifdef TCGETS2
+		if (kernel_termios_to_user_termios_1(
+			(struct termios __user *)arg, &bd->cons_termios))
+#else
+		if (kernel_termios_to_user_termios(
+			(struct termios __user *)arg, &bd->cons_termios))
+#endif
+			retval = -EFAULT;
+		break;
+	}
+
+	case TCSETS:
+	case TCSETSW:
+	case TCSETSF: {
+#ifdef TCGETS2
+		if (user_termios_to_kernel_termios_1(
+			&bd->cons_termios, (struct termios __user *)arg))
+#else
+		if (user_termios_to_kernel_termios(
+			&bd->cons_termios, (struct termios __user *)arg))
+#endif
+			retval = -EFAULT;
+		break;
+	}
+
+	default:
+		retval = -EINVAL;
+		break;
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	return retval;
+}
+
+static unsigned int rshim_console_poll(struct file *file, poll_table *wait)
+{
+	return rshim_fifo_poll(file, wait, TMFIFO_CONS_CHAN);
+}
+
+static int rshim_console_release(struct inode *inode, struct file *file)
+{
+	return rshim_fifo_release(inode, file, TMFIFO_CONS_CHAN);
+}
+
+static const struct file_operations rshim_console_fops = {
+	.owner = THIS_MODULE,
+	.read = rshim_console_read,
+	.write = rshim_console_write,
+	.fsync = rshim_console_fsync,
+	.unlocked_ioctl = rshim_console_unlocked_ioctl,
+	.poll = rshim_console_poll,
+	.release = rshim_console_release,
+};
+
+static int rshim_console_open(struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	file->f_op = &rshim_console_fops;
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->is_cons_open) {
+		/*
+		 * The console is already open.  This is OK, but it means
+		 * there's no work to do other than updating the reference
+		 * count.
+		 */
+		bd->console_opens++;
+		mutex_unlock(&bd->mutex);
+		return 0;
+	}
+
+	bd->is_cons_open = 1;
+
+	spin_lock_irq(&bd->spinlock);
+
+	bd->spin_flags |= RSH_SFLG_CONS_OPEN;
+
+	spin_unlock_irq(&bd->spinlock);
+
+	if (!bd->has_cons_work) {
+		bd->has_cons_work = 1;
+		queue_delayed_work(rshim_wq, &bd->work, HZ / 10);
+	}
+
+	bd->console_opens++;
+	mutex_unlock(&bd->mutex);
+
+	return 0;
+}
+
+static int rshim_boot_done(struct rshim_backend *bd)
+{
+	if (bd->has_rshim && bd->has_tm) {
+		/* Clear any previous errors. */
+		bd->tmfifo_error = 0;
+
+		/*
+		 * If someone might be waiting for the device to come up,
+		 * tell them it's ready.
+		 */
+		if (bd->is_booting) {
+			bd->is_booting = 0;
+
+			pr_debug("signaling booting complete\n");
+			complete_all(&bd->booting_complete);
+#if RSH_RESET_MUTEX
+			complete_all(&bd->reset_complete);
+#endif
+		};
+
+		/* If the console device is open, start the worker. */
+		if (bd->is_cons_open && !bd->has_cons_work) {
+			bd->has_cons_work = 1;
+			pr_debug("probe: console_work submitted\n");
+			queue_delayed_work(rshim_wq, &bd->work, 0);
+		}
+
+		/* Tell the user this device is now attached. */
+		pr_info("%s now attached\n", rshim_dev_names[bd->dev_index]);
+	}
+
+	return 0;
+}
+
+/* Rshim file operations routines */
+
+static ssize_t rshim_rshim_read(struct file *file, char *user_buffer,
+			      size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd;
+	int retval = 0;
+	u64 buf;
+
+	/* rshim registers are all 8-byte aligned. */
+	if (count != 8 || (*ppos & 7) != 0)
+		return -EINVAL;
+
+	bd = file->private_data;
+
+	mutex_lock(&bd->mutex);
+	retval = bd->read_rshim(bd,
+				(*ppos >> 16) & 0xF, /* channel # */
+				*ppos & 0xFFFF,	 /* addr */
+				&buf);
+	mutex_unlock(&bd->mutex);
+
+	/* If the read was successful, copy the data to userspace */
+	if (!retval && copy_to_user(user_buffer, &buf, count))
+		return -EFAULT;
+
+	return retval ? retval : count;
+}
+
+static ssize_t rshim_rshim_write(struct file *file, const char *user_buffer,
+			       size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd;
+	int retval = 0;
+	u64 buf;
+
+	/* rshim registers are all 8-byte aligned. */
+	if (count != 8 || (*ppos & 7) != 0)
+		return -EINVAL;
+
+	/* Copy the data from userspace */
+	if (copy_from_user(&buf, user_buffer, count))
+		return -EFAULT;
+
+	bd = file->private_data;
+
+	mutex_lock(&bd->mutex);
+	retval = bd->write_rshim(bd,
+				 (*ppos >> 16) & 0xF, /* channel # */
+				 *ppos & 0xFFFF, /* addr */
+				 buf);
+	mutex_unlock(&bd->mutex);
+
+	return retval ? retval : count;
+}
+
+static int rshim_rshim_release(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+	struct module *owner;
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+static const struct file_operations rshim_rshim_fops = {
+	.owner = THIS_MODULE,
+	.read = rshim_rshim_read,
+	.write = rshim_rshim_write,
+	.release = rshim_rshim_release,
+	.llseek = default_llseek,
+};
+
+static int rshim_rshim_open(struct file *file)
+{
+	file->f_op = &rshim_rshim_fops;
+
+	return 0;
+}
+
+/* Misc file operations routines */
+
+static int
+rshim_misc_seq_show(struct seq_file *s, void *token)
+{
+	struct rshim_backend *bd = s->private;
+	int retval;
+	u64 value;
+
+	/* Boot mode. */
+	retval = bd->read_rshim(bd, RSHIM_CHANNEL, RSH_BOOT_CONTROL,
+				&value);
+	if (retval) {
+		pr_err("couldn't read rshim register\n");
+		return retval;
+	}
+	seq_printf(s, "BOOT_MODE %lld\n",
+		   value & RSH_BOOT_CONTROL__BOOT_MODE_MASK);
+
+	/* SW reset flag is always 0. */
+	seq_printf(s, "SW_RESET  %d\n", 0);
+
+	/* Display the driver name. */
+	seq_printf(s, "DRV_NAME  %s\n", bd->owner->name);
+
+	return 0;
+}
+
+static ssize_t rshim_misc_write(struct file *file, const char *user_buffer,
+				size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd;
+	int retval = 0, value;
+	char buf[64], key[32];
+
+	if (*ppos != 0 || count >= sizeof(buf))
+		return -EINVAL;
+
+	/* Copy the data from userspace */
+	if (copy_from_user(buf, user_buffer, count))
+		return -EFAULT;
+
+	if (sscanf(buf, "%s %x", key, &value) != 2)
+		return -EINVAL;
+
+	bd = ((struct seq_file *)file->private_data)->private;
+
+	if (strcmp(key, "BOOT_MODE") == 0) {
+		retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_BOOT_CONTROL,
+				 value & RSH_BOOT_CONTROL__BOOT_MODE_MASK);
+	} else if (strcmp(key, "SW_RESET") == 0) {
+		if (value) {
+			if (!bd->has_reprobe) {
+				/* Detach, which shouldn't hold bd->mutex. */
+				rshim_notify(bd, RSH_EVENT_DETACH, 0);
+
+				mutex_lock(&bd->mutex);
+				/* Reset the TmFifo. */
+				rshim_fifo_reset(bd);
+				mutex_unlock(&bd->mutex);
+			}
+
+			retval = bd->write_rshim(bd, RSHIM_CHANNEL,
+					RSH_RESET_CONTROL,
+					RSH_RESET_CONTROL__RESET_CHIP_VAL_KEY);
+
+			if (!bd->has_reprobe) {
+				/* Attach. */
+				msleep_interruptible(1000);
+				mutex_lock(&bd->mutex);
+				rshim_notify(bd, RSH_EVENT_ATTACH, 0);
+				mutex_unlock(&bd->mutex);
+			}
+		}
+	} else
+		return -EINVAL;
+
+	return retval ? retval : count;
+}
+
+static int rshim_misc_release(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd;
+	struct module *owner;
+	int retval;
+
+	/*
+	 * Note that since this got turned into a seq file by
+	 * rshim_misc_open(), our device pointer isn't in the usual spot
+	 * (the file's private data); that's used by the seq file
+	 * subsystem.
+	 */
+	bd = ((struct seq_file *)file->private_data)->private;
+
+	retval = single_release(inode, file);
+	if (retval)
+		return retval;
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+static const struct file_operations rshim_misc_fops = {
+	.owner = THIS_MODULE,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.write = rshim_misc_write,
+	.release = rshim_misc_release,
+};
+
+static int rshim_misc_open(struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+	int retval;
+
+	/*
+	 * If file->private_data is non-NULL, seq_open (called by
+	 * single_open) thinks it's already a seq_file struct, and
+	 * scribbles over it!  Very bad.
+	 */
+	file->private_data = NULL;
+
+	file->f_op = &rshim_misc_fops;
+	retval = single_open(file, rshim_misc_seq_show, bd);
+
+	return retval;
+}
+
+/* Common file operations routines */
+
+static int rshim_open(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd;
+	int subminor = iminor(inode);
+	int retval;
+
+	rshim_lock();
+
+	bd = rshim_devs[subminor / RSH_DEV_TYPES];
+	if (!bd) {
+		rshim_unlock();
+		return -ENODEV;
+	}
+
+	/* Add a reference to the owner. */
+	if (!try_module_get(bd->owner)) {
+		rshim_unlock();
+		return -ENODEV;
+	}
+
+	/* Increment our usage count for the device. */
+	kref_get(&bd->kref);
+
+	rshim_unlock();
+
+	file->private_data = bd;
+
+	switch (subminor % RSH_DEV_TYPES) {
+	case RSH_DEV_TYPE_BOOT:
+		retval = rshim_boot_open(file);
+		break;
+
+	case RSH_DEV_TYPE_RSHIM:
+		retval = rshim_rshim_open(file);
+		break;
+
+	case RSH_DEV_TYPE_CONSOLE:
+		retval = rshim_console_open(file);
+		break;
+
+	case RSH_DEV_TYPE_NET:
+		retval = rshim_tmfifo_open(file);
+		break;
+
+	case RSH_DEV_TYPE_MISC:
+		retval = rshim_misc_open(file);
+		break;
+
+	default:
+		retval = -ENODEV;
+		break;
+	}
+
+	/* If the minor open failed, drop the usage count. */
+	if (retval < 0) {
+		struct module *owner;
+
+		rshim_lock();
+		owner = RSHIM_READ_ONCE(bd->owner);
+		kref_put(&bd->kref, bd->destroy);
+		module_put(owner);
+		rshim_unlock();
+	}
+
+	return retval;
+}
+
+static const struct file_operations rshim_fops = {
+	.owner = THIS_MODULE,
+	.open =	rshim_open,
+};
+
+int rshim_tmfifo_sync(struct rshim_backend *bd)
+{
+	u64 word;
+	int i, retval, max_size, avail;
+	union rshim_tmfifo_msg_hdr hdr;
+
+	/* Get FIFO max size. */
+	retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+				RSH_TM_HOST_TO_TILE_CTL, &word);
+	if (retval < 0) {
+		pr_err("read_rshim error %d\n", retval);
+		return retval;
+	}
+	max_size = (word >> RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_SHIFT)
+		   & RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RMASK;
+
+	/* Calculate available size. */
+	retval = bd->read_rshim(bd, RSHIM_CHANNEL, RSH_TM_HOST_TO_TILE_STS,
+				&word);
+	if (retval < 0) {
+		pr_err("read_rshim error %d\n", retval);
+		return retval;
+	}
+	avail = max_size - (int)(word & RSH_TM_HOST_TO_TILE_STS__COUNT_MASK);
+
+	if (avail > TMFIFO_MAX_SYNC_WORDS)
+		avail = TMFIFO_MAX_SYNC_WORDS;
+
+	hdr.type = VIRTIO_ID_NET;
+	hdr.len = 0;
+	for (i = 0; i < avail; i++) {
+		retval = bd->write_rshim(bd, RSHIM_CHANNEL,
+					 RSH_TM_HOST_TO_TILE_STS, hdr.data);
+		if (retval < 0)
+			break;
+	}
+
+	return 0;
+}
+
+int rshim_notify(struct rshim_backend *bd, int event, int code)
+{
+	int i, rc = 0;
+	struct rshim_service *svc;
+
+	switch (event) {
+	case RSH_EVENT_FIFO_INPUT:
+		rshim_fifo_input(bd);
+		break;
+
+	case RSH_EVENT_FIFO_OUTPUT:
+		rshim_fifo_output(bd);
+		break;
+
+	case RSH_EVENT_FIFO_ERR:
+		rshim_fifo_err(bd, code);
+		break;
+
+	case RSH_EVENT_ATTACH:
+		rshim_boot_done(bd);
+
+		/* Sync-up the tmfifo if reprobe is not supported. */
+		if (!bd->has_reprobe && bd->has_rshim)
+			rshim_tmfifo_sync(bd);
+
+		rcu_read_lock();
+		for (i = 0; i < RSH_SVC_MAX; i++) {
+			svc = rcu_dereference(rshim_svc[i]);
+			if (svc != NULL && svc->create != NULL) {
+				rc = (*svc->create)(bd);
+				if (rc == -EEXIST)
+					rc = 0;
+				else if (rc) {
+					pr_err("Failed to attach svc %d\n", i);
+					break;
+				}
+			}
+		}
+		rcu_read_unlock();
+
+		spin_lock_irq(&bd->spinlock);
+		rshim_fifo_input(bd);
+		spin_unlock_irq(&bd->spinlock);
+		break;
+
+	case RSH_EVENT_DETACH:
+		for (i = 0; i < RSH_SVC_MAX; i++) {
+			/*
+			 * The svc->delete() could call into Linux kernel and
+			 * potentially trigger synchronize_rcu(). So it should
+			 * be outside of the rcu_read_lock(). Instead, a ref
+			 * counter is used here to avoid race condition between
+			 * svc deletion such as caused by kernel module unload.
+			 */
+			rcu_read_lock();
+			svc = rcu_dereference(rshim_svc[i]);
+			if (svc != NULL)
+				atomic_inc(&svc->ref);
+			rcu_read_unlock();
+
+			if (svc != NULL) {
+				(*svc->delete)(bd);
+				atomic_dec(&svc->ref);
+			}
+		}
+		bd->dev = NULL;
+		break;
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL(rshim_notify);
+
+static int rshim_find_index(char *dev_name)
+{
+	int i, dev_index = -1;
+
+	/* First look for a match with a previous device name. */
+	for (i = 0; i < rshim_nr_devs; i++)
+		if (rshim_dev_names[i] &&
+		    !strcmp(dev_name, rshim_dev_names[i])) {
+			pr_debug("found match with previous@index %d\n", i);
+			dev_index = i;
+			break;
+		}
+
+	/* Then look for a never-used slot. */
+	if (dev_index < 0) {
+		for (i = 0; i < rshim_nr_devs; i++)
+			if (!rshim_dev_names[i]) {
+				pr_debug("found never-used slot %d\n", i);
+				dev_index = i;
+				break;
+			}
+	}
+
+	/* Finally look for a currently-unused slot. */
+	if (dev_index < 0) {
+		for (i = 0; i < rshim_nr_devs; i++)
+			if (!rshim_devs[i]) {
+				pr_debug("found unused slot %d\n", i);
+				dev_index = i;
+				break;
+			}
+	}
+
+	return dev_index;
+}
+
+struct rshim_backend *rshim_find(char *dev_name)
+{
+	int dev_index = rshim_find_index(dev_name);
+
+	/* If none of that worked, we fail. */
+	if (dev_index < 0) {
+		pr_err("couldn't find slot for new device %s\n", dev_name);
+		return NULL;
+	}
+
+	return rshim_devs[dev_index];
+}
+EXPORT_SYMBOL(rshim_find);
+
+/* House-keeping timer. */
+static void rshim_timer_func(struct timer_list *arg)
+{
+	struct rshim_backend *bd =
+	  container_of(arg, struct rshim_backend, timer);
+
+	u32 period = msecs_to_jiffies(rshim_keepalive_period);
+
+	if (bd->has_cons_work)
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+
+	/* Request keepalive update and restart the ~300ms timer. */
+	if (time_after(jiffies, (unsigned long)bd->last_keepalive + period)) {
+		bd->keepalive = 1;
+		bd->last_keepalive = jiffies;
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+	}
+	mod_timer(&bd->timer, jiffies + period);
+}
+
+static ssize_t rshim_path_show(struct device *cdev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct rshim_backend *bd = dev_get_drvdata(cdev);
+
+	if (bd == NULL)
+		return -ENODEV;
+	return snprintf(buf, PAGE_SIZE, "%s\n",
+			rshim_dev_names[bd->dev_index]);
+}
+
+static DEVICE_ATTR(rshim_path, 0444, rshim_path_show, NULL);
+
+static void
+rshim_load_modules(struct work_struct *work)
+{
+	request_module("rshim_net");
+}
+
+static DECLARE_DELAYED_WORK(rshim_load_modules_work, rshim_load_modules);
+
+/* Check whether backend is allowed to register or not. */
+static int rshim_access_check(struct rshim_backend *bd)
+{
+	int i, retval;
+	u64 value;
+
+	/* Write value 0 to RSH_SCRATCHPAD1. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_SCRATCHPAD1, 0);
+	if (retval < 0)
+		return -ENODEV;
+
+	/*
+	 * Poll RSH_SCRATCHPAD1 up to one second to check whether it's reset to
+	 * the keepalive magic value, which indicates another backend driver has
+	 * already attached to this target.
+	 */
+	for (i = 0; i < 10; i++) {
+		retval = bd->read_rshim(bd, RSHIM_CHANNEL, RSH_SCRATCHPAD1,
+					&value);
+		if (retval < 0)
+			return -ENODEV;
+
+		if (value == RSH_KEEPALIVE_MAGIC_NUM) {
+			pr_info("another backend already attached.\n");
+			return -EEXIST;
+		}
+
+		msleep(100);
+	}
+
+	return 0;
+}
+
+int rshim_register(struct rshim_backend *bd)
+{
+	int i, retval, dev_index;
+
+	if (bd->registered)
+		return 0;
+
+	if (backend_driver[0] && strcmp(backend_driver, bd->owner->name))
+		return -EACCES;
+
+	dev_index = rshim_find_index(bd->dev_name);
+	if (dev_index < 0)
+		return -ENODEV;
+
+	if (!bd->read_rshim || !bd->write_rshim) {
+		pr_err("read_rshim/write_rshim missing\n");
+		return -EINVAL;
+	}
+
+	retval = rshim_access_check(bd);
+	if (retval)
+		return retval;
+
+	if (!bd->write)
+		bd->write = rshim_write_default;
+	if (!bd->read)
+		bd->read = rshim_read_default;
+
+	kref_init(&bd->kref);
+	spin_lock_init(&bd->spinlock);
+#if RSH_RESET_MUTEX
+	init_completion(&bd->reset_complete);
+#endif
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		init_waitqueue_head(&bd->read_fifo[i].operable);
+		init_waitqueue_head(&bd->write_fifo[i].operable);
+	}
+
+	init_waitqueue_head(&bd->write_completed);
+	init_completion(&bd->booting_complete);
+	init_completion(&bd->boot_write_complete);
+	memcpy(&bd->cons_termios, &init_console_termios,
+	       sizeof(init_console_termios));
+	INIT_DELAYED_WORK(&bd->work, rshim_work_handler);
+
+	bd->dev_index = dev_index;
+	if (rshim_dev_names[dev_index] != bd->dev_name) {
+		kfree(rshim_dev_names[dev_index]);
+		rshim_dev_names[dev_index] = bd->dev_name;
+	}
+	rshim_devs[dev_index] = bd;
+
+	for (i = 0; i < RSH_DEV_TYPES; i++) {
+		struct device *cl_dev;
+		int err;
+		char devbuf[32];
+
+		cdev_init(&bd->cdevs[i], &rshim_fops);
+		bd->cdevs[i].owner = THIS_MODULE;
+		/*
+		 * FIXME: is this addition really legal, or should
+		 * we be using MKDEV?
+		 */
+		err = cdev_add(&bd->cdevs[i],
+			       rshim_dev_base +
+			       bd->dev_index * RSH_DEV_TYPES + i,
+			       1);
+		/*
+		 * We complain if this fails, but we don't return
+		 * an error; it really shouldn't happen, and it's
+		 * hard to go un-do the rest of the adds.
+		 */
+		if (err)
+			pr_err("rsh%d: couldn't add minor %d\n", dev_index, i);
+
+		cl_dev = device_create(rshim_class, NULL, rshim_dev_base +
+				       bd->dev_index * RSH_DEV_TYPES + i, NULL,
+				       "rshim%d!%s",
+				       bd->dev_index, rshim_dev_minor_names[i]);
+		if (IS_ERR(cl_dev)) {
+			pr_err("rsh%d: couldn't add dev %s, err %ld\n",
+			       dev_index,
+			       format_dev_t(devbuf, rshim_dev_base + dev_index *
+					    RSH_DEV_TYPES + i),
+			       PTR_ERR(cl_dev));
+		} else {
+			pr_debug("added class dev %s\n",
+				 format_dev_t(devbuf, rshim_dev_base +
+					      bd->dev_index *
+					      RSH_DEV_TYPES + i));
+		}
+
+		dev_set_drvdata(cl_dev, bd);
+		if (device_create_file(cl_dev, &dev_attr_rshim_path))
+			pr_err("could not create rshim_path file in sysfs\n");
+	}
+
+	for (i = 0; i < 2; i++) {
+		bd->boot_buf[i] = kmalloc(BOOT_BUF_SIZE, GFP_KERNEL);
+		if (!bd->boot_buf[i]) {
+			if (i == 1) {
+				kfree(bd->boot_buf[0]);
+				bd->boot_buf[0] = NULL;
+			}
+		}
+	}
+
+	timer_setup(&bd->timer, rshim_timer_func, 0);
+
+	bd->registered = 1;
+
+	/* Start the keepalive timer. */
+	bd->last_keepalive = jiffies;
+	mod_timer(&bd->timer, jiffies + 1);
+
+	schedule_delayed_work(&rshim_load_modules_work, 3 * HZ);
+
+	return 0;
+}
+EXPORT_SYMBOL(rshim_register);
+
+void rshim_deregister(struct rshim_backend *bd)
+{
+	int i;
+
+	if (!bd->registered)
+		return;
+
+	/* Stop the timer. */
+	del_timer_sync(&bd->timer);
+
+	for (i = 0; i < 2; i++)
+		kfree(bd->boot_buf[i]);
+
+	for (i = 0; i < RSH_DEV_TYPES; i++) {
+		cdev_del(&bd->cdevs[i]);
+		device_destroy(rshim_class,
+			       rshim_dev_base + bd->dev_index *
+			       RSH_DEV_TYPES + i);
+	}
+
+	rshim_devs[bd->dev_index] = NULL;
+	bd->registered = 0;
+}
+EXPORT_SYMBOL(rshim_deregister);
+
+int rshim_register_service(struct rshim_service *service)
+{
+	int i, retval = 0;
+	struct rshim_service *svc;
+
+	rshim_lock();
+
+	atomic_set(&service->ref, 0);
+
+	BUG_ON(service->type >= RSH_SVC_MAX);
+
+	if (!rshim_svc[service->type]) {
+		svc = kmalloc(sizeof(*svc), GFP_KERNEL);
+		if (svc) {
+			memcpy(svc, service, sizeof(*svc));
+			/*
+			 * Add memory barrir to make sure 'svc' is ready
+			 * before switching the pointer.
+			 */
+			smp_mb();
+
+			/*
+			 * rshim_svc[] is protected by RCU. References to it
+			 * should have rcu_read_lock() / rcu_dereference() /
+			 * rcu_read_lock().
+			 */
+			rcu_assign_pointer(rshim_svc[service->type], svc);
+
+			/* Attach the service to all backends. */
+			for (i = 0; i < rshim_nr_devs; i++) {
+				if (rshim_devs[i] != NULL) {
+					retval = svc->create(rshim_devs[i]);
+					if (retval && retval != -EEXIST)
+						break;
+				}
+			}
+		} else
+			retval = -ENOMEM;
+	} else
+		retval = -EEXIST;
+
+	rshim_unlock();
+
+	/* Deregister / cleanup the service in case of failures. */
+	if (retval && retval != -EEXIST)
+		rshim_deregister_service(service);
+
+	return retval;
+}
+EXPORT_SYMBOL(rshim_register_service);
+
+void rshim_deregister_service(struct rshim_service *service)
+{
+	int i;
+	struct rshim_service *svc = NULL;
+
+	BUG_ON(service->type >= RSH_SVC_MAX);
+
+	/*
+	 * Use synchronize_rcu() to make sure no more outstanding
+	 * references to the 'svc' pointer before releasing it.
+	 *
+	 * The reason to use RCU is that the rshim_svc pointer will be
+	 * accessed in rshim_notify() which could be called in interrupt
+	 * context and not suitable for mutex lock.
+	 */
+	rshim_lock();
+	if (rshim_svc[service->type]) {
+		svc = rshim_svc[service->type];
+
+		/* Delete the service from all backends. */
+		for (i = 0; i < rshim_nr_devs; i++)
+			if (rshim_devs[i] != NULL)
+				svc->delete(rshim_devs[i]);
+
+		rcu_assign_pointer(rshim_svc[service->type], NULL);
+	}
+	rshim_unlock();
+	if (svc != NULL) {
+		synchronize_rcu();
+
+		/* Make sure no more references to the svc pointer. */
+		while (atomic_read(&svc->ref) != 0)
+			msleep(100);
+		kfree(svc);
+	}
+}
+EXPORT_SYMBOL(rshim_deregister_service);
+
+static int __init rshim_init(void)
+{
+	int result, class_registered = 0;
+
+	/* Register our device class. */
+	rshim_class = class_create(THIS_MODULE, "rsh");
+	if (IS_ERR(rshim_class)) {
+		result = PTR_ERR(rshim_class);
+		goto error;
+	}
+	class_registered = 1;
+
+	/* Allocate major/minor numbers. */
+	result = alloc_chrdev_region(&rshim_dev_base, 0,
+				     rshim_nr_devs * RSH_DEV_TYPES,
+				     "rsh");
+	if (result < 0) {
+		pr_err("can't get rshim major\n");
+		goto error;
+	}
+
+	rshim_dev_names = kzalloc(rshim_nr_devs *
+				    sizeof(rshim_dev_names[0]), GFP_KERNEL);
+	rshim_devs = kcalloc(rshim_nr_devs, sizeof(rshim_devs[0]),
+			       GFP_KERNEL);
+
+	if (!rshim_dev_names || !rshim_devs) {
+		result = -ENOMEM;
+		goto error;
+	}
+
+	rshim_wq = create_workqueue("rshim");
+	if (!rshim_wq) {
+		result = -ENOMEM;
+		goto error;
+	}
+
+	return 0;
+
+error:
+	if (rshim_dev_base)
+		unregister_chrdev_region(rshim_dev_base,
+				 rshim_nr_devs * RSH_DEV_TYPES);
+	if (class_registered)
+		class_destroy(rshim_class);
+	kfree(rshim_dev_names);
+	kfree(rshim_devs);
+
+	return result;
+}
+
+static void __exit rshim_exit(void)
+{
+	int i;
+
+	flush_delayed_work(&rshim_load_modules_work);
+
+	/* Free the major/minor numbers. */
+	unregister_chrdev_region(rshim_dev_base,
+				 rshim_nr_devs * RSH_DEV_TYPES);
+
+	/* Destroy our device class. */
+	class_destroy(rshim_class);
+
+	/* Destroy our work queue. */
+	destroy_workqueue(rshim_wq);
+
+	for (i = 0; i < RSH_SVC_MAX; i++)
+		kfree(rshim_svc[i]);
+
+	for (i = 0; i < rshim_nr_devs; i++)
+		kfree(rshim_dev_names[i]);
+
+	kfree(rshim_dev_names);
+	kfree(rshim_devs);
+}
+
+module_init(rshim_init);
+module_exit(rshim_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.12");
diff --git a/drivers/soc/mellanox/host/rshim.h b/drivers/soc/mellanox/host/rshim.h
new file mode 100644
index 0000000..3ac3410
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim.h
@@ -0,0 +1,361 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _RSHIM_H
+#define _RSHIM_H
+
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/termios.h>
+#include <linux/workqueue.h>
+#include <linux/device.h>
+#include <linux/cdev.h>
+
+#include "rshim_regs.h"
+
+/* ACCESS_ONCE() wrapper. */
+#define RSHIM_READ_ONCE(x)	READ_ONCE(x)
+
+/*
+ * This forces only one reset to occur at a time.  Once we've gotten
+ * more experience with this mode we'll probably remove the #define.
+ */
+#define RSH_RESET_MUTEX		1
+
+/* Spin flag values. */
+#define RSH_SFLG_READING	0x1  /* read is active. */
+#define RSH_SFLG_WRITING	0x2  /* write_urb is active. */
+#define RSH_SFLG_CONS_OPEN	0x4  /* console stream is open. */
+
+/*
+ * Buffer/FIFO sizes.  Note that the FIFO sizes must be powers of 2; also,
+ * the read and write buffers must be no larger than the corresponding
+ * FIFOs.
+ */
+#define READ_BUF_SIZE		2048
+#define WRITE_BUF_SIZE		2048
+#define READ_FIFO_SIZE		(4 * 1024)
+#define WRITE_FIFO_SIZE		(4 * 1024)
+#define BOOT_BUF_SIZE		(16 * 1024)
+
+/* Sub-device types. */
+enum {
+	RSH_DEV_TYPE_RSHIM,
+	RSH_DEV_TYPE_BOOT,
+	RSH_DEV_TYPE_CONSOLE,
+	RSH_DEV_TYPE_NET,
+	RSH_DEV_TYPE_MISC,
+	RSH_DEV_TYPES
+};
+
+/* Event types used in rshim_notify(). */
+enum {
+	RSH_EVENT_FIFO_INPUT,		/* fifo ready for input */
+	RSH_EVENT_FIFO_OUTPUT,		/* fifo ready for output */
+	RSH_EVENT_FIFO_ERR,		/* fifo error */
+	RSH_EVENT_ATTACH,		/* backend attaching */
+	RSH_EVENT_DETACH,		/* backend detaching */
+};
+
+/* RShim service types. */
+enum {
+	RSH_SVC_NET,			/* networking service */
+	RSH_SVC_MAX
+};
+
+/* TMFIFO message header. */
+union rshim_tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/* TMFIFO demux channels. */
+enum {
+	TMFIFO_CONS_CHAN,	/* Console */
+	TMFIFO_NET_CHAN,	/* Network */
+	TMFIFO_MAX_CHAN		/* Number of channels */
+};
+
+/* Various rshim definitions. */
+#define RSH_INT_VEC0_RTC__SWINT3_MASK 0x8
+
+#define RSH_BYTE_ACC_READ_TRIGGER 0x50000000
+#define RSH_BYTE_ACC_SIZE 0x10000000
+#define RSH_BYTE_ACC_PENDING 0x20000000
+
+
+#define BOOT_CHANNEL        RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_BOOT
+#define RSHIM_CHANNEL       RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_RSHIM
+#define UART0_CHANNEL       RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART0
+#define UART1_CHANNEL       RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART1
+
+#define RSH_BOOT_FIFO_SIZE   512
+
+/* FIFO structure. */
+struct rshim_fifo {
+	unsigned char *data;
+	unsigned int head;
+	unsigned int tail;
+	wait_queue_head_t operable;
+};
+
+/* RShim backend. */
+struct rshim_backend {
+	/* Device name. */
+	char *dev_name;
+
+	/* Backend owner. */
+	struct module *owner;
+
+	/* Pointer to the backend device. */
+	struct device *dev;
+
+	/* Pointer to the net device. */
+	void *net;
+
+	/* House-keeping Timer. */
+	struct timer_list timer;
+
+	/* Character device structure for each device. */
+	struct cdev cdevs[RSH_DEV_TYPES];
+
+	/*
+	 * The reference count for this structure.  This is incremented by
+	 * each open, and by the probe routine (thus, one reference for
+	 * each of the two interfaces).  It's decremented on each release,
+	 * and on each disconnect.
+	 */
+	struct kref kref;
+
+	/* State flags. */
+	u32 is_booting : 1;        /* Waiting for device to come back. */
+	u32 is_boot_open : 1;      /* Boot device is open. */
+	u32 is_tm_open : 1;        /* TM FIFO device is open. */
+	u32 is_cons_open : 1;      /* Console device is open. */
+	u32 is_in_boot_write : 1;  /* A thread is in boot_write(). */
+	u32 has_cons_work : 1;     /* Console worker thread running. */
+	u32 has_debug : 1;         /* Debug enabled for this device. */
+	u32 has_tm : 1;            /* TM FIFO found. */
+	u32 has_rshim : 1;         /* RSHIM found. */
+	u32 has_fifo_work : 1;     /* FIFO output to be done in worker. */
+	u32 has_reprobe : 1;       /* Reprobe support after SW reset. */
+	u32 drop : 1;              /* Drop the rest of the packet. */
+	u32 registered : 1;        /* Backend has been registered. */
+	u32 keepalive : 1;         /* A flag to update keepalive. */
+
+	/* Jiffies of last keepalive. */
+	u64 last_keepalive;
+
+	/* State flag bits from RSH_SFLG_xxx (see above). */
+	int spin_flags;
+
+	/* Total bytes in the read buffer. */
+	int read_buf_bytes;
+	/* Offset of next unread byte in the read buffer. */
+	int read_buf_next;
+	/* Bytes left in the current packet, or 0 if no current packet. */
+	int read_buf_pkt_rem;
+	/* Padded bytes in the read buffer. */
+	int read_buf_pkt_padding;
+
+	/* Bytes left in the current packet pending to write. */
+	int write_buf_pkt_rem;
+
+	/* Current message header. */
+	union rshim_tmfifo_msg_hdr msg_hdr;
+
+	/* Read FIFOs. */
+	struct rshim_fifo read_fifo[TMFIFO_MAX_CHAN];
+
+	/* Write FIFOs. */
+	struct rshim_fifo write_fifo[TMFIFO_MAX_CHAN];
+
+	/* Read buffer.  This is a DMA'able buffer. */
+	unsigned char *read_buf;
+	dma_addr_t read_buf_dma;
+
+	/* Write buffer.  This is a DMA'able buffer. */
+	unsigned char *write_buf;
+	dma_addr_t write_buf_dma;
+
+	/* Current Tx FIFO channel. */
+	int tx_chan;
+
+	/* Current Rx FIFO channel. */
+	int rx_chan;
+
+	/* First error encountered during read or write. */
+	int tmfifo_error;
+
+	/* Buffers used for boot writes.  Allocated at startup. */
+	char *boot_buf[2];
+
+	/*
+	 * This mutex is used to prevent the interface pointers and the
+	 * device pointer from disappearing while a driver entry point
+	 * is using them.  It's held throughout a read or write operation
+	 * (at least the parts of those operations which depend upon those
+	 * pointers) and is also held whenever those pointers are modified.
+	 * It also protects state flags, and booting_complete.
+	 */
+	struct mutex mutex;
+
+	/* We'll signal completion on this when FLG_BOOTING is turned off. */
+	struct completion booting_complete;
+
+#ifdef RSH_RESET_MUTEX
+	/* Signaled when a device is disconnected. */
+	struct completion reset_complete;
+#endif
+
+	/*
+	 * This wait queue supports fsync; it's woken up whenever an
+	 * outstanding USB write URB is done.  This will need to be more
+	 * complex if we start doing write double-buffering.
+	 */
+	wait_queue_head_t write_completed;
+
+	/* State for our outstanding boot write. */
+	struct completion boot_write_complete;
+
+	/*
+	 * This spinlock is used to protect items which must be updated by
+	 * URB completion handlers, since those can't sleep.  This includes
+	 * the read and write buffer pointers, as well as spin_flags.
+	 */
+	spinlock_t spinlock;
+
+	/* Current termios settings for the console. */
+	struct ktermios cons_termios;
+
+	/* Work queue entry. */
+	struct delayed_work	work;
+
+	/* Pending boot & fifo request for the worker. */
+	u8 *boot_work_buf;
+	u32 boot_work_buf_len;
+	u32 boot_work_buf_actual_len;
+	u8 *fifo_work_buf;
+	u32 fifo_work_buf_len;
+	int fifo_work_devtype;
+
+	/* Number of open console files. */
+	long console_opens;
+
+	/*
+	 * Our index in rshim_devs, which is also the high bits of our
+	 * minor number.
+	 */
+	int dev_index;
+
+	/* APIs provided by backend. */
+
+	/* API to write bulk data to RShim via the backend. */
+	ssize_t (*write)(struct rshim_backend *bd, int devtype,
+			 const char *buf, size_t count);
+
+	/* API to read bulk data from RShim via the backend. */
+	ssize_t (*read)(struct rshim_backend *bd, int devtype,
+			char *buf, size_t count);
+
+	/* API to cancel a read / write request (optional). */
+	void (*cancel)(struct rshim_backend *bd, int devtype, bool is_write);
+
+	/* API to destroy the backend. */
+	void (*destroy)(struct kref *kref);
+
+	/* API to read 8 bytes from RShim. */
+	int (*read_rshim)(struct rshim_backend *bd, int chan, int addr,
+			  u64 *value);
+
+	/* API to write 8 bytes to RShim. */
+	int (*write_rshim)(struct rshim_backend *bd, int chan, int addr,
+			   u64 value);
+};
+
+/* RShim service. */
+struct rshim_service {
+	/* Service type RSH_SVC_xxx. */
+	int type;
+
+	/* Reference number. */
+	atomic_t ref;
+
+	/* Create service. */
+	int (*create)(struct rshim_backend *bd);
+
+	/* Delete service. */
+	int (*delete)(struct rshim_backend *bd);
+
+	/* Notify service Rx is ready. */
+	void (*rx_notify)(struct rshim_backend *bd);
+};
+
+/* Global variables. */
+
+/* Global array to store RShim devices and names. */
+extern struct workqueue_struct *rshim_wq;
+
+/* Common APIs. */
+
+/* Register/unregister backend. */
+int rshim_register(struct rshim_backend *bd);
+void rshim_deregister(struct rshim_backend *bd);
+
+/* Register / deregister service. */
+int rshim_register_service(struct rshim_service *service);
+void rshim_deregister_service(struct rshim_service *service);
+
+/* Find backend by name. */
+struct rshim_backend *rshim_find(char *dev_name);
+
+/* RShim global lock. */
+void rshim_lock(void);
+void rshim_unlock(void);
+
+/* Event notification. */
+int rshim_notify(struct rshim_backend *bd, int event, int code);
+
+/*
+ * FIFO APIs.
+ *
+ * FIFO is demuxed into two channels, one for network interface
+ * (TMFIFO_NET_CHAN), one for console (TMFIFO_CONS_CHAN).
+ */
+
+/* Write / read some bytes to / from the FIFO via the backend. */
+ssize_t rshim_fifo_read(struct rshim_backend *bd, char *buffer,
+		      size_t count, int chan, bool nonblock,
+		      bool to_user);
+ssize_t rshim_fifo_write(struct rshim_backend *bd, const char *buffer,
+		       size_t count, int chan, bool nonblock,
+		       bool from_user);
+
+/* Alloc/free the FIFO. */
+int rshim_fifo_alloc(struct rshim_backend *bd);
+void rshim_fifo_free(struct rshim_backend *bd);
+
+/* Console APIs. */
+
+/* Enable early console. */
+int rshim_cons_early_enable(struct rshim_backend *bd);
+
+#endif /* _RSHIM_H */
diff --git a/drivers/soc/mellanox/host/rshim_regs.h b/drivers/soc/mellanox/host/rshim_regs.h
new file mode 100644
index 0000000..b14df716
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_regs.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#ifndef __RSHIM_REGS_H__
+#define __RSHIM_REGS_H__
+
+#ifdef __ASSEMBLER__
+#define _64bit(x) x
+#else /* __ASSEMBLER__ */
+#define _64bit(x) x ## ULL
+#endif /* __ASSEMBLER */
+
+#include <linux/types.h>
+
+#define RSH_BOOT_FIFO_DATA 0x408
+
+#define RSH_BOOT_FIFO_COUNT 0x488
+#define RSH_BOOT_FIFO_COUNT__LENGTH 0x0001
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_SHIFT 0
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_WIDTH 10
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_RESET_VAL 0
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_RMASK 0x3ff
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_MASK  0x3ff
+
+#define RSH_BOOT_CONTROL 0x528
+#define RSH_BOOT_CONTROL__LENGTH 0x0001
+#define RSH_BOOT_CONTROL__BOOT_MODE_SHIFT 0
+#define RSH_BOOT_CONTROL__BOOT_MODE_WIDTH 2
+#define RSH_BOOT_CONTROL__BOOT_MODE_RESET_VAL 0
+#define RSH_BOOT_CONTROL__BOOT_MODE_RMASK 0x3
+#define RSH_BOOT_CONTROL__BOOT_MODE_MASK  0x3
+#define RSH_BOOT_CONTROL__BOOT_MODE_VAL_NONE 0x0
+#define RSH_BOOT_CONTROL__BOOT_MODE_VAL_EMMC 0x1
+#define RSH_BOOT_CONTROL__BOOT_MODE_VAL_EMMC_LEGACY 0x3
+
+#define RSH_RESET_CONTROL 0x500
+#define RSH_RESET_CONTROL__LENGTH 0x0001
+#define RSH_RESET_CONTROL__RESET_CHIP_SHIFT 0
+#define RSH_RESET_CONTROL__RESET_CHIP_WIDTH 32
+#define RSH_RESET_CONTROL__RESET_CHIP_RESET_VAL 0
+#define RSH_RESET_CONTROL__RESET_CHIP_RMASK 0xffffffff
+#define RSH_RESET_CONTROL__RESET_CHIP_MASK  0xffffffff
+#define RSH_RESET_CONTROL__RESET_CHIP_VAL_KEY 0xca710001
+#define RSH_RESET_CONTROL__DISABLE_SHIFT 32
+#define RSH_RESET_CONTROL__DISABLE_WIDTH 1
+#define RSH_RESET_CONTROL__DISABLE_RESET_VAL 0
+#define RSH_RESET_CONTROL__DISABLE_RMASK 0x1
+#define RSH_RESET_CONTROL__DISABLE_MASK  _64bit(0x100000000)
+#define RSH_RESET_CONTROL__REQ_PND_SHIFT 33
+#define RSH_RESET_CONTROL__REQ_PND_WIDTH 1
+#define RSH_RESET_CONTROL__REQ_PND_RESET_VAL 0
+#define RSH_RESET_CONTROL__REQ_PND_RMASK 0x1
+#define RSH_RESET_CONTROL__REQ_PND_MASK  _64bit(0x200000000)
+
+#define RSH_SCRATCHPAD1 0xc20
+
+#define RSH_SCRATCH_BUF_CTL 0x600
+
+#define RSH_SCRATCH_BUF_DAT 0x610
+
+#define RSH_SEMAPHORE0 0x28
+
+#define RSH_SCRATCHPAD 0x20
+
+#define RSH_TM_HOST_TO_TILE_CTL 0xa30
+#define RSH_TM_HOST_TO_TILE_CTL__LENGTH 0x0001
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_SHIFT 0
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_WIDTH 8
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_RESET_VAL 128
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_RMASK 0xff
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_MASK  0xff
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_SHIFT 8
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_WIDTH 8
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_RESET_VAL 128
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_RMASK 0xff
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_MASK  0xff00
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_SHIFT 32
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_WIDTH 9
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RESET_VAL 256
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_MASK  _64bit(0x1ff00000000)
+
+#define RSH_TM_HOST_TO_TILE_STS 0xa28
+#define RSH_TM_HOST_TO_TILE_STS__LENGTH 0x0001
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_SHIFT 0
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_WIDTH 9
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_RESET_VAL 0
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_RMASK 0x1ff
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_MASK  0x1ff
+
+#define RSH_TM_TILE_TO_HOST_STS 0xa48
+#define RSH_TM_TILE_TO_HOST_STS__LENGTH 0x0001
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_SHIFT 0
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_WIDTH 9
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_RESET_VAL 0
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_RMASK 0x1ff
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_MASK  0x1ff
+
+#define RSH_TM_HOST_TO_TILE_DATA 0xa20
+
+#define RSH_TM_TILE_TO_HOST_DATA 0xa40
+
+#define RSH_MMIO_ADDRESS_SPACE__LENGTH 0x10000000000
+#define RSH_MMIO_ADDRESS_SPACE__STRIDE 0x8
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_SHIFT 0
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_WIDTH 16
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_RESET_VAL 0
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_RMASK 0xffff
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_MASK  0xffff
+#define RSH_MMIO_ADDRESS_SPACE__PROT_SHIFT 16
+#define RSH_MMIO_ADDRESS_SPACE__PROT_WIDTH 3
+#define RSH_MMIO_ADDRESS_SPACE__PROT_RESET_VAL 0
+#define RSH_MMIO_ADDRESS_SPACE__PROT_RMASK 0x7
+#define RSH_MMIO_ADDRESS_SPACE__PROT_MASK  0x70000
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_SHIFT 23
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_WIDTH 4
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_RESET_VAL 0
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_RMASK 0xf
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_MASK  0x7800000
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_BOOT 0x0
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_RSHIM 0x1
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART0 0x2
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART1 0x3
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_DIAG_UART 0x4
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU 0x5
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU_EXT1 0x6
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU_EXT2 0x7
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU_EXT3 0x8
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TIMER 0x9
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_USB 0xa
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_GPIO 0xb
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_MMC 0xc
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TIMER_EXT 0xd
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_WDOG_NS 0xe
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_WDOG_SEC 0xf
+
+#define RSH_SWINT 0x318
+
+#define RSH_BYTE_ACC_CTL 0x490
+
+#define RSH_BYTE_ACC_WDAT 0x498
+
+#define RSH_BYTE_ACC_RDAT 0x4a0
+
+#define RSH_BYTE_ACC_ADDR 0x4a8
+
+#endif /* !defined(__RSHIM_REGS_H__) */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v6 6/9] soc: mellanox: host: Add networking support over Rshim
  2018-11-01 16:25   ` Liming Sun
@ 2018-11-01 16:25   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-11-01 16:25 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit adds networking support over the Rshim interface of
the BlueField SoC. It communicates with the target (ARM) side via
the Rshim TmFifo.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/host/Makefile    |   2 +-
 drivers/soc/mellanox/host/rshim_net.c | 834 ++++++++++++++++++++++++++++++++++
 2 files changed, 835 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/mellanox/host/rshim_net.c

diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
index e47842f..1a282b9 100644
--- a/drivers/soc/mellanox/host/Makefile
+++ b/drivers/soc/mellanox/host/Makefile
@@ -1,2 +1,2 @@
-obj-m := rshim.o
+obj-m := rshim.o rshim_net.o
 
diff --git a/drivers/soc/mellanox/host/rshim_net.c b/drivers/soc/mellanox/host/rshim_net.c
new file mode 100644
index 0000000..6d10497
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_net.c
@@ -0,0 +1,834 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_net.c - Mellanox RShim network host driver
+ *
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_ring.h>
+#include <linux/virtio_net.h>
+#include <linux/cache.h>
+#include <linux/interrupt.h>
+#include <linux/version.h>
+#include <asm/byteorder.h>
+
+#include "rshim.h"
+
+/* Vring size. */
+#define RSH_NET_VRING_SIZE			1024
+
+/*
+ * Keepalive time in seconds. If configured, the link is considered down
+ * if no Rx activity within the configured time.
+ */
+static int rshim_net_keepalive;
+module_param(rshim_net_keepalive, int, 0644);
+MODULE_PARM_DESC(rshim_net_keepalive,
+		 "Keepalive time in seconds.");
+
+/* Use a timer for house-keeping. */
+static int rshim_net_timer_interval = HZ / 10;
+
+/* Flag to drain the current pending packet. */
+static bool rshim_net_draining_mode;
+
+/* Spin lock. */
+static DEFINE_SPINLOCK(rshim_net_spin_lock);
+
+/* Virtio ring size. */
+static int rshim_net_vring_size = RSH_NET_VRING_SIZE;
+module_param(rshim_net_vring_size, int, 0444);
+MODULE_PARM_DESC(rshim_net_vring_size, "Size of the vring.");
+
+/* Supported virtio-net features. */
+#define RSH_NET_FEATURES		((1 << VIRTIO_NET_F_MTU) | \
+					 (1 << VIRTIO_NET_F_MAC) | \
+					 (1 << VIRTIO_NET_F_STATUS))
+
+/* Default MAC. */
+static u8 rshim_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x02};
+module_param_array(rshim_net_default_mac, byte, NULL, 0);
+MODULE_PARM_DESC(rshim_net_default_mac, "default MAC address");
+
+#define VIRTIO_GET_FEATURES_RETURN_TYPE		u64
+#define VIRTIO_FINALIZE_FEATURES_RETURN_TYPE	int
+#define VIRTIO_NOTIFY_RETURN_TYPE	bool
+#define VIRTIO_NOTIFY_RETURN		{ return true; }
+
+/* MTU setting of the virtio-net interface. */
+#define RSH_NET_MTU			1500
+
+struct rshim_net;
+static void rshim_net_virtio_rxtx(struct virtqueue *vq, bool is_rx);
+static void rshim_net_update_activity(struct rshim_net *net, bool activity);
+
+/* Structure to maintain the ring state. */
+struct rshim_net_vring {
+	void *va;			/* virtual address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	u32 pkt_len;			/* packet total length */
+	u16 next_avail;			/* next avail desc id */
+	union rshim_tmfifo_msg_hdr hdr;	/* header of the current packet */
+	struct rshim_net *net;		/* pointer back to the rshim_net */
+};
+
+/* Event types. */
+enum {
+	RSH_NET_RX_EVENT,		/* Rx event */
+	RSH_NET_TX_EVENT		/* Tx event */
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	RSH_NET_VRING_RX,		/* Rx ring */
+	RSH_NET_VRING_TX,		/* Tx ring */
+	RSH_NET_VRING_NUM
+};
+
+/* RShim net device structure */
+struct rshim_net {
+	struct virtio_device vdev;	/* virtual device */
+	struct mutex lock;
+	struct rshim_backend *bd;		/* backend */
+	u8 status;
+	u16 virtio_registered : 1;
+	u64 features;
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	unsigned long rx_jiffies;	/* last Rx jiffies */
+	struct rshim_net_vring vrings[RSH_NET_VRING_NUM];
+	struct virtio_net_config config;	/* virtio config space */
+};
+
+/* Allocate vrings for the net device. */
+static int rshim_net_alloc_vrings(struct rshim_net *net)
+{
+	void *va;
+	int i, size;
+	struct rshim_net_vring *vring;
+	struct virtio_device *vdev = &net->vdev;
+
+	for (i = 0; i < ARRAY_SIZE(net->vrings); i++) {
+		vring = &net->vrings[i];
+		vring->net = net;
+		vring->size = rshim_net_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = kzalloc(size, GFP_KERNEL);
+		if (!va) {
+			dev_err(vdev->dev.parent, "vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the net device. */
+static void rshim_net_free_vrings(struct rshim_net *net)
+{
+	int i, size;
+	struct rshim_net_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(net->vrings); i++) {
+		vring = &net->vrings[i];
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			kfree(vring->va);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void rshim_net_work_handler(struct work_struct *work)
+{
+	struct virtqueue *vq;
+	struct rshim_net *net = container_of(work, struct rshim_net, work);
+
+	/* Tx. */
+	if (test_and_clear_bit(RSH_NET_TX_EVENT, &net->pend_events) &&
+		       net->virtio_registered) {
+		vq = net->vrings[RSH_NET_VRING_TX].vq;
+		if (vq)
+			rshim_net_virtio_rxtx(vq, false);
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(RSH_NET_RX_EVENT, &net->pend_events) &&
+		       net->virtio_registered) {
+		vq = net->vrings[RSH_NET_VRING_RX].vq;
+		if (vq)
+			rshim_net_virtio_rxtx(vq, true);
+	}
+
+	/* Keepalive check. */
+	if (rshim_net_keepalive &&
+	    time_after(jiffies, net->rx_jiffies +
+		       (unsigned long)rshim_net_keepalive * HZ)) {
+		mutex_lock(&net->lock);
+		rshim_net_update_activity(net, false);
+		mutex_unlock(&net->lock);
+	}
+}
+
+/* Nothing to do for now. */
+static void rshim_net_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+rshim_net_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct rshim_net_vring *vring = (struct rshim_net_vring *)vq->priv;
+
+	if (vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vring->size;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vring->size);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 rshim_net_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+/* House-keeping timer. */
+static void rshim_net_timer(struct timer_list *arg)
+{
+	struct rshim_net *net = container_of(arg, struct rshim_net, timer);
+
+	/*
+	 * Wake up Rx handler in case Rx event is missing or any leftover
+	 * bytes are stuck in the backend.
+	 */
+	test_and_set_bit(RSH_NET_RX_EVENT, &net->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(RSH_NET_TX_EVENT, &net->pend_events);
+
+	schedule_work(&net->work);
+
+	mod_timer(&net->timer, jiffies + rshim_net_timer_interval);
+}
+
+static void rshim_net_release_cur_desc(struct virtio_device *vdev,
+				       struct rshim_net_vring *vring)
+{
+	int idx;
+	unsigned long flags;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+
+	idx = vr->used->idx % vring->size;
+	vr->used->ring[idx].id = vring->desc_head - vr->desc;
+	vr->used->ring[idx].len =
+		cpu_to_virtio32(vdev, vring->pkt_len);
+
+	/*
+	 * Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+
+	vring->desc = NULL;
+
+	/* Notify upper layer. */
+	spin_lock_irqsave(&rshim_net_spin_lock, flags);
+	vring_interrupt(0, vring->vq);
+	spin_unlock_irqrestore(&rshim_net_spin_lock, flags);
+}
+
+/* Update the link activity. */
+static void rshim_net_update_activity(struct rshim_net *net, bool activity)
+{
+	if (activity) {
+		/* Bring up the link. */
+		if (!(net->config.status & VIRTIO_NET_S_LINK_UP)) {
+			net->config.status |= VIRTIO_NET_S_LINK_UP;
+			virtio_config_changed(&net->vdev);
+		}
+	} else {
+		/* Bring down the link. */
+		if (net->config.status & VIRTIO_NET_S_LINK_UP) {
+			int i;
+
+			net->config.status &= ~VIRTIO_NET_S_LINK_UP;
+			virtio_config_changed(&net->vdev);
+
+			/* Reset the ring state. */
+			for (i = 0; i < RSH_NET_VRING_NUM; i++) {
+				net->vrings[i].pkt_len =
+						sizeof(struct virtio_net_hdr);
+				net->vrings[i].cur_len = 0;
+				net->vrings[i].rem_len = 0;
+			}
+		}
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void rshim_net_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct rshim_net_vring *vring = (struct rshim_net_vring *)vq->priv;
+	struct rshim_net *net = vring->net;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &net->vdev;
+	void *addr;
+	int len, idx, seg_len;
+	struct vring_desc *desc;
+
+	mutex_lock(&net->lock);
+
+	/* Get the current pending descriptor. */
+	desc = vring->desc;
+
+	/* Don't continue if booting. */
+	if (net->bd->is_boot_open) {
+		/* Drop the pending buffer. */
+		if (desc != NULL)
+			rshim_net_release_cur_desc(vdev, vring);
+		mutex_unlock(&net->lock);
+		return;
+	}
+
+	while (1) {
+		if (!desc) {
+			/* Don't process new packet in draining mode. */
+			if (RSHIM_READ_ONCE(rshim_net_draining_mode))
+				break;
+
+			/* Get the head desc of next packet. */
+			vring->desc_head = rshim_net_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				mutex_unlock(&net->lock);
+				return;
+			}
+			desc = vring->desc_head;
+
+			/* Packet length is unknown yet. */
+			vring->pkt_len = 0;
+			vring->rem_len = sizeof(vring->hdr);
+		}
+
+		/* Beginning of a packet. */
+		if (vring->pkt_len == 0) {
+			if (is_rx) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Read the packet header. */
+				len = rshim_fifo_read(net->bd,
+					(void *)&vring->hdr +
+					sizeof(vring->hdr) - vring->rem_len,
+					vring->rem_len, TMFIFO_NET_CHAN, true,
+					false);
+				if (len > 0) {
+					vring->rem_len -= len;
+					if (vring->rem_len != 0)
+						continue;
+				} else
+					break;
+
+				/* Update activity. */
+				net->rx_jiffies = jiffies;
+				rshim_net_update_activity(net, true);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (vring->hdr.len == 0) {
+					vring->rem_len = sizeof(vring->hdr);
+					continue;
+				}
+
+				/* Update total length. */
+				vring->pkt_len = ntohs(vring->hdr.len) +
+					sizeof(struct virtio_net_hdr);
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+					vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			} else {
+				/* Write packet header. */
+				if (vring->rem_len == sizeof(vring->hdr)) {
+					len = rshim_net_virtio_get_pkt_len(
+							vdev, desc, vr);
+					vring->hdr.data = 0;
+					vring->hdr.type = VIRTIO_ID_NET;
+					vring->hdr.len = htons(len -
+						sizeof(struct virtio_net_hdr));
+				}
+
+				len = rshim_fifo_write(net->bd,
+					(void *)&vring->hdr +
+					sizeof(vring->hdr) - vring->rem_len,
+					vring->rem_len, TMFIFO_NET_CHAN,
+					true, false);
+				if (len > 0) {
+					vring->rem_len -= len;
+					if (vring->rem_len != 0)
+						continue;
+				} else
+					break;
+
+				/* Update total length. */
+				vring->pkt_len = rshim_net_virtio_get_pkt_len(
+							vdev, desc, vr);
+			}
+
+			vring->cur_len = sizeof(struct virtio_net_hdr);
+			vring->rem_len = vring->pkt_len;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done with this chain. */
+			rshim_net_release_cur_desc(vdev, vring);
+
+			/* Clear desc and go back to the loop. */
+			desc = NULL;
+
+			continue;
+		}
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		if (is_rx) {
+			seg_len = rshim_fifo_read(net->bd,
+					addr + vring->cur_len,
+					len - vring->cur_len,
+					TMFIFO_NET_CHAN, true, false);
+		} else {
+			seg_len = rshim_fifo_write(net->bd,
+					addr + vring->cur_len,
+					len - vring->cur_len,
+					TMFIFO_NET_CHAN, true, false);
+		}
+		if (seg_len > 0)
+			vring->cur_len += seg_len;
+		else {
+			/* Schedule the worker to speed up Tx. */
+			if (!is_rx) {
+				if (!test_and_set_bit(RSH_NET_TX_EVENT,
+				    &net->pend_events))
+					schedule_work(&net->work);
+			}
+			break;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+
+	mutex_unlock(&net->lock);
+}
+
+/* The notify function is called when new buffers are posted. */
+static VIRTIO_NOTIFY_RETURN_TYPE rshim_net_virtio_notify(struct virtqueue *vq)
+{
+	struct rshim_net_vring *vring = (struct rshim_net_vring *)vq->priv;
+	struct rshim_net *net = vring->net;
+
+	/*
+	 * Virtio-net maintains vrings in pairs. Odd number ring for Rx
+	 * and even number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX bit. */
+		if (!test_and_set_bit(RSH_NET_RX_EVENT, &net->pend_events))
+			schedule_work(&net->work);
+	} else {
+		/* Set the TX bit. */
+		if (!test_and_set_bit(RSH_NET_TX_EVENT, &net->pend_events))
+			schedule_work(&net->work);
+	}
+
+	VIRTIO_NOTIFY_RETURN;
+}
+
+/* Get the array of feature bits for this device. */
+static VIRTIO_GET_FEATURES_RETURN_TYPE rshim_net_virtio_get_features(
+	struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	return net->features;
+}
+
+/* Confirm device features to use. */
+static VIRTIO_FINALIZE_FEATURES_RETURN_TYPE rshim_net_virtio_finalize_features(
+	struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	net->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void rshim_net_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct rshim_net_vring *vring;
+	struct virtqueue *vq;
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	for (i = 0; i < ARRAY_SIZE(net->vrings); i++) {
+		vring = &net->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			rshim_net_release_cur_desc(vdev, vring);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int rshim_net_virtio_find_vqs(struct virtio_device *vdev,
+				     unsigned int nvqs,
+				     struct virtqueue *vqs[],
+				     vq_callback_t *callbacks[],
+				     const char * const names[],
+				     const bool *ctx,
+				     struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct rshim_net_vring *vring;
+	struct virtqueue *vq;
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	if (nvqs > ARRAY_SIZE(net->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &net->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+
+		vq = vring_new_virtqueue(
+					 i,
+					 vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 rshim_net_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vq->priv = vring;
+		/*
+		 * Add barrier to make sure vq is ready before assigning to
+		 * vring.
+		 */
+		mb();
+		vring->vq = vq;
+		vqs[i] = vq;
+	}
+
+	return 0;
+
+error:
+	rshim_net_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 rshim_net_virtio_get_status(struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	return net->status;
+}
+
+/* Write the status byte. */
+static void rshim_net_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	net->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void rshim_net_virtio_reset(struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	net->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void rshim_net_virtio_get(struct virtio_device *vdev,
+				 unsigned int offset,
+				 void *buf,
+				 unsigned int len)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	if (offset + len > sizeof(net->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&net->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void rshim_net_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	if (offset + len > sizeof(net->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&net->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static struct virtio_config_ops rshim_net_virtio_config_ops = {
+	.get_features = rshim_net_virtio_get_features,
+	.finalize_features = rshim_net_virtio_finalize_features,
+	.find_vqs = rshim_net_virtio_find_vqs,
+	.del_vqs = rshim_net_virtio_del_vqs,
+	.reset = rshim_net_virtio_reset,
+	.set_status = rshim_net_virtio_set_status,
+	.get_status = rshim_net_virtio_get_status,
+	.get = rshim_net_virtio_get,
+	.set = rshim_net_virtio_set,
+};
+
+/* Remove. */
+static int rshim_net_delete_dev(struct rshim_net *net)
+{
+	if (net) {
+		/* Stop the timer. */
+		del_timer_sync(&net->timer);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&net->work);
+
+		/* Unregister virtio. */
+		if (net->virtio_registered)
+			unregister_virtio_device(&net->vdev);
+
+		/* Free vring. */
+		rshim_net_free_vrings(net);
+
+		kfree(net);
+	}
+
+	return 0;
+}
+
+/* Rx ready. */
+void rshim_net_rx_notify(struct rshim_backend *bd)
+{
+	struct rshim_net *net = (struct rshim_net *)bd->net;
+
+	if (net) {
+		test_and_set_bit(RSH_NET_RX_EVENT, &net->pend_events);
+		schedule_work(&net->work);
+	}
+}
+
+/* Remove. */
+int rshim_net_delete(struct rshim_backend *bd)
+{
+	int ret = 0;
+
+	if (bd->net) {
+		ret = rshim_net_delete_dev((struct rshim_net *)bd->net);
+		bd->net = NULL;
+	}
+
+	return ret;
+}
+
+/* Init. */
+int rshim_net_create(struct rshim_backend *bd)
+{
+	struct rshim_net *net;
+	struct virtio_device *vdev;
+	int ret = -ENOMEM;
+
+	if (bd->net)
+		return -EEXIST;
+
+	net = kzalloc(sizeof(struct rshim_net), GFP_KERNEL);
+	if (!net)
+		return ret;
+
+	INIT_WORK(&net->work, rshim_net_work_handler);
+
+	timer_setup(&net->timer, rshim_net_timer, 0);
+	net->timer.function = rshim_net_timer;
+
+	net->features = RSH_NET_FEATURES;
+	net->config.mtu = RSH_NET_MTU;
+	memcpy(net->config.mac, rshim_net_default_mac,
+	       sizeof(rshim_net_default_mac));
+	/* Set MAC address to be unique even number. */
+	net->config.mac[5] += bd->dev_index * 2;
+
+	mutex_init(&net->lock);
+
+	vdev = &net->vdev;
+	vdev->id.device = VIRTIO_ID_NET;
+	vdev->config = &rshim_net_virtio_config_ops;
+	vdev->dev.parent = bd->dev;
+	vdev->dev.release = rshim_net_virtio_dev_release;
+	if (rshim_net_alloc_vrings(net))
+		goto err;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(vdev);
+	if (ret) {
+		dev_err(bd->dev, "register_virtio_device() failed\n");
+		goto err;
+	}
+	net->virtio_registered = 1;
+
+	mod_timer(&net->timer, jiffies + rshim_net_timer_interval);
+
+	net->bd = bd;
+	/* Add a barrier to keep the order of the two pointer assignments. */
+	mb();
+	bd->net = net;
+
+	/* Bring up the interface. */
+	mutex_lock(&net->lock);
+	rshim_net_update_activity(net, true);
+	mutex_unlock(&net->lock);
+
+	return 0;
+
+err:
+	rshim_net_delete_dev(net);
+	return ret;
+}
+
+struct rshim_service rshim_svc = {
+	.type = RSH_SVC_NET,
+	.create = rshim_net_create,
+	.delete = rshim_net_delete,
+	.rx_notify = rshim_net_rx_notify
+};
+
+static int __init rshim_net_init(void)
+{
+	return rshim_register_service(&rshim_svc);
+}
+
+static void __exit rshim_net_exit(void)
+{
+	/*
+	 * Wait 200ms, which should be good enough to drain the current
+	 * pending packet.
+	 */
+	rshim_net_draining_mode = true;
+	msleep(200);
+
+	return rshim_deregister_service(&rshim_svc);
+}
+
+module_init(rshim_net_init);
+module_exit(rshim_net_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.7");
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v6 6/9] soc: mellanox: host: Add networking support over Rshim
@ 2018-11-01 16:25   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-11-01 16:25 UTC (permalink / raw)
  To: linux-arm-kernel

This commit adds networking support over the Rshim interface of
the BlueField SoC. It communicates with the target (ARM) side via
the Rshim TmFifo.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/host/Makefile    |   2 +-
 drivers/soc/mellanox/host/rshim_net.c | 834 ++++++++++++++++++++++++++++++++++
 2 files changed, 835 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/mellanox/host/rshim_net.c

diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
index e47842f..1a282b9 100644
--- a/drivers/soc/mellanox/host/Makefile
+++ b/drivers/soc/mellanox/host/Makefile
@@ -1,2 +1,2 @@
-obj-m := rshim.o
+obj-m := rshim.o rshim_net.o
 
diff --git a/drivers/soc/mellanox/host/rshim_net.c b/drivers/soc/mellanox/host/rshim_net.c
new file mode 100644
index 0000000..6d10497
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_net.c
@@ -0,0 +1,834 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_net.c - Mellanox RShim network host driver
+ *
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_ring.h>
+#include <linux/virtio_net.h>
+#include <linux/cache.h>
+#include <linux/interrupt.h>
+#include <linux/version.h>
+#include <asm/byteorder.h>
+
+#include "rshim.h"
+
+/* Vring size. */
+#define RSH_NET_VRING_SIZE			1024
+
+/*
+ * Keepalive time in seconds. If configured, the link is considered down
+ * if no Rx activity within the configured time.
+ */
+static int rshim_net_keepalive;
+module_param(rshim_net_keepalive, int, 0644);
+MODULE_PARM_DESC(rshim_net_keepalive,
+		 "Keepalive time in seconds.");
+
+/* Use a timer for house-keeping. */
+static int rshim_net_timer_interval = HZ / 10;
+
+/* Flag to drain the current pending packet. */
+static bool rshim_net_draining_mode;
+
+/* Spin lock. */
+static DEFINE_SPINLOCK(rshim_net_spin_lock);
+
+/* Virtio ring size. */
+static int rshim_net_vring_size = RSH_NET_VRING_SIZE;
+module_param(rshim_net_vring_size, int, 0444);
+MODULE_PARM_DESC(rshim_net_vring_size, "Size of the vring.");
+
+/* Supported virtio-net features. */
+#define RSH_NET_FEATURES		((1 << VIRTIO_NET_F_MTU) | \
+					 (1 << VIRTIO_NET_F_MAC) | \
+					 (1 << VIRTIO_NET_F_STATUS))
+
+/* Default MAC. */
+static u8 rshim_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x02};
+module_param_array(rshim_net_default_mac, byte, NULL, 0);
+MODULE_PARM_DESC(rshim_net_default_mac, "default MAC address");
+
+#define VIRTIO_GET_FEATURES_RETURN_TYPE		u64
+#define VIRTIO_FINALIZE_FEATURES_RETURN_TYPE	int
+#define VIRTIO_NOTIFY_RETURN_TYPE	bool
+#define VIRTIO_NOTIFY_RETURN		{ return true; }
+
+/* MTU setting of the virtio-net interface. */
+#define RSH_NET_MTU			1500
+
+struct rshim_net;
+static void rshim_net_virtio_rxtx(struct virtqueue *vq, bool is_rx);
+static void rshim_net_update_activity(struct rshim_net *net, bool activity);
+
+/* Structure to maintain the ring state. */
+struct rshim_net_vring {
+	void *va;			/* virtual address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	u32 pkt_len;			/* packet total length */
+	u16 next_avail;			/* next avail desc id */
+	union rshim_tmfifo_msg_hdr hdr;	/* header of the current packet */
+	struct rshim_net *net;		/* pointer back to the rshim_net */
+};
+
+/* Event types. */
+enum {
+	RSH_NET_RX_EVENT,		/* Rx event */
+	RSH_NET_TX_EVENT		/* Tx event */
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	RSH_NET_VRING_RX,		/* Rx ring */
+	RSH_NET_VRING_TX,		/* Tx ring */
+	RSH_NET_VRING_NUM
+};
+
+/* RShim net device structure */
+struct rshim_net {
+	struct virtio_device vdev;	/* virtual device */
+	struct mutex lock;
+	struct rshim_backend *bd;		/* backend */
+	u8 status;
+	u16 virtio_registered : 1;
+	u64 features;
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	unsigned long rx_jiffies;	/* last Rx jiffies */
+	struct rshim_net_vring vrings[RSH_NET_VRING_NUM];
+	struct virtio_net_config config;	/* virtio config space */
+};
+
+/* Allocate vrings for the net device. */
+static int rshim_net_alloc_vrings(struct rshim_net *net)
+{
+	void *va;
+	int i, size;
+	struct rshim_net_vring *vring;
+	struct virtio_device *vdev = &net->vdev;
+
+	for (i = 0; i < ARRAY_SIZE(net->vrings); i++) {
+		vring = &net->vrings[i];
+		vring->net = net;
+		vring->size = rshim_net_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = kzalloc(size, GFP_KERNEL);
+		if (!va) {
+			dev_err(vdev->dev.parent, "vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the net device. */
+static void rshim_net_free_vrings(struct rshim_net *net)
+{
+	int i, size;
+	struct rshim_net_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(net->vrings); i++) {
+		vring = &net->vrings[i];
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			kfree(vring->va);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void rshim_net_work_handler(struct work_struct *work)
+{
+	struct virtqueue *vq;
+	struct rshim_net *net = container_of(work, struct rshim_net, work);
+
+	/* Tx. */
+	if (test_and_clear_bit(RSH_NET_TX_EVENT, &net->pend_events) &&
+		       net->virtio_registered) {
+		vq = net->vrings[RSH_NET_VRING_TX].vq;
+		if (vq)
+			rshim_net_virtio_rxtx(vq, false);
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(RSH_NET_RX_EVENT, &net->pend_events) &&
+		       net->virtio_registered) {
+		vq = net->vrings[RSH_NET_VRING_RX].vq;
+		if (vq)
+			rshim_net_virtio_rxtx(vq, true);
+	}
+
+	/* Keepalive check. */
+	if (rshim_net_keepalive &&
+	    time_after(jiffies, net->rx_jiffies +
+		       (unsigned long)rshim_net_keepalive * HZ)) {
+		mutex_lock(&net->lock);
+		rshim_net_update_activity(net, false);
+		mutex_unlock(&net->lock);
+	}
+}
+
+/* Nothing to do for now. */
+static void rshim_net_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+rshim_net_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct rshim_net_vring *vring = (struct rshim_net_vring *)vq->priv;
+
+	if (vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vring->size;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vring->size);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 rshim_net_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+/* House-keeping timer. */
+static void rshim_net_timer(struct timer_list *arg)
+{
+	struct rshim_net *net = container_of(arg, struct rshim_net, timer);
+
+	/*
+	 * Wake up Rx handler in case Rx event is missing or any leftover
+	 * bytes are stuck in the backend.
+	 */
+	test_and_set_bit(RSH_NET_RX_EVENT, &net->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(RSH_NET_TX_EVENT, &net->pend_events);
+
+	schedule_work(&net->work);
+
+	mod_timer(&net->timer, jiffies + rshim_net_timer_interval);
+}
+
+static void rshim_net_release_cur_desc(struct virtio_device *vdev,
+				       struct rshim_net_vring *vring)
+{
+	int idx;
+	unsigned long flags;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+
+	idx = vr->used->idx % vring->size;
+	vr->used->ring[idx].id = vring->desc_head - vr->desc;
+	vr->used->ring[idx].len =
+		cpu_to_virtio32(vdev, vring->pkt_len);
+
+	/*
+	 * Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+
+	vring->desc = NULL;
+
+	/* Notify upper layer. */
+	spin_lock_irqsave(&rshim_net_spin_lock, flags);
+	vring_interrupt(0, vring->vq);
+	spin_unlock_irqrestore(&rshim_net_spin_lock, flags);
+}
+
+/* Update the link activity. */
+static void rshim_net_update_activity(struct rshim_net *net, bool activity)
+{
+	if (activity) {
+		/* Bring up the link. */
+		if (!(net->config.status & VIRTIO_NET_S_LINK_UP)) {
+			net->config.status |= VIRTIO_NET_S_LINK_UP;
+			virtio_config_changed(&net->vdev);
+		}
+	} else {
+		/* Bring down the link. */
+		if (net->config.status & VIRTIO_NET_S_LINK_UP) {
+			int i;
+
+			net->config.status &= ~VIRTIO_NET_S_LINK_UP;
+			virtio_config_changed(&net->vdev);
+
+			/* Reset the ring state. */
+			for (i = 0; i < RSH_NET_VRING_NUM; i++) {
+				net->vrings[i].pkt_len =
+						sizeof(struct virtio_net_hdr);
+				net->vrings[i].cur_len = 0;
+				net->vrings[i].rem_len = 0;
+			}
+		}
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void rshim_net_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct rshim_net_vring *vring = (struct rshim_net_vring *)vq->priv;
+	struct rshim_net *net = vring->net;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &net->vdev;
+	void *addr;
+	int len, idx, seg_len;
+	struct vring_desc *desc;
+
+	mutex_lock(&net->lock);
+
+	/* Get the current pending descriptor. */
+	desc = vring->desc;
+
+	/* Don't continue if booting. */
+	if (net->bd->is_boot_open) {
+		/* Drop the pending buffer. */
+		if (desc != NULL)
+			rshim_net_release_cur_desc(vdev, vring);
+		mutex_unlock(&net->lock);
+		return;
+	}
+
+	while (1) {
+		if (!desc) {
+			/* Don't process new packet in draining mode. */
+			if (RSHIM_READ_ONCE(rshim_net_draining_mode))
+				break;
+
+			/* Get the head desc of next packet. */
+			vring->desc_head = rshim_net_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				mutex_unlock(&net->lock);
+				return;
+			}
+			desc = vring->desc_head;
+
+			/* Packet length is unknown yet. */
+			vring->pkt_len = 0;
+			vring->rem_len = sizeof(vring->hdr);
+		}
+
+		/* Beginning of a packet. */
+		if (vring->pkt_len == 0) {
+			if (is_rx) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Read the packet header. */
+				len = rshim_fifo_read(net->bd,
+					(void *)&vring->hdr +
+					sizeof(vring->hdr) - vring->rem_len,
+					vring->rem_len, TMFIFO_NET_CHAN, true,
+					false);
+				if (len > 0) {
+					vring->rem_len -= len;
+					if (vring->rem_len != 0)
+						continue;
+				} else
+					break;
+
+				/* Update activity. */
+				net->rx_jiffies = jiffies;
+				rshim_net_update_activity(net, true);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (vring->hdr.len == 0) {
+					vring->rem_len = sizeof(vring->hdr);
+					continue;
+				}
+
+				/* Update total length. */
+				vring->pkt_len = ntohs(vring->hdr.len) +
+					sizeof(struct virtio_net_hdr);
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+					vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			} else {
+				/* Write packet header. */
+				if (vring->rem_len == sizeof(vring->hdr)) {
+					len = rshim_net_virtio_get_pkt_len(
+							vdev, desc, vr);
+					vring->hdr.data = 0;
+					vring->hdr.type = VIRTIO_ID_NET;
+					vring->hdr.len = htons(len -
+						sizeof(struct virtio_net_hdr));
+				}
+
+				len = rshim_fifo_write(net->bd,
+					(void *)&vring->hdr +
+					sizeof(vring->hdr) - vring->rem_len,
+					vring->rem_len, TMFIFO_NET_CHAN,
+					true, false);
+				if (len > 0) {
+					vring->rem_len -= len;
+					if (vring->rem_len != 0)
+						continue;
+				} else
+					break;
+
+				/* Update total length. */
+				vring->pkt_len = rshim_net_virtio_get_pkt_len(
+							vdev, desc, vr);
+			}
+
+			vring->cur_len = sizeof(struct virtio_net_hdr);
+			vring->rem_len = vring->pkt_len;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done with this chain. */
+			rshim_net_release_cur_desc(vdev, vring);
+
+			/* Clear desc and go back to the loop. */
+			desc = NULL;
+
+			continue;
+		}
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		if (is_rx) {
+			seg_len = rshim_fifo_read(net->bd,
+					addr + vring->cur_len,
+					len - vring->cur_len,
+					TMFIFO_NET_CHAN, true, false);
+		} else {
+			seg_len = rshim_fifo_write(net->bd,
+					addr + vring->cur_len,
+					len - vring->cur_len,
+					TMFIFO_NET_CHAN, true, false);
+		}
+		if (seg_len > 0)
+			vring->cur_len += seg_len;
+		else {
+			/* Schedule the worker to speed up Tx. */
+			if (!is_rx) {
+				if (!test_and_set_bit(RSH_NET_TX_EVENT,
+				    &net->pend_events))
+					schedule_work(&net->work);
+			}
+			break;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+
+	mutex_unlock(&net->lock);
+}
+
+/* The notify function is called when new buffers are posted. */
+static VIRTIO_NOTIFY_RETURN_TYPE rshim_net_virtio_notify(struct virtqueue *vq)
+{
+	struct rshim_net_vring *vring = (struct rshim_net_vring *)vq->priv;
+	struct rshim_net *net = vring->net;
+
+	/*
+	 * Virtio-net maintains vrings in pairs. Odd number ring for Rx
+	 * and even number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX bit. */
+		if (!test_and_set_bit(RSH_NET_RX_EVENT, &net->pend_events))
+			schedule_work(&net->work);
+	} else {
+		/* Set the TX bit. */
+		if (!test_and_set_bit(RSH_NET_TX_EVENT, &net->pend_events))
+			schedule_work(&net->work);
+	}
+
+	VIRTIO_NOTIFY_RETURN;
+}
+
+/* Get the array of feature bits for this device. */
+static VIRTIO_GET_FEATURES_RETURN_TYPE rshim_net_virtio_get_features(
+	struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	return net->features;
+}
+
+/* Confirm device features to use. */
+static VIRTIO_FINALIZE_FEATURES_RETURN_TYPE rshim_net_virtio_finalize_features(
+	struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	net->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void rshim_net_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct rshim_net_vring *vring;
+	struct virtqueue *vq;
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	for (i = 0; i < ARRAY_SIZE(net->vrings); i++) {
+		vring = &net->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			rshim_net_release_cur_desc(vdev, vring);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int rshim_net_virtio_find_vqs(struct virtio_device *vdev,
+				     unsigned int nvqs,
+				     struct virtqueue *vqs[],
+				     vq_callback_t *callbacks[],
+				     const char * const names[],
+				     const bool *ctx,
+				     struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct rshim_net_vring *vring;
+	struct virtqueue *vq;
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	if (nvqs > ARRAY_SIZE(net->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &net->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+
+		vq = vring_new_virtqueue(
+					 i,
+					 vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 rshim_net_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vq->priv = vring;
+		/*
+		 * Add barrier to make sure vq is ready before assigning to
+		 * vring.
+		 */
+		mb();
+		vring->vq = vq;
+		vqs[i] = vq;
+	}
+
+	return 0;
+
+error:
+	rshim_net_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 rshim_net_virtio_get_status(struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	return net->status;
+}
+
+/* Write the status byte. */
+static void rshim_net_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	net->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void rshim_net_virtio_reset(struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	net->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void rshim_net_virtio_get(struct virtio_device *vdev,
+				 unsigned int offset,
+				 void *buf,
+				 unsigned int len)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	if (offset + len > sizeof(net->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&net->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void rshim_net_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	if (offset + len > sizeof(net->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&net->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static struct virtio_config_ops rshim_net_virtio_config_ops = {
+	.get_features = rshim_net_virtio_get_features,
+	.finalize_features = rshim_net_virtio_finalize_features,
+	.find_vqs = rshim_net_virtio_find_vqs,
+	.del_vqs = rshim_net_virtio_del_vqs,
+	.reset = rshim_net_virtio_reset,
+	.set_status = rshim_net_virtio_set_status,
+	.get_status = rshim_net_virtio_get_status,
+	.get = rshim_net_virtio_get,
+	.set = rshim_net_virtio_set,
+};
+
+/* Remove. */
+static int rshim_net_delete_dev(struct rshim_net *net)
+{
+	if (net) {
+		/* Stop the timer. */
+		del_timer_sync(&net->timer);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&net->work);
+
+		/* Unregister virtio. */
+		if (net->virtio_registered)
+			unregister_virtio_device(&net->vdev);
+
+		/* Free vring. */
+		rshim_net_free_vrings(net);
+
+		kfree(net);
+	}
+
+	return 0;
+}
+
+/* Rx ready. */
+void rshim_net_rx_notify(struct rshim_backend *bd)
+{
+	struct rshim_net *net = (struct rshim_net *)bd->net;
+
+	if (net) {
+		test_and_set_bit(RSH_NET_RX_EVENT, &net->pend_events);
+		schedule_work(&net->work);
+	}
+}
+
+/* Remove. */
+int rshim_net_delete(struct rshim_backend *bd)
+{
+	int ret = 0;
+
+	if (bd->net) {
+		ret = rshim_net_delete_dev((struct rshim_net *)bd->net);
+		bd->net = NULL;
+	}
+
+	return ret;
+}
+
+/* Init. */
+int rshim_net_create(struct rshim_backend *bd)
+{
+	struct rshim_net *net;
+	struct virtio_device *vdev;
+	int ret = -ENOMEM;
+
+	if (bd->net)
+		return -EEXIST;
+
+	net = kzalloc(sizeof(struct rshim_net), GFP_KERNEL);
+	if (!net)
+		return ret;
+
+	INIT_WORK(&net->work, rshim_net_work_handler);
+
+	timer_setup(&net->timer, rshim_net_timer, 0);
+	net->timer.function = rshim_net_timer;
+
+	net->features = RSH_NET_FEATURES;
+	net->config.mtu = RSH_NET_MTU;
+	memcpy(net->config.mac, rshim_net_default_mac,
+	       sizeof(rshim_net_default_mac));
+	/* Set MAC address to be unique even number. */
+	net->config.mac[5] += bd->dev_index * 2;
+
+	mutex_init(&net->lock);
+
+	vdev = &net->vdev;
+	vdev->id.device = VIRTIO_ID_NET;
+	vdev->config = &rshim_net_virtio_config_ops;
+	vdev->dev.parent = bd->dev;
+	vdev->dev.release = rshim_net_virtio_dev_release;
+	if (rshim_net_alloc_vrings(net))
+		goto err;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(vdev);
+	if (ret) {
+		dev_err(bd->dev, "register_virtio_device() failed\n");
+		goto err;
+	}
+	net->virtio_registered = 1;
+
+	mod_timer(&net->timer, jiffies + rshim_net_timer_interval);
+
+	net->bd = bd;
+	/* Add a barrier to keep the order of the two pointer assignments. */
+	mb();
+	bd->net = net;
+
+	/* Bring up the interface. */
+	mutex_lock(&net->lock);
+	rshim_net_update_activity(net, true);
+	mutex_unlock(&net->lock);
+
+	return 0;
+
+err:
+	rshim_net_delete_dev(net);
+	return ret;
+}
+
+struct rshim_service rshim_svc = {
+	.type = RSH_SVC_NET,
+	.create = rshim_net_create,
+	.delete = rshim_net_delete,
+	.rx_notify = rshim_net_rx_notify
+};
+
+static int __init rshim_net_init(void)
+{
+	return rshim_register_service(&rshim_svc);
+}
+
+static void __exit rshim_net_exit(void)
+{
+	/*
+	 * Wait 200ms, which should be good enough to drain the current
+	 * pending packet.
+	 */
+	rshim_net_draining_mode = true;
+	msleep(200);
+
+	return rshim_deregister_service(&rshim_svc);
+}
+
+module_init(rshim_net_init);
+module_exit(rshim_net_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.7");
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v6 7/9] soc: mellanox: host: Add the Rshim USB backend driver
  2018-11-01 16:25   ` Liming Sun
@ 2018-11-01 16:25   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-11-01 16:25 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit adds the USB backend driver to access the Rshim
interface on the BlueField SoC. It can be used when a USB cable
is connected to the Smart NIC or standalone device.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/host/Makefile    |    2 +-
 drivers/soc/mellanox/host/rshim_usb.c | 1035 +++++++++++++++++++++++++++++++++
 2 files changed, 1036 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/mellanox/host/rshim_usb.c

diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
index 1a282b9..c6703cd 100644
--- a/drivers/soc/mellanox/host/Makefile
+++ b/drivers/soc/mellanox/host/Makefile
@@ -1,2 +1,2 @@
-obj-m := rshim.o rshim_net.o
+obj-m := rshim.o rshim_net.o rshim_usb.o
 
diff --git a/drivers/soc/mellanox/host/rshim_usb.c b/drivers/soc/mellanox/host/rshim_usb.c
new file mode 100644
index 0000000..aad6250
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_usb.c
@@ -0,0 +1,1035 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_usb.c - Mellanox RShim USB host driver
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/*
+ * This source code was originally derived from:
+ *
+ *   USB Skeleton driver - 2.0
+ *
+ *   Copyright (C) 2001-2004 Greg Kroah-Hartman (greg@kroah.com)
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License as
+ *	published by the Free Software Foundation, version 2.
+ *
+ * Some code was also lifted from the example drivers in "Linux Device
+ * Drivers" by Alessandro Rubini and Jonathan Corbet, published by
+ * O'Reilly & Associates.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/usb.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <linux/ioctl.h>
+#include <linux/termios.h>
+#include <linux/workqueue.h>
+#include <asm/termbits.h>
+#include <linux/circ_buf.h>
+
+#include "rshim.h"
+
+/* Disable RSim access. */
+static int rshim_disable;
+module_param(rshim_disable, int, 0444);
+MODULE_PARM_DESC(rshim_disable, "Disable rshim (obsoleted)");
+
+/* Our USB vendor/product IDs. */
+#define USB_TILERA_VENDOR_ID	0x22dc	 /* Tilera Corporation */
+#define USB_BLUEFIELD_PRODUCT_ID	0x0004	 /* Mellanox Bluefield */
+
+/* Number of retries for the tmfifo read/write path. */
+#define READ_RETRIES		5
+#define WRITE_RETRIES		5
+
+/* Structure to hold all of our device specific stuff. */
+struct rshim_usb {
+	/* RShim backend structure. */
+	struct rshim_backend bd;
+
+	/*
+	 * The USB device for this device.  We bump its reference count
+	 * when the first interface is probed, and drop the ref when the
+	 * last interface is disconnected.
+	 */
+	struct usb_device *udev;
+
+	/* The USB interfaces for this device. */
+	struct usb_interface *rshim_interface;
+
+	/* State for our outstanding boot write. */
+	struct urb *boot_urb;
+
+	/* Control data. */
+	u64 ctrl_data;
+
+	/* Interrupt data buffer.  This is a USB DMA'able buffer. */
+	u64 *intr_buf;
+	dma_addr_t intr_buf_dma;
+
+	/* Read/interrupt urb, retries, and mode. */
+	struct urb *read_or_intr_urb;
+	int read_or_intr_retries;
+	int read_urb_is_intr;
+
+	/* Write urb and retries. */
+	struct urb *write_urb;
+	int write_retries;
+
+	/* The address of the boot FIFO endpoint. */
+	u8 boot_fifo_ep;
+	/* The address of the tile-monitor FIFO interrupt endpoint. */
+	u8 tm_fifo_int_ep;
+	/* The address of the tile-monitor FIFO input endpoint. */
+	u8 tm_fifo_in_ep;
+	/* The address of the tile-monitor FIFO output endpoint. */
+	u8 tm_fifo_out_ep;
+};
+
+/* Table of devices that work with this driver */
+static struct usb_device_id rshim_usb_table[] = {
+	{ USB_DEVICE(USB_TILERA_VENDOR_ID, USB_BLUEFIELD_PRODUCT_ID) },
+	{ }					/* Terminating entry */
+};
+MODULE_DEVICE_TABLE(usb, rshim_usb_table);
+
+/* Random compatibility hacks. */
+
+/* Arguments to an urb completion handler. */
+#define URB_COMP_ARGS struct urb *urb
+
+static void rshim_usb_delete(struct kref *kref)
+{
+	struct rshim_backend *bd;
+	struct rshim_usb *dev;
+
+	bd = container_of(kref, struct rshim_backend, kref);
+	dev = container_of(bd, struct rshim_usb, bd);
+
+	rshim_deregister(bd);
+	kfree(dev);
+}
+
+/* Rshim read/write routines */
+
+static int rshim_usb_read_rshim(struct rshim_backend *bd, int chan, int addr,
+			      u64 *result)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+	int retval;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	/* Do a blocking control read and endian conversion. */
+	retval = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0),
+				 0,  /* request */
+				 USB_RECIP_ENDPOINT | USB_TYPE_VENDOR |
+				 USB_DIR_IN,  /* request type */
+				 chan, /* value */
+				 addr, /* index */
+				 &dev->ctrl_data, 8, 2000);
+
+	/*
+	 * The RShim HW puts bytes on the wire in little-endian order
+	 * regardless of endianness settings either in the host or the ARM
+	 * cores.
+	 */
+	*result = le64_to_cpu(dev->ctrl_data);
+	if (retval == 8)
+		return 0;
+
+	/*
+	 * These are weird error codes, but we want to use something
+	 * the USB stack doesn't use so that we can identify short/long
+	 * reads.
+	 */
+	return retval >= 0 ? (retval > 8 ? -EBADE : -EBADR) : retval;
+}
+
+static int rshim_usb_write_rshim(struct rshim_backend *bd, int chan, int addr,
+			       u64 value)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+	int retval;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	/* Convert the word to little endian and do blocking control write. */
+	dev->ctrl_data = cpu_to_le64(value);
+	retval = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0),
+				 0,  /* request */
+				 USB_RECIP_ENDPOINT | USB_TYPE_VENDOR |
+				 USB_DIR_OUT,  /* request type */
+				 chan, /* value */
+				 addr, /* index */
+				 &dev->ctrl_data, 8, 2000);
+
+	if (retval == 8)
+		return 0;
+
+	/*
+	 * These are weird error codes, but we want to use something
+	 * the USB stack doesn't use so that we can identify short/long
+	 * writes.
+	 */
+	return retval >= 0 ? (retval > 8 ? -EBADE : -EBADR) : retval;
+}
+
+/* Boot routines */
+
+static void rshim_usb_boot_write_callback(URB_COMP_ARGS)
+{
+	struct rshim_usb *dev = urb->context;
+
+	if (urb->status == -ENOENT)
+		pr_debug("boot tx canceled, actual length %d\n",
+			 urb->actual_length);
+	else if (urb->status)
+		pr_debug("boot tx failed, status %d, actual length %d\n",
+			 urb->status, urb->actual_length);
+
+	complete_all(&dev->bd.boot_write_complete);
+}
+
+static ssize_t rshim_usb_boot_write(struct rshim_usb *dev, const char *buf,
+				  size_t count)
+{
+	struct rshim_backend *bd = &dev->bd;
+	int retval = 0;
+	size_t bytes_written = 0;
+
+	/* Create and fill an urb */
+	dev->boot_urb = usb_alloc_urb(0, GFP_KERNEL);
+	if (unlikely(!dev->boot_urb)) {
+		pr_debug("boot_write: couldn't allocate urb\n");
+		return -ENOMEM;
+	}
+	usb_fill_bulk_urb(dev->boot_urb, dev->udev,
+			  usb_sndbulkpipe(dev->udev, dev->boot_fifo_ep),
+			  (char *)buf, count, rshim_usb_boot_write_callback,
+			  dev);
+
+	/* Submit the urb. */
+	reinit_completion(&bd->boot_write_complete);
+	retval = usb_submit_urb(dev->boot_urb, GFP_KERNEL);
+	if (retval)
+		goto done;
+
+	/*
+	 * Wait until it's done. If anything goes wrong in the USB layer,
+	 * the callback function might never get called and cause stuck.
+	 * Here we release the mutex so user could use 'ctrl + c' to terminate
+	 * the current write. Once the boot file is opened again, the
+	 * outstanding urb will be canceled.
+	 *
+	 * Note: when boot stream starts to write, it will either run to
+	 * completion, or be interrupted by user. The urb callback function will
+	 * be called during this period. There are no other operations to affect
+	 * the boot stream. So unlocking the mutex is considered safe.
+	 */
+	mutex_unlock(&bd->mutex);
+	retval = wait_for_completion_interruptible(&bd->boot_write_complete);
+	mutex_lock(&bd->mutex);
+	if (retval) {
+		usb_kill_urb(dev->boot_urb);
+		bytes_written += dev->boot_urb->actual_length;
+		goto done;
+	}
+
+	if (dev->boot_urb->actual_length !=
+		dev->boot_urb->transfer_buffer_length) {
+		pr_debug("length mismatch, exp %d act %d stat %d\n",
+			 dev->boot_urb->transfer_buffer_length,
+			 dev->boot_urb->actual_length,
+			 dev->boot_urb->status);
+	}
+
+#ifdef RSH_USB_BMC
+	/*
+	 * The UHCI host controller on the BMC seems to
+	 * overestimate the amount of data it's
+	 * successfully sent when it sees a babble error.
+	 */
+	if (dev->boot_urb->status == -EOVERFLOW &&
+	    dev->boot_urb->actual_length >= 64) {
+		dev->boot_urb->actual_length -= 64;
+		pr_debug("saw babble, new length %d\n",
+		dev->boot_urb->actual_length);
+	}
+#endif
+
+	bytes_written = dev->boot_urb->actual_length;
+
+	if (dev->boot_urb->status == -ENOENT &&
+	    dev->boot_urb->transfer_buffer_length !=
+	    dev->boot_urb->actual_length) {
+		pr_debug("boot_write: urb canceled.\n");
+	} else {
+		if (dev->boot_urb->status) {
+			pr_debug("boot_write: urb failed, status %d\n",
+				 dev->boot_urb->status);
+		}
+		if (dev->boot_urb->status != -ENOENT && !retval)
+			retval = dev->boot_urb->status;
+	}
+
+done:
+	usb_free_urb(dev->boot_urb);
+	dev->boot_urb = NULL;
+
+	return bytes_written ? bytes_written : retval;
+}
+
+/* FIFO routines */
+
+static void rshim_usb_fifo_read_callback(URB_COMP_ARGS)
+{
+	struct rshim_usb *dev = urb->context;
+	struct rshim_backend *bd = &dev->bd;
+
+	spin_lock(&bd->spinlock);
+
+	pr_debug("usb_fifo_read_callback: %s urb completed, status %d, "
+		 "actual length %d, intr buf %d\n",
+		 dev->read_urb_is_intr ? "interrupt" : "read",
+		 urb->status, urb->actual_length, (int) *dev->intr_buf);
+
+	bd->spin_flags &= ~RSH_SFLG_READING;
+
+	if (urb->status == 0) {
+		/*
+		 * If a read completed, clear the number of bytes available
+		 * from the last interrupt, and set up the new buffer for
+		 * processing.  (If an interrupt completed, there's nothing
+		 * to do, since the number of bytes available was already
+		 * set by the I/O itself.)
+		 */
+		if (!dev->read_urb_is_intr) {
+			*dev->intr_buf = 0;
+			bd->read_buf_bytes = urb->actual_length;
+			bd->read_buf_next = 0;
+		}
+
+		/* Process any data we got, and launch another I/O if needed. */
+		rshim_notify(bd, RSH_EVENT_FIFO_INPUT, 0);
+	} else if (urb->status == -ENOENT) {
+		/*
+		 * The urb was explicitly cancelled.  The only time we
+		 * currently do this is when we close the stream.  If we
+		 * mark this as an error, tile-monitor --resume won't work,
+		 * so we just want to do nothing.
+		 */
+	} else if (urb->status == -ECONNRESET ||
+		   urb->status == -ESHUTDOWN) {
+		/*
+		 * The device went away.  We don't want to retry this, and
+		 * we expect things to get better, probably after a device
+		 * reset, but in the meantime, we should let upper layers
+		 * know there was a problem.
+		 */
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	} else if (dev->read_or_intr_retries < READ_RETRIES &&
+		   urb->actual_length == 0 &&
+		   (urb->status == -EPROTO || urb->status == -EILSEQ ||
+		    urb->status == -EOVERFLOW)) {
+		/*
+		 * We got an error which could benefit from being retried.
+		 * Just submit the same urb again.  Note that we don't
+		 * handle partial reads; it's hard, and we haven't really
+		 * seen them.
+		 */
+		int retval;
+
+		dev->read_or_intr_retries++;
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			pr_debug("fifo_read_callback: resubmitted urb but got error %d",
+				 retval);
+			/*
+			 * In this case, we won't try again; signal the
+			 * error to upper layers.
+			 */
+			rshim_notify(bd, RSH_EVENT_FIFO_ERR, retval);
+		} else {
+			bd->spin_flags |= RSH_SFLG_READING;
+		}
+	} else {
+		/*
+		 * We got some error we don't know how to handle, or we got
+		 * too many errors.  Either way we don't retry any more,
+		 * but we signal the error to upper layers.
+		 */
+		pr_err("fifo_read_callback: %s urb completed abnormally, "
+		       "error %d\n",
+		       dev->read_urb_is_intr ? "interrupt" : "read",
+		       urb->status);
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	}
+
+	spin_unlock(&bd->spinlock);
+}
+
+static void rshim_usb_fifo_read(struct rshim_usb *dev, char *buffer,
+			      size_t count)
+{
+	struct rshim_backend *bd = &dev->bd;
+
+	if ((int) *dev->intr_buf || bd->read_buf_bytes) {
+		/* We're doing a read. */
+
+		int retval;
+		struct urb *urb = dev->read_or_intr_urb;
+
+		usb_fill_bulk_urb(urb, dev->udev,
+				  usb_rcvbulkpipe(dev->udev,
+						  dev->tm_fifo_in_ep),
+				  buffer, count,
+				  rshim_usb_fifo_read_callback,
+				  dev);
+		urb->transfer_dma = dev->bd.read_buf_dma;
+		urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+
+		dev->bd.spin_flags |= RSH_SFLG_READING;
+		dev->read_urb_is_intr = 0;
+		dev->read_or_intr_retries = 0;
+
+		/* Submit the urb. */
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			dev->bd.spin_flags &= ~RSH_SFLG_READING;
+			pr_debug("fifo_drain: failed submitting read "
+			      "urb, error %d", retval);
+		}
+		pr_debug("fifo_read_callback: resubmitted read urb\n");
+	} else {
+		/* We're doing an interrupt. */
+
+		int retval;
+		struct urb *urb = dev->read_or_intr_urb;
+
+		usb_fill_int_urb(urb, dev->udev,
+				 usb_rcvintpipe(dev->udev, dev->tm_fifo_int_ep),
+				 dev->intr_buf, sizeof(*dev->intr_buf),
+				 rshim_usb_fifo_read_callback,
+				 /*
+				  * FIXME: is 6 a good interval value?  That's
+				  * polling at 8000/(1 << 6) == 125 Hz.
+				  */
+				 dev, 6);
+		urb->transfer_dma = dev->intr_buf_dma;
+		urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+
+		dev->bd.spin_flags |= RSH_SFLG_READING;
+		dev->read_urb_is_intr = 1;
+		dev->read_or_intr_retries = 0;
+
+		/* Submit the urb */
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			dev->bd.spin_flags &= ~RSH_SFLG_READING;
+			pr_debug("fifo_read_callback: failed submitting "
+			      "interrupt urb, error %d", retval);
+		}
+		pr_debug("fifo_read_callback: resubmitted interrupt urb\n");
+	}
+}
+
+static void rshim_usb_fifo_write_callback(URB_COMP_ARGS)
+{
+	struct rshim_usb *dev = urb->context;
+	struct rshim_backend *bd = &dev->bd;
+
+	spin_lock(&bd->spinlock);
+
+	pr_debug("fifo_write_callback: urb completed, status %d, "
+		 "actual length %d, intr buf %d\n",
+		 urb->status, urb->actual_length, (int) *dev->intr_buf);
+
+	bd->spin_flags &= ~RSH_SFLG_WRITING;
+
+	if (urb->status == 0) {
+		/* A write completed. */
+		wake_up_interruptible_all(&bd->write_completed);
+		rshim_notify(bd, RSH_EVENT_FIFO_OUTPUT, 0);
+	} else if (urb->status == -ENOENT) {
+		/*
+		 * The urb was explicitly cancelled.  The only time we
+		 * currently do this is when we close the stream.  If we
+		 * mark this as an error, tile-monitor --resume won't work,
+		 * so we just want to do nothing.
+		 */
+	} else if (urb->status == -ECONNRESET ||
+		   urb->status == -ESHUTDOWN) {
+		/*
+		 * The device went away.  We don't want to retry this, and
+		 * we expect things to get better, probably after a device
+		 * reset, but in the meantime, we should let upper layers
+		 * know there was a problem.
+		 */
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	} else if (dev->write_retries < WRITE_RETRIES &&
+		   urb->actual_length == 0 &&
+		   (urb->status == -EPROTO || urb->status == -EILSEQ ||
+		    urb->status == -EOVERFLOW)) {
+		/*
+		 * We got an error which could benefit from being retried.
+		 * Just submit the same urb again.  Note that we don't
+		 * handle partial writes; it's hard, and we haven't really
+		 * seen them.
+		 */
+		int retval;
+
+		dev->write_retries++;
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			pr_err("fifo_write_callback: resubmitted urb but "
+			       "got error %d\n", retval);
+			/*
+			 * In this case, we won't try again; signal the
+			 * error to upper layers.
+			 */
+			rshim_notify(bd, RSH_EVENT_FIFO_ERR, retval);
+		} else {
+			bd->spin_flags |= RSH_SFLG_WRITING;
+		}
+	} else {
+		/*
+		 * We got some error we don't know how to handle, or we got
+		 * too many errors.  Either way we don't retry any more,
+		 * but we signal the error to upper layers.
+		 */
+		pr_err("fifo_write_callback: urb completed abnormally, "
+		       "error %d\n", urb->status);
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	}
+
+	spin_unlock(&bd->spinlock);
+}
+
+static int rshim_usb_fifo_write(struct rshim_usb *dev, const char *buffer,
+			      size_t count)
+{
+	struct rshim_backend *bd = &dev->bd;
+	int retval;
+
+	WARN_ONCE(count % 8 != 0, "rshim write %d is not multiple of 8 bytes\n",
+		  (int)count);
+
+	/* Initialize the urb properly. */
+	usb_fill_bulk_urb(dev->write_urb, dev->udev,
+			  usb_sndbulkpipe(dev->udev,
+					  dev->tm_fifo_out_ep),
+			  (char *)buffer,
+			  count,
+			  rshim_usb_fifo_write_callback,
+			  dev);
+	dev->write_urb->transfer_dma = bd->write_buf_dma;
+	dev->write_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+	dev->write_retries = 0;
+
+	/* Send the data out the bulk port. */
+	retval = usb_submit_urb(dev->write_urb, GFP_ATOMIC);
+	if (retval) {
+		bd->spin_flags &= ~RSH_SFLG_WRITING;
+		pr_err("fifo_write: failed submitting write "
+		       "urb, error %d\n", retval);
+		return -1;
+	}
+
+	bd->spin_flags |= RSH_SFLG_WRITING;
+	return 0;
+}
+
+/* Probe routines */
+
+/* These make the endpoint test code in rshim_usb_probe() a lot cleaner. */
+#define is_in_ep(ep)   (((ep)->bEndpointAddress & USB_ENDPOINT_DIR_MASK) == \
+			USB_DIR_IN)
+#define is_bulk_ep(ep) (((ep)->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == \
+			USB_ENDPOINT_XFER_BULK)
+#define is_int_ep(ep)  (((ep)->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == \
+			USB_ENDPOINT_XFER_INT)
+#define max_pkt(ep)    le16_to_cpu(ep->wMaxPacketSize)
+#define ep_addr(ep)    (ep->bEndpointAddress)
+
+static ssize_t rshim_usb_backend_read(struct rshim_backend *bd, int devtype,
+				    char *buf, size_t count)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		rshim_usb_fifo_read(dev, buf, count);
+		return 0;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+}
+
+static ssize_t rshim_usb_backend_write(struct rshim_backend *bd, int devtype,
+				     const char *buf, size_t count)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		return rshim_usb_fifo_write(dev, buf, count);
+
+	case RSH_DEV_TYPE_BOOT:
+		return rshim_usb_boot_write(dev, buf, count);
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+}
+
+static void rshim_usb_backend_cancel_req(struct rshim_backend *bd, int devtype,
+				       bool is_write)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		if (is_write)
+			usb_kill_urb(dev->write_urb);
+		else
+			usb_kill_urb(dev->read_or_intr_urb);
+		break;
+
+	case RSH_DEV_TYPE_BOOT:
+		usb_kill_urb(dev->boot_urb);
+		break;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		break;
+	}
+}
+
+static int rshim_usb_probe(struct usb_interface *interface,
+			 const struct usb_device_id *id)
+{
+	char *usb_dev_name;
+	int dev_name_len = 32;
+	struct rshim_usb *dev = NULL;
+	struct rshim_backend *bd;
+	struct usb_host_interface *iface_desc;
+	struct usb_endpoint_descriptor *ep;
+	int i;
+	int allocfail = 0;
+	int retval = -ENOMEM;
+
+	/*
+	 * Get our device pathname.  The usb_make_path interface uselessly
+	 * returns -1 if the output buffer is too small, instead of telling
+	 * us how big it needs to be, so we just start with a reasonable
+	 * size and double it until the name fits.
+	 */
+	while (1) {
+		usb_dev_name = kmalloc(dev_name_len, GFP_KERNEL);
+		if (!usb_dev_name)
+			goto error;
+		if (usb_make_path(interface_to_usbdev(interface), usb_dev_name,
+				  dev_name_len) >= 0)
+			break;
+		kfree(usb_dev_name);
+		dev_name_len *= 2;
+	}
+
+	pr_debug("probing %s\n", usb_dev_name);
+
+	/*
+	 * Now see if we've previously seen this device.  If so, we use the
+	 * same device number, otherwise we pick the first available one.
+	 */
+	rshim_lock();
+
+	/* Find the backend. */
+	bd = rshim_find(usb_dev_name);
+	if (bd) {
+		pr_debug("found previously allocated rshim_usb structure\n");
+		kref_get(&bd->kref);
+		dev = container_of(bd, struct rshim_usb, bd);
+		kfree(usb_dev_name);
+		usb_dev_name = NULL;
+	} else {
+		pr_debug("creating new rshim_usb structure\n");
+		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+		if (dev == NULL) {
+			pr_err("couldn't get memory for new device\n");
+			rshim_unlock();
+			goto error;
+		}
+
+		bd = &dev->bd;
+		bd->dev_name = usb_dev_name;
+		bd->read = rshim_usb_backend_read;
+		bd->write = rshim_usb_backend_write;
+		bd->cancel = rshim_usb_backend_cancel_req;
+		bd->destroy = rshim_usb_delete;
+		bd->read_rshim = rshim_usb_read_rshim;
+		bd->write_rshim = rshim_usb_write_rshim;
+		bd->has_reprobe = 1;
+		bd->owner = THIS_MODULE;
+		mutex_init(&bd->mutex);
+	}
+
+	/*
+	 * This has to be done on the first probe, whether or not we
+	 * allocated a new rshim_usb structure, since it's always dropped
+	 * on the second disconnect.
+	 */
+	if (!bd->has_rshim && !bd->has_tm)
+		dev->udev = usb_get_dev(interface_to_usbdev(interface));
+
+	/*
+	 * It would seem more logical to allocate these above when we create
+	 * a new rshim_usb structure, but we don't want to do it until we've
+	 * upped the usb device reference count.
+	 */
+	allocfail |= rshim_fifo_alloc(bd);
+
+	if (!bd->read_buf)
+		bd->read_buf = usb_alloc_coherent(dev->udev, READ_BUF_SIZE,
+						   GFP_KERNEL,
+						   &bd->read_buf_dma);
+	allocfail |= bd->read_buf == 0;
+
+	if (!dev->intr_buf) {
+		dev->intr_buf = usb_alloc_coherent(dev->udev,
+						   sizeof(*dev->intr_buf),
+						   GFP_KERNEL,
+						   &dev->intr_buf_dma);
+		if (dev->intr_buf != NULL)
+			*dev->intr_buf = 0;
+	}
+	allocfail |= dev->intr_buf == 0;
+
+	if (!bd->write_buf) {
+		bd->write_buf = usb_alloc_coherent(dev->udev,
+						       WRITE_BUF_SIZE,
+						       GFP_KERNEL,
+						       &bd->write_buf_dma);
+	}
+	allocfail |= bd->write_buf == 0;
+
+	if (!dev->read_or_intr_urb)
+		dev->read_or_intr_urb = usb_alloc_urb(0, GFP_KERNEL);
+	allocfail |= dev->read_or_intr_urb == 0;
+
+	if (!dev->write_urb)
+		dev->write_urb = usb_alloc_urb(0, GFP_KERNEL);
+	allocfail |= dev->write_urb == 0;
+
+	if (allocfail) {
+		pr_err("can't allocate buffers or urbs\n");
+		rshim_unlock();
+		goto error;
+	}
+
+	rshim_unlock();
+
+	iface_desc = interface->cur_altsetting;
+
+	/* Make sure this is a vendor-specific interface class. */
+	if (iface_desc->desc.bInterfaceClass != 0xFF)
+		goto error;
+
+	/* See which interface this is, then save the correct data. */
+
+	mutex_lock(&bd->mutex);
+	if (iface_desc->desc.bInterfaceSubClass == 0) {
+		pr_debug("found rshim interface\n");
+		/*
+		 * We only expect one endpoint here, just make sure its
+		 * attributes match.
+		 */
+		if (iface_desc->desc.bNumEndpoints != 1) {
+			pr_err("wrong number of endpoints for rshim "
+			       "interface\n");
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+		ep = &iface_desc->endpoint[0].desc;
+
+		/* We expect a bulk out endpoint. */
+		if (!is_bulk_ep(ep) || is_in_ep(ep)) {
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+
+		bd->has_rshim = 1;
+		dev->rshim_interface = interface;
+		dev->boot_fifo_ep = ep_addr(ep);
+
+	} else if (iface_desc->desc.bInterfaceSubClass == 1) {
+		pr_debug("found tmfifo interface\n");
+		/*
+		 * We expect 3 endpoints here.  Since they're listed in
+		 * random order we have to use their attributes to figure
+		 * out which is which.
+		 */
+		if (iface_desc->desc.bNumEndpoints != 3) {
+			pr_err("wrong number of endpoints for tm "
+			       "interface\n");
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+		dev->tm_fifo_in_ep = 0;
+		dev->tm_fifo_int_ep = 0;
+		dev->tm_fifo_out_ep = 0;
+
+		for (i = 0; i < iface_desc->desc.bNumEndpoints; i++) {
+			ep = &iface_desc->endpoint[i].desc;
+
+			if (is_in_ep(ep)) {
+				if (is_bulk_ep(ep)) {
+					/* Bulk in endpoint. */
+					dev->tm_fifo_in_ep = ep_addr(ep);
+				} else if (is_int_ep(ep)) {
+					/* Interrupt in endpoint. */
+					dev->tm_fifo_int_ep = ep_addr(ep);
+				}
+			} else {
+				if (is_bulk_ep(ep)) {
+					/* Bulk out endpoint. */
+					dev->tm_fifo_out_ep = ep_addr(ep);
+				}
+			}
+		}
+
+		if (!dev->tm_fifo_in_ep || !dev->tm_fifo_int_ep ||
+		    !dev->tm_fifo_out_ep) {
+			pr_err("could not find all required endpoints for "
+			       "tm interface\n");
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+		bd->has_tm = 1;
+	} else {
+		mutex_unlock(&bd->mutex);
+		goto error;
+	}
+
+	/* Save our data pointer in this interface device. */
+	usb_set_intfdata(interface, dev);
+
+	if (!bd->dev)
+		bd->dev = &dev->udev->dev;
+
+	/*
+	 * Register rshim here since it needs to detect whether other backend
+	 * has already registered or not, which involves reading/writing rshim
+	 * registers and has assumption that the under layer is working.
+	 */
+	rshim_lock();
+	if (!bd->registered) {
+		retval = rshim_register(bd);
+		if (retval) {
+			rshim_unlock();
+			goto error;
+		}
+	}
+	rshim_unlock();
+
+	/* Notify that device is attached. */
+	retval = rshim_notify(&dev->bd, RSH_EVENT_ATTACH, 0);
+	mutex_unlock(&dev->bd.mutex);
+	if (retval)
+		goto error;
+
+	return 0;
+
+error:
+	if (dev) {
+		usb_free_urb(dev->read_or_intr_urb);
+		dev->read_or_intr_urb = NULL;
+		usb_free_urb(dev->write_urb);
+		dev->write_urb = NULL;
+
+		usb_free_coherent(dev->udev, READ_BUF_SIZE,
+				  dev->bd.read_buf, dev->bd.read_buf_dma);
+		dev->bd.read_buf = NULL;
+
+		usb_free_coherent(dev->udev, WRITE_BUF_SIZE,
+				  dev->bd.write_buf, dev->bd.write_buf_dma);
+		dev->bd.write_buf = NULL;
+
+		rshim_fifo_free(&dev->bd);
+
+		usb_free_coherent(dev->udev, sizeof(*dev->intr_buf),
+				  dev->intr_buf, dev->intr_buf_dma);
+		dev->intr_buf = NULL;
+
+		rshim_lock();
+		kref_put(&dev->bd.kref, rshim_usb_delete);
+		rshim_unlock();
+	}
+
+	kfree(usb_dev_name);
+	return retval;
+}
+
+static void rshim_usb_disconnect(struct usb_interface *interface)
+{
+	struct rshim_usb *dev;
+	struct rshim_backend *bd;
+	int flush_wq = 0;
+
+	dev = usb_get_intfdata(interface);
+	bd = &dev->bd;
+	usb_set_intfdata(interface, NULL);
+
+	rshim_notify(bd, RSH_EVENT_DETACH, 0);
+
+	/*
+	 * Clear this interface so we don't unregister our devices next
+	 * time.
+	 */
+	mutex_lock(&bd->mutex);
+
+	if (dev->rshim_interface == interface) {
+		bd->has_rshim = 0;
+		dev->rshim_interface = NULL;
+	} else {
+		/*
+		 * We have to get rid of any USB state, since it may be
+		 * tied to the USB device which is going to vanish as soon
+		 * as we get both disconnects.  We'll reallocate these
+		 * on the next probe.
+		 *
+		 * Supposedly the code which called us already killed any
+		 * outstanding URBs, but it doesn't hurt to be sure.
+		 */
+
+		/*
+		 * We must make sure the console worker isn't running
+		 * before we free all these resources, and particularly
+		 * before we decrement our usage count, below.  Most of the
+		 * time, if it's even enabled, it'll be scheduled to run at
+		 * some point in the future, and we can take care of that
+		 * by asking that it be canceled.
+		 *
+		 * However, it's possible that it's already started
+		 * running, but can't make progress because it's waiting
+		 * for the device mutex, which we currently have.  We
+		 * handle this case by clearing the bit that says it's
+		 * enabled.  The worker tests this bit as soon as it gets
+		 * the mutex, and if it's clear, it just returns without
+		 * rescheduling itself.  Note that if we didn't
+		 * successfully cancel it, we flush the work entry below,
+		 * after we drop the mutex, to be sure it's done before we
+		 * decrement the device usage count.
+		 *
+		 * XXX This might be racy; what if something else which
+		 * would enable the worker runs after we drop the mutex
+		 * but before the worker itself runs?
+		 */
+		flush_wq = !cancel_delayed_work(&bd->work);
+		bd->has_cons_work = 0;
+
+		usb_kill_urb(dev->read_or_intr_urb);
+		usb_free_urb(dev->read_or_intr_urb);
+		dev->read_or_intr_urb = NULL;
+		usb_kill_urb(dev->write_urb);
+		usb_free_urb(dev->write_urb);
+		dev->write_urb = NULL;
+
+		usb_free_coherent(dev->udev, READ_BUF_SIZE,
+				  bd->read_buf, bd->read_buf_dma);
+		bd->read_buf = NULL;
+
+		usb_free_coherent(dev->udev, sizeof(*dev->intr_buf),
+				  dev->intr_buf, dev->intr_buf_dma);
+		dev->intr_buf = NULL;
+
+		usb_free_coherent(dev->udev, WRITE_BUF_SIZE,
+				  bd->write_buf, bd->write_buf_dma);
+		bd->write_buf = NULL;
+
+		rshim_fifo_free(bd);
+	}
+
+	if (!bd->has_rshim && !bd->has_tm) {
+		usb_put_dev(dev->udev);
+		dev->udev = NULL;
+		pr_info("now disconnected\n");
+	} else {
+		pr_debug("partially disconnected\n");
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	/* This can't be done while we hold the mutex; see comments above. */
+	if (flush_wq)
+		flush_workqueue(rshim_wq);
+
+	/* decrement our usage count */
+	rshim_lock();
+	kref_put(&bd->kref, rshim_usb_delete);
+	rshim_unlock();
+}
+
+static struct usb_driver rshim_usb_driver = {
+	.name = "rshim_usb",
+	.probe = rshim_usb_probe,
+	.disconnect = rshim_usb_disconnect,
+	.id_table = rshim_usb_table,
+};
+
+static int __init rshim_usb_init(void)
+{
+	int result;
+
+	/* Register this driver with the USB subsystem. */
+	result = usb_register(&rshim_usb_driver);
+	if (result)
+		pr_err("usb_register failed, error number %d\n", result);
+
+	return result;
+}
+
+static void __exit rshim_usb_exit(void)
+{
+	/* Deregister this driver with the USB subsystem. */
+	usb_deregister(&rshim_usb_driver);
+}
+
+module_init(rshim_usb_init);
+module_exit(rshim_usb_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.6");
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v6 7/9] soc: mellanox: host: Add the Rshim USB backend driver
@ 2018-11-01 16:25   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-11-01 16:25 UTC (permalink / raw)
  To: linux-arm-kernel

This commit adds the USB backend driver to access the Rshim
interface on the BlueField SoC. It can be used when a USB cable
is connected to the Smart NIC or standalone device.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/host/Makefile    |    2 +-
 drivers/soc/mellanox/host/rshim_usb.c | 1035 +++++++++++++++++++++++++++++++++
 2 files changed, 1036 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/mellanox/host/rshim_usb.c

diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
index 1a282b9..c6703cd 100644
--- a/drivers/soc/mellanox/host/Makefile
+++ b/drivers/soc/mellanox/host/Makefile
@@ -1,2 +1,2 @@
-obj-m := rshim.o rshim_net.o
+obj-m := rshim.o rshim_net.o rshim_usb.o
 
diff --git a/drivers/soc/mellanox/host/rshim_usb.c b/drivers/soc/mellanox/host/rshim_usb.c
new file mode 100644
index 0000000..aad6250
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_usb.c
@@ -0,0 +1,1035 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_usb.c - Mellanox RShim USB host driver
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/*
+ * This source code was originally derived from:
+ *
+ *   USB Skeleton driver - 2.0
+ *
+ *   Copyright (C) 2001-2004 Greg Kroah-Hartman (greg at kroah.com)
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License as
+ *	published by the Free Software Foundation, version 2.
+ *
+ * Some code was also lifted from the example drivers in "Linux Device
+ * Drivers" by Alessandro Rubini and Jonathan Corbet, published by
+ * O'Reilly & Associates.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/usb.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <linux/ioctl.h>
+#include <linux/termios.h>
+#include <linux/workqueue.h>
+#include <asm/termbits.h>
+#include <linux/circ_buf.h>
+
+#include "rshim.h"
+
+/* Disable RSim access. */
+static int rshim_disable;
+module_param(rshim_disable, int, 0444);
+MODULE_PARM_DESC(rshim_disable, "Disable rshim (obsoleted)");
+
+/* Our USB vendor/product IDs. */
+#define USB_TILERA_VENDOR_ID	0x22dc	 /* Tilera Corporation */
+#define USB_BLUEFIELD_PRODUCT_ID	0x0004	 /* Mellanox Bluefield */
+
+/* Number of retries for the tmfifo read/write path. */
+#define READ_RETRIES		5
+#define WRITE_RETRIES		5
+
+/* Structure to hold all of our device specific stuff. */
+struct rshim_usb {
+	/* RShim backend structure. */
+	struct rshim_backend bd;
+
+	/*
+	 * The USB device for this device.  We bump its reference count
+	 * when the first interface is probed, and drop the ref when the
+	 * last interface is disconnected.
+	 */
+	struct usb_device *udev;
+
+	/* The USB interfaces for this device. */
+	struct usb_interface *rshim_interface;
+
+	/* State for our outstanding boot write. */
+	struct urb *boot_urb;
+
+	/* Control data. */
+	u64 ctrl_data;
+
+	/* Interrupt data buffer.  This is a USB DMA'able buffer. */
+	u64 *intr_buf;
+	dma_addr_t intr_buf_dma;
+
+	/* Read/interrupt urb, retries, and mode. */
+	struct urb *read_or_intr_urb;
+	int read_or_intr_retries;
+	int read_urb_is_intr;
+
+	/* Write urb and retries. */
+	struct urb *write_urb;
+	int write_retries;
+
+	/* The address of the boot FIFO endpoint. */
+	u8 boot_fifo_ep;
+	/* The address of the tile-monitor FIFO interrupt endpoint. */
+	u8 tm_fifo_int_ep;
+	/* The address of the tile-monitor FIFO input endpoint. */
+	u8 tm_fifo_in_ep;
+	/* The address of the tile-monitor FIFO output endpoint. */
+	u8 tm_fifo_out_ep;
+};
+
+/* Table of devices that work with this driver */
+static struct usb_device_id rshim_usb_table[] = {
+	{ USB_DEVICE(USB_TILERA_VENDOR_ID, USB_BLUEFIELD_PRODUCT_ID) },
+	{ }					/* Terminating entry */
+};
+MODULE_DEVICE_TABLE(usb, rshim_usb_table);
+
+/* Random compatibility hacks. */
+
+/* Arguments to an urb completion handler. */
+#define URB_COMP_ARGS struct urb *urb
+
+static void rshim_usb_delete(struct kref *kref)
+{
+	struct rshim_backend *bd;
+	struct rshim_usb *dev;
+
+	bd = container_of(kref, struct rshim_backend, kref);
+	dev = container_of(bd, struct rshim_usb, bd);
+
+	rshim_deregister(bd);
+	kfree(dev);
+}
+
+/* Rshim read/write routines */
+
+static int rshim_usb_read_rshim(struct rshim_backend *bd, int chan, int addr,
+			      u64 *result)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+	int retval;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	/* Do a blocking control read and endian conversion. */
+	retval = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0),
+				 0,  /* request */
+				 USB_RECIP_ENDPOINT | USB_TYPE_VENDOR |
+				 USB_DIR_IN,  /* request type */
+				 chan, /* value */
+				 addr, /* index */
+				 &dev->ctrl_data, 8, 2000);
+
+	/*
+	 * The RShim HW puts bytes on the wire in little-endian order
+	 * regardless of endianness settings either in the host or the ARM
+	 * cores.
+	 */
+	*result = le64_to_cpu(dev->ctrl_data);
+	if (retval == 8)
+		return 0;
+
+	/*
+	 * These are weird error codes, but we want to use something
+	 * the USB stack doesn't use so that we can identify short/long
+	 * reads.
+	 */
+	return retval >= 0 ? (retval > 8 ? -EBADE : -EBADR) : retval;
+}
+
+static int rshim_usb_write_rshim(struct rshim_backend *bd, int chan, int addr,
+			       u64 value)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+	int retval;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	/* Convert the word to little endian and do blocking control write. */
+	dev->ctrl_data = cpu_to_le64(value);
+	retval = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0),
+				 0,  /* request */
+				 USB_RECIP_ENDPOINT | USB_TYPE_VENDOR |
+				 USB_DIR_OUT,  /* request type */
+				 chan, /* value */
+				 addr, /* index */
+				 &dev->ctrl_data, 8, 2000);
+
+	if (retval == 8)
+		return 0;
+
+	/*
+	 * These are weird error codes, but we want to use something
+	 * the USB stack doesn't use so that we can identify short/long
+	 * writes.
+	 */
+	return retval >= 0 ? (retval > 8 ? -EBADE : -EBADR) : retval;
+}
+
+/* Boot routines */
+
+static void rshim_usb_boot_write_callback(URB_COMP_ARGS)
+{
+	struct rshim_usb *dev = urb->context;
+
+	if (urb->status == -ENOENT)
+		pr_debug("boot tx canceled, actual length %d\n",
+			 urb->actual_length);
+	else if (urb->status)
+		pr_debug("boot tx failed, status %d, actual length %d\n",
+			 urb->status, urb->actual_length);
+
+	complete_all(&dev->bd.boot_write_complete);
+}
+
+static ssize_t rshim_usb_boot_write(struct rshim_usb *dev, const char *buf,
+				  size_t count)
+{
+	struct rshim_backend *bd = &dev->bd;
+	int retval = 0;
+	size_t bytes_written = 0;
+
+	/* Create and fill an urb */
+	dev->boot_urb = usb_alloc_urb(0, GFP_KERNEL);
+	if (unlikely(!dev->boot_urb)) {
+		pr_debug("boot_write: couldn't allocate urb\n");
+		return -ENOMEM;
+	}
+	usb_fill_bulk_urb(dev->boot_urb, dev->udev,
+			  usb_sndbulkpipe(dev->udev, dev->boot_fifo_ep),
+			  (char *)buf, count, rshim_usb_boot_write_callback,
+			  dev);
+
+	/* Submit the urb. */
+	reinit_completion(&bd->boot_write_complete);
+	retval = usb_submit_urb(dev->boot_urb, GFP_KERNEL);
+	if (retval)
+		goto done;
+
+	/*
+	 * Wait until it's done. If anything goes wrong in the USB layer,
+	 * the callback function might never get called and cause stuck.
+	 * Here we release the mutex so user could use 'ctrl + c' to terminate
+	 * the current write. Once the boot file is opened again, the
+	 * outstanding urb will be canceled.
+	 *
+	 * Note: when boot stream starts to write, it will either run to
+	 * completion, or be interrupted by user. The urb callback function will
+	 * be called during this period. There are no other operations to affect
+	 * the boot stream. So unlocking the mutex is considered safe.
+	 */
+	mutex_unlock(&bd->mutex);
+	retval = wait_for_completion_interruptible(&bd->boot_write_complete);
+	mutex_lock(&bd->mutex);
+	if (retval) {
+		usb_kill_urb(dev->boot_urb);
+		bytes_written += dev->boot_urb->actual_length;
+		goto done;
+	}
+
+	if (dev->boot_urb->actual_length !=
+		dev->boot_urb->transfer_buffer_length) {
+		pr_debug("length mismatch, exp %d act %d stat %d\n",
+			 dev->boot_urb->transfer_buffer_length,
+			 dev->boot_urb->actual_length,
+			 dev->boot_urb->status);
+	}
+
+#ifdef RSH_USB_BMC
+	/*
+	 * The UHCI host controller on the BMC seems to
+	 * overestimate the amount of data it's
+	 * successfully sent when it sees a babble error.
+	 */
+	if (dev->boot_urb->status == -EOVERFLOW &&
+	    dev->boot_urb->actual_length >= 64) {
+		dev->boot_urb->actual_length -= 64;
+		pr_debug("saw babble, new length %d\n",
+		dev->boot_urb->actual_length);
+	}
+#endif
+
+	bytes_written = dev->boot_urb->actual_length;
+
+	if (dev->boot_urb->status == -ENOENT &&
+	    dev->boot_urb->transfer_buffer_length !=
+	    dev->boot_urb->actual_length) {
+		pr_debug("boot_write: urb canceled.\n");
+	} else {
+		if (dev->boot_urb->status) {
+			pr_debug("boot_write: urb failed, status %d\n",
+				 dev->boot_urb->status);
+		}
+		if (dev->boot_urb->status != -ENOENT && !retval)
+			retval = dev->boot_urb->status;
+	}
+
+done:
+	usb_free_urb(dev->boot_urb);
+	dev->boot_urb = NULL;
+
+	return bytes_written ? bytes_written : retval;
+}
+
+/* FIFO routines */
+
+static void rshim_usb_fifo_read_callback(URB_COMP_ARGS)
+{
+	struct rshim_usb *dev = urb->context;
+	struct rshim_backend *bd = &dev->bd;
+
+	spin_lock(&bd->spinlock);
+
+	pr_debug("usb_fifo_read_callback: %s urb completed, status %d, "
+		 "actual length %d, intr buf %d\n",
+		 dev->read_urb_is_intr ? "interrupt" : "read",
+		 urb->status, urb->actual_length, (int) *dev->intr_buf);
+
+	bd->spin_flags &= ~RSH_SFLG_READING;
+
+	if (urb->status == 0) {
+		/*
+		 * If a read completed, clear the number of bytes available
+		 * from the last interrupt, and set up the new buffer for
+		 * processing.  (If an interrupt completed, there's nothing
+		 * to do, since the number of bytes available was already
+		 * set by the I/O itself.)
+		 */
+		if (!dev->read_urb_is_intr) {
+			*dev->intr_buf = 0;
+			bd->read_buf_bytes = urb->actual_length;
+			bd->read_buf_next = 0;
+		}
+
+		/* Process any data we got, and launch another I/O if needed. */
+		rshim_notify(bd, RSH_EVENT_FIFO_INPUT, 0);
+	} else if (urb->status == -ENOENT) {
+		/*
+		 * The urb was explicitly cancelled.  The only time we
+		 * currently do this is when we close the stream.  If we
+		 * mark this as an error, tile-monitor --resume won't work,
+		 * so we just want to do nothing.
+		 */
+	} else if (urb->status == -ECONNRESET ||
+		   urb->status == -ESHUTDOWN) {
+		/*
+		 * The device went away.  We don't want to retry this, and
+		 * we expect things to get better, probably after a device
+		 * reset, but in the meantime, we should let upper layers
+		 * know there was a problem.
+		 */
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	} else if (dev->read_or_intr_retries < READ_RETRIES &&
+		   urb->actual_length == 0 &&
+		   (urb->status == -EPROTO || urb->status == -EILSEQ ||
+		    urb->status == -EOVERFLOW)) {
+		/*
+		 * We got an error which could benefit from being retried.
+		 * Just submit the same urb again.  Note that we don't
+		 * handle partial reads; it's hard, and we haven't really
+		 * seen them.
+		 */
+		int retval;
+
+		dev->read_or_intr_retries++;
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			pr_debug("fifo_read_callback: resubmitted urb but got error %d",
+				 retval);
+			/*
+			 * In this case, we won't try again; signal the
+			 * error to upper layers.
+			 */
+			rshim_notify(bd, RSH_EVENT_FIFO_ERR, retval);
+		} else {
+			bd->spin_flags |= RSH_SFLG_READING;
+		}
+	} else {
+		/*
+		 * We got some error we don't know how to handle, or we got
+		 * too many errors.  Either way we don't retry any more,
+		 * but we signal the error to upper layers.
+		 */
+		pr_err("fifo_read_callback: %s urb completed abnormally, "
+		       "error %d\n",
+		       dev->read_urb_is_intr ? "interrupt" : "read",
+		       urb->status);
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	}
+
+	spin_unlock(&bd->spinlock);
+}
+
+static void rshim_usb_fifo_read(struct rshim_usb *dev, char *buffer,
+			      size_t count)
+{
+	struct rshim_backend *bd = &dev->bd;
+
+	if ((int) *dev->intr_buf || bd->read_buf_bytes) {
+		/* We're doing a read. */
+
+		int retval;
+		struct urb *urb = dev->read_or_intr_urb;
+
+		usb_fill_bulk_urb(urb, dev->udev,
+				  usb_rcvbulkpipe(dev->udev,
+						  dev->tm_fifo_in_ep),
+				  buffer, count,
+				  rshim_usb_fifo_read_callback,
+				  dev);
+		urb->transfer_dma = dev->bd.read_buf_dma;
+		urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+
+		dev->bd.spin_flags |= RSH_SFLG_READING;
+		dev->read_urb_is_intr = 0;
+		dev->read_or_intr_retries = 0;
+
+		/* Submit the urb. */
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			dev->bd.spin_flags &= ~RSH_SFLG_READING;
+			pr_debug("fifo_drain: failed submitting read "
+			      "urb, error %d", retval);
+		}
+		pr_debug("fifo_read_callback: resubmitted read urb\n");
+	} else {
+		/* We're doing an interrupt. */
+
+		int retval;
+		struct urb *urb = dev->read_or_intr_urb;
+
+		usb_fill_int_urb(urb, dev->udev,
+				 usb_rcvintpipe(dev->udev, dev->tm_fifo_int_ep),
+				 dev->intr_buf, sizeof(*dev->intr_buf),
+				 rshim_usb_fifo_read_callback,
+				 /*
+				  * FIXME: is 6 a good interval value?  That's
+				  * polling at 8000/(1 << 6) == 125 Hz.
+				  */
+				 dev, 6);
+		urb->transfer_dma = dev->intr_buf_dma;
+		urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+
+		dev->bd.spin_flags |= RSH_SFLG_READING;
+		dev->read_urb_is_intr = 1;
+		dev->read_or_intr_retries = 0;
+
+		/* Submit the urb */
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			dev->bd.spin_flags &= ~RSH_SFLG_READING;
+			pr_debug("fifo_read_callback: failed submitting "
+			      "interrupt urb, error %d", retval);
+		}
+		pr_debug("fifo_read_callback: resubmitted interrupt urb\n");
+	}
+}
+
+static void rshim_usb_fifo_write_callback(URB_COMP_ARGS)
+{
+	struct rshim_usb *dev = urb->context;
+	struct rshim_backend *bd = &dev->bd;
+
+	spin_lock(&bd->spinlock);
+
+	pr_debug("fifo_write_callback: urb completed, status %d, "
+		 "actual length %d, intr buf %d\n",
+		 urb->status, urb->actual_length, (int) *dev->intr_buf);
+
+	bd->spin_flags &= ~RSH_SFLG_WRITING;
+
+	if (urb->status == 0) {
+		/* A write completed. */
+		wake_up_interruptible_all(&bd->write_completed);
+		rshim_notify(bd, RSH_EVENT_FIFO_OUTPUT, 0);
+	} else if (urb->status == -ENOENT) {
+		/*
+		 * The urb was explicitly cancelled.  The only time we
+		 * currently do this is when we close the stream.  If we
+		 * mark this as an error, tile-monitor --resume won't work,
+		 * so we just want to do nothing.
+		 */
+	} else if (urb->status == -ECONNRESET ||
+		   urb->status == -ESHUTDOWN) {
+		/*
+		 * The device went away.  We don't want to retry this, and
+		 * we expect things to get better, probably after a device
+		 * reset, but in the meantime, we should let upper layers
+		 * know there was a problem.
+		 */
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	} else if (dev->write_retries < WRITE_RETRIES &&
+		   urb->actual_length == 0 &&
+		   (urb->status == -EPROTO || urb->status == -EILSEQ ||
+		    urb->status == -EOVERFLOW)) {
+		/*
+		 * We got an error which could benefit from being retried.
+		 * Just submit the same urb again.  Note that we don't
+		 * handle partial writes; it's hard, and we haven't really
+		 * seen them.
+		 */
+		int retval;
+
+		dev->write_retries++;
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			pr_err("fifo_write_callback: resubmitted urb but "
+			       "got error %d\n", retval);
+			/*
+			 * In this case, we won't try again; signal the
+			 * error to upper layers.
+			 */
+			rshim_notify(bd, RSH_EVENT_FIFO_ERR, retval);
+		} else {
+			bd->spin_flags |= RSH_SFLG_WRITING;
+		}
+	} else {
+		/*
+		 * We got some error we don't know how to handle, or we got
+		 * too many errors.  Either way we don't retry any more,
+		 * but we signal the error to upper layers.
+		 */
+		pr_err("fifo_write_callback: urb completed abnormally, "
+		       "error %d\n", urb->status);
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	}
+
+	spin_unlock(&bd->spinlock);
+}
+
+static int rshim_usb_fifo_write(struct rshim_usb *dev, const char *buffer,
+			      size_t count)
+{
+	struct rshim_backend *bd = &dev->bd;
+	int retval;
+
+	WARN_ONCE(count % 8 != 0, "rshim write %d is not multiple of 8 bytes\n",
+		  (int)count);
+
+	/* Initialize the urb properly. */
+	usb_fill_bulk_urb(dev->write_urb, dev->udev,
+			  usb_sndbulkpipe(dev->udev,
+					  dev->tm_fifo_out_ep),
+			  (char *)buffer,
+			  count,
+			  rshim_usb_fifo_write_callback,
+			  dev);
+	dev->write_urb->transfer_dma = bd->write_buf_dma;
+	dev->write_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+	dev->write_retries = 0;
+
+	/* Send the data out the bulk port. */
+	retval = usb_submit_urb(dev->write_urb, GFP_ATOMIC);
+	if (retval) {
+		bd->spin_flags &= ~RSH_SFLG_WRITING;
+		pr_err("fifo_write: failed submitting write "
+		       "urb, error %d\n", retval);
+		return -1;
+	}
+
+	bd->spin_flags |= RSH_SFLG_WRITING;
+	return 0;
+}
+
+/* Probe routines */
+
+/* These make the endpoint test code in rshim_usb_probe() a lot cleaner. */
+#define is_in_ep(ep)   (((ep)->bEndpointAddress & USB_ENDPOINT_DIR_MASK) == \
+			USB_DIR_IN)
+#define is_bulk_ep(ep) (((ep)->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == \
+			USB_ENDPOINT_XFER_BULK)
+#define is_int_ep(ep)  (((ep)->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == \
+			USB_ENDPOINT_XFER_INT)
+#define max_pkt(ep)    le16_to_cpu(ep->wMaxPacketSize)
+#define ep_addr(ep)    (ep->bEndpointAddress)
+
+static ssize_t rshim_usb_backend_read(struct rshim_backend *bd, int devtype,
+				    char *buf, size_t count)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		rshim_usb_fifo_read(dev, buf, count);
+		return 0;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+}
+
+static ssize_t rshim_usb_backend_write(struct rshim_backend *bd, int devtype,
+				     const char *buf, size_t count)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		return rshim_usb_fifo_write(dev, buf, count);
+
+	case RSH_DEV_TYPE_BOOT:
+		return rshim_usb_boot_write(dev, buf, count);
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+}
+
+static void rshim_usb_backend_cancel_req(struct rshim_backend *bd, int devtype,
+				       bool is_write)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		if (is_write)
+			usb_kill_urb(dev->write_urb);
+		else
+			usb_kill_urb(dev->read_or_intr_urb);
+		break;
+
+	case RSH_DEV_TYPE_BOOT:
+		usb_kill_urb(dev->boot_urb);
+		break;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		break;
+	}
+}
+
+static int rshim_usb_probe(struct usb_interface *interface,
+			 const struct usb_device_id *id)
+{
+	char *usb_dev_name;
+	int dev_name_len = 32;
+	struct rshim_usb *dev = NULL;
+	struct rshim_backend *bd;
+	struct usb_host_interface *iface_desc;
+	struct usb_endpoint_descriptor *ep;
+	int i;
+	int allocfail = 0;
+	int retval = -ENOMEM;
+
+	/*
+	 * Get our device pathname.  The usb_make_path interface uselessly
+	 * returns -1 if the output buffer is too small, instead of telling
+	 * us how big it needs to be, so we just start with a reasonable
+	 * size and double it until the name fits.
+	 */
+	while (1) {
+		usb_dev_name = kmalloc(dev_name_len, GFP_KERNEL);
+		if (!usb_dev_name)
+			goto error;
+		if (usb_make_path(interface_to_usbdev(interface), usb_dev_name,
+				  dev_name_len) >= 0)
+			break;
+		kfree(usb_dev_name);
+		dev_name_len *= 2;
+	}
+
+	pr_debug("probing %s\n", usb_dev_name);
+
+	/*
+	 * Now see if we've previously seen this device.  If so, we use the
+	 * same device number, otherwise we pick the first available one.
+	 */
+	rshim_lock();
+
+	/* Find the backend. */
+	bd = rshim_find(usb_dev_name);
+	if (bd) {
+		pr_debug("found previously allocated rshim_usb structure\n");
+		kref_get(&bd->kref);
+		dev = container_of(bd, struct rshim_usb, bd);
+		kfree(usb_dev_name);
+		usb_dev_name = NULL;
+	} else {
+		pr_debug("creating new rshim_usb structure\n");
+		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+		if (dev == NULL) {
+			pr_err("couldn't get memory for new device\n");
+			rshim_unlock();
+			goto error;
+		}
+
+		bd = &dev->bd;
+		bd->dev_name = usb_dev_name;
+		bd->read = rshim_usb_backend_read;
+		bd->write = rshim_usb_backend_write;
+		bd->cancel = rshim_usb_backend_cancel_req;
+		bd->destroy = rshim_usb_delete;
+		bd->read_rshim = rshim_usb_read_rshim;
+		bd->write_rshim = rshim_usb_write_rshim;
+		bd->has_reprobe = 1;
+		bd->owner = THIS_MODULE;
+		mutex_init(&bd->mutex);
+	}
+
+	/*
+	 * This has to be done on the first probe, whether or not we
+	 * allocated a new rshim_usb structure, since it's always dropped
+	 * on the second disconnect.
+	 */
+	if (!bd->has_rshim && !bd->has_tm)
+		dev->udev = usb_get_dev(interface_to_usbdev(interface));
+
+	/*
+	 * It would seem more logical to allocate these above when we create
+	 * a new rshim_usb structure, but we don't want to do it until we've
+	 * upped the usb device reference count.
+	 */
+	allocfail |= rshim_fifo_alloc(bd);
+
+	if (!bd->read_buf)
+		bd->read_buf = usb_alloc_coherent(dev->udev, READ_BUF_SIZE,
+						   GFP_KERNEL,
+						   &bd->read_buf_dma);
+	allocfail |= bd->read_buf == 0;
+
+	if (!dev->intr_buf) {
+		dev->intr_buf = usb_alloc_coherent(dev->udev,
+						   sizeof(*dev->intr_buf),
+						   GFP_KERNEL,
+						   &dev->intr_buf_dma);
+		if (dev->intr_buf != NULL)
+			*dev->intr_buf = 0;
+	}
+	allocfail |= dev->intr_buf == 0;
+
+	if (!bd->write_buf) {
+		bd->write_buf = usb_alloc_coherent(dev->udev,
+						       WRITE_BUF_SIZE,
+						       GFP_KERNEL,
+						       &bd->write_buf_dma);
+	}
+	allocfail |= bd->write_buf == 0;
+
+	if (!dev->read_or_intr_urb)
+		dev->read_or_intr_urb = usb_alloc_urb(0, GFP_KERNEL);
+	allocfail |= dev->read_or_intr_urb == 0;
+
+	if (!dev->write_urb)
+		dev->write_urb = usb_alloc_urb(0, GFP_KERNEL);
+	allocfail |= dev->write_urb == 0;
+
+	if (allocfail) {
+		pr_err("can't allocate buffers or urbs\n");
+		rshim_unlock();
+		goto error;
+	}
+
+	rshim_unlock();
+
+	iface_desc = interface->cur_altsetting;
+
+	/* Make sure this is a vendor-specific interface class. */
+	if (iface_desc->desc.bInterfaceClass != 0xFF)
+		goto error;
+
+	/* See which interface this is, then save the correct data. */
+
+	mutex_lock(&bd->mutex);
+	if (iface_desc->desc.bInterfaceSubClass == 0) {
+		pr_debug("found rshim interface\n");
+		/*
+		 * We only expect one endpoint here, just make sure its
+		 * attributes match.
+		 */
+		if (iface_desc->desc.bNumEndpoints != 1) {
+			pr_err("wrong number of endpoints for rshim "
+			       "interface\n");
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+		ep = &iface_desc->endpoint[0].desc;
+
+		/* We expect a bulk out endpoint. */
+		if (!is_bulk_ep(ep) || is_in_ep(ep)) {
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+
+		bd->has_rshim = 1;
+		dev->rshim_interface = interface;
+		dev->boot_fifo_ep = ep_addr(ep);
+
+	} else if (iface_desc->desc.bInterfaceSubClass == 1) {
+		pr_debug("found tmfifo interface\n");
+		/*
+		 * We expect 3 endpoints here.  Since they're listed in
+		 * random order we have to use their attributes to figure
+		 * out which is which.
+		 */
+		if (iface_desc->desc.bNumEndpoints != 3) {
+			pr_err("wrong number of endpoints for tm "
+			       "interface\n");
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+		dev->tm_fifo_in_ep = 0;
+		dev->tm_fifo_int_ep = 0;
+		dev->tm_fifo_out_ep = 0;
+
+		for (i = 0; i < iface_desc->desc.bNumEndpoints; i++) {
+			ep = &iface_desc->endpoint[i].desc;
+
+			if (is_in_ep(ep)) {
+				if (is_bulk_ep(ep)) {
+					/* Bulk in endpoint. */
+					dev->tm_fifo_in_ep = ep_addr(ep);
+				} else if (is_int_ep(ep)) {
+					/* Interrupt in endpoint. */
+					dev->tm_fifo_int_ep = ep_addr(ep);
+				}
+			} else {
+				if (is_bulk_ep(ep)) {
+					/* Bulk out endpoint. */
+					dev->tm_fifo_out_ep = ep_addr(ep);
+				}
+			}
+		}
+
+		if (!dev->tm_fifo_in_ep || !dev->tm_fifo_int_ep ||
+		    !dev->tm_fifo_out_ep) {
+			pr_err("could not find all required endpoints for "
+			       "tm interface\n");
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+		bd->has_tm = 1;
+	} else {
+		mutex_unlock(&bd->mutex);
+		goto error;
+	}
+
+	/* Save our data pointer in this interface device. */
+	usb_set_intfdata(interface, dev);
+
+	if (!bd->dev)
+		bd->dev = &dev->udev->dev;
+
+	/*
+	 * Register rshim here since it needs to detect whether other backend
+	 * has already registered or not, which involves reading/writing rshim
+	 * registers and has assumption that the under layer is working.
+	 */
+	rshim_lock();
+	if (!bd->registered) {
+		retval = rshim_register(bd);
+		if (retval) {
+			rshim_unlock();
+			goto error;
+		}
+	}
+	rshim_unlock();
+
+	/* Notify that device is attached. */
+	retval = rshim_notify(&dev->bd, RSH_EVENT_ATTACH, 0);
+	mutex_unlock(&dev->bd.mutex);
+	if (retval)
+		goto error;
+
+	return 0;
+
+error:
+	if (dev) {
+		usb_free_urb(dev->read_or_intr_urb);
+		dev->read_or_intr_urb = NULL;
+		usb_free_urb(dev->write_urb);
+		dev->write_urb = NULL;
+
+		usb_free_coherent(dev->udev, READ_BUF_SIZE,
+				  dev->bd.read_buf, dev->bd.read_buf_dma);
+		dev->bd.read_buf = NULL;
+
+		usb_free_coherent(dev->udev, WRITE_BUF_SIZE,
+				  dev->bd.write_buf, dev->bd.write_buf_dma);
+		dev->bd.write_buf = NULL;
+
+		rshim_fifo_free(&dev->bd);
+
+		usb_free_coherent(dev->udev, sizeof(*dev->intr_buf),
+				  dev->intr_buf, dev->intr_buf_dma);
+		dev->intr_buf = NULL;
+
+		rshim_lock();
+		kref_put(&dev->bd.kref, rshim_usb_delete);
+		rshim_unlock();
+	}
+
+	kfree(usb_dev_name);
+	return retval;
+}
+
+static void rshim_usb_disconnect(struct usb_interface *interface)
+{
+	struct rshim_usb *dev;
+	struct rshim_backend *bd;
+	int flush_wq = 0;
+
+	dev = usb_get_intfdata(interface);
+	bd = &dev->bd;
+	usb_set_intfdata(interface, NULL);
+
+	rshim_notify(bd, RSH_EVENT_DETACH, 0);
+
+	/*
+	 * Clear this interface so we don't unregister our devices next
+	 * time.
+	 */
+	mutex_lock(&bd->mutex);
+
+	if (dev->rshim_interface == interface) {
+		bd->has_rshim = 0;
+		dev->rshim_interface = NULL;
+	} else {
+		/*
+		 * We have to get rid of any USB state, since it may be
+		 * tied to the USB device which is going to vanish as soon
+		 * as we get both disconnects.  We'll reallocate these
+		 * on the next probe.
+		 *
+		 * Supposedly the code which called us already killed any
+		 * outstanding URBs, but it doesn't hurt to be sure.
+		 */
+
+		/*
+		 * We must make sure the console worker isn't running
+		 * before we free all these resources, and particularly
+		 * before we decrement our usage count, below.  Most of the
+		 * time, if it's even enabled, it'll be scheduled to run at
+		 * some point in the future, and we can take care of that
+		 * by asking that it be canceled.
+		 *
+		 * However, it's possible that it's already started
+		 * running, but can't make progress because it's waiting
+		 * for the device mutex, which we currently have.  We
+		 * handle this case by clearing the bit that says it's
+		 * enabled.  The worker tests this bit as soon as it gets
+		 * the mutex, and if it's clear, it just returns without
+		 * rescheduling itself.  Note that if we didn't
+		 * successfully cancel it, we flush the work entry below,
+		 * after we drop the mutex, to be sure it's done before we
+		 * decrement the device usage count.
+		 *
+		 * XXX This might be racy; what if something else which
+		 * would enable the worker runs after we drop the mutex
+		 * but before the worker itself runs?
+		 */
+		flush_wq = !cancel_delayed_work(&bd->work);
+		bd->has_cons_work = 0;
+
+		usb_kill_urb(dev->read_or_intr_urb);
+		usb_free_urb(dev->read_or_intr_urb);
+		dev->read_or_intr_urb = NULL;
+		usb_kill_urb(dev->write_urb);
+		usb_free_urb(dev->write_urb);
+		dev->write_urb = NULL;
+
+		usb_free_coherent(dev->udev, READ_BUF_SIZE,
+				  bd->read_buf, bd->read_buf_dma);
+		bd->read_buf = NULL;
+
+		usb_free_coherent(dev->udev, sizeof(*dev->intr_buf),
+				  dev->intr_buf, dev->intr_buf_dma);
+		dev->intr_buf = NULL;
+
+		usb_free_coherent(dev->udev, WRITE_BUF_SIZE,
+				  bd->write_buf, bd->write_buf_dma);
+		bd->write_buf = NULL;
+
+		rshim_fifo_free(bd);
+	}
+
+	if (!bd->has_rshim && !bd->has_tm) {
+		usb_put_dev(dev->udev);
+		dev->udev = NULL;
+		pr_info("now disconnected\n");
+	} else {
+		pr_debug("partially disconnected\n");
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	/* This can't be done while we hold the mutex; see comments above. */
+	if (flush_wq)
+		flush_workqueue(rshim_wq);
+
+	/* decrement our usage count */
+	rshim_lock();
+	kref_put(&bd->kref, rshim_usb_delete);
+	rshim_unlock();
+}
+
+static struct usb_driver rshim_usb_driver = {
+	.name = "rshim_usb",
+	.probe = rshim_usb_probe,
+	.disconnect = rshim_usb_disconnect,
+	.id_table = rshim_usb_table,
+};
+
+static int __init rshim_usb_init(void)
+{
+	int result;
+
+	/* Register this driver with the USB subsystem. */
+	result = usb_register(&rshim_usb_driver);
+	if (result)
+		pr_err("usb_register failed, error number %d\n", result);
+
+	return result;
+}
+
+static void __exit rshim_usb_exit(void)
+{
+	/* Deregister this driver with the USB subsystem. */
+	usb_deregister(&rshim_usb_driver);
+}
+
+module_init(rshim_usb_init);
+module_exit(rshim_usb_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.6");
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v6 8/9] soc: mellanox: host: Add the Rshim PCIe backend driver
  2018-11-01 16:25   ` Liming Sun
@ 2018-11-01 16:25   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-11-01 16:25 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit adds the PCIe backend driver to access the Rshim
interface on the BlueField SoC, such as on the Smart NIC.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/host/Makefile     |   2 +-
 drivers/soc/mellanox/host/rshim_pcie.c | 478 +++++++++++++++++++++++++++++++++
 2 files changed, 479 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/mellanox/host/rshim_pcie.c

diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
index c6703cd..fa4b21c 100644
--- a/drivers/soc/mellanox/host/Makefile
+++ b/drivers/soc/mellanox/host/Makefile
@@ -1,2 +1,2 @@
-obj-m := rshim.o rshim_net.o rshim_usb.o
+obj-m := rshim.o rshim_net.o rshim_usb.o rshim_pcie.o
 
diff --git a/drivers/soc/mellanox/host/rshim_pcie.c b/drivers/soc/mellanox/host/rshim_pcie.c
new file mode 100644
index 0000000..3fa7bd9
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_pcie.c
@@ -0,0 +1,478 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_pcie.c - Mellanox RShim PCIe host driver
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/pci.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+
+#include "rshim.h"
+
+/* Disable RSim access. */
+static int rshim_disable;
+module_param(rshim_disable, int, 0444);
+MODULE_PARM_DESC(rshim_disable, "Disable rshim (obsoleted)");
+
+/** Our Vendor/Device IDs. */
+#define TILERA_VENDOR_ID					0x15b3
+#define BLUEFIELD_DEVICE_ID					0xc2d2
+
+/** The offset in BAR2 of the RShim region. */
+#define PCI_RSHIM_WINDOW_OFFSET					0x0
+
+/** The size the RShim region. */
+#define PCI_RSHIM_WINDOW_SIZE					0x100000
+
+/* Maximum number of devices this driver can handle */
+#define MAX_DEV_COUNT						16
+
+struct rshim_pcie {
+	/* RShim backend structure. */
+	struct rshim_backend	bd;
+
+	struct pci_dev *pci_dev;
+
+	/* RShim BAR size. */
+	uint64_t bar0_size;
+
+	/* Address of the RShim registers. */
+	u8 __iomem *rshim_regs;
+
+	/* Keep track of number of 8-byte word writes */
+	u8 write_count;
+};
+
+static struct rshim_pcie *instances[MAX_DEV_COUNT];
+
+#ifndef CONFIG_64BIT
+/* Wait until the RSH_BYTE_ACC_CTL pending bit is cleared */
+static int rshim_byte_acc_pending_wait(struct rshim_pcie *dev, int chan)
+{
+	u32 read_value;
+
+	do {
+		read_value = readl(dev->rshim_regs +
+			(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+		if (signal_pending(current))
+			return -EINTR;
+
+	} while (read_value & RSH_BYTE_ACC_PENDING);
+
+	return 0;
+}
+
+/*
+ * RShim read/write methods for 32-bit systems
+ * Mechanism to do an 8-byte access to the Rshim using
+ * two 4-byte accesses through the Rshim Byte Access Widget.
+ */
+static int rshim_byte_acc_read(struct rshim_pcie *dev, int chan, int addr,
+				u64 *result)
+{
+	int retval;
+	u32 read_value;
+	u64 read_result;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	writel(RSH_BYTE_ACC_SIZE, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	writel(addr, dev->rshim_regs + (RSH_BYTE_ACC_ADDR | (chan << 16)));
+
+	/* Write trigger bits to perform read */
+	writel(RSH_BYTE_ACC_READ_TRIGGER, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read lower 32-bits of data */
+	read_value = readl(dev->rshim_regs +
+		(RSH_BYTE_ACC_RDAT | (chan << 16)));
+
+	read_result = (u64)read_value << 32;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read upper 32-bits of data */
+	read_value = readl(dev->rshim_regs +
+		(RSH_BYTE_ACC_RDAT | (chan << 16)));
+
+	read_result |= (u64)read_value;
+	*result = be64_to_cpu(read_result);
+
+	return 0;
+}
+
+static int rshim_byte_acc_write(struct rshim_pcie *dev, int chan, int addr,
+				u64 value)
+{
+	int retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	writel(RSH_BYTE_ACC_SIZE, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	writel(addr, dev->rshim_regs + (RSH_BYTE_ACC_ADDR | (chan << 16)));
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	writel(RSH_BYTE_ACC_SIZE, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Write lower 32 bits of data to TRIO_CR_GW_DATA */
+	writel((u32)(value >> 32), dev->rshim_regs +
+		(RSH_BYTE_ACC_WDAT | (chan << 16)));
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Write upper 32 bits of data to TRIO_CR_GW_DATA */
+	writel((u32)(value), dev->rshim_regs +
+		(RSH_BYTE_ACC_WDAT | (chan << 16)));
+
+	return 0;
+}
+#endif /* CONFIG_64BIT */
+
+/* RShim read/write routines */
+static int rshim_pcie_read(struct rshim_backend *bd, int chan, int addr,
+				u64 *result)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	int retval = 0;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	dev->write_count = 0;
+
+#ifndef CONFIG_64BIT
+	retval = rshim_byte_acc_read(dev, chan, addr, result);
+#else
+	*result = readq(dev->rshim_regs + (addr | (chan << 16)));
+#endif
+	return retval;
+}
+
+static int rshim_pcie_write(struct rshim_backend *bd, int chan, int addr,
+				u64 value)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	u64 result;
+	int retval = 0;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	/*
+	 * We cannot stream large numbers of PCIe writes to the RShim's BAR.
+	 * Instead, we must write no more than 15 8-byte words before
+	 * doing a read from another register within the BAR,
+	 * which forces previous writes to drain.
+	 */
+	if (dev->write_count == 15) {
+		/* Add memory barrier to synchronize the order. */
+		mb();
+		rshim_pcie_read(bd, chan, RSH_SCRATCHPAD, &result);
+	}
+	dev->write_count++;
+#ifndef CONFIG_64BIT
+	retval = rshim_byte_acc_write(dev, chan, addr, value);
+#else
+	writeq(value, dev->rshim_regs + (addr | (chan << 16)));
+#endif
+
+	return retval;
+}
+
+static void rshim_pcie_delete(struct kref *kref)
+{
+	struct rshim_backend *bd;
+	struct rshim_pcie *dev;
+
+	bd = container_of(kref, struct rshim_backend, kref);
+	dev = container_of(bd, struct rshim_pcie, bd);
+
+	rshim_deregister(bd);
+	if (dev->pci_dev)
+		dev_set_drvdata(&dev->pci_dev->dev, NULL);
+	kfree(dev);
+}
+
+/* Probe routine */
+static int rshim_pcie_probe(struct pci_dev *pci_dev,
+			    const struct pci_device_id *id)
+{
+	struct rshim_pcie *dev;
+	struct rshim_backend *bd;
+	char *pcie_dev_name;
+	int index, retval, err = 0, allocfail = 0;
+	const int max_name_len = 20;
+
+	for (index = 0; index < MAX_DEV_COUNT; index++)
+		if (instances[index] == NULL)
+			break;
+	if (index == MAX_DEV_COUNT) {
+		pr_err("Driver cannot handle any more devices.\n");
+		return -ENODEV;
+	}
+
+	pcie_dev_name = kzalloc(max_name_len, GFP_KERNEL);
+	if (pcie_dev_name == NULL) {
+		err = -ENOMEM;
+		goto error;
+	}
+	retval = snprintf(pcie_dev_name, max_name_len,
+				"rshim_pcie%d", index);
+	if (WARN_ON_ONCE(retval >= max_name_len)) {
+		err = -EINVAL;
+		goto error;
+	}
+
+	pr_debug("Probing %s\n", pcie_dev_name);
+
+	rshim_lock();
+
+	/* Find the backend. */
+	bd = rshim_find(pcie_dev_name);
+	if (bd) {
+		kref_get(&bd->kref);
+		dev = container_of(bd, struct rshim_pcie, bd);
+	} else {
+		/* Get some memory for this device's driver state. */
+		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+		if (dev == NULL) {
+			err = -ENOMEM;
+			rshim_unlock();
+			goto error;
+		}
+
+		instances[index] = dev;
+		bd = &dev->bd;
+		bd->has_rshim = 1;
+		bd->has_tm = 1;
+		bd->dev_name = pcie_dev_name;
+		bd->read_rshim = rshim_pcie_read;
+		bd->write_rshim = rshim_pcie_write;
+		bd->destroy = rshim_pcie_delete;
+		bd->owner = THIS_MODULE;
+		dev->write_count = 0;
+		mutex_init(&bd->mutex);
+	}
+
+	retval = rshim_fifo_alloc(bd);
+	if (retval) {
+		rshim_unlock();
+		dev_err(&pci_dev->dev, "Failed to allocate fifo\n");
+		err = -ENOMEM;
+		goto enable_failed;
+	}
+
+	allocfail |= rshim_fifo_alloc(bd);
+
+	if (!bd->read_buf) {
+		bd->read_buf = kzalloc(READ_BUF_SIZE,
+					   GFP_KERNEL);
+	}
+	allocfail |= bd->read_buf == 0;
+
+	if (!bd->write_buf) {
+		bd->write_buf = kzalloc(WRITE_BUF_SIZE,
+					    GFP_KERNEL);
+	}
+	allocfail |= bd->write_buf == 0;
+
+	if (allocfail) {
+		rshim_unlock();
+		pr_err("can't allocate buffers\n");
+		goto enable_failed;
+	}
+
+	rshim_unlock();
+
+	/* Enable the device. */
+	err = pci_enable_device(pci_dev);
+	if (err != 0) {
+		pr_err("Device enable failed with error %d\n", err);
+		goto enable_failed;
+	}
+
+	/* Initialize object */
+	dev->pci_dev = pci_dev;
+	dev_set_drvdata(&pci_dev->dev, dev);
+
+	dev->bar0_size = pci_resource_len(pci_dev, 0);
+
+	/* Fail if the BAR is unassigned. */
+	if (!dev->bar0_size) {
+		pr_err("BAR unassigned, run 'lspci -v'.\n");
+		err = -ENOMEM;
+		goto rshim_map_failed;
+	}
+
+	/* Map in the RShim registers. */
+	dev->rshim_regs = ioremap(pci_resource_start(pci_dev, 0) +
+				  PCI_RSHIM_WINDOW_OFFSET,
+				  PCI_RSHIM_WINDOW_SIZE);
+	if (dev->rshim_regs == NULL) {
+		dev_err(&pci_dev->dev, "Failed to map RShim registers\n");
+		err = -ENOMEM;
+		goto rshim_map_failed;
+	}
+
+	/* Enable PCI bus mastering. */
+	pci_set_master(pci_dev);
+
+	/*
+	 * Register rshim here since it needs to detect whether other backend
+	 * has already registered or not, which involves reading/writing rshim
+	 * registers and has assumption that the under layer is working.
+	 */
+	rshim_lock();
+	if (!bd->registered) {
+		retval = rshim_register(bd);
+		if (retval) {
+			rshim_unlock();
+			goto rshim_map_failed;
+		} else
+			pcie_dev_name = NULL;
+	}
+	rshim_unlock();
+
+	/* Notify that the device is attached */
+	mutex_lock(&bd->mutex);
+	retval = rshim_notify(bd, RSH_EVENT_ATTACH, 0);
+	mutex_unlock(&bd->mutex);
+	if (retval)
+		goto rshim_map_failed;
+
+	return 0;
+
+ rshim_map_failed:
+	pci_disable_device(pci_dev);
+ enable_failed:
+	rshim_lock();
+	kref_put(&bd->kref, rshim_pcie_delete);
+	rshim_unlock();
+ error:
+	kfree(pcie_dev_name);
+	return err;
+}
+
+/* Called via pci_unregister_driver() when the module is removed. */
+static void rshim_pcie_remove(struct pci_dev *pci_dev)
+{
+	struct rshim_pcie *dev = dev_get_drvdata(&pci_dev->dev);
+	int flush_wq;
+
+	if (!dev)
+		return;
+
+	/*
+	 * Reset TRIO_PCIE_INTFC_RX_BAR0_ADDR_MASK and TRIO_MAP_RSH_BASE.
+	 * Otherwise, upon host reboot, the two registers will retain previous
+	 * values that don't match the new BAR0 address that is assigned to
+	 * the PCIe ports, causing host MMIO access to RShim to fail.
+	 */
+	rshim_pcie_write(&dev->bd, (RSH_SWINT >> 16) & 0xF,
+		RSH_SWINT & 0xFFFF, RSH_INT_VEC0_RTC__SWINT3_MASK);
+
+	/* Clear the flags before unmapping rshim registers to avoid race. */
+	dev->bd.has_rshim = 0;
+	dev->bd.has_tm = 0;
+	/* Add memory barrier to synchronize the order. */
+	mb();
+
+	if (dev->rshim_regs)
+		iounmap(dev->rshim_regs);
+
+	rshim_notify(&dev->bd, RSH_EVENT_DETACH, 0);
+	mutex_lock(&dev->bd.mutex);
+	flush_wq = !cancel_delayed_work(&dev->bd.work);
+	if (flush_wq)
+		flush_workqueue(rshim_wq);
+	dev->bd.has_cons_work = 0;
+	kfree(dev->bd.read_buf);
+	kfree(dev->bd.write_buf);
+	rshim_fifo_free(&dev->bd);
+	mutex_unlock(&dev->bd.mutex);
+
+	rshim_lock();
+	kref_put(&dev->bd.kref, rshim_pcie_delete);
+	rshim_unlock();
+
+	pci_disable_device(pci_dev);
+	dev_set_drvdata(&pci_dev->dev, NULL);
+}
+
+static struct pci_device_id rshim_pcie_table[] = {
+	{ PCI_DEVICE(TILERA_VENDOR_ID, BLUEFIELD_DEVICE_ID), },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, rshim_pcie_table);
+
+static struct pci_driver rshim_pcie_driver = {
+	.name = "rshim_pcie",
+	.probe = rshim_pcie_probe,
+	.remove = rshim_pcie_remove,
+	.id_table = rshim_pcie_table,
+};
+
+static int __init rshim_pcie_init(void)
+{
+	int result;
+
+	/* Register the driver */
+	result = pci_register_driver(&rshim_pcie_driver);
+	if (result)
+		pr_err("pci_register failed, error number %d\n", result);
+
+	return result;
+}
+
+static void __exit rshim_pcie_exit(void)
+{
+	/* Unregister the driver. */
+	pci_unregister_driver(&rshim_pcie_driver);
+}
+
+module_init(rshim_pcie_init);
+module_exit(rshim_pcie_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.6");
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v6 8/9] soc: mellanox: host: Add the Rshim PCIe backend driver
@ 2018-11-01 16:25   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-11-01 16:25 UTC (permalink / raw)
  To: linux-arm-kernel

This commit adds the PCIe backend driver to access the Rshim
interface on the BlueField SoC, such as on the Smart NIC.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/host/Makefile     |   2 +-
 drivers/soc/mellanox/host/rshim_pcie.c | 478 +++++++++++++++++++++++++++++++++
 2 files changed, 479 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/mellanox/host/rshim_pcie.c

diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
index c6703cd..fa4b21c 100644
--- a/drivers/soc/mellanox/host/Makefile
+++ b/drivers/soc/mellanox/host/Makefile
@@ -1,2 +1,2 @@
-obj-m := rshim.o rshim_net.o rshim_usb.o
+obj-m := rshim.o rshim_net.o rshim_usb.o rshim_pcie.o
 
diff --git a/drivers/soc/mellanox/host/rshim_pcie.c b/drivers/soc/mellanox/host/rshim_pcie.c
new file mode 100644
index 0000000..3fa7bd9
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_pcie.c
@@ -0,0 +1,478 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_pcie.c - Mellanox RShim PCIe host driver
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/pci.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+
+#include "rshim.h"
+
+/* Disable RSim access. */
+static int rshim_disable;
+module_param(rshim_disable, int, 0444);
+MODULE_PARM_DESC(rshim_disable, "Disable rshim (obsoleted)");
+
+/** Our Vendor/Device IDs. */
+#define TILERA_VENDOR_ID					0x15b3
+#define BLUEFIELD_DEVICE_ID					0xc2d2
+
+/** The offset in BAR2 of the RShim region. */
+#define PCI_RSHIM_WINDOW_OFFSET					0x0
+
+/** The size the RShim region. */
+#define PCI_RSHIM_WINDOW_SIZE					0x100000
+
+/* Maximum number of devices this driver can handle */
+#define MAX_DEV_COUNT						16
+
+struct rshim_pcie {
+	/* RShim backend structure. */
+	struct rshim_backend	bd;
+
+	struct pci_dev *pci_dev;
+
+	/* RShim BAR size. */
+	uint64_t bar0_size;
+
+	/* Address of the RShim registers. */
+	u8 __iomem *rshim_regs;
+
+	/* Keep track of number of 8-byte word writes */
+	u8 write_count;
+};
+
+static struct rshim_pcie *instances[MAX_DEV_COUNT];
+
+#ifndef CONFIG_64BIT
+/* Wait until the RSH_BYTE_ACC_CTL pending bit is cleared */
+static int rshim_byte_acc_pending_wait(struct rshim_pcie *dev, int chan)
+{
+	u32 read_value;
+
+	do {
+		read_value = readl(dev->rshim_regs +
+			(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+		if (signal_pending(current))
+			return -EINTR;
+
+	} while (read_value & RSH_BYTE_ACC_PENDING);
+
+	return 0;
+}
+
+/*
+ * RShim read/write methods for 32-bit systems
+ * Mechanism to do an 8-byte access to the Rshim using
+ * two 4-byte accesses through the Rshim Byte Access Widget.
+ */
+static int rshim_byte_acc_read(struct rshim_pcie *dev, int chan, int addr,
+				u64 *result)
+{
+	int retval;
+	u32 read_value;
+	u64 read_result;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	writel(RSH_BYTE_ACC_SIZE, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	writel(addr, dev->rshim_regs + (RSH_BYTE_ACC_ADDR | (chan << 16)));
+
+	/* Write trigger bits to perform read */
+	writel(RSH_BYTE_ACC_READ_TRIGGER, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read lower 32-bits of data */
+	read_value = readl(dev->rshim_regs +
+		(RSH_BYTE_ACC_RDAT | (chan << 16)));
+
+	read_result = (u64)read_value << 32;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read upper 32-bits of data */
+	read_value = readl(dev->rshim_regs +
+		(RSH_BYTE_ACC_RDAT | (chan << 16)));
+
+	read_result |= (u64)read_value;
+	*result = be64_to_cpu(read_result);
+
+	return 0;
+}
+
+static int rshim_byte_acc_write(struct rshim_pcie *dev, int chan, int addr,
+				u64 value)
+{
+	int retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	writel(RSH_BYTE_ACC_SIZE, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	writel(addr, dev->rshim_regs + (RSH_BYTE_ACC_ADDR | (chan << 16)));
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	writel(RSH_BYTE_ACC_SIZE, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Write lower 32 bits of data to TRIO_CR_GW_DATA */
+	writel((u32)(value >> 32), dev->rshim_regs +
+		(RSH_BYTE_ACC_WDAT | (chan << 16)));
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Write upper 32 bits of data to TRIO_CR_GW_DATA */
+	writel((u32)(value), dev->rshim_regs +
+		(RSH_BYTE_ACC_WDAT | (chan << 16)));
+
+	return 0;
+}
+#endif /* CONFIG_64BIT */
+
+/* RShim read/write routines */
+static int rshim_pcie_read(struct rshim_backend *bd, int chan, int addr,
+				u64 *result)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	int retval = 0;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	dev->write_count = 0;
+
+#ifndef CONFIG_64BIT
+	retval = rshim_byte_acc_read(dev, chan, addr, result);
+#else
+	*result = readq(dev->rshim_regs + (addr | (chan << 16)));
+#endif
+	return retval;
+}
+
+static int rshim_pcie_write(struct rshim_backend *bd, int chan, int addr,
+				u64 value)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	u64 result;
+	int retval = 0;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	/*
+	 * We cannot stream large numbers of PCIe writes to the RShim's BAR.
+	 * Instead, we must write no more than 15 8-byte words before
+	 * doing a read from another register within the BAR,
+	 * which forces previous writes to drain.
+	 */
+	if (dev->write_count == 15) {
+		/* Add memory barrier to synchronize the order. */
+		mb();
+		rshim_pcie_read(bd, chan, RSH_SCRATCHPAD, &result);
+	}
+	dev->write_count++;
+#ifndef CONFIG_64BIT
+	retval = rshim_byte_acc_write(dev, chan, addr, value);
+#else
+	writeq(value, dev->rshim_regs + (addr | (chan << 16)));
+#endif
+
+	return retval;
+}
+
+static void rshim_pcie_delete(struct kref *kref)
+{
+	struct rshim_backend *bd;
+	struct rshim_pcie *dev;
+
+	bd = container_of(kref, struct rshim_backend, kref);
+	dev = container_of(bd, struct rshim_pcie, bd);
+
+	rshim_deregister(bd);
+	if (dev->pci_dev)
+		dev_set_drvdata(&dev->pci_dev->dev, NULL);
+	kfree(dev);
+}
+
+/* Probe routine */
+static int rshim_pcie_probe(struct pci_dev *pci_dev,
+			    const struct pci_device_id *id)
+{
+	struct rshim_pcie *dev;
+	struct rshim_backend *bd;
+	char *pcie_dev_name;
+	int index, retval, err = 0, allocfail = 0;
+	const int max_name_len = 20;
+
+	for (index = 0; index < MAX_DEV_COUNT; index++)
+		if (instances[index] == NULL)
+			break;
+	if (index == MAX_DEV_COUNT) {
+		pr_err("Driver cannot handle any more devices.\n");
+		return -ENODEV;
+	}
+
+	pcie_dev_name = kzalloc(max_name_len, GFP_KERNEL);
+	if (pcie_dev_name == NULL) {
+		err = -ENOMEM;
+		goto error;
+	}
+	retval = snprintf(pcie_dev_name, max_name_len,
+				"rshim_pcie%d", index);
+	if (WARN_ON_ONCE(retval >= max_name_len)) {
+		err = -EINVAL;
+		goto error;
+	}
+
+	pr_debug("Probing %s\n", pcie_dev_name);
+
+	rshim_lock();
+
+	/* Find the backend. */
+	bd = rshim_find(pcie_dev_name);
+	if (bd) {
+		kref_get(&bd->kref);
+		dev = container_of(bd, struct rshim_pcie, bd);
+	} else {
+		/* Get some memory for this device's driver state. */
+		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+		if (dev == NULL) {
+			err = -ENOMEM;
+			rshim_unlock();
+			goto error;
+		}
+
+		instances[index] = dev;
+		bd = &dev->bd;
+		bd->has_rshim = 1;
+		bd->has_tm = 1;
+		bd->dev_name = pcie_dev_name;
+		bd->read_rshim = rshim_pcie_read;
+		bd->write_rshim = rshim_pcie_write;
+		bd->destroy = rshim_pcie_delete;
+		bd->owner = THIS_MODULE;
+		dev->write_count = 0;
+		mutex_init(&bd->mutex);
+	}
+
+	retval = rshim_fifo_alloc(bd);
+	if (retval) {
+		rshim_unlock();
+		dev_err(&pci_dev->dev, "Failed to allocate fifo\n");
+		err = -ENOMEM;
+		goto enable_failed;
+	}
+
+	allocfail |= rshim_fifo_alloc(bd);
+
+	if (!bd->read_buf) {
+		bd->read_buf = kzalloc(READ_BUF_SIZE,
+					   GFP_KERNEL);
+	}
+	allocfail |= bd->read_buf == 0;
+
+	if (!bd->write_buf) {
+		bd->write_buf = kzalloc(WRITE_BUF_SIZE,
+					    GFP_KERNEL);
+	}
+	allocfail |= bd->write_buf == 0;
+
+	if (allocfail) {
+		rshim_unlock();
+		pr_err("can't allocate buffers\n");
+		goto enable_failed;
+	}
+
+	rshim_unlock();
+
+	/* Enable the device. */
+	err = pci_enable_device(pci_dev);
+	if (err != 0) {
+		pr_err("Device enable failed with error %d\n", err);
+		goto enable_failed;
+	}
+
+	/* Initialize object */
+	dev->pci_dev = pci_dev;
+	dev_set_drvdata(&pci_dev->dev, dev);
+
+	dev->bar0_size = pci_resource_len(pci_dev, 0);
+
+	/* Fail if the BAR is unassigned. */
+	if (!dev->bar0_size) {
+		pr_err("BAR unassigned, run 'lspci -v'.\n");
+		err = -ENOMEM;
+		goto rshim_map_failed;
+	}
+
+	/* Map in the RShim registers. */
+	dev->rshim_regs = ioremap(pci_resource_start(pci_dev, 0) +
+				  PCI_RSHIM_WINDOW_OFFSET,
+				  PCI_RSHIM_WINDOW_SIZE);
+	if (dev->rshim_regs == NULL) {
+		dev_err(&pci_dev->dev, "Failed to map RShim registers\n");
+		err = -ENOMEM;
+		goto rshim_map_failed;
+	}
+
+	/* Enable PCI bus mastering. */
+	pci_set_master(pci_dev);
+
+	/*
+	 * Register rshim here since it needs to detect whether other backend
+	 * has already registered or not, which involves reading/writing rshim
+	 * registers and has assumption that the under layer is working.
+	 */
+	rshim_lock();
+	if (!bd->registered) {
+		retval = rshim_register(bd);
+		if (retval) {
+			rshim_unlock();
+			goto rshim_map_failed;
+		} else
+			pcie_dev_name = NULL;
+	}
+	rshim_unlock();
+
+	/* Notify that the device is attached */
+	mutex_lock(&bd->mutex);
+	retval = rshim_notify(bd, RSH_EVENT_ATTACH, 0);
+	mutex_unlock(&bd->mutex);
+	if (retval)
+		goto rshim_map_failed;
+
+	return 0;
+
+ rshim_map_failed:
+	pci_disable_device(pci_dev);
+ enable_failed:
+	rshim_lock();
+	kref_put(&bd->kref, rshim_pcie_delete);
+	rshim_unlock();
+ error:
+	kfree(pcie_dev_name);
+	return err;
+}
+
+/* Called via pci_unregister_driver() when the module is removed. */
+static void rshim_pcie_remove(struct pci_dev *pci_dev)
+{
+	struct rshim_pcie *dev = dev_get_drvdata(&pci_dev->dev);
+	int flush_wq;
+
+	if (!dev)
+		return;
+
+	/*
+	 * Reset TRIO_PCIE_INTFC_RX_BAR0_ADDR_MASK and TRIO_MAP_RSH_BASE.
+	 * Otherwise, upon host reboot, the two registers will retain previous
+	 * values that don't match the new BAR0 address that is assigned to
+	 * the PCIe ports, causing host MMIO access to RShim to fail.
+	 */
+	rshim_pcie_write(&dev->bd, (RSH_SWINT >> 16) & 0xF,
+		RSH_SWINT & 0xFFFF, RSH_INT_VEC0_RTC__SWINT3_MASK);
+
+	/* Clear the flags before unmapping rshim registers to avoid race. */
+	dev->bd.has_rshim = 0;
+	dev->bd.has_tm = 0;
+	/* Add memory barrier to synchronize the order. */
+	mb();
+
+	if (dev->rshim_regs)
+		iounmap(dev->rshim_regs);
+
+	rshim_notify(&dev->bd, RSH_EVENT_DETACH, 0);
+	mutex_lock(&dev->bd.mutex);
+	flush_wq = !cancel_delayed_work(&dev->bd.work);
+	if (flush_wq)
+		flush_workqueue(rshim_wq);
+	dev->bd.has_cons_work = 0;
+	kfree(dev->bd.read_buf);
+	kfree(dev->bd.write_buf);
+	rshim_fifo_free(&dev->bd);
+	mutex_unlock(&dev->bd.mutex);
+
+	rshim_lock();
+	kref_put(&dev->bd.kref, rshim_pcie_delete);
+	rshim_unlock();
+
+	pci_disable_device(pci_dev);
+	dev_set_drvdata(&pci_dev->dev, NULL);
+}
+
+static struct pci_device_id rshim_pcie_table[] = {
+	{ PCI_DEVICE(TILERA_VENDOR_ID, BLUEFIELD_DEVICE_ID), },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, rshim_pcie_table);
+
+static struct pci_driver rshim_pcie_driver = {
+	.name = "rshim_pcie",
+	.probe = rshim_pcie_probe,
+	.remove = rshim_pcie_remove,
+	.id_table = rshim_pcie_table,
+};
+
+static int __init rshim_pcie_init(void)
+{
+	int result;
+
+	/* Register the driver */
+	result = pci_register_driver(&rshim_pcie_driver);
+	if (result)
+		pr_err("pci_register failed, error number %d\n", result);
+
+	return result;
+}
+
+static void __exit rshim_pcie_exit(void)
+{
+	/* Unregister the driver. */
+	pci_unregister_driver(&rshim_pcie_driver);
+}
+
+module_init(rshim_pcie_init);
+module_exit(rshim_pcie_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.6");
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v6 9/9] soc: mellanox: host: Add the Rshim PCIe live-fish backend driver
  2018-11-01 16:25   ` Liming Sun
@ 2018-11-01 16:25   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-11-01 16:25 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit adds the PCIe live-fish backend driver to access the
Rshim interface on the BlueField SoC, such as on the Smart NIC.
It is slow access and can be used for live-fish mode when the NIC
firmware hasn't been programmed yet.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/host/Makefile        |   2 +-
 drivers/soc/mellanox/host/rshim_pcie_lf.c | 695 ++++++++++++++++++++++++++++++
 2 files changed, 696 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/mellanox/host/rshim_pcie_lf.c

diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
index fa4b21c..79a1c86 100644
--- a/drivers/soc/mellanox/host/Makefile
+++ b/drivers/soc/mellanox/host/Makefile
@@ -1,2 +1,2 @@
-obj-m := rshim.o rshim_net.o rshim_usb.o rshim_pcie.o
+obj-m := rshim.o rshim_net.o rshim_usb.o rshim_pcie.o rshim_pcie_lf.o
 
diff --git a/drivers/soc/mellanox/host/rshim_pcie_lf.c b/drivers/soc/mellanox/host/rshim_pcie_lf.c
new file mode 100644
index 0000000..08e2c15
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_pcie_lf.c
@@ -0,0 +1,695 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_pcie_lf.c - Mellanox RShim PCIe Livefish driver for x86 host
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/pci.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+
+#include "rshim.h"
+
+/* Disable RSim access. */
+static int rshim_disable;
+module_param(rshim_disable, int, 0444);
+MODULE_PARM_DESC(rshim_disable, "Disable rshim (obsoleted)");
+
+/** Our Vendor/Device IDs. */
+#define TILERA_VENDOR_ID					0x15b3
+#define BLUEFIELD_DEVICE_ID					0x0211
+
+/* Maximum number of devices this driver can handle */
+#define MAX_DEV_COUNT						16
+
+/* Mellanox Address & Data Capabilities */
+#define MELLANOX_ADDR						0x58
+#define MELLANOX_DATA						0x5c
+#define MELLANOX_CAP_READ					0x1
+
+/* TRIO_CR_GATEWAY registers */
+#define TRIO_CR_GW_LOCK						0xe38a0
+#define TRIO_CR_GW_LOCK_CPY					0xe38a4
+#define TRIO_CR_GW_DATA_UPPER					0xe38ac
+#define TRIO_CR_GW_DATA_LOWER					0xe38b0
+#define TRIO_CR_GW_CTL						0xe38b4
+#define TRIO_CR_GW_ADDR_UPPER					0xe38b8
+#define TRIO_CR_GW_ADDR_LOWER					0xe38bc
+#define TRIO_CR_GW_LOCK_ACQUIRED				0x80000000
+#define TRIO_CR_GW_LOCK_RELEASE					0x0
+#define TRIO_CR_GW_BUSY						0x60000000
+#define TRIO_CR_GW_TRIGGER					0xe0000000
+#define TRIO_CR_GW_READ_4BYTE					0x6
+#define TRIO_CR_GW_WRITE_4BYTE					0x2
+
+/* Base RShim Address */
+#define RSH_BASE_ADDR						0x80000000
+#define RSH_CHANNEL1_BASE					0x80010000
+
+struct rshim_pcie {
+	/* RShim backend structure. */
+	struct rshim_backend	bd;
+
+	struct pci_dev *pci_dev;
+
+	/* Keep track of number of 8-byte word writes */
+	u8 write_count;
+};
+
+static struct rshim_pcie *instances[MAX_DEV_COUNT];
+
+/* Mechanism to access the CR space using hidden PCI capabilities */
+static int pci_cap_read(struct pci_dev *pci_dev, int offset,
+				u32 *result)
+{
+	int retval;
+
+	/*
+	 * Write target offset to MELLANOX_ADDR.
+	 * Set LSB to indicate a read operation.
+	 */
+	retval = pci_write_config_dword(pci_dev, MELLANOX_ADDR,
+				offset | MELLANOX_CAP_READ);
+	if (retval)
+		return retval;
+
+	/* Read result from MELLANOX_DATA */
+	retval = pci_read_config_dword(pci_dev, MELLANOX_DATA,
+				result);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+static int pci_cap_write(struct pci_dev *pci_dev, int offset,
+				u32 value)
+{
+	int retval;
+
+	/* Write data to MELLANOX_DATA */
+	retval = pci_write_config_dword(pci_dev, MELLANOX_DATA,
+				value);
+	if (retval)
+		return retval;
+
+	/*
+	 * Write target offset to MELLANOX_ADDR.
+	 * Leave LSB clear to indicate a write operation.
+	 */
+	retval = pci_write_config_dword(pci_dev, MELLANOX_ADDR,
+				offset);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/* Acquire and release the TRIO_CR_GW_LOCK. */
+static int trio_cr_gw_lock_acquire(struct pci_dev *pci_dev)
+{
+	int retval;
+	u32 read_value;
+
+	/* Wait until TRIO_CR_GW_LOCK is free */
+	do {
+		retval = pci_cap_read(pci_dev, TRIO_CR_GW_LOCK,
+				&read_value);
+		if (retval)
+			return retval;
+		if (signal_pending(current))
+			return -EINTR;
+	} while (read_value & TRIO_CR_GW_LOCK_ACQUIRED);
+
+	/* Acquire TRIO_CR_GW_LOCK */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_LOCK_ACQUIRED);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+static int trio_cr_gw_lock_release(struct pci_dev *pci_dev)
+{
+	int retval;
+
+	/* Release TRIO_CR_GW_LOCK */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_LOCK_RELEASE);
+
+	return retval;
+}
+
+/*
+ * Mechanism to access the RShim from the CR space using the
+ * TRIO_CR_GATEWAY.
+ */
+static int trio_cr_gw_read(struct pci_dev *pci_dev, int addr,
+				u32 *result)
+{
+	int retval;
+
+	/* Acquire TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_acquire(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write addr to TRIO_CR_GW_ADDR_LOWER */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_ADDR_LOWER,
+				addr);
+	if (retval)
+		return retval;
+
+	/* Set TRIO_CR_GW_READ_4BYTE */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_CTL,
+				TRIO_CR_GW_READ_4BYTE);
+	if (retval)
+		return retval;
+
+	/* Trigger TRIO_CR_GW to read from addr */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_TRIGGER);
+	if (retval)
+		return retval;
+
+	/* Read 32-bit data from TRIO_CR_GW_DATA_LOWER */
+	retval = pci_cap_read(pci_dev, TRIO_CR_GW_DATA_LOWER,
+				result);
+	if (retval)
+		return retval;
+
+	/* Release TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_release(pci_dev);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+static int trio_cr_gw_write(struct pci_dev *pci_dev, int addr,
+				u32 value)
+{
+	int retval;
+
+	/* Acquire TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_acquire(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write 32-bit data to TRIO_CR_GW_DATA_LOWER */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_DATA_LOWER,
+				value);
+	if (retval)
+		return retval;
+
+	/* Write addr to TRIO_CR_GW_ADDR_LOWER */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_ADDR_LOWER,
+				addr);
+	if (retval)
+		return retval;
+
+	/* Set TRIO_CR_GW_WRITE_4BYTE */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_CTL,
+				TRIO_CR_GW_WRITE_4BYTE);
+	if (retval)
+		return retval;
+
+	/* Trigger CR gateway to write to RShim */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_TRIGGER);
+	if (retval)
+		return retval;
+
+	/* Release TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_release(pci_dev);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/* Wait until the RSH_BYTE_ACC_CTL pending bit is cleared */
+static int rshim_byte_acc_pending_wait(struct pci_dev *pci_dev)
+{
+	int retval;
+	u32 read_value;
+
+	do {
+		retval = trio_cr_gw_read(pci_dev,
+			RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL, &read_value);
+		if (retval)
+			return retval;
+		if (signal_pending(current))
+			return -EINTR;
+	} while (read_value & (RSH_CHANNEL1_BASE + RSH_BYTE_ACC_PENDING));
+
+	return 0;
+}
+
+/*
+ * Mechanism to do an 8-byte access to the Rshim using
+ * two 4-byte accesses through the Rshim Byte Access Widget.
+ */
+static int rshim_byte_acc_read(struct pci_dev *pci_dev, int addr,
+				u64 *result)
+{
+	int retval;
+	u32 read_value;
+	u64 read_result;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_SIZE);
+	if (retval)
+		return retval;
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_ADDR, addr);
+	if (retval)
+		return retval;
+
+	/* Write trigger bits to perform read */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_READ_TRIGGER);
+	if (retval)
+		return retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read lower 32-bits of data */
+	retval = trio_cr_gw_read(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_RDAT,
+				&read_value);
+	if (retval)
+		return retval;
+
+	read_result = (u64)read_value << 32;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read upper 32-bits of data */
+	retval = trio_cr_gw_read(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_RDAT,
+				&read_value);
+	if (retval)
+		return retval;
+
+	read_result |= (u64)read_value;
+	*result = be64_to_cpu(read_result);
+
+	return 0;
+}
+
+static int rshim_byte_acc_write(struct pci_dev *pci_dev, int addr,
+				u64 value)
+{
+	int retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_SIZE);
+	if (retval)
+		return retval;
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_ADDR, addr);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_SIZE);
+	if (retval)
+		return retval;
+
+	/* Write lower 32 bits of data to TRIO_CR_GW_DATA */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_WDAT, (u32)(value >> 32));
+	if (retval)
+		return retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write upper 32 bits of data to TRIO_CR_GW_DATA */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_WDAT, (u32)(value));
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/*
+ * The RShim Boot FIFO has a holding register which can couple
+ * two consecutive 4-byte writes into a single 8-byte write
+ * before pushing the data into the FIFO.
+ * Hence the RShim Byte Access Widget is not necessary to write
+ * to the BOOT FIFO using 4-byte writes.
+ */
+static int rshim_boot_fifo_write(struct pci_dev *pci_dev, int addr,
+				u64 value)
+{
+	int retval;
+
+	/* Write lower 32 bits of data to RSH_BOOT_FIFO_DATA */
+	retval = trio_cr_gw_write(pci_dev, addr,
+				(u32)(value >> 32));
+	if (retval)
+		return retval;
+
+	/* Write upper 32 bits of data to RSH_BOOT_FIFO_DATA */
+	retval = trio_cr_gw_write(pci_dev, addr,
+				(u32)(value));
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/* RShim read/write routines */
+static int rshim_pcie_read(struct rshim_backend *bd, int chan, int addr,
+				u64 *result)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	struct pci_dev *pci_dev = dev->pci_dev;
+	int retval;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	dev->write_count = 0;
+
+	addr = RSH_BASE_ADDR + (addr | (chan << 16));
+	addr = be32_to_cpu(addr);
+
+	retval = rshim_byte_acc_read(pci_dev, addr, result);
+
+	return retval;
+}
+
+static int rshim_pcie_write(struct rshim_backend *bd, int chan, int addr,
+				u64 value)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	struct pci_dev *pci_dev = dev->pci_dev;
+	int retval;
+	u64 result;
+	bool is_boot_stream = (addr == RSH_BOOT_FIFO_DATA);
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	addr = RSH_BASE_ADDR + (addr | (chan << 16));
+	if (!is_boot_stream)
+		addr = be32_to_cpu(addr);
+
+	value = be64_to_cpu(value);
+
+	/*
+	 * We cannot stream large numbers of PCIe writes to the RShim.
+	 * Instead, we must write no more than 15 words before
+	 * doing a read from another register within the RShim,
+	 * which forces previous writes to drain.
+	 * Note that we allow a max write_count of 7 since each 8-byte
+	 * write is done using 2 4-byte writes in the boot fifo case.
+	 */
+	if (dev->write_count == 7) {
+		/* Add memory barrier to synchronize the order. */
+		mb();
+		rshim_pcie_read(bd, 1, RSH_SCRATCHPAD, &result);
+	}
+	dev->write_count++;
+
+	if (is_boot_stream)
+		retval = rshim_boot_fifo_write(pci_dev, addr, value);
+	else
+		retval = rshim_byte_acc_write(pci_dev, addr, value);
+
+	return retval;
+}
+
+static void rshim_pcie_delete(struct kref *kref)
+{
+	struct rshim_backend *bd;
+	struct rshim_pcie *dev;
+
+	bd = container_of(kref, struct rshim_backend, kref);
+	dev = container_of(bd, struct rshim_pcie, bd);
+
+	rshim_deregister(bd);
+	if (dev->pci_dev)
+		dev_set_drvdata(&dev->pci_dev->dev, NULL);
+	kfree(dev);
+}
+
+/* Probe routine */
+static int rshim_pcie_probe(struct pci_dev *pci_dev,
+				const struct pci_device_id *id)
+{
+	struct rshim_pcie *dev = NULL;
+	struct rshim_backend *bd = NULL;
+	char *pcie_dev_name;
+	int index, retval, err = 0, allocfail = 0;
+	const int max_name_len = 20;
+
+	for (index = 0; index < MAX_DEV_COUNT; index++)
+		if (instances[index] == NULL)
+			break;
+	if (index == MAX_DEV_COUNT) {
+		pr_err("Driver cannot handle any more devices.\n");
+		return -ENODEV;
+	}
+
+	pcie_dev_name = kzalloc(max_name_len, GFP_KERNEL);
+	if (pcie_dev_name == NULL)
+		return -ENOMEM;
+	retval = snprintf(pcie_dev_name, max_name_len,
+				"rshim_pcie%d", index);
+	if (WARN_ON_ONCE(retval >= max_name_len)) {
+		err = -EINVAL;
+		goto error;
+	}
+
+	pr_debug("Probing %s\n", pcie_dev_name);
+
+	rshim_lock();
+
+	/* Find the backend. */
+	bd = rshim_find(pcie_dev_name);
+	if (bd) {
+		kref_get(&bd->kref);
+		dev = container_of(bd, struct rshim_pcie, bd);
+	} else {
+		/* Get some memory for this device's driver state. */
+		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+		if (dev == NULL) {
+			err = -ENOMEM;
+			rshim_unlock();
+			goto error;
+		}
+
+		instances[index] = dev;
+		bd = &dev->bd;
+		bd->has_rshim = 1;
+		bd->has_tm = 1;
+		bd->owner = THIS_MODULE;
+		bd->dev_name = pcie_dev_name;
+		bd->destroy = rshim_pcie_delete;
+		bd->read_rshim = rshim_pcie_read;
+		bd->write_rshim = rshim_pcie_write;
+		dev->write_count = 0;
+		mutex_init(&bd->mutex);
+	}
+
+	retval = rshim_fifo_alloc(bd);
+	if (retval) {
+		rshim_unlock();
+		pr_err("Failed to allocate fifo\n");
+		err = -ENOMEM;
+		goto enable_failed;
+	}
+
+	allocfail |= rshim_fifo_alloc(bd);
+
+	if (!bd->read_buf) {
+		bd->read_buf = kzalloc(READ_BUF_SIZE,
+					   GFP_KERNEL);
+	}
+	allocfail |= bd->read_buf == 0;
+
+	if (!bd->write_buf) {
+		bd->write_buf = kzalloc(WRITE_BUF_SIZE,
+					    GFP_KERNEL);
+	}
+	allocfail |= bd->write_buf == 0;
+
+	if (allocfail) {
+		rshim_unlock();
+		pr_err("can't allocate buffers\n");
+		goto enable_failed;
+	}
+
+	rshim_unlock();
+
+	/* Enable the device. */
+	err = pci_enable_device(pci_dev);
+	if (err != 0) {
+		pr_err("Device enable failed with error %d\n", err);
+		goto enable_failed;
+	}
+
+	/* Initialize object */
+	dev->pci_dev = pci_dev;
+	dev_set_drvdata(&pci_dev->dev, dev);
+
+	/* Enable PCI bus mastering. */
+	pci_set_master(pci_dev);
+
+	/*
+	 * Register rshim here since it needs to detect whether other backend
+	 * has already registered or not, which involves reading/writing rshim
+	 * registers and has assumption that the under layer is working.
+	 */
+	rshim_lock();
+	if (!bd->registered) {
+		retval = rshim_register(bd);
+		if (retval) {
+			pr_err("Backend register failed with error %d\n",
+				 retval);
+			rshim_unlock();
+			goto register_failed;
+		}
+	}
+	rshim_unlock();
+
+	/* Notify that the device is attached */
+	mutex_lock(&bd->mutex);
+	retval = rshim_notify(bd, RSH_EVENT_ATTACH, 0);
+	mutex_unlock(&bd->mutex);
+	if (retval)
+		goto register_failed;
+
+	return 0;
+
+register_failed:
+	pci_disable_device(pci_dev);
+
+enable_failed:
+	rshim_lock();
+	kref_put(&dev->bd.kref, rshim_pcie_delete);
+	rshim_unlock();
+error:
+	kfree(pcie_dev_name);
+
+	return err;
+}
+
+/* Called via pci_unregister_driver() when the module is removed. */
+static void rshim_pcie_remove(struct pci_dev *pci_dev)
+{
+	struct rshim_pcie *dev = dev_get_drvdata(&pci_dev->dev);
+	int retval, flush_wq;
+
+	/*
+	 * Reset TRIO_PCIE_INTFC_RX_BAR0_ADDR_MASK and TRIO_MAP_RSH_BASE.
+	 * Otherwise, upon host reboot, the two registers will retain previous
+	 * values that don't match the new BAR0 address that is assigned to
+	 * the PCIe ports, causing host MMIO access to RShim to fail.
+	 */
+	retval = rshim_pcie_write(&dev->bd, (RSH_SWINT >> 16) & 0xF,
+			RSH_SWINT & 0xFFFF, RSH_INT_VEC0_RTC__SWINT3_MASK);
+	if (retval)
+		pr_err("RShim write failed\n");
+
+	/* Clear the flags before deleting the backend. */
+	dev->bd.has_rshim = 0;
+	dev->bd.has_tm = 0;
+
+	rshim_notify(&dev->bd, RSH_EVENT_DETACH, 0);
+	mutex_lock(&dev->bd.mutex);
+	flush_wq = !cancel_delayed_work(&dev->bd.work);
+	if (flush_wq)
+		flush_workqueue(rshim_wq);
+	dev->bd.has_cons_work = 0;
+	kfree(dev->bd.read_buf);
+	kfree(dev->bd.write_buf);
+	rshim_fifo_free(&dev->bd);
+	mutex_unlock(&dev->bd.mutex);
+
+	rshim_lock();
+	kref_put(&dev->bd.kref, rshim_pcie_delete);
+	rshim_unlock();
+
+	pci_disable_device(pci_dev);
+	dev_set_drvdata(&pci_dev->dev, NULL);
+}
+
+static struct pci_device_id rshim_pcie_table[] = {
+	{ PCI_DEVICE(TILERA_VENDOR_ID, BLUEFIELD_DEVICE_ID), },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, rshim_pcie_table);
+
+static struct pci_driver rshim_pcie_driver = {
+	.name = "rshim_pcie_lf",
+	.probe = rshim_pcie_probe,
+	.remove = rshim_pcie_remove,
+	.id_table = rshim_pcie_table,
+};
+
+static int __init rshim_pcie_init(void)
+{
+	int result;
+
+	/* Register the driver */
+	result = pci_register_driver(&rshim_pcie_driver);
+	if (result)
+		pr_err("pci_register failed, error number %d\n", result);
+
+	return result;
+}
+
+static void __exit rshim_pcie_exit(void)
+{
+	/* Unregister the driver. */
+	pci_unregister_driver(&rshim_pcie_driver);
+}
+
+module_init(rshim_pcie_init);
+module_exit(rshim_pcie_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.4");
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v6 9/9] soc: mellanox: host: Add the Rshim PCIe live-fish backend driver
@ 2018-11-01 16:25   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-11-01 16:25 UTC (permalink / raw)
  To: linux-arm-kernel

This commit adds the PCIe live-fish backend driver to access the
Rshim interface on the BlueField SoC, such as on the Smart NIC.
It is slow access and can be used for live-fish mode when the NIC
firmware hasn't been programmed yet.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/host/Makefile        |   2 +-
 drivers/soc/mellanox/host/rshim_pcie_lf.c | 695 ++++++++++++++++++++++++++++++
 2 files changed, 696 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/mellanox/host/rshim_pcie_lf.c

diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
index fa4b21c..79a1c86 100644
--- a/drivers/soc/mellanox/host/Makefile
+++ b/drivers/soc/mellanox/host/Makefile
@@ -1,2 +1,2 @@
-obj-m := rshim.o rshim_net.o rshim_usb.o rshim_pcie.o
+obj-m := rshim.o rshim_net.o rshim_usb.o rshim_pcie.o rshim_pcie_lf.o
 
diff --git a/drivers/soc/mellanox/host/rshim_pcie_lf.c b/drivers/soc/mellanox/host/rshim_pcie_lf.c
new file mode 100644
index 0000000..08e2c15
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_pcie_lf.c
@@ -0,0 +1,695 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_pcie_lf.c - Mellanox RShim PCIe Livefish driver for x86 host
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/pci.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+
+#include "rshim.h"
+
+/* Disable RSim access. */
+static int rshim_disable;
+module_param(rshim_disable, int, 0444);
+MODULE_PARM_DESC(rshim_disable, "Disable rshim (obsoleted)");
+
+/** Our Vendor/Device IDs. */
+#define TILERA_VENDOR_ID					0x15b3
+#define BLUEFIELD_DEVICE_ID					0x0211
+
+/* Maximum number of devices this driver can handle */
+#define MAX_DEV_COUNT						16
+
+/* Mellanox Address & Data Capabilities */
+#define MELLANOX_ADDR						0x58
+#define MELLANOX_DATA						0x5c
+#define MELLANOX_CAP_READ					0x1
+
+/* TRIO_CR_GATEWAY registers */
+#define TRIO_CR_GW_LOCK						0xe38a0
+#define TRIO_CR_GW_LOCK_CPY					0xe38a4
+#define TRIO_CR_GW_DATA_UPPER					0xe38ac
+#define TRIO_CR_GW_DATA_LOWER					0xe38b0
+#define TRIO_CR_GW_CTL						0xe38b4
+#define TRIO_CR_GW_ADDR_UPPER					0xe38b8
+#define TRIO_CR_GW_ADDR_LOWER					0xe38bc
+#define TRIO_CR_GW_LOCK_ACQUIRED				0x80000000
+#define TRIO_CR_GW_LOCK_RELEASE					0x0
+#define TRIO_CR_GW_BUSY						0x60000000
+#define TRIO_CR_GW_TRIGGER					0xe0000000
+#define TRIO_CR_GW_READ_4BYTE					0x6
+#define TRIO_CR_GW_WRITE_4BYTE					0x2
+
+/* Base RShim Address */
+#define RSH_BASE_ADDR						0x80000000
+#define RSH_CHANNEL1_BASE					0x80010000
+
+struct rshim_pcie {
+	/* RShim backend structure. */
+	struct rshim_backend	bd;
+
+	struct pci_dev *pci_dev;
+
+	/* Keep track of number of 8-byte word writes */
+	u8 write_count;
+};
+
+static struct rshim_pcie *instances[MAX_DEV_COUNT];
+
+/* Mechanism to access the CR space using hidden PCI capabilities */
+static int pci_cap_read(struct pci_dev *pci_dev, int offset,
+				u32 *result)
+{
+	int retval;
+
+	/*
+	 * Write target offset to MELLANOX_ADDR.
+	 * Set LSB to indicate a read operation.
+	 */
+	retval = pci_write_config_dword(pci_dev, MELLANOX_ADDR,
+				offset | MELLANOX_CAP_READ);
+	if (retval)
+		return retval;
+
+	/* Read result from MELLANOX_DATA */
+	retval = pci_read_config_dword(pci_dev, MELLANOX_DATA,
+				result);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+static int pci_cap_write(struct pci_dev *pci_dev, int offset,
+				u32 value)
+{
+	int retval;
+
+	/* Write data to MELLANOX_DATA */
+	retval = pci_write_config_dword(pci_dev, MELLANOX_DATA,
+				value);
+	if (retval)
+		return retval;
+
+	/*
+	 * Write target offset to MELLANOX_ADDR.
+	 * Leave LSB clear to indicate a write operation.
+	 */
+	retval = pci_write_config_dword(pci_dev, MELLANOX_ADDR,
+				offset);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/* Acquire and release the TRIO_CR_GW_LOCK. */
+static int trio_cr_gw_lock_acquire(struct pci_dev *pci_dev)
+{
+	int retval;
+	u32 read_value;
+
+	/* Wait until TRIO_CR_GW_LOCK is free */
+	do {
+		retval = pci_cap_read(pci_dev, TRIO_CR_GW_LOCK,
+				&read_value);
+		if (retval)
+			return retval;
+		if (signal_pending(current))
+			return -EINTR;
+	} while (read_value & TRIO_CR_GW_LOCK_ACQUIRED);
+
+	/* Acquire TRIO_CR_GW_LOCK */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_LOCK_ACQUIRED);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+static int trio_cr_gw_lock_release(struct pci_dev *pci_dev)
+{
+	int retval;
+
+	/* Release TRIO_CR_GW_LOCK */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_LOCK_RELEASE);
+
+	return retval;
+}
+
+/*
+ * Mechanism to access the RShim from the CR space using the
+ * TRIO_CR_GATEWAY.
+ */
+static int trio_cr_gw_read(struct pci_dev *pci_dev, int addr,
+				u32 *result)
+{
+	int retval;
+
+	/* Acquire TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_acquire(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write addr to TRIO_CR_GW_ADDR_LOWER */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_ADDR_LOWER,
+				addr);
+	if (retval)
+		return retval;
+
+	/* Set TRIO_CR_GW_READ_4BYTE */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_CTL,
+				TRIO_CR_GW_READ_4BYTE);
+	if (retval)
+		return retval;
+
+	/* Trigger TRIO_CR_GW to read from addr */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_TRIGGER);
+	if (retval)
+		return retval;
+
+	/* Read 32-bit data from TRIO_CR_GW_DATA_LOWER */
+	retval = pci_cap_read(pci_dev, TRIO_CR_GW_DATA_LOWER,
+				result);
+	if (retval)
+		return retval;
+
+	/* Release TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_release(pci_dev);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+static int trio_cr_gw_write(struct pci_dev *pci_dev, int addr,
+				u32 value)
+{
+	int retval;
+
+	/* Acquire TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_acquire(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write 32-bit data to TRIO_CR_GW_DATA_LOWER */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_DATA_LOWER,
+				value);
+	if (retval)
+		return retval;
+
+	/* Write addr to TRIO_CR_GW_ADDR_LOWER */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_ADDR_LOWER,
+				addr);
+	if (retval)
+		return retval;
+
+	/* Set TRIO_CR_GW_WRITE_4BYTE */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_CTL,
+				TRIO_CR_GW_WRITE_4BYTE);
+	if (retval)
+		return retval;
+
+	/* Trigger CR gateway to write to RShim */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_TRIGGER);
+	if (retval)
+		return retval;
+
+	/* Release TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_release(pci_dev);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/* Wait until the RSH_BYTE_ACC_CTL pending bit is cleared */
+static int rshim_byte_acc_pending_wait(struct pci_dev *pci_dev)
+{
+	int retval;
+	u32 read_value;
+
+	do {
+		retval = trio_cr_gw_read(pci_dev,
+			RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL, &read_value);
+		if (retval)
+			return retval;
+		if (signal_pending(current))
+			return -EINTR;
+	} while (read_value & (RSH_CHANNEL1_BASE + RSH_BYTE_ACC_PENDING));
+
+	return 0;
+}
+
+/*
+ * Mechanism to do an 8-byte access to the Rshim using
+ * two 4-byte accesses through the Rshim Byte Access Widget.
+ */
+static int rshim_byte_acc_read(struct pci_dev *pci_dev, int addr,
+				u64 *result)
+{
+	int retval;
+	u32 read_value;
+	u64 read_result;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_SIZE);
+	if (retval)
+		return retval;
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_ADDR, addr);
+	if (retval)
+		return retval;
+
+	/* Write trigger bits to perform read */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_READ_TRIGGER);
+	if (retval)
+		return retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read lower 32-bits of data */
+	retval = trio_cr_gw_read(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_RDAT,
+				&read_value);
+	if (retval)
+		return retval;
+
+	read_result = (u64)read_value << 32;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read upper 32-bits of data */
+	retval = trio_cr_gw_read(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_RDAT,
+				&read_value);
+	if (retval)
+		return retval;
+
+	read_result |= (u64)read_value;
+	*result = be64_to_cpu(read_result);
+
+	return 0;
+}
+
+static int rshim_byte_acc_write(struct pci_dev *pci_dev, int addr,
+				u64 value)
+{
+	int retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_SIZE);
+	if (retval)
+		return retval;
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_ADDR, addr);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_SIZE);
+	if (retval)
+		return retval;
+
+	/* Write lower 32 bits of data to TRIO_CR_GW_DATA */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_WDAT, (u32)(value >> 32));
+	if (retval)
+		return retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write upper 32 bits of data to TRIO_CR_GW_DATA */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_WDAT, (u32)(value));
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/*
+ * The RShim Boot FIFO has a holding register which can couple
+ * two consecutive 4-byte writes into a single 8-byte write
+ * before pushing the data into the FIFO.
+ * Hence the RShim Byte Access Widget is not necessary to write
+ * to the BOOT FIFO using 4-byte writes.
+ */
+static int rshim_boot_fifo_write(struct pci_dev *pci_dev, int addr,
+				u64 value)
+{
+	int retval;
+
+	/* Write lower 32 bits of data to RSH_BOOT_FIFO_DATA */
+	retval = trio_cr_gw_write(pci_dev, addr,
+				(u32)(value >> 32));
+	if (retval)
+		return retval;
+
+	/* Write upper 32 bits of data to RSH_BOOT_FIFO_DATA */
+	retval = trio_cr_gw_write(pci_dev, addr,
+				(u32)(value));
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/* RShim read/write routines */
+static int rshim_pcie_read(struct rshim_backend *bd, int chan, int addr,
+				u64 *result)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	struct pci_dev *pci_dev = dev->pci_dev;
+	int retval;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	dev->write_count = 0;
+
+	addr = RSH_BASE_ADDR + (addr | (chan << 16));
+	addr = be32_to_cpu(addr);
+
+	retval = rshim_byte_acc_read(pci_dev, addr, result);
+
+	return retval;
+}
+
+static int rshim_pcie_write(struct rshim_backend *bd, int chan, int addr,
+				u64 value)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	struct pci_dev *pci_dev = dev->pci_dev;
+	int retval;
+	u64 result;
+	bool is_boot_stream = (addr == RSH_BOOT_FIFO_DATA);
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	addr = RSH_BASE_ADDR + (addr | (chan << 16));
+	if (!is_boot_stream)
+		addr = be32_to_cpu(addr);
+
+	value = be64_to_cpu(value);
+
+	/*
+	 * We cannot stream large numbers of PCIe writes to the RShim.
+	 * Instead, we must write no more than 15 words before
+	 * doing a read from another register within the RShim,
+	 * which forces previous writes to drain.
+	 * Note that we allow a max write_count of 7 since each 8-byte
+	 * write is done using 2 4-byte writes in the boot fifo case.
+	 */
+	if (dev->write_count == 7) {
+		/* Add memory barrier to synchronize the order. */
+		mb();
+		rshim_pcie_read(bd, 1, RSH_SCRATCHPAD, &result);
+	}
+	dev->write_count++;
+
+	if (is_boot_stream)
+		retval = rshim_boot_fifo_write(pci_dev, addr, value);
+	else
+		retval = rshim_byte_acc_write(pci_dev, addr, value);
+
+	return retval;
+}
+
+static void rshim_pcie_delete(struct kref *kref)
+{
+	struct rshim_backend *bd;
+	struct rshim_pcie *dev;
+
+	bd = container_of(kref, struct rshim_backend, kref);
+	dev = container_of(bd, struct rshim_pcie, bd);
+
+	rshim_deregister(bd);
+	if (dev->pci_dev)
+		dev_set_drvdata(&dev->pci_dev->dev, NULL);
+	kfree(dev);
+}
+
+/* Probe routine */
+static int rshim_pcie_probe(struct pci_dev *pci_dev,
+				const struct pci_device_id *id)
+{
+	struct rshim_pcie *dev = NULL;
+	struct rshim_backend *bd = NULL;
+	char *pcie_dev_name;
+	int index, retval, err = 0, allocfail = 0;
+	const int max_name_len = 20;
+
+	for (index = 0; index < MAX_DEV_COUNT; index++)
+		if (instances[index] == NULL)
+			break;
+	if (index == MAX_DEV_COUNT) {
+		pr_err("Driver cannot handle any more devices.\n");
+		return -ENODEV;
+	}
+
+	pcie_dev_name = kzalloc(max_name_len, GFP_KERNEL);
+	if (pcie_dev_name == NULL)
+		return -ENOMEM;
+	retval = snprintf(pcie_dev_name, max_name_len,
+				"rshim_pcie%d", index);
+	if (WARN_ON_ONCE(retval >= max_name_len)) {
+		err = -EINVAL;
+		goto error;
+	}
+
+	pr_debug("Probing %s\n", pcie_dev_name);
+
+	rshim_lock();
+
+	/* Find the backend. */
+	bd = rshim_find(pcie_dev_name);
+	if (bd) {
+		kref_get(&bd->kref);
+		dev = container_of(bd, struct rshim_pcie, bd);
+	} else {
+		/* Get some memory for this device's driver state. */
+		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+		if (dev == NULL) {
+			err = -ENOMEM;
+			rshim_unlock();
+			goto error;
+		}
+
+		instances[index] = dev;
+		bd = &dev->bd;
+		bd->has_rshim = 1;
+		bd->has_tm = 1;
+		bd->owner = THIS_MODULE;
+		bd->dev_name = pcie_dev_name;
+		bd->destroy = rshim_pcie_delete;
+		bd->read_rshim = rshim_pcie_read;
+		bd->write_rshim = rshim_pcie_write;
+		dev->write_count = 0;
+		mutex_init(&bd->mutex);
+	}
+
+	retval = rshim_fifo_alloc(bd);
+	if (retval) {
+		rshim_unlock();
+		pr_err("Failed to allocate fifo\n");
+		err = -ENOMEM;
+		goto enable_failed;
+	}
+
+	allocfail |= rshim_fifo_alloc(bd);
+
+	if (!bd->read_buf) {
+		bd->read_buf = kzalloc(READ_BUF_SIZE,
+					   GFP_KERNEL);
+	}
+	allocfail |= bd->read_buf == 0;
+
+	if (!bd->write_buf) {
+		bd->write_buf = kzalloc(WRITE_BUF_SIZE,
+					    GFP_KERNEL);
+	}
+	allocfail |= bd->write_buf == 0;
+
+	if (allocfail) {
+		rshim_unlock();
+		pr_err("can't allocate buffers\n");
+		goto enable_failed;
+	}
+
+	rshim_unlock();
+
+	/* Enable the device. */
+	err = pci_enable_device(pci_dev);
+	if (err != 0) {
+		pr_err("Device enable failed with error %d\n", err);
+		goto enable_failed;
+	}
+
+	/* Initialize object */
+	dev->pci_dev = pci_dev;
+	dev_set_drvdata(&pci_dev->dev, dev);
+
+	/* Enable PCI bus mastering. */
+	pci_set_master(pci_dev);
+
+	/*
+	 * Register rshim here since it needs to detect whether other backend
+	 * has already registered or not, which involves reading/writing rshim
+	 * registers and has assumption that the under layer is working.
+	 */
+	rshim_lock();
+	if (!bd->registered) {
+		retval = rshim_register(bd);
+		if (retval) {
+			pr_err("Backend register failed with error %d\n",
+				 retval);
+			rshim_unlock();
+			goto register_failed;
+		}
+	}
+	rshim_unlock();
+
+	/* Notify that the device is attached */
+	mutex_lock(&bd->mutex);
+	retval = rshim_notify(bd, RSH_EVENT_ATTACH, 0);
+	mutex_unlock(&bd->mutex);
+	if (retval)
+		goto register_failed;
+
+	return 0;
+
+register_failed:
+	pci_disable_device(pci_dev);
+
+enable_failed:
+	rshim_lock();
+	kref_put(&dev->bd.kref, rshim_pcie_delete);
+	rshim_unlock();
+error:
+	kfree(pcie_dev_name);
+
+	return err;
+}
+
+/* Called via pci_unregister_driver() when the module is removed. */
+static void rshim_pcie_remove(struct pci_dev *pci_dev)
+{
+	struct rshim_pcie *dev = dev_get_drvdata(&pci_dev->dev);
+	int retval, flush_wq;
+
+	/*
+	 * Reset TRIO_PCIE_INTFC_RX_BAR0_ADDR_MASK and TRIO_MAP_RSH_BASE.
+	 * Otherwise, upon host reboot, the two registers will retain previous
+	 * values that don't match the new BAR0 address that is assigned to
+	 * the PCIe ports, causing host MMIO access to RShim to fail.
+	 */
+	retval = rshim_pcie_write(&dev->bd, (RSH_SWINT >> 16) & 0xF,
+			RSH_SWINT & 0xFFFF, RSH_INT_VEC0_RTC__SWINT3_MASK);
+	if (retval)
+		pr_err("RShim write failed\n");
+
+	/* Clear the flags before deleting the backend. */
+	dev->bd.has_rshim = 0;
+	dev->bd.has_tm = 0;
+
+	rshim_notify(&dev->bd, RSH_EVENT_DETACH, 0);
+	mutex_lock(&dev->bd.mutex);
+	flush_wq = !cancel_delayed_work(&dev->bd.work);
+	if (flush_wq)
+		flush_workqueue(rshim_wq);
+	dev->bd.has_cons_work = 0;
+	kfree(dev->bd.read_buf);
+	kfree(dev->bd.write_buf);
+	rshim_fifo_free(&dev->bd);
+	mutex_unlock(&dev->bd.mutex);
+
+	rshim_lock();
+	kref_put(&dev->bd.kref, rshim_pcie_delete);
+	rshim_unlock();
+
+	pci_disable_device(pci_dev);
+	dev_set_drvdata(&pci_dev->dev, NULL);
+}
+
+static struct pci_device_id rshim_pcie_table[] = {
+	{ PCI_DEVICE(TILERA_VENDOR_ID, BLUEFIELD_DEVICE_ID), },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, rshim_pcie_table);
+
+static struct pci_driver rshim_pcie_driver = {
+	.name = "rshim_pcie_lf",
+	.probe = rshim_pcie_probe,
+	.remove = rshim_pcie_remove,
+	.id_table = rshim_pcie_table,
+};
+
+static int __init rshim_pcie_init(void)
+{
+	int result;
+
+	/* Register the driver */
+	result = pci_register_driver(&rshim_pcie_driver);
+	if (result)
+		pr_err("pci_register failed, error number %d\n", result);
+
+	return result;
+}
+
+static void __exit rshim_pcie_exit(void)
+{
+	/* Unregister the driver. */
+	pci_unregister_driver(&rshim_pcie_driver);
+}
+
+module_init(rshim_pcie_init);
+module_exit(rshim_pcie_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.4");
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* RE: [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-10-25 15:57     ` Arnd Bergmann
@ 2018-12-04 22:12       ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-12-04 22:12 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: devicetree, David Woods, arm-soc, Olof Johansson, Robin Murphy,
	linux-arm-kernel

Just an update that I have uploaded new patch series v6, which includes the other half of the driver that runs on the external USB host machine, and also tries to resolve the previous comments.

The v6 patches could also be found at
https://patchwork.kernel.org/project/linux-arm-kernel/list/?submitter=176699

Thanks!

-----Original Message-----
From: arndbergmann@gmail.com <arndbergmann@gmail.com> On Behalf Of Arnd Bergmann
Sent: Thursday, October 25, 2018 11:58 AM
To: Liming Sun <lsun@mellanox.com>
Cc: Olof Johansson <olof@lixom.net>; David Woods <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-soc <arm@kernel.org>; devicetree@vger.kernel.org; linux-arm-kernel@lists.infradead.org
Subject: Re: [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc

On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> This commit adds the TmFifo driver for Mellanox BlueField Soc.
> TmFifo is a shared FIFO which enables external host machine to 
> exchange data with the SoC via USB or PCIe. The driver is based on 
> virtio framework and has console and network access enabled.
>
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>

I definitely like the idea of using virtio-net and virtio-console here, this is a great way of reusing the existing high-level drivers, and i similar in concept (but also much simpler) to what we have in drivers/misc/mic/ for another Linux-running machine that can be a PCIe add-on card.

Have you also posted the other half of this driver? I'd like to see how it all fits together.

A few style comments:

> +
> +#define TMFIFO_GET_FIELD(reg, mask)	FIELD_GET(mask, reg)
> +
> +#define TMFIFO_SET_FIELD(reg, mask, value) \
> +	((reg & ~mask) | FIELD_PREP(mask, value))

I think it would be nicer to use FIELD_GET/FIELD_PREP in the code directly, and avoid adding extra wrappers around them.

> +/* Vring size. */
> +#define TMFIFO_VRING_SIZE			1024
> +
> +/* Console Tx buffer size. */
> +#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
> +
> +/* Use a timer for house-keeping. */
> +static int tmfifo_timer_interval = HZ / 10;
> +
> +/* Global lock. */
> +static struct mutex tmfifo_lock;

Maybe use 'static DEFINE_MUTEX(tmfifo_lock) here and remove the initialization call.

> +/* Virtio ring size. */
> +static int tmfifo_vring_size = TMFIFO_VRING_SIZE; 
> +module_param(tmfifo_vring_size, int, 0444); 
> +MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring.");
> +
> +struct tmfifo;
> +
> +/* A flag to indicate TmFifo ready. */ static bool tmfifo_ready;
> +
> +/* Virtual devices sharing the TM FIFO. */
> +#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
> +
> +/* Spin lock. */
> +static DEFINE_SPINLOCK(tmfifo_spin_lock);

Generally speaking, it's nicer to write a driver in a way that avoids global variables and make the flags and locks all members of a device specific structure.

> +struct tmfifo_vdev {
> +	struct virtio_device vdev;	/* virtual device */
> +	u8 status;
> +	u64 features;
> +	union {				/* virtio config space */
> +		struct virtio_console_config cons;
> +		struct virtio_net_config net;
> +	} config;
> +	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
> +	u8 *tx_buf;			/* tx buffer */
> +	u32 tx_head;			/* tx buffer head */
> +	u32 tx_tail;			/* tx buffer tail */
> +};

I suppose you did this to keep the driver simple, but it seems a little inflexible to only support two specific device types. Wouldn't we also want e.g. 9pfs or virtio_blk in some configurations?

> +
> +#define TMFIFO_VDEV_TX_BUF_AVAIL(vdev) \
> +	(((vdev)->tx_tail >= (vdev)->tx_head) ? \
> +	(TMFIFO_CONS_TX_BUF_SIZE - 8 - ((vdev)->tx_tail - (vdev)->tx_head)) : \
> +	((vdev)->tx_head - (vdev)->tx_tail - 8))
> +
> +#define TMFIFO_VDEV_TX_BUF_PUSH(vdev, len) do { \
> +	(vdev)->tx_tail += (len); \
> +	if ((vdev)->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE) \
> +		(vdev)->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE; \ } while (0)
> +
> +#define TMFIFO_VDEV_TX_BUF_POP(vdev, len) do { \
> +	(vdev)->tx_head += (len); \
> +	if ((vdev)->tx_head >= TMFIFO_CONS_TX_BUF_SIZE) \
> +		(vdev)->tx_head -= TMFIFO_CONS_TX_BUF_SIZE; \ } while (0)

It would be nicer to turn these into inline functions rather than macros.

> +/* TMFIFO device structure */
> +struct tmfifo {
> +	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
> +	struct platform_device *pdev;	/* platform device */
> +	struct mutex lock;
> +	void __iomem *rx_base;		/* mapped register base */
> +	void __iomem *tx_base;		/* mapped register base */
> +	int tx_fifo_size;		/* number of entries of the Tx FIFO */
> +	int rx_fifo_size;		/* number of entries of the Rx FIFO */
> +	unsigned long pend_events;	/* pending bits for deferred process */
> +	int irq[TM_IRQ_CNT];		/* irq numbers */
> +	struct work_struct work;	/* work struct for deferred process */
> +	struct timer_list timer;	/* keepalive timer */
> +	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
> +};
> +
> +union tmfifo_msg_hdr {
> +	struct {
> +		u8 type;		/* message type */
> +		__be16 len;		/* payload length */
> +		u8 unused[5];		/* reserved, set to 0 */
> +	} __packed;
> +	u64 data;
> +};
> +
> +/*
> + * Default MAC.
> + * This MAC address will be read from EFI persistent variable if
> configured.
> + * It can also be reconfigured with standard Linux tools.
> + */
> +static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF,
> 0x01};
> +

Is a predefined MAC address better than a random one here?

For DT based systems, we tend to also call of_get_mac_address() in order to allow setting a unique address from firmware.

> +/* Forward declaration. */
> +static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx); 
> +static void tmfifo_release_pkt(struct virtio_device *vdev,
> +			       struct tmfifo_vring *vring,
> +			       struct vring_desc **desc);

Try to avoid forward declarations by reordering the functions according to how they get called.

> +
> +/* Interrupt handler. */
> +static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id) {
> +	int i = (uintptr_t)dev_id % sizeof(void *);
> +	struct tmfifo *fifo = dev_id - i;
> +
> +	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
> +		schedule_work(&fifo->work);
> +
> +	return IRQ_HANDLED;
> +}

Maybe using a request_threaded_irq() would be a better way to defer the handler into IRQ context.

        Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
@ 2018-12-04 22:12       ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2018-12-04 22:12 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: devicetree, David Woods, arm-soc, Olof Johansson, Robin Murphy,
	linux-arm-kernel

Just an update that I have uploaded new patch series v6, which includes the other half of the driver that runs on the external USB host machine, and also tries to resolve the previous comments.

The v6 patches could also be found at
https://patchwork.kernel.org/project/linux-arm-kernel/list/?submitter=176699

Thanks!

-----Original Message-----
From: arndbergmann@gmail.com <arndbergmann@gmail.com> On Behalf Of Arnd Bergmann
Sent: Thursday, October 25, 2018 11:58 AM
To: Liming Sun <lsun@mellanox.com>
Cc: Olof Johansson <olof@lixom.net>; David Woods <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-soc <arm@kernel.org>; devicetree@vger.kernel.org; linux-arm-kernel@lists.infradead.org
Subject: Re: [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc

On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> This commit adds the TmFifo driver for Mellanox BlueField Soc.
> TmFifo is a shared FIFO which enables external host machine to 
> exchange data with the SoC via USB or PCIe. The driver is based on 
> virtio framework and has console and network access enabled.
>
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>

I definitely like the idea of using virtio-net and virtio-console here, this is a great way of reusing the existing high-level drivers, and i similar in concept (but also much simpler) to what we have in drivers/misc/mic/ for another Linux-running machine that can be a PCIe add-on card.

Have you also posted the other half of this driver? I'd like to see how it all fits together.

A few style comments:

> +
> +#define TMFIFO_GET_FIELD(reg, mask)	FIELD_GET(mask, reg)
> +
> +#define TMFIFO_SET_FIELD(reg, mask, value) \
> +	((reg & ~mask) | FIELD_PREP(mask, value))

I think it would be nicer to use FIELD_GET/FIELD_PREP in the code directly, and avoid adding extra wrappers around them.

> +/* Vring size. */
> +#define TMFIFO_VRING_SIZE			1024
> +
> +/* Console Tx buffer size. */
> +#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
> +
> +/* Use a timer for house-keeping. */
> +static int tmfifo_timer_interval = HZ / 10;
> +
> +/* Global lock. */
> +static struct mutex tmfifo_lock;

Maybe use 'static DEFINE_MUTEX(tmfifo_lock) here and remove the initialization call.

> +/* Virtio ring size. */
> +static int tmfifo_vring_size = TMFIFO_VRING_SIZE; 
> +module_param(tmfifo_vring_size, int, 0444); 
> +MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring.");
> +
> +struct tmfifo;
> +
> +/* A flag to indicate TmFifo ready. */ static bool tmfifo_ready;
> +
> +/* Virtual devices sharing the TM FIFO. */
> +#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
> +
> +/* Spin lock. */
> +static DEFINE_SPINLOCK(tmfifo_spin_lock);

Generally speaking, it's nicer to write a driver in a way that avoids global variables and make the flags and locks all members of a device specific structure.

> +struct tmfifo_vdev {
> +	struct virtio_device vdev;	/* virtual device */
> +	u8 status;
> +	u64 features;
> +	union {				/* virtio config space */
> +		struct virtio_console_config cons;
> +		struct virtio_net_config net;
> +	} config;
> +	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
> +	u8 *tx_buf;			/* tx buffer */
> +	u32 tx_head;			/* tx buffer head */
> +	u32 tx_tail;			/* tx buffer tail */
> +};

I suppose you did this to keep the driver simple, but it seems a little inflexible to only support two specific device types. Wouldn't we also want e.g. 9pfs or virtio_blk in some configurations?

> +
> +#define TMFIFO_VDEV_TX_BUF_AVAIL(vdev) \
> +	(((vdev)->tx_tail >= (vdev)->tx_head) ? \
> +	(TMFIFO_CONS_TX_BUF_SIZE - 8 - ((vdev)->tx_tail - (vdev)->tx_head)) : \
> +	((vdev)->tx_head - (vdev)->tx_tail - 8))
> +
> +#define TMFIFO_VDEV_TX_BUF_PUSH(vdev, len) do { \
> +	(vdev)->tx_tail += (len); \
> +	if ((vdev)->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE) \
> +		(vdev)->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE; \ } while (0)
> +
> +#define TMFIFO_VDEV_TX_BUF_POP(vdev, len) do { \
> +	(vdev)->tx_head += (len); \
> +	if ((vdev)->tx_head >= TMFIFO_CONS_TX_BUF_SIZE) \
> +		(vdev)->tx_head -= TMFIFO_CONS_TX_BUF_SIZE; \ } while (0)

It would be nicer to turn these into inline functions rather than macros.

> +/* TMFIFO device structure */
> +struct tmfifo {
> +	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
> +	struct platform_device *pdev;	/* platform device */
> +	struct mutex lock;
> +	void __iomem *rx_base;		/* mapped register base */
> +	void __iomem *tx_base;		/* mapped register base */
> +	int tx_fifo_size;		/* number of entries of the Tx FIFO */
> +	int rx_fifo_size;		/* number of entries of the Rx FIFO */
> +	unsigned long pend_events;	/* pending bits for deferred process */
> +	int irq[TM_IRQ_CNT];		/* irq numbers */
> +	struct work_struct work;	/* work struct for deferred process */
> +	struct timer_list timer;	/* keepalive timer */
> +	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
> +};
> +
> +union tmfifo_msg_hdr {
> +	struct {
> +		u8 type;		/* message type */
> +		__be16 len;		/* payload length */
> +		u8 unused[5];		/* reserved, set to 0 */
> +	} __packed;
> +	u64 data;
> +};
> +
> +/*
> + * Default MAC.
> + * This MAC address will be read from EFI persistent variable if
> configured.
> + * It can also be reconfigured with standard Linux tools.
> + */
> +static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF,
> 0x01};
> +

Is a predefined MAC address better than a random one here?

For DT based systems, we tend to also call of_get_mac_address() in order to allow setting a unique address from firmware.

> +/* Forward declaration. */
> +static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx); 
> +static void tmfifo_release_pkt(struct virtio_device *vdev,
> +			       struct tmfifo_vring *vring,
> +			       struct vring_desc **desc);

Try to avoid forward declarations by reordering the functions according to how they get called.

> +
> +/* Interrupt handler. */
> +static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id) {
> +	int i = (uintptr_t)dev_id % sizeof(void *);
> +	struct tmfifo *fifo = dev_id - i;
> +
> +	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
> +		schedule_work(&fifo->work);
> +
> +	return IRQ_HANDLED;
> +}

Maybe using a request_threaded_irq() would be a better way to defer the handler into IRQ context.

        Arnd
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v6 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-11-01 16:23   ` Liming Sun
@ 2018-12-12 23:07     ` Matthias Brugger
  -1 siblings, 0 replies; 179+ messages in thread
From: Matthias Brugger @ 2018-12-12 23:07 UTC (permalink / raw)
  To: Liming Sun, y, Olof Johansson, Arnd Bergmann, David Woods,
	Robin Murphy, arm-soc
  Cc: devicetree, mbrugger@suse.com >> Matthias Brugger,
	linux-arm-kernel



On 01/11/2018 17:23, Liming Sun wrote:> This commit adds the TmFifo driver for
Mellanox BlueField Soc.
> TmFifo is a shared FIFO which enables external host machine to
> exchange data with the SoC via USB or PCIe. The driver is based on
> virtio framework and has console and network access enabled.
>
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>
> ---
>  drivers/soc/Kconfig                |    1 +
>  drivers/soc/Makefile               |    1 +
>  drivers/soc/mellanox/Kconfig       |   18 +
>  drivers/soc/mellanox/Makefile      |    5 +
>  drivers/soc/mellanox/tmfifo.c      | 1236 ++++++++++++++++++++++++++++++++++++
>  drivers/soc/mellanox/tmfifo_regs.h |   76 +++
>  6 files changed, 1337 insertions(+)
>  create mode 100644 drivers/soc/mellanox/Kconfig
>  create mode 100644 drivers/soc/mellanox/Makefile
>  create mode 100644 drivers/soc/mellanox/tmfifo.c
>  create mode 100644 drivers/soc/mellanox/tmfifo_regs.h
>

[...]

> +
> +/* Interrupt handler. */
> +static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
> +{
> +	int i = (uintptr_t)dev_id % sizeof(void *);
> +	struct tmfifo *fifo = dev_id - i;
> +
> +	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
> +		schedule_work(&fifo->work);
> +
> +	return IRQ_HANDLED;
> +}
> +
[...]
> +
> +/* Probe the TMFIFO. */
> +static int tmfifo_probe(struct platform_device *pdev)
> +{
> +	u64 ctl;
> +	struct tmfifo *fifo;
> +	struct resource *rx_res, *tx_res;
> +	struct virtio_net_config net_config;
> +	int i, ret;
> +
> +	/* Get the resource of the Rx & Tx FIFO. */
> +	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> +	if (!rx_res || !tx_res) {
> +		ret = -EINVAL;
> +		goto err;
> +	}
> +
> +	if (request_mem_region(rx_res->start,
> +			       resource_size(rx_res), "bf-tmfifo") == NULL) {
> +		ret = -EBUSY;
> +		goto early_err;
> +	}
> +
> +	if (request_mem_region(tx_res->start,
> +			       resource_size(tx_res), "bf-tmfifo") == NULL) {
> +		release_mem_region(rx_res->start, resource_size(rx_res));
> +		ret = -EBUSY;
> +		goto early_err;
> +	}
> +
> +	ret = -ENOMEM;
> +	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
> +	if (!fifo)
> +		goto err;
> +
> +	fifo->pdev = pdev;
> +	platform_set_drvdata(pdev, fifo);
> +
> +	spin_lock_init(&fifo->spin_lock);
> +	INIT_WORK(&fifo->work, tmfifo_work_handler);
> +
> +	timer_setup(&fifo->timer, tmfifo_timer, 0);
> +	fifo->timer.function = tmfifo_timer;
> +
> +	for (i = 0; i < TM_IRQ_CNT; i++) {
> +		fifo->irq[i] = platform_get_irq(pdev, i);
> +		ret = request_irq(fifo->irq[i], tmfifo_irq_handler, 0,
> +				  "tmfifo", (u8 *)fifo + i);

I think it would be better if you create a struct that passes a pointer to fifo
and the ID instead of "hiding" the ID inside the address.

Regards,
Matthias

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v6 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc
@ 2018-12-12 23:07     ` Matthias Brugger
  0 siblings, 0 replies; 179+ messages in thread
From: Matthias Brugger @ 2018-12-12 23:07 UTC (permalink / raw)
  To: Liming Sun, y, Olof Johansson, Arnd Bergmann, David Woods,
	Robin Murphy, arm-soc
  Cc: devicetree, mbrugger@suse.com >> Matthias Brugger,
	linux-arm-kernel



On 01/11/2018 17:23, Liming Sun wrote:> This commit adds the TmFifo driver for
Mellanox BlueField Soc.
> TmFifo is a shared FIFO which enables external host machine to
> exchange data with the SoC via USB or PCIe. The driver is based on
> virtio framework and has console and network access enabled.
>
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>
> ---
>  drivers/soc/Kconfig                |    1 +
>  drivers/soc/Makefile               |    1 +
>  drivers/soc/mellanox/Kconfig       |   18 +
>  drivers/soc/mellanox/Makefile      |    5 +
>  drivers/soc/mellanox/tmfifo.c      | 1236 ++++++++++++++++++++++++++++++++++++
>  drivers/soc/mellanox/tmfifo_regs.h |   76 +++
>  6 files changed, 1337 insertions(+)
>  create mode 100644 drivers/soc/mellanox/Kconfig
>  create mode 100644 drivers/soc/mellanox/Makefile
>  create mode 100644 drivers/soc/mellanox/tmfifo.c
>  create mode 100644 drivers/soc/mellanox/tmfifo_regs.h
>

[...]

> +
> +/* Interrupt handler. */
> +static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
> +{
> +	int i = (uintptr_t)dev_id % sizeof(void *);
> +	struct tmfifo *fifo = dev_id - i;
> +
> +	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
> +		schedule_work(&fifo->work);
> +
> +	return IRQ_HANDLED;
> +}
> +
[...]
> +
> +/* Probe the TMFIFO. */
> +static int tmfifo_probe(struct platform_device *pdev)
> +{
> +	u64 ctl;
> +	struct tmfifo *fifo;
> +	struct resource *rx_res, *tx_res;
> +	struct virtio_net_config net_config;
> +	int i, ret;
> +
> +	/* Get the resource of the Rx & Tx FIFO. */
> +	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> +	if (!rx_res || !tx_res) {
> +		ret = -EINVAL;
> +		goto err;
> +	}
> +
> +	if (request_mem_region(rx_res->start,
> +			       resource_size(rx_res), "bf-tmfifo") == NULL) {
> +		ret = -EBUSY;
> +		goto early_err;
> +	}
> +
> +	if (request_mem_region(tx_res->start,
> +			       resource_size(tx_res), "bf-tmfifo") == NULL) {
> +		release_mem_region(rx_res->start, resource_size(rx_res));
> +		ret = -EBUSY;
> +		goto early_err;
> +	}
> +
> +	ret = -ENOMEM;
> +	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
> +	if (!fifo)
> +		goto err;
> +
> +	fifo->pdev = pdev;
> +	platform_set_drvdata(pdev, fifo);
> +
> +	spin_lock_init(&fifo->spin_lock);
> +	INIT_WORK(&fifo->work, tmfifo_work_handler);
> +
> +	timer_setup(&fifo->timer, tmfifo_timer, 0);
> +	fifo->timer.function = tmfifo_timer;
> +
> +	for (i = 0; i < TM_IRQ_CNT; i++) {
> +		fifo->irq[i] = platform_get_irq(pdev, i);
> +		ret = request_irq(fifo->irq[i], tmfifo_irq_handler, 0,
> +				  "tmfifo", (u8 *)fifo + i);

I think it would be better if you create a struct that passes a pointer to fifo
and the ID instead of "hiding" the ID inside the address.

Regards,
Matthias

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v7 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 16:06 ` Liming Sun
@ 2019-01-03 19:17   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-03 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit adds the TmFifo driver for Mellanox BlueField Soc.
TmFifo is a shared FIFO which enables external host machine to
exchange data with the SoC via USB or PCIe. The driver is based on
virtio framework and has console and network access enabled.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/Kconfig                |    1 +
 drivers/soc/Makefile               |    1 +
 drivers/soc/mellanox/Kconfig       |   18 +
 drivers/soc/mellanox/Makefile      |    5 +
 drivers/soc/mellanox/tmfifo.c      | 1244 ++++++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/tmfifo_regs.h |   76 +++
 6 files changed, 1345 insertions(+)
 create mode 100644 drivers/soc/mellanox/Kconfig
 create mode 100644 drivers/soc/mellanox/Makefile
 create mode 100644 drivers/soc/mellanox/tmfifo.c
 create mode 100644 drivers/soc/mellanox/tmfifo_regs.h

diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index c07b4a8..fa87dc8 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -7,6 +7,7 @@ source "drivers/soc/bcm/Kconfig"
 source "drivers/soc/fsl/Kconfig"
 source "drivers/soc/imx/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
+source "drivers/soc/mellanox/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/renesas/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 446166b..d14555b 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ARCH_GEMINI)	+= gemini/
 obj-$(CONFIG_ARCH_MXC)		+= imx/
 obj-$(CONFIG_SOC_XWAY)		+= lantiq/
 obj-y				+= mediatek/
+obj-$(CONFIG_SOC_MLNX)		+= mellanox/
 obj-$(CONFIG_ARCH_MESON)	+= amlogic/
 obj-y				+= qcom/
 obj-y				+= renesas/
diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
new file mode 100644
index 0000000..d88efa1
--- /dev/null
+++ b/drivers/soc/mellanox/Kconfig
@@ -0,0 +1,18 @@
+menuconfig SOC_MLNX
+	bool "Mellanox SoC drivers"
+	default y if ARCH_MLNX_BLUEFIELD
+
+if ARCH_MLNX_BLUEFIELD || COMPILE_TEST
+
+config MLNX_BLUEFIELD_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo driver"
+	depends on ARM64
+	default m
+	select VIRTIO_CONSOLE
+	select VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides the
+	  virtio driver framework for the TMFIFO of Mellanox BlueField SoC and
+	  the implementation of a console and network driver.
+
+endif # ARCH_MLNX_BLUEFIELD
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
new file mode 100644
index 0000000..c44c0e2
--- /dev/null
+++ b/drivers/soc/mellanox/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Mellanox SoC drivers.
+#
+obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
new file mode 100644
index 0000000..2975229
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo.c
@@ -0,0 +1,1244 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/cache.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <asm/byteorder.h>
+
+#include "tmfifo_regs.h"
+
+/* Vring size. */
+#define TMFIFO_VRING_SIZE			1024
+
+/* Console Tx buffer size. */
+#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
+
+/* House-keeping timer interval. */
+static int tmfifo_timer_interval = HZ / 10;
+module_param(tmfifo_timer_interval, int, 0644);
+MODULE_PARM_DESC(tmfifo_timer_interval, "timer interval");
+
+/* Global lock. */
+static DEFINE_MUTEX(tmfifo_lock);
+
+/* Virtio ring size. */
+static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
+module_param(tmfifo_vring_size, int, 0444);
+MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring");
+
+/* Struct declaration. */
+struct tmfifo;
+
+/* Virtual devices sharing the TM FIFO. */
+#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/* Structure to maintain the ring state. */
+struct tmfifo_vring {
+	void *va;			/* virtual address */
+	dma_addr_t dma;			/* dma address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	int vdev_id;			/* TMFIFO_VDEV_xxx */
+	u32 pkt_len;			/* packet total length */
+	__virtio16 next_avail;		/* next avail desc id */
+	struct tmfifo *fifo;		/* pointer back to the tmfifo */
+};
+
+/* Interrupt types. */
+enum {
+	TM_RX_LWM_IRQ,			/* Rx low water mark irq */
+	TM_RX_HWM_IRQ,			/* Rx high water mark irq */
+	TM_TX_LWM_IRQ,			/* Tx low water mark irq */
+	TM_TX_HWM_IRQ,			/* Tx high water mark irq */
+	TM_IRQ_CNT
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	TMFIFO_VRING_RX,		/* Rx ring */
+	TMFIFO_VRING_TX,		/* Tx ring */
+	TMFIFO_VRING_NUM
+};
+
+struct tmfifo_vdev {
+	struct virtio_device vdev;	/* virtual device */
+	u8 status;
+	u64 features;
+	union {				/* virtio config space */
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
+	u8 *tx_buf;			/* tx buffer */
+	u32 tx_head;			/* tx buffer head */
+	u32 tx_tail;			/* tx buffer tail */
+};
+
+struct tmfifo_irq_info {
+	struct tmfifo *fifo;		/* tmfifo structure */
+	int irq;			/* interrupt number */
+	int index;			/* array index */
+};
+
+/* TMFIFO device structure */
+struct tmfifo {
+	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
+	struct platform_device *pdev;	/* platform device */
+	struct mutex lock;
+	void __iomem *rx_base;		/* mapped register base */
+	void __iomem *tx_base;		/* mapped register base */
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	struct tmfifo_irq_info irq_info[TM_IRQ_CNT];	/* irq info */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
+	bool is_ready;			/* ready flag */
+	spinlock_t spin_lock;		/* spin lock */
+};
+
+union tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* MTU setting of the virtio-net interface. */
+#define TMFIFO_NET_MTU		1500
+
+/* Supported virtio-net features. */
+#define TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
+				 (1UL << VIRTIO_NET_F_STATUS) | \
+				 (1UL << VIRTIO_NET_F_MAC))
+
+/* Return the available Tx buffer space. */
+static inline int tmfifo_vdev_tx_buf_avail(struct tmfifo_vdev *vdev)
+{
+	return ((vdev->tx_tail >= vdev->tx_head) ?
+		(TMFIFO_CONS_TX_BUF_SIZE - 8 - (vdev->tx_tail -
+		vdev->tx_head)) : (vdev->tx_head - vdev->tx_tail - 8));
+}
+
+/* Update Tx buffer pointer after pushing data. */
+static inline void tmfifo_vdev_tx_buf_push(struct tmfifo_vdev *vdev, u32 len)
+{
+	vdev->tx_tail += len;
+	if (vdev->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE)
+		vdev->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Update Tx buffer pointer after popping data. */
+static inline void tmfifo_vdev_tx_buf_pop(struct tmfifo_vdev *vdev, u32 len)
+{
+	vdev->tx_head += len;
+	if (vdev->tx_head >= TMFIFO_CONS_TX_BUF_SIZE)
+		vdev->tx_head -= TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Allocate vrings for the fifo. */
+static int tmfifo_alloc_vrings(struct tmfifo *fifo,
+			       struct tmfifo_vdev *tm_vdev, int vdev_id)
+{
+	dma_addr_t dma;
+	void *va;
+	int i, size;
+	struct tmfifo_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->size = tmfifo_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+		vring->vdev_id = vdev_id;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
+					GFP_KERNEL);
+		if (!va) {
+			dev_err(tm_vdev->vdev.dev.parent,
+				"vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void tmfifo_free_vrings(struct tmfifo *fifo, int vdev_id)
+{
+	int i, size;
+	struct tmfifo_vring *vring;
+	struct tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Free interrupts of the fifo device. */
+static void tmfifo_free_irqs(struct tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		irq = fifo->irq_info[i].irq;
+		if (irq) {
+			fifo->irq_info[i].irq = 0;
+			disable_irq(irq);
+			free_irq(irq, (u8 *)fifo + i);
+		}
+	}
+}
+
+/* Interrupt handler. */
+static irqreturn_t tmfifo_irq_handler(int irq, void *arg)
+{
+	struct tmfifo_irq_info *irq_info = (struct tmfifo_irq_info *)arg;
+
+	if (irq_info->index < TM_IRQ_CNT &&
+	    !test_and_set_bit(irq_info->index, &irq_info->fifo->pend_events))
+		schedule_work(&irq_info->fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Nothing to do for now. */
+static void tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+tmfifo_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+
+	if (!vr || vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vr->num);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+static inline void tmfifo_virtio_release_desc(struct virtio_device *vdev,
+					      struct vring *vr,
+					      struct vring_desc *desc, u32 len)
+{
+	unsigned int idx;
+
+	idx = vr->used->idx % vr->num;
+	vr->used->ring[idx].id = desc - vr->desc;
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/* Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc)
+{
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+	struct vring_desc *desc_head;
+	uint32_t pkt_len = 0;
+
+	if (!vr)
+		return;
+
+	if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
+		desc_head = vring->desc_head;
+		pkt_len = vring->pkt_len;
+	} else {
+		desc_head = tmfifo_virtio_get_next_desc(vring->vq);
+		if (desc_head != NULL) {
+			pkt_len = tmfifo_virtio_get_pkt_len(vdev,
+							desc_head, vr);
+		}
+	}
+
+	if (desc_head != NULL)
+		tmfifo_virtio_release_desc(vdev, vr, desc_head, pkt_len);
+
+	if (desc != NULL)
+		*desc = NULL;
+	vring->pkt_len = 0;
+}
+
+/* House-keeping timer. */
+static void tmfifo_timer(struct timer_list *arg)
+{
+	struct tmfifo *fifo = container_of(arg, struct tmfifo, timer);
+
+	/*
+	 * Wake up the work handler to poll the Rx FIFO in case interrupt
+	 * missing or any leftover bytes stuck in the FIFO.
+	 */
+	test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+}
+
+/* Buffer the console output. */
+static void tmfifo_console_output(struct tmfifo_vdev *cons,
+				  struct virtqueue *vq)
+{
+	u32 len, pkt_len, idx;
+	struct vring_desc *head_desc, *desc = NULL;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &cons->vdev;
+	void *addr;
+	union tmfifo_msg_hdr *hdr;
+
+	for (;;) {
+		head_desc = tmfifo_virtio_get_next_desc(vq);
+		if (head_desc == NULL)
+			break;
+
+		/* Release the packet if no more space. */
+		pkt_len = tmfifo_virtio_get_pkt_len(vdev, head_desc, vr);
+		if (pkt_len + sizeof(*hdr) > tmfifo_vdev_tx_buf_avail(cons)) {
+			tmfifo_virtio_release_desc(vdev, vr, head_desc,
+						   pkt_len);
+			break;
+		}
+
+		hdr = (union tmfifo_msg_hdr *)&cons->tx_buf[cons->tx_tail];
+		hdr->data = 0;
+		hdr->type = VIRTIO_ID_CONSOLE;
+		hdr->len = htons(pkt_len);
+
+		tmfifo_vdev_tx_buf_push(cons, sizeof(*hdr));
+		desc = head_desc;
+
+		while (desc != NULL) {
+			addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+			len = virtio32_to_cpu(vdev, desc->len);
+
+			if (len <= TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail) {
+				memcpy(cons->tx_buf + cons->tx_tail, addr, len);
+			} else {
+				u32 seg;
+
+				seg = TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail;
+				memcpy(cons->tx_buf + cons->tx_tail, addr, seg);
+				addr += seg;
+				memcpy(cons->tx_buf, addr, len - seg);
+			}
+			tmfifo_vdev_tx_buf_push(cons, len);
+
+			if (!(virtio16_to_cpu(vdev, desc->flags) &
+			    VRING_DESC_F_NEXT))
+				break;
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+		}
+
+		/* Make each packet 8-byte aligned. */
+		tmfifo_vdev_tx_buf_push(cons, ((pkt_len + 7) & -8) - pkt_len);
+
+		tmfifo_virtio_release_desc(vdev, vr, head_desc, pkt_len);
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct tmfifo_vring *vring;
+	struct tmfifo *fifo;
+	struct vring *vr;
+	struct virtio_device *vdev;
+	u64 sts, data;
+	int num_avail = 0, hdr_len, tx_reserve;
+	void *addr;
+	u32 len, idx;
+	struct vring_desc *desc;
+	unsigned long flags;
+	struct tmfifo_vdev *cons;
+
+	if (!vq)
+		return;
+
+	vring = (struct tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+	vr = (struct vring *)virtqueue_get_vring(vq);
+
+	if (!fifo->vdev[vring->vdev_id])
+		return;
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+
+	/* Don't continue if another vring is running. */
+	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
+		return;
+
+	/* tx_reserve is used to reserved some room in FIFO for console. */
+	if (vring->vdev_id == VIRTIO_ID_NET) {
+		hdr_len = sizeof(struct virtio_net_hdr);
+		tx_reserve = fifo->tx_fifo_size / 16;
+	} else {
+		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
+		hdr_len = 0;
+		tx_reserve = 1;
+	}
+
+	desc = vring->desc;
+
+again:
+	while (1) {
+		/* Get available FIFO space. */
+		if (num_avail == 0) {
+			if (is_rx) {
+				/* Get the number of available words in FIFO. */
+				sts = readq(fifo->rx_base + TMFIFO_RX_STS);
+				num_avail = FIELD_GET(
+					TMFIFO_RX_STS__COUNT_MASK, sts);
+
+				/* Don't continue if nothing in FIFO. */
+				if (num_avail <= 0)
+					break;
+			} else {
+				/* Get available space in FIFO. */
+				sts = readq(fifo->tx_base + TMFIFO_TX_STS);
+				num_avail = fifo->tx_fifo_size - tx_reserve -
+					FIELD_GET(
+						TMFIFO_TX_STS__COUNT_MASK, sts);
+
+				if (num_avail <= 0)
+					break;
+			}
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
+		    cons != NULL && cons->tx_buf != NULL) {
+			for (;;) {
+				spin_lock_irqsave(&fifo->spin_lock, flags);
+				if (cons->tx_head == cons->tx_tail) {
+					spin_unlock_irqrestore(
+						&fifo->spin_lock, flags);
+					return;
+				}
+				addr = cons->tx_buf + cons->tx_head;
+				writeq(cpu_to_le64(*(u64 *)addr),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+				tmfifo_vdev_tx_buf_pop(cons, sizeof(u64));
+				spin_unlock_irqrestore(&fifo->spin_lock,
+						       flags);
+				if (--num_avail <= 0)
+					goto again;
+			}
+		}
+
+		/* Get the desc of next packet. */
+		if (!desc) {
+			/* Save the head desc of the chain. */
+			vring->desc_head = tmfifo_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				return;
+			}
+			desc = vring->desc_head;
+			vring->desc = desc;
+
+			if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+						vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			}
+		}
+
+		/* Beginning of each packet. */
+		if (vring->pkt_len == 0) {
+			int vdev_id, vring_change = 0;
+			union tmfifo_msg_hdr hdr;
+
+			num_avail--;
+
+			/* Read/Write packet length. */
+			if (is_rx) {
+				hdr.data = readq(fifo->rx_base +
+						 TMFIFO_RX_DATA);
+				hdr.data = le64_to_cpu(hdr.data);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (hdr.len == 0)
+					continue;
+
+				/* Check packet type. */
+				if (hdr.type == VIRTIO_ID_NET) {
+					vdev_id = VIRTIO_ID_NET;
+					hdr_len = sizeof(struct virtio_net_hdr);
+				} else if (hdr.type == VIRTIO_ID_CONSOLE) {
+					vdev_id = VIRTIO_ID_CONSOLE;
+					hdr_len = 0;
+				} else {
+					continue;
+				}
+
+				/*
+				 * Check whether the new packet still belongs
+				 * to this vring or not. If not, update the
+				 * pkt_len of the new vring and return.
+				 */
+				if (vdev_id != vring->vdev_id) {
+					struct tmfifo_vdev *dev2 =
+						fifo->vdev[vdev_id];
+
+					if (!dev2)
+						break;
+					vring->desc = desc;
+					vring = &dev2->vrings[TMFIFO_VRING_RX];
+					vring_change = 1;
+				}
+				vring->pkt_len = ntohs(hdr.len) + hdr_len;
+			} else {
+				vring->pkt_len = tmfifo_virtio_get_pkt_len(
+					vdev, desc, vr);
+
+				hdr.data = 0;
+				hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+					VIRTIO_ID_NET :
+					VIRTIO_ID_CONSOLE;
+				hdr.len = htons(vring->pkt_len - hdr_len);
+				writeq(cpu_to_le64(hdr.data),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+			}
+
+			vring->cur_len = hdr_len;
+			vring->rem_len = vring->pkt_len;
+			fifo->vring[is_rx] = vring;
+
+			if (vring_change)
+				return;
+			continue;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check if the current desc is already done. */
+		if (vring->cur_len == len)
+			goto check_done;
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		/* Read a word from FIFO for Rx. */
+		if (is_rx) {
+			data = readq(fifo->rx_base + TMFIFO_RX_DATA);
+			data = le64_to_cpu(data);
+		}
+
+		if (vring->cur_len + sizeof(u64) <= len) {
+			/* The whole word. */
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       sizeof(u64));
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       sizeof(u64));
+			}
+			vring->cur_len += sizeof(u64);
+		} else {
+			/* Leftover bytes. */
+			BUG_ON(vring->cur_len > len);
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       len - vring->cur_len);
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       len - vring->cur_len);
+			}
+			vring->cur_len = len;
+		}
+
+		/* Write the word into FIFO for Tx. */
+		if (!is_rx) {
+			writeq(cpu_to_le64(data),
+			       fifo->tx_base + TMFIFO_TX_DATA);
+		}
+
+		num_avail--;
+
+check_done:
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done and release the desc. */
+			tmfifo_release_pkt(vdev, vring, &desc);
+			fifo->vring[is_rx] = NULL;
+
+			/* Notify upper layer that packet is done. */
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			vring_interrupt(0, vq);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			continue;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+	struct tmfifo *fifo = vring->fifo;
+	unsigned long flags;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX HWM bit to start Rx. */
+		if (!test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			tmfifo_console_output(fifo->vdev[VIRTIO_ID_CONSOLE],
+					      vq);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			schedule_work(&fifo->work);
+		} else if (!test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	}
+
+	return true;
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void tmfifo_work_handler(struct work_struct *work)
+{
+	int i;
+	struct tmfifo_vdev *tm_vdev;
+	struct tmfifo *fifo = container_of(work, struct tmfifo, work);
+
+	if (!fifo->is_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx. */
+	if (test_and_clear_bit(TM_TX_LWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq_info[TM_TX_LWM_IRQ].irq) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_TX].vq,
+					false);
+			}
+		}
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(TM_RX_HWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq_info[TM_RX_HWM_IRQ].irq) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_RX].vq,
+					true);
+			}
+		}
+	}
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* Get the array of feature bits for this device. */
+static u64 tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			tmfifo_release_pkt(&tm_vdev->vdev, vring, &vring->desc);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+				  unsigned int nvqs,
+				  struct virtqueue *vqs[],
+				  vq_callback_t *callbacks[],
+				  const char * const names[],
+				  const bool *ctx,
+				  struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void tmfifo_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void tmfifo_virtio_get(struct virtio_device *vdev,
+			      unsigned int offset,
+			      void *buf,
+			      unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void tmfifo_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops tmfifo_virtio_config_ops = {
+	.get_features = tmfifo_virtio_get_features,
+	.finalize_features = tmfifo_virtio_finalize_features,
+	.find_vqs = tmfifo_virtio_find_vqs,
+	.del_vqs = tmfifo_virtio_del_vqs,
+	.reset = tmfifo_virtio_reset,
+	.set_status = tmfifo_virtio_set_status,
+	.get_status = tmfifo_virtio_get_status,
+	.get = tmfifo_virtio_get,
+	.set = tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+int tmfifo_create_vdev(struct tmfifo *fifo, int vdev_id, u64 features,
+		       void *config, u32 size)
+{
+	struct tmfifo_vdev *tm_vdev;
+	int ret = 0;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev != NULL) {
+		pr_err("vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto already_exist;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto already_exist;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+	if (tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
+		pr_err("Unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto alloc_vring_fail;
+	}
+	if (vdev_id == VIRTIO_ID_CONSOLE) {
+		tm_vdev->tx_buf = kmalloc(TMFIFO_CONS_TX_BUF_SIZE,
+					  GFP_KERNEL);
+	}
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
+		goto register_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+register_fail:
+	tmfifo_free_vrings(fifo, vdev_id);
+	fifo->vdev[vdev_id] = NULL;
+alloc_vring_fail:
+	kfree(tm_vdev);
+already_exist:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+int tmfifo_delete_vdev(struct tmfifo *fifo, int vdev_id)
+{
+	struct tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		tmfifo_free_vrings(fifo, vdev_id);
+		kfree(tm_vdev->tx_buf);
+		kfree(tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Device remove function. */
+static int tmfifo_remove(struct platform_device *pdev)
+{
+	int i;
+	struct tmfifo *fifo = platform_get_drvdata(pdev);
+	struct resource *rx_res, *tx_res;
+
+	if (fifo) {
+		mutex_lock(&tmfifo_lock);
+
+		fifo->is_ready = false;
+
+		/* Stop the timer. */
+		del_timer_sync(&fifo->timer);
+
+		/* Release interrupts. */
+		tmfifo_free_irqs(fifo);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&fifo->work);
+
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++)
+			tmfifo_delete_vdev(fifo, i);
+
+		/* Release IO resources. */
+		if (fifo->rx_base)
+			iounmap(fifo->rx_base);
+		if (fifo->tx_base)
+			iounmap(fifo->tx_base);
+
+		platform_set_drvdata(pdev, NULL);
+		kfree(fifo);
+
+		mutex_unlock(&tmfifo_lock);
+	}
+
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (rx_res)
+		release_mem_region(rx_res->start, resource_size(rx_res));
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (tx_res)
+		release_mem_region(tx_res->start, resource_size(tx_res));
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void tmfifo_get_cfg_mac(u8 *mac)
+{
+	u8 buf[6];
+	efi_status_t status;
+	unsigned long size = sizeof(buf);
+	efi_char16_t name[] = { 'R', 's', 'h', 'i', 'm', 'M', 'a', 'c',
+				'A', 'd', 'd', 'r', 0 };
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+
+	status = efi.get_variable(name, &guid, NULL, &size, buf);
+	if (status == EFI_SUCCESS && size == sizeof(buf))
+		memcpy(mac, buf, sizeof(buf));
+}
+
+/* Probe the TMFIFO. */
+static int tmfifo_probe(struct platform_device *pdev)
+{
+	u64 ctl;
+	struct tmfifo *fifo;
+	struct resource *rx_res, *tx_res;
+	struct virtio_net_config net_config;
+	int i, ret;
+
+	/* Get the resource of the Rx & Tx FIFO. */
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!rx_res || !tx_res) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (request_mem_region(rx_res->start,
+			       resource_size(rx_res), "bf-tmfifo") == NULL) {
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	if (request_mem_region(tx_res->start,
+			       resource_size(tx_res), "bf-tmfifo") == NULL) {
+		release_mem_region(rx_res->start, resource_size(rx_res));
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	ret = -ENOMEM;
+	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
+	if (!fifo)
+		goto err;
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	spin_lock_init(&fifo->spin_lock);
+	INIT_WORK(&fifo->work, tmfifo_work_handler);
+
+	timer_setup(&fifo->timer, tmfifo_timer, 0);
+	fifo->timer.function = tmfifo_timer;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		fifo->irq_info[i].index = i;
+		fifo->irq_info[i].fifo = fifo;
+		fifo->irq_info[i].irq = platform_get_irq(pdev, i);
+		ret = request_irq(fifo->irq_info[i].irq, tmfifo_irq_handler, 0,
+				  "tmfifo", &fifo->irq_info[i]);
+		if (ret) {
+			pr_err("Unable to request irq\n");
+			fifo->irq_info[i].irq = 0;
+			goto err;
+		}
+	}
+
+	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
+	if (!fifo->rx_base)
+		goto err;
+
+	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
+	if (!fifo->tx_base)
+		goto err;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		FIELD_GET(TMFIFO_TX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~TMFIFO_TX_CTL__LWM_MASK) |
+		FIELD_PREP(TMFIFO_TX_CTL__LWM_MASK, fifo->tx_fifo_size / 2);
+	ctl = (ctl & ~TMFIFO_TX_CTL__HWM_MASK) |
+		FIELD_PREP(TMFIFO_TX_CTL__HWM_MASK, fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		FIELD_GET(TMFIFO_RX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~TMFIFO_RX_CTL__LWM_MASK) |
+		FIELD_PREP(TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = (ctl & ~TMFIFO_RX_CTL__HWM_MASK) |
+		FIELD_PREP(TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + TMFIFO_RX_CTL);
+
+	mutex_init(&fifo->lock);
+
+	/* Create the console vdev. */
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (ret)
+		goto err;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = TMFIFO_NET_MTU;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	memcpy(net_config.mac, tmfifo_net_default_mac, 6);
+	tmfifo_get_cfg_mac(net_config.mac);
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_NET, TMFIFO_NET_FEATURES,
+				 &net_config, sizeof(net_config));
+	if (ret)
+		goto err;
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+
+	fifo->is_ready = true;
+
+	return 0;
+
+err:
+	tmfifo_remove(pdev);
+early_err:
+	dev_err(&pdev->dev, "Probe Failed\n");
+	return ret;
+}
+
+static const struct of_device_id tmfifo_match[] = {
+	{ .compatible = "mellanox,bf-tmfifo" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, tmfifo_match);
+
+static const struct acpi_device_id bf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, bf_tmfifo_acpi_match);
+
+static struct platform_driver tmfifo_driver = {
+	.probe = tmfifo_probe,
+	.remove = tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.of_match_table = tmfifo_match,
+		.acpi_match_table = ACPI_PTR(bf_tmfifo_acpi_match),
+	},
+};
+
+static int __init tmfifo_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&tmfifo_driver);
+	if (ret)
+		pr_err("Failed to register tmfifo driver.\n");
+
+	return ret;
+}
+
+static void __exit tmfifo_exit(void)
+{
+	platform_driver_unregister(&tmfifo_driver);
+}
+
+module_init(tmfifo_init);
+module_exit(tmfifo_exit);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
new file mode 100644
index 0000000..9f21764
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo_regs.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __TMFIFO_REGS_H__
+#define __TMFIFO_REGS_H__
+
+#include <linux/types.h>
+
+#define TMFIFO_TX_DATA 0x0
+
+#define TMFIFO_TX_STS 0x8
+#define TMFIFO_TX_STS__LENGTH 0x0001
+#define TMFIFO_TX_STS__COUNT_SHIFT 0
+#define TMFIFO_TX_STS__COUNT_WIDTH 9
+#define TMFIFO_TX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_TX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_TX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_TX_CTL 0x10
+#define TMFIFO_TX_CTL__LENGTH 0x0001
+#define TMFIFO_TX_CTL__LWM_SHIFT 0
+#define TMFIFO_TX_CTL__LWM_WIDTH 8
+#define TMFIFO_TX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__LWM_RMASK 0xff
+#define TMFIFO_TX_CTL__LWM_MASK  0xff
+#define TMFIFO_TX_CTL__HWM_SHIFT 8
+#define TMFIFO_TX_CTL__HWM_WIDTH 8
+#define TMFIFO_TX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__HWM_RMASK 0xff
+#define TMFIFO_TX_CTL__HWM_MASK  0xff00
+#define TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#define TMFIFO_RX_DATA 0x0
+
+#define TMFIFO_RX_STS 0x8
+#define TMFIFO_RX_STS__LENGTH 0x0001
+#define TMFIFO_RX_STS__COUNT_SHIFT 0
+#define TMFIFO_RX_STS__COUNT_WIDTH 9
+#define TMFIFO_RX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_RX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_RX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_RX_CTL 0x10
+#define TMFIFO_RX_CTL__LENGTH 0x0001
+#define TMFIFO_RX_CTL__LWM_SHIFT 0
+#define TMFIFO_RX_CTL__LWM_WIDTH 8
+#define TMFIFO_RX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__LWM_RMASK 0xff
+#define TMFIFO_RX_CTL__LWM_MASK  0xff
+#define TMFIFO_RX_CTL__HWM_SHIFT 8
+#define TMFIFO_RX_CTL__HWM_WIDTH 8
+#define TMFIFO_RX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__HWM_RMASK 0xff
+#define TMFIFO_RX_CTL__HWM_MASK  0xff00
+#define TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#endif /* !defined(__TMFIFO_REGS_H__) */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v7 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc
@ 2019-01-03 19:17   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-03 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit adds the TmFifo driver for Mellanox BlueField Soc.
TmFifo is a shared FIFO which enables external host machine to
exchange data with the SoC via USB or PCIe. The driver is based on
virtio framework and has console and network access enabled.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/Kconfig                |    1 +
 drivers/soc/Makefile               |    1 +
 drivers/soc/mellanox/Kconfig       |   18 +
 drivers/soc/mellanox/Makefile      |    5 +
 drivers/soc/mellanox/tmfifo.c      | 1244 ++++++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/tmfifo_regs.h |   76 +++
 6 files changed, 1345 insertions(+)
 create mode 100644 drivers/soc/mellanox/Kconfig
 create mode 100644 drivers/soc/mellanox/Makefile
 create mode 100644 drivers/soc/mellanox/tmfifo.c
 create mode 100644 drivers/soc/mellanox/tmfifo_regs.h

diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index c07b4a8..fa87dc8 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -7,6 +7,7 @@ source "drivers/soc/bcm/Kconfig"
 source "drivers/soc/fsl/Kconfig"
 source "drivers/soc/imx/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
+source "drivers/soc/mellanox/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/renesas/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 446166b..d14555b 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ARCH_GEMINI)	+= gemini/
 obj-$(CONFIG_ARCH_MXC)		+= imx/
 obj-$(CONFIG_SOC_XWAY)		+= lantiq/
 obj-y				+= mediatek/
+obj-$(CONFIG_SOC_MLNX)		+= mellanox/
 obj-$(CONFIG_ARCH_MESON)	+= amlogic/
 obj-y				+= qcom/
 obj-y				+= renesas/
diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
new file mode 100644
index 0000000..d88efa1
--- /dev/null
+++ b/drivers/soc/mellanox/Kconfig
@@ -0,0 +1,18 @@
+menuconfig SOC_MLNX
+	bool "Mellanox SoC drivers"
+	default y if ARCH_MLNX_BLUEFIELD
+
+if ARCH_MLNX_BLUEFIELD || COMPILE_TEST
+
+config MLNX_BLUEFIELD_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo driver"
+	depends on ARM64
+	default m
+	select VIRTIO_CONSOLE
+	select VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides the
+	  virtio driver framework for the TMFIFO of Mellanox BlueField SoC and
+	  the implementation of a console and network driver.
+
+endif # ARCH_MLNX_BLUEFIELD
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
new file mode 100644
index 0000000..c44c0e2
--- /dev/null
+++ b/drivers/soc/mellanox/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Mellanox SoC drivers.
+#
+obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
new file mode 100644
index 0000000..2975229
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo.c
@@ -0,0 +1,1244 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/cache.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <asm/byteorder.h>
+
+#include "tmfifo_regs.h"
+
+/* Vring size. */
+#define TMFIFO_VRING_SIZE			1024
+
+/* Console Tx buffer size. */
+#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
+
+/* House-keeping timer interval. */
+static int tmfifo_timer_interval = HZ / 10;
+module_param(tmfifo_timer_interval, int, 0644);
+MODULE_PARM_DESC(tmfifo_timer_interval, "timer interval");
+
+/* Global lock. */
+static DEFINE_MUTEX(tmfifo_lock);
+
+/* Virtio ring size. */
+static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
+module_param(tmfifo_vring_size, int, 0444);
+MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring");
+
+/* Struct declaration. */
+struct tmfifo;
+
+/* Virtual devices sharing the TM FIFO. */
+#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/* Structure to maintain the ring state. */
+struct tmfifo_vring {
+	void *va;			/* virtual address */
+	dma_addr_t dma;			/* dma address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	int vdev_id;			/* TMFIFO_VDEV_xxx */
+	u32 pkt_len;			/* packet total length */
+	__virtio16 next_avail;		/* next avail desc id */
+	struct tmfifo *fifo;		/* pointer back to the tmfifo */
+};
+
+/* Interrupt types. */
+enum {
+	TM_RX_LWM_IRQ,			/* Rx low water mark irq */
+	TM_RX_HWM_IRQ,			/* Rx high water mark irq */
+	TM_TX_LWM_IRQ,			/* Tx low water mark irq */
+	TM_TX_HWM_IRQ,			/* Tx high water mark irq */
+	TM_IRQ_CNT
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	TMFIFO_VRING_RX,		/* Rx ring */
+	TMFIFO_VRING_TX,		/* Tx ring */
+	TMFIFO_VRING_NUM
+};
+
+struct tmfifo_vdev {
+	struct virtio_device vdev;	/* virtual device */
+	u8 status;
+	u64 features;
+	union {				/* virtio config space */
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
+	u8 *tx_buf;			/* tx buffer */
+	u32 tx_head;			/* tx buffer head */
+	u32 tx_tail;			/* tx buffer tail */
+};
+
+struct tmfifo_irq_info {
+	struct tmfifo *fifo;		/* tmfifo structure */
+	int irq;			/* interrupt number */
+	int index;			/* array index */
+};
+
+/* TMFIFO device structure */
+struct tmfifo {
+	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
+	struct platform_device *pdev;	/* platform device */
+	struct mutex lock;
+	void __iomem *rx_base;		/* mapped register base */
+	void __iomem *tx_base;		/* mapped register base */
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	struct tmfifo_irq_info irq_info[TM_IRQ_CNT];	/* irq info */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
+	bool is_ready;			/* ready flag */
+	spinlock_t spin_lock;		/* spin lock */
+};
+
+union tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* MTU setting of the virtio-net interface. */
+#define TMFIFO_NET_MTU		1500
+
+/* Supported virtio-net features. */
+#define TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
+				 (1UL << VIRTIO_NET_F_STATUS) | \
+				 (1UL << VIRTIO_NET_F_MAC))
+
+/* Return the available Tx buffer space. */
+static inline int tmfifo_vdev_tx_buf_avail(struct tmfifo_vdev *vdev)
+{
+	return ((vdev->tx_tail >= vdev->tx_head) ?
+		(TMFIFO_CONS_TX_BUF_SIZE - 8 - (vdev->tx_tail -
+		vdev->tx_head)) : (vdev->tx_head - vdev->tx_tail - 8));
+}
+
+/* Update Tx buffer pointer after pushing data. */
+static inline void tmfifo_vdev_tx_buf_push(struct tmfifo_vdev *vdev, u32 len)
+{
+	vdev->tx_tail += len;
+	if (vdev->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE)
+		vdev->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Update Tx buffer pointer after popping data. */
+static inline void tmfifo_vdev_tx_buf_pop(struct tmfifo_vdev *vdev, u32 len)
+{
+	vdev->tx_head += len;
+	if (vdev->tx_head >= TMFIFO_CONS_TX_BUF_SIZE)
+		vdev->tx_head -= TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Allocate vrings for the fifo. */
+static int tmfifo_alloc_vrings(struct tmfifo *fifo,
+			       struct tmfifo_vdev *tm_vdev, int vdev_id)
+{
+	dma_addr_t dma;
+	void *va;
+	int i, size;
+	struct tmfifo_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->size = tmfifo_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+		vring->vdev_id = vdev_id;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
+					GFP_KERNEL);
+		if (!va) {
+			dev_err(tm_vdev->vdev.dev.parent,
+				"vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void tmfifo_free_vrings(struct tmfifo *fifo, int vdev_id)
+{
+	int i, size;
+	struct tmfifo_vring *vring;
+	struct tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Free interrupts of the fifo device. */
+static void tmfifo_free_irqs(struct tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		irq = fifo->irq_info[i].irq;
+		if (irq) {
+			fifo->irq_info[i].irq = 0;
+			disable_irq(irq);
+			free_irq(irq, (u8 *)fifo + i);
+		}
+	}
+}
+
+/* Interrupt handler. */
+static irqreturn_t tmfifo_irq_handler(int irq, void *arg)
+{
+	struct tmfifo_irq_info *irq_info = (struct tmfifo_irq_info *)arg;
+
+	if (irq_info->index < TM_IRQ_CNT &&
+	    !test_and_set_bit(irq_info->index, &irq_info->fifo->pend_events))
+		schedule_work(&irq_info->fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Nothing to do for now. */
+static void tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+tmfifo_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+
+	if (!vr || vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vr->num);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+static inline void tmfifo_virtio_release_desc(struct virtio_device *vdev,
+					      struct vring *vr,
+					      struct vring_desc *desc, u32 len)
+{
+	unsigned int idx;
+
+	idx = vr->used->idx % vr->num;
+	vr->used->ring[idx].id = desc - vr->desc;
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/* Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc)
+{
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+	struct vring_desc *desc_head;
+	uint32_t pkt_len = 0;
+
+	if (!vr)
+		return;
+
+	if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
+		desc_head = vring->desc_head;
+		pkt_len = vring->pkt_len;
+	} else {
+		desc_head = tmfifo_virtio_get_next_desc(vring->vq);
+		if (desc_head != NULL) {
+			pkt_len = tmfifo_virtio_get_pkt_len(vdev,
+							desc_head, vr);
+		}
+	}
+
+	if (desc_head != NULL)
+		tmfifo_virtio_release_desc(vdev, vr, desc_head, pkt_len);
+
+	if (desc != NULL)
+		*desc = NULL;
+	vring->pkt_len = 0;
+}
+
+/* House-keeping timer. */
+static void tmfifo_timer(struct timer_list *arg)
+{
+	struct tmfifo *fifo = container_of(arg, struct tmfifo, timer);
+
+	/*
+	 * Wake up the work handler to poll the Rx FIFO in case interrupt
+	 * missing or any leftover bytes stuck in the FIFO.
+	 */
+	test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+}
+
+/* Buffer the console output. */
+static void tmfifo_console_output(struct tmfifo_vdev *cons,
+				  struct virtqueue *vq)
+{
+	u32 len, pkt_len, idx;
+	struct vring_desc *head_desc, *desc = NULL;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &cons->vdev;
+	void *addr;
+	union tmfifo_msg_hdr *hdr;
+
+	for (;;) {
+		head_desc = tmfifo_virtio_get_next_desc(vq);
+		if (head_desc == NULL)
+			break;
+
+		/* Release the packet if no more space. */
+		pkt_len = tmfifo_virtio_get_pkt_len(vdev, head_desc, vr);
+		if (pkt_len + sizeof(*hdr) > tmfifo_vdev_tx_buf_avail(cons)) {
+			tmfifo_virtio_release_desc(vdev, vr, head_desc,
+						   pkt_len);
+			break;
+		}
+
+		hdr = (union tmfifo_msg_hdr *)&cons->tx_buf[cons->tx_tail];
+		hdr->data = 0;
+		hdr->type = VIRTIO_ID_CONSOLE;
+		hdr->len = htons(pkt_len);
+
+		tmfifo_vdev_tx_buf_push(cons, sizeof(*hdr));
+		desc = head_desc;
+
+		while (desc != NULL) {
+			addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+			len = virtio32_to_cpu(vdev, desc->len);
+
+			if (len <= TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail) {
+				memcpy(cons->tx_buf + cons->tx_tail, addr, len);
+			} else {
+				u32 seg;
+
+				seg = TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail;
+				memcpy(cons->tx_buf + cons->tx_tail, addr, seg);
+				addr += seg;
+				memcpy(cons->tx_buf, addr, len - seg);
+			}
+			tmfifo_vdev_tx_buf_push(cons, len);
+
+			if (!(virtio16_to_cpu(vdev, desc->flags) &
+			    VRING_DESC_F_NEXT))
+				break;
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+		}
+
+		/* Make each packet 8-byte aligned. */
+		tmfifo_vdev_tx_buf_push(cons, ((pkt_len + 7) & -8) - pkt_len);
+
+		tmfifo_virtio_release_desc(vdev, vr, head_desc, pkt_len);
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct tmfifo_vring *vring;
+	struct tmfifo *fifo;
+	struct vring *vr;
+	struct virtio_device *vdev;
+	u64 sts, data;
+	int num_avail = 0, hdr_len, tx_reserve;
+	void *addr;
+	u32 len, idx;
+	struct vring_desc *desc;
+	unsigned long flags;
+	struct tmfifo_vdev *cons;
+
+	if (!vq)
+		return;
+
+	vring = (struct tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+	vr = (struct vring *)virtqueue_get_vring(vq);
+
+	if (!fifo->vdev[vring->vdev_id])
+		return;
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+
+	/* Don't continue if another vring is running. */
+	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
+		return;
+
+	/* tx_reserve is used to reserved some room in FIFO for console. */
+	if (vring->vdev_id == VIRTIO_ID_NET) {
+		hdr_len = sizeof(struct virtio_net_hdr);
+		tx_reserve = fifo->tx_fifo_size / 16;
+	} else {
+		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
+		hdr_len = 0;
+		tx_reserve = 1;
+	}
+
+	desc = vring->desc;
+
+again:
+	while (1) {
+		/* Get available FIFO space. */
+		if (num_avail == 0) {
+			if (is_rx) {
+				/* Get the number of available words in FIFO. */
+				sts = readq(fifo->rx_base + TMFIFO_RX_STS);
+				num_avail = FIELD_GET(
+					TMFIFO_RX_STS__COUNT_MASK, sts);
+
+				/* Don't continue if nothing in FIFO. */
+				if (num_avail <= 0)
+					break;
+			} else {
+				/* Get available space in FIFO. */
+				sts = readq(fifo->tx_base + TMFIFO_TX_STS);
+				num_avail = fifo->tx_fifo_size - tx_reserve -
+					FIELD_GET(
+						TMFIFO_TX_STS__COUNT_MASK, sts);
+
+				if (num_avail <= 0)
+					break;
+			}
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
+		    cons != NULL && cons->tx_buf != NULL) {
+			for (;;) {
+				spin_lock_irqsave(&fifo->spin_lock, flags);
+				if (cons->tx_head == cons->tx_tail) {
+					spin_unlock_irqrestore(
+						&fifo->spin_lock, flags);
+					return;
+				}
+				addr = cons->tx_buf + cons->tx_head;
+				writeq(cpu_to_le64(*(u64 *)addr),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+				tmfifo_vdev_tx_buf_pop(cons, sizeof(u64));
+				spin_unlock_irqrestore(&fifo->spin_lock,
+						       flags);
+				if (--num_avail <= 0)
+					goto again;
+			}
+		}
+
+		/* Get the desc of next packet. */
+		if (!desc) {
+			/* Save the head desc of the chain. */
+			vring->desc_head = tmfifo_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				return;
+			}
+			desc = vring->desc_head;
+			vring->desc = desc;
+
+			if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+						vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			}
+		}
+
+		/* Beginning of each packet. */
+		if (vring->pkt_len == 0) {
+			int vdev_id, vring_change = 0;
+			union tmfifo_msg_hdr hdr;
+
+			num_avail--;
+
+			/* Read/Write packet length. */
+			if (is_rx) {
+				hdr.data = readq(fifo->rx_base +
+						 TMFIFO_RX_DATA);
+				hdr.data = le64_to_cpu(hdr.data);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (hdr.len == 0)
+					continue;
+
+				/* Check packet type. */
+				if (hdr.type == VIRTIO_ID_NET) {
+					vdev_id = VIRTIO_ID_NET;
+					hdr_len = sizeof(struct virtio_net_hdr);
+				} else if (hdr.type == VIRTIO_ID_CONSOLE) {
+					vdev_id = VIRTIO_ID_CONSOLE;
+					hdr_len = 0;
+				} else {
+					continue;
+				}
+
+				/*
+				 * Check whether the new packet still belongs
+				 * to this vring or not. If not, update the
+				 * pkt_len of the new vring and return.
+				 */
+				if (vdev_id != vring->vdev_id) {
+					struct tmfifo_vdev *dev2 =
+						fifo->vdev[vdev_id];
+
+					if (!dev2)
+						break;
+					vring->desc = desc;
+					vring = &dev2->vrings[TMFIFO_VRING_RX];
+					vring_change = 1;
+				}
+				vring->pkt_len = ntohs(hdr.len) + hdr_len;
+			} else {
+				vring->pkt_len = tmfifo_virtio_get_pkt_len(
+					vdev, desc, vr);
+
+				hdr.data = 0;
+				hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+					VIRTIO_ID_NET :
+					VIRTIO_ID_CONSOLE;
+				hdr.len = htons(vring->pkt_len - hdr_len);
+				writeq(cpu_to_le64(hdr.data),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+			}
+
+			vring->cur_len = hdr_len;
+			vring->rem_len = vring->pkt_len;
+			fifo->vring[is_rx] = vring;
+
+			if (vring_change)
+				return;
+			continue;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check if the current desc is already done. */
+		if (vring->cur_len == len)
+			goto check_done;
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		/* Read a word from FIFO for Rx. */
+		if (is_rx) {
+			data = readq(fifo->rx_base + TMFIFO_RX_DATA);
+			data = le64_to_cpu(data);
+		}
+
+		if (vring->cur_len + sizeof(u64) <= len) {
+			/* The whole word. */
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       sizeof(u64));
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       sizeof(u64));
+			}
+			vring->cur_len += sizeof(u64);
+		} else {
+			/* Leftover bytes. */
+			BUG_ON(vring->cur_len > len);
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       len - vring->cur_len);
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       len - vring->cur_len);
+			}
+			vring->cur_len = len;
+		}
+
+		/* Write the word into FIFO for Tx. */
+		if (!is_rx) {
+			writeq(cpu_to_le64(data),
+			       fifo->tx_base + TMFIFO_TX_DATA);
+		}
+
+		num_avail--;
+
+check_done:
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done and release the desc. */
+			tmfifo_release_pkt(vdev, vring, &desc);
+			fifo->vring[is_rx] = NULL;
+
+			/* Notify upper layer that packet is done. */
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			vring_interrupt(0, vq);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			continue;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+	struct tmfifo *fifo = vring->fifo;
+	unsigned long flags;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX HWM bit to start Rx. */
+		if (!test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			tmfifo_console_output(fifo->vdev[VIRTIO_ID_CONSOLE],
+					      vq);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			schedule_work(&fifo->work);
+		} else if (!test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	}
+
+	return true;
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void tmfifo_work_handler(struct work_struct *work)
+{
+	int i;
+	struct tmfifo_vdev *tm_vdev;
+	struct tmfifo *fifo = container_of(work, struct tmfifo, work);
+
+	if (!fifo->is_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx. */
+	if (test_and_clear_bit(TM_TX_LWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq_info[TM_TX_LWM_IRQ].irq) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_TX].vq,
+					false);
+			}
+		}
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(TM_RX_HWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq_info[TM_RX_HWM_IRQ].irq) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_RX].vq,
+					true);
+			}
+		}
+	}
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* Get the array of feature bits for this device. */
+static u64 tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			tmfifo_release_pkt(&tm_vdev->vdev, vring, &vring->desc);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+				  unsigned int nvqs,
+				  struct virtqueue *vqs[],
+				  vq_callback_t *callbacks[],
+				  const char * const names[],
+				  const bool *ctx,
+				  struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void tmfifo_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void tmfifo_virtio_get(struct virtio_device *vdev,
+			      unsigned int offset,
+			      void *buf,
+			      unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void tmfifo_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops tmfifo_virtio_config_ops = {
+	.get_features = tmfifo_virtio_get_features,
+	.finalize_features = tmfifo_virtio_finalize_features,
+	.find_vqs = tmfifo_virtio_find_vqs,
+	.del_vqs = tmfifo_virtio_del_vqs,
+	.reset = tmfifo_virtio_reset,
+	.set_status = tmfifo_virtio_set_status,
+	.get_status = tmfifo_virtio_get_status,
+	.get = tmfifo_virtio_get,
+	.set = tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+int tmfifo_create_vdev(struct tmfifo *fifo, int vdev_id, u64 features,
+		       void *config, u32 size)
+{
+	struct tmfifo_vdev *tm_vdev;
+	int ret = 0;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev != NULL) {
+		pr_err("vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto already_exist;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto already_exist;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+	if (tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
+		pr_err("Unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto alloc_vring_fail;
+	}
+	if (vdev_id == VIRTIO_ID_CONSOLE) {
+		tm_vdev->tx_buf = kmalloc(TMFIFO_CONS_TX_BUF_SIZE,
+					  GFP_KERNEL);
+	}
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
+		goto register_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+register_fail:
+	tmfifo_free_vrings(fifo, vdev_id);
+	fifo->vdev[vdev_id] = NULL;
+alloc_vring_fail:
+	kfree(tm_vdev);
+already_exist:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+int tmfifo_delete_vdev(struct tmfifo *fifo, int vdev_id)
+{
+	struct tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		tmfifo_free_vrings(fifo, vdev_id);
+		kfree(tm_vdev->tx_buf);
+		kfree(tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Device remove function. */
+static int tmfifo_remove(struct platform_device *pdev)
+{
+	int i;
+	struct tmfifo *fifo = platform_get_drvdata(pdev);
+	struct resource *rx_res, *tx_res;
+
+	if (fifo) {
+		mutex_lock(&tmfifo_lock);
+
+		fifo->is_ready = false;
+
+		/* Stop the timer. */
+		del_timer_sync(&fifo->timer);
+
+		/* Release interrupts. */
+		tmfifo_free_irqs(fifo);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&fifo->work);
+
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++)
+			tmfifo_delete_vdev(fifo, i);
+
+		/* Release IO resources. */
+		if (fifo->rx_base)
+			iounmap(fifo->rx_base);
+		if (fifo->tx_base)
+			iounmap(fifo->tx_base);
+
+		platform_set_drvdata(pdev, NULL);
+		kfree(fifo);
+
+		mutex_unlock(&tmfifo_lock);
+	}
+
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (rx_res)
+		release_mem_region(rx_res->start, resource_size(rx_res));
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (tx_res)
+		release_mem_region(tx_res->start, resource_size(tx_res));
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void tmfifo_get_cfg_mac(u8 *mac)
+{
+	u8 buf[6];
+	efi_status_t status;
+	unsigned long size = sizeof(buf);
+	efi_char16_t name[] = { 'R', 's', 'h', 'i', 'm', 'M', 'a', 'c',
+				'A', 'd', 'd', 'r', 0 };
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+
+	status = efi.get_variable(name, &guid, NULL, &size, buf);
+	if (status == EFI_SUCCESS && size == sizeof(buf))
+		memcpy(mac, buf, sizeof(buf));
+}
+
+/* Probe the TMFIFO. */
+static int tmfifo_probe(struct platform_device *pdev)
+{
+	u64 ctl;
+	struct tmfifo *fifo;
+	struct resource *rx_res, *tx_res;
+	struct virtio_net_config net_config;
+	int i, ret;
+
+	/* Get the resource of the Rx & Tx FIFO. */
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!rx_res || !tx_res) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (request_mem_region(rx_res->start,
+			       resource_size(rx_res), "bf-tmfifo") == NULL) {
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	if (request_mem_region(tx_res->start,
+			       resource_size(tx_res), "bf-tmfifo") == NULL) {
+		release_mem_region(rx_res->start, resource_size(rx_res));
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	ret = -ENOMEM;
+	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
+	if (!fifo)
+		goto err;
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	spin_lock_init(&fifo->spin_lock);
+	INIT_WORK(&fifo->work, tmfifo_work_handler);
+
+	timer_setup(&fifo->timer, tmfifo_timer, 0);
+	fifo->timer.function = tmfifo_timer;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		fifo->irq_info[i].index = i;
+		fifo->irq_info[i].fifo = fifo;
+		fifo->irq_info[i].irq = platform_get_irq(pdev, i);
+		ret = request_irq(fifo->irq_info[i].irq, tmfifo_irq_handler, 0,
+				  "tmfifo", &fifo->irq_info[i]);
+		if (ret) {
+			pr_err("Unable to request irq\n");
+			fifo->irq_info[i].irq = 0;
+			goto err;
+		}
+	}
+
+	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
+	if (!fifo->rx_base)
+		goto err;
+
+	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
+	if (!fifo->tx_base)
+		goto err;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		FIELD_GET(TMFIFO_TX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~TMFIFO_TX_CTL__LWM_MASK) |
+		FIELD_PREP(TMFIFO_TX_CTL__LWM_MASK, fifo->tx_fifo_size / 2);
+	ctl = (ctl & ~TMFIFO_TX_CTL__HWM_MASK) |
+		FIELD_PREP(TMFIFO_TX_CTL__HWM_MASK, fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		FIELD_GET(TMFIFO_RX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~TMFIFO_RX_CTL__LWM_MASK) |
+		FIELD_PREP(TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = (ctl & ~TMFIFO_RX_CTL__HWM_MASK) |
+		FIELD_PREP(TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + TMFIFO_RX_CTL);
+
+	mutex_init(&fifo->lock);
+
+	/* Create the console vdev. */
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (ret)
+		goto err;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = TMFIFO_NET_MTU;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	memcpy(net_config.mac, tmfifo_net_default_mac, 6);
+	tmfifo_get_cfg_mac(net_config.mac);
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_NET, TMFIFO_NET_FEATURES,
+				 &net_config, sizeof(net_config));
+	if (ret)
+		goto err;
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+
+	fifo->is_ready = true;
+
+	return 0;
+
+err:
+	tmfifo_remove(pdev);
+early_err:
+	dev_err(&pdev->dev, "Probe Failed\n");
+	return ret;
+}
+
+static const struct of_device_id tmfifo_match[] = {
+	{ .compatible = "mellanox,bf-tmfifo" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, tmfifo_match);
+
+static const struct acpi_device_id bf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, bf_tmfifo_acpi_match);
+
+static struct platform_driver tmfifo_driver = {
+	.probe = tmfifo_probe,
+	.remove = tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.of_match_table = tmfifo_match,
+		.acpi_match_table = ACPI_PTR(bf_tmfifo_acpi_match),
+	},
+};
+
+static int __init tmfifo_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&tmfifo_driver);
+	if (ret)
+		pr_err("Failed to register tmfifo driver.\n");
+
+	return ret;
+}
+
+static void __exit tmfifo_exit(void)
+{
+	platform_driver_unregister(&tmfifo_driver);
+}
+
+module_init(tmfifo_init);
+module_exit(tmfifo_exit);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
new file mode 100644
index 0000000..9f21764
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo_regs.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __TMFIFO_REGS_H__
+#define __TMFIFO_REGS_H__
+
+#include <linux/types.h>
+
+#define TMFIFO_TX_DATA 0x0
+
+#define TMFIFO_TX_STS 0x8
+#define TMFIFO_TX_STS__LENGTH 0x0001
+#define TMFIFO_TX_STS__COUNT_SHIFT 0
+#define TMFIFO_TX_STS__COUNT_WIDTH 9
+#define TMFIFO_TX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_TX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_TX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_TX_CTL 0x10
+#define TMFIFO_TX_CTL__LENGTH 0x0001
+#define TMFIFO_TX_CTL__LWM_SHIFT 0
+#define TMFIFO_TX_CTL__LWM_WIDTH 8
+#define TMFIFO_TX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__LWM_RMASK 0xff
+#define TMFIFO_TX_CTL__LWM_MASK  0xff
+#define TMFIFO_TX_CTL__HWM_SHIFT 8
+#define TMFIFO_TX_CTL__HWM_WIDTH 8
+#define TMFIFO_TX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__HWM_RMASK 0xff
+#define TMFIFO_TX_CTL__HWM_MASK  0xff00
+#define TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#define TMFIFO_RX_DATA 0x0
+
+#define TMFIFO_RX_STS 0x8
+#define TMFIFO_RX_STS__LENGTH 0x0001
+#define TMFIFO_RX_STS__COUNT_SHIFT 0
+#define TMFIFO_RX_STS__COUNT_WIDTH 9
+#define TMFIFO_RX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_RX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_RX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_RX_CTL 0x10
+#define TMFIFO_RX_CTL__LENGTH 0x0001
+#define TMFIFO_RX_CTL__LWM_SHIFT 0
+#define TMFIFO_RX_CTL__LWM_WIDTH 8
+#define TMFIFO_RX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__LWM_RMASK 0xff
+#define TMFIFO_RX_CTL__LWM_MASK  0xff
+#define TMFIFO_RX_CTL__HWM_SHIFT 8
+#define TMFIFO_RX_CTL__HWM_WIDTH 8
+#define TMFIFO_RX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__HWM_RMASK 0xff
+#define TMFIFO_RX_CTL__HWM_MASK  0xff00
+#define TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#endif /* !defined(__TMFIFO_REGS_H__) */
-- 
1.8.3.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v7 2/9] arm64: Add Mellanox BlueField SoC config option
  2018-05-25 16:06 ` Liming Sun
@ 2019-01-03 19:17   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-03 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit introduces config option for Mellanox BlueField SoC,
which can be used to build the SoC specific drivers, and enables
it by default in configs/defconfig.

Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 arch/arm64/Kconfig.platforms | 6 ++++++
 arch/arm64/configs/defconfig | 1 +
 2 files changed, 7 insertions(+)

diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 51bc479..a9b10a0 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -119,6 +119,12 @@ config ARCH_MESON
 	help
 	  This enables support for the Amlogic S905 SoCs.
 
+config ARCH_MLNX_BLUEFIELD
+	bool "Mellanox BlueField SoC Family"
+	select SOC_MLNX
+	help
+	  This enables support for the Mellanox BlueField SoC.
+
 config ARCH_MVEBU
 	bool "Marvell EBU SoC Family"
 	select ARMADA_AP806_SYSCON
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index c9a57d1..85dbef0 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -44,6 +44,7 @@ CONFIG_ARCH_LG1K=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
 CONFIG_ARCH_MESON=y
+CONFIG_ARCH_MLNX_BLUEFIELD=y
 CONFIG_ARCH_MVEBU=y
 CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v7 2/9] arm64: Add Mellanox BlueField SoC config option
@ 2019-01-03 19:17   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-03 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit introduces config option for Mellanox BlueField SoC,
which can be used to build the SoC specific drivers, and enables
it by default in configs/defconfig.

Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 arch/arm64/Kconfig.platforms | 6 ++++++
 arch/arm64/configs/defconfig | 1 +
 2 files changed, 7 insertions(+)

diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 51bc479..a9b10a0 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -119,6 +119,12 @@ config ARCH_MESON
 	help
 	  This enables support for the Amlogic S905 SoCs.
 
+config ARCH_MLNX_BLUEFIELD
+	bool "Mellanox BlueField SoC Family"
+	select SOC_MLNX
+	help
+	  This enables support for the Mellanox BlueField SoC.
+
 config ARCH_MVEBU
 	bool "Marvell EBU SoC Family"
 	select ARMADA_AP806_SYSCON
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index c9a57d1..85dbef0 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -44,6 +44,7 @@ CONFIG_ARCH_LG1K=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
 CONFIG_ARCH_MESON=y
+CONFIG_ARCH_MLNX_BLUEFIELD=y
 CONFIG_ARCH_MVEBU=y
 CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
-- 
1.8.3.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v7 3/9] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-05-25 16:06 ` Liming Sun
@ 2019-01-03 19:17   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-03 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

Add devicetree bindings for the TmFifo which is found on Mellanox
BlueField SoCs.

Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 .../devicetree/bindings/soc/mellanox/tmfifo.txt    | 23 ++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt

diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
new file mode 100644
index 0000000..8a13fa6
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
@@ -0,0 +1,23 @@
+* Mellanox BlueField SoC TmFifo
+
+BlueField TmFifo provides a shared FIFO between the target and the
+external host machine, which can be accessed by external host via
+USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
+to implement virtual console and network interface based on the virtio
+framework.
+
+Required properties:
+
+- compatible:	Should be "mellanox,bf-tmfifo"
+- reg:		Physical base address and length of Rx/Tx block
+- interrupts:	The interrupt number of Rx low water mark, Rx high water mark
+		Tx low water mark, Tx high water mark respectively.
+
+Example:
+
+tmfifo@800a20 {
+	compatible = "mellanox,bf-tmfifo";
+	reg = <0x00800a20 0x00000018
+	       0x00800a40 0x00000018>;
+	interrupts = <41, 42, 43, 44>;
+};
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v7 3/9] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
@ 2019-01-03 19:17   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-03 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

Add devicetree bindings for the TmFifo which is found on Mellanox
BlueField SoCs.

Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 .../devicetree/bindings/soc/mellanox/tmfifo.txt    | 23 ++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt

diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
new file mode 100644
index 0000000..8a13fa6
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
@@ -0,0 +1,23 @@
+* Mellanox BlueField SoC TmFifo
+
+BlueField TmFifo provides a shared FIFO between the target and the
+external host machine, which can be accessed by external host via
+USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
+to implement virtual console and network interface based on the virtio
+framework.
+
+Required properties:
+
+- compatible:	Should be "mellanox,bf-tmfifo"
+- reg:		Physical base address and length of Rx/Tx block
+- interrupts:	The interrupt number of Rx low water mark, Rx high water mark
+		Tx low water mark, Tx high water mark respectively.
+
+Example:
+
+tmfifo@800a20 {
+	compatible = "mellanox,bf-tmfifo";
+	reg = <0x00800a20 0x00000018
+	       0x00800a40 0x00000018>;
+	interrupts = <41, 42, 43, 44>;
+};
-- 
1.8.3.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v7 4/9] MAINTAINERS: Add entry for Mellanox Bluefield Soc
  2018-05-25 16:06 ` Liming Sun
@ 2019-01-03 19:17   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-03 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

Add maintainer information for Mellanox BlueField SoC.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index f063443..bb2ea86 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1759,6 +1759,14 @@ S:	Maintained
 F:	drivers/phy/mediatek/
 F:	Documentation/devicetree/bindings/phy/phy-mtk-*
 
+ARM/Mellanox BlueField SoC support
+M:	David Woods <dwoods@mellanox.com>
+M:	Liming Sun <lsun@mellanox.com>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	drivers/soc/mellanox/*
+F:	Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
+
 ARM/MICREL KS8695 ARCHITECTURE
 M:	Greg Ungerer <gerg@uclinux.org>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v7 4/9] MAINTAINERS: Add entry for Mellanox Bluefield Soc
@ 2019-01-03 19:17   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-03 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

Add maintainer information for Mellanox BlueField SoC.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index f063443..bb2ea86 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1759,6 +1759,14 @@ S:	Maintained
 F:	drivers/phy/mediatek/
 F:	Documentation/devicetree/bindings/phy/phy-mtk-*
 
+ARM/Mellanox BlueField SoC support
+M:	David Woods <dwoods@mellanox.com>
+M:	Liming Sun <lsun@mellanox.com>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	drivers/soc/mellanox/*
+F:	Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
+
 ARM/MICREL KS8695 ARCHITECTURE
 M:	Greg Ungerer <gerg@uclinux.org>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-- 
1.8.3.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v7 5/9] soc: mellanox: host: Add the common host side Rshim driver
  2018-05-25 16:06 ` Liming Sun
@ 2019-01-03 19:17   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-03 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

An external host can connect to a Mellanox BlueField SoC via an
interface called Rshim. The Rshim driver provides boot, console,
and networking services over this interface. This commit is
the common driver where the other backend (transport) driver will
use.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/Kconfig           |    8 +
 drivers/soc/mellanox/Makefile          |    1 +
 drivers/soc/mellanox/host/Makefile     |    2 +
 drivers/soc/mellanox/host/rshim.c      | 2673 ++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/host/rshim.h      |  361 +++++
 drivers/soc/mellanox/host/rshim_regs.h |  152 ++
 6 files changed, 3197 insertions(+)
 create mode 100644 drivers/soc/mellanox/host/Makefile
 create mode 100644 drivers/soc/mellanox/host/rshim.c
 create mode 100644 drivers/soc/mellanox/host/rshim.h
 create mode 100644 drivers/soc/mellanox/host/rshim_regs.h

diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
index d88efa1..ecd83a4 100644
--- a/drivers/soc/mellanox/Kconfig
+++ b/drivers/soc/mellanox/Kconfig
@@ -16,3 +16,11 @@ config MLNX_BLUEFIELD_TMFIFO
 	  the implementation of a console and network driver.
 
 endif # ARCH_MLNX_BLUEFIELD
+
+config MLNX_BLUEFIELD_HOST
+	tristate "Mellnox BlueField host side drivers"
+	help
+	  If you say yes to this option, then support will be added
+	  for control and communication of Mellanox BlueField SoCs
+	  from an external host via USB or PCI-express.
+
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
index c44c0e2..aaaf2be 100644
--- a/drivers/soc/mellanox/Makefile
+++ b/drivers/soc/mellanox/Makefile
@@ -3,3 +3,4 @@
 # Makefile for Mellanox SoC drivers.
 #
 obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
+obj-$(CONFIG_MLNX_BLUEFIELD_HOST)	+= host/
diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
new file mode 100644
index 0000000..e47842f
--- /dev/null
+++ b/drivers/soc/mellanox/host/Makefile
@@ -0,0 +1,2 @@
+obj-m := rshim.o
+
diff --git a/drivers/soc/mellanox/host/rshim.c b/drivers/soc/mellanox/host/rshim.c
new file mode 100644
index 0000000..32f1124
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim.c
@@ -0,0 +1,2673 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_common.c - Mellanox host-side driver for RShim
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.	See the GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <linux/ioctl.h>
+#include <linux/termios.h>
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+#include <asm/termbits.h>
+#include <linux/circ_buf.h>
+#include <linux/delay.h>
+#include <linux/virtio_ids.h>
+
+#include "rshim.h"
+
+/* Maximum number of devices controlled by this driver. */
+int rshim_nr_devs = 64;
+module_param(rshim_nr_devs, int, 0444);
+MODULE_PARM_DESC(rshim_nr_devs, "Maximum number of supported devices");
+
+static char *backend_driver = "";
+module_param(backend_driver, charp, 0444);
+MODULE_PARM_DESC(backend_driver, "Rshim backend driver to use");
+
+static int rshim_keepalive_period = 300;
+module_param(rshim_keepalive_period, int, 0644);
+MODULE_PARM_DESC(rshim_keepalive_period, "keepalive period in milliseconds");
+
+#define RSH_KEEPALIVE_MAGIC_NUM 0x5089836482ULL
+
+/* Circular buffer macros. */
+
+#define read_empty(bd, chan) \
+	(CIRC_CNT((bd)->read_fifo[chan].head, \
+		  (bd)->read_fifo[chan].tail, READ_FIFO_SIZE) == 0)
+#define read_full(bd, chan) \
+	(CIRC_SPACE((bd)->read_fifo[chan].head, \
+		    (bd)->read_fifo[chan].tail, READ_FIFO_SIZE) == 0)
+#define read_space(bd, chan) \
+	CIRC_SPACE((bd)->read_fifo[chan].head, \
+		   (bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_cnt(bd, chan) \
+	CIRC_CNT((bd)->read_fifo[chan].head, \
+		 (bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_cnt_to_end(bd, chan) \
+	CIRC_CNT_TO_END((bd)->read_fifo[chan].head, \
+			(bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_data_ptr(bd, chan) \
+	((bd)->read_fifo[chan].data + \
+	 ((bd)->read_fifo[chan].tail & (READ_FIFO_SIZE - 1)))
+#define read_consume_bytes(bd, chan, nbytes) \
+	((bd)->read_fifo[chan].tail = \
+		((bd)->read_fifo[chan].tail + (nbytes)) & \
+		 (READ_FIFO_SIZE - 1))
+#define read_space_to_end(bd, chan) \
+	CIRC_SPACE_TO_END((bd)->read_fifo[chan].head, \
+			  (bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_space_offset(bd, chan) \
+	((bd)->read_fifo[chan].head & (READ_FIFO_SIZE - 1))
+#define read_space_ptr(bd, chan) \
+	((bd)->read_fifo[chan].data + read_space_offset(bd, (chan)))
+#define read_add_bytes(bd, chan, nbytes) \
+	((bd)->read_fifo[chan].head = \
+		((bd)->read_fifo[chan].head + (nbytes)) & \
+		 (READ_FIFO_SIZE - 1))
+#define read_reset(bd, chan) \
+	((bd)->read_fifo[chan].head = (bd)->read_fifo[chan].tail = 0)
+
+#define write_empty(bd, chan) \
+	(CIRC_CNT((bd)->write_fifo[chan].head, \
+		  (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE) == 0)
+#define write_full(bd, chan) \
+	(CIRC_SPACE((bd)->write_fifo[chan].head, \
+		    (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE) == 0)
+#define write_space(bd, chan) \
+	CIRC_SPACE((bd)->write_fifo[chan].head, \
+		   (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_cnt(bd, chan) \
+	CIRC_CNT((bd)->write_fifo[chan].head, \
+		 (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_cnt_to_end(bd, chan) \
+	CIRC_CNT_TO_END((bd)->write_fifo[chan].head, \
+			(bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_data_offset(bd, chan) \
+	((bd)->write_fifo[chan].tail & (WRITE_FIFO_SIZE - 1))
+#define write_data_ptr(bd, chan) \
+	((bd)->write_fifo[chan].data + write_data_offset(bd, (chan)))
+#define write_consume_bytes(bd, chan, nbytes) \
+	((bd)->write_fifo[chan].tail = \
+		 ((bd)->write_fifo[chan].tail + (nbytes)) & \
+		  (WRITE_FIFO_SIZE - 1))
+#define write_space_to_end(bd, chan) \
+	CIRC_SPACE_TO_END((bd)->write_fifo[chan].head, \
+			  (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_space_ptr(bd, chan) \
+	((bd)->write_fifo[chan].data + \
+	 ((bd)->write_fifo[chan].head & (WRITE_FIFO_SIZE - 1)))
+#define write_add_bytes(bd, chan, nbytes) \
+	((bd)->write_fifo[chan].head = \
+	 ((bd)->write_fifo[chan].head + (nbytes)) & \
+	  (WRITE_FIFO_SIZE - 1))
+#define write_reset(bd, chan) \
+	((bd)->write_fifo[chan].head = (bd)->write_fifo[chan].tail = 0)
+
+/*
+ * Tile-to-host bits (UART 0 scratchpad).
+ */
+/*
+ * Output write pointer mask.  Note that this is the maximum size; the
+ * write pointer may be smaller if requested by the host.
+ */
+#define CONS_RSHIM_T2H_OUT_WPTR_MASK     0x3FF
+
+/* Tile is done mask. */
+#define CONS_RSHIM_T2H_DONE_MASK         0x400
+
+/*
+ * Input read pointer mask.  Note that this is the maximum size; the read
+ * pointer may be smaller if requested by the host.
+ */
+#define CONS_RSHIM_T2H_IN_RPTR_MASK      0x1FF800
+
+/* Input read pointer shift. */
+#define CONS_RSHIM_T2H_IN_RPTR_SHIFT     11
+
+/* Tile is done mask. */
+#define CONS_RSHIM_T2H_DONE_MASK         0x400
+
+/* Number of words to send as sync-data (calculated by packet MTU). */
+#define TMFIFO_MAX_SYNC_WORDS            (1536 / 8)
+
+/* Terminal characteristics for newly created consoles. */
+static struct ktermios init_console_termios = {
+	.c_iflag = INLCR | ICRNL,
+	.c_oflag = OPOST | ONLCR,
+	.c_cflag = B115200 | HUPCL | CLOCAL | CREAD | CS8,
+	.c_lflag = ISIG | ICANON | ECHOE | ECHOK | ECHOCTL | ECHOKE | IEXTEN,
+	.c_line = 0,
+	.c_cc = INIT_C_CC,
+};
+
+/* Global mutex. */
+static DEFINE_MUTEX(rshim_mutex);
+
+/*
+ * Array of all of the rshim devices.  The high bits of our minor number
+ * index into this table to find the relevant device.
+ */
+struct rshim_backend **rshim_devs;
+
+/*
+ * Work queue. Right now we have one for the whole driver; we might
+ * eventually decide that we need one per device, but we'll see.
+ */
+struct workqueue_struct *rshim_wq;
+EXPORT_SYMBOL(rshim_wq);
+
+/*
+ * Array of pointers to kmalloc'ed strings, holding the path name for
+ * all of the devices we've seen.  If rshim_devs[i] is non-NULL, then
+ * rshim_dev_names[i] is its path name.  If rshim_devs[i] is NULL, then
+ * rshim_dev_names[i] is the name that was last used for that device.
+ * When we see a new device, we look it up in this table; this allows us to
+ * use the same device index we did last time we saw the device.  The
+ * strings within the array persist until the driver is unloaded.
+ */
+char **rshim_dev_names;
+
+/* Name of the sub-device types. */
+char *rshim_dev_minor_names[RSH_DEV_TYPES] = {
+	[RSH_DEV_TYPE_RSHIM] = "rshim",
+	[RSH_DEV_TYPE_BOOT] = "boot",
+	[RSH_DEV_TYPE_CONSOLE] = "console",
+	[RSH_DEV_TYPE_NET] = "net",
+	[RSH_DEV_TYPE_MISC] = "misc",
+};
+
+/* dev_t base index. */
+static dev_t rshim_dev_base;
+
+/* Class structure for our device class. */
+static struct class *rshim_class;
+
+/* Registered services. */
+static struct rshim_service *rshim_svc[RSH_SVC_MAX];
+
+/* FIFO reset. */
+static void rshim_fifo_reset(struct rshim_backend *bd);
+
+/* Global lock / unlock. */
+
+void rshim_lock(void)
+{
+	mutex_lock(&rshim_mutex);
+}
+EXPORT_SYMBOL(rshim_lock);
+
+void rshim_unlock(void)
+{
+	mutex_unlock(&rshim_mutex);
+}
+EXPORT_SYMBOL(rshim_unlock);
+
+/*
+ * Read some bytes from RShim.
+ *
+ * The provided buffer size should be multiple of 8 bytes. If not, the
+ * leftover bytes (which presumably were sent as NUL bytes by the sender)
+ * will be discarded.
+ */
+static ssize_t rshim_read_default(struct rshim_backend *bd, int devtype,
+				char *buf, size_t count)
+{
+	int retval, total = 0, avail = 0;
+	u64 word;
+
+	/* Read is only supported for RShim TMFIFO. */
+	if (devtype != RSH_DEV_TYPE_NET && devtype != RSH_DEV_TYPE_CONSOLE) {
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+	if (bd->is_boot_open)
+		return 0;
+
+	while (total < count) {
+		if (avail == 0) {
+			retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+						RSH_TM_TILE_TO_HOST_STS, &word);
+			if (retval < 0)
+				break;
+			avail = word & RSH_TM_TILE_TO_HOST_STS__COUNT_MASK;
+			if (avail == 0)
+				break;
+		}
+		retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+					RSH_TM_TILE_TO_HOST_DATA, &word);
+		if (retval < 0)
+			break;
+		/*
+		 * Convert it to little endian before sending to RShim. The
+		 * other side should decode it as little endian as well which
+		 * is usually the default case.
+		 */
+		word = le64_to_cpu(word);
+		if (total + sizeof(word) <= count) {
+			*(u64 *)buf = word;
+			buf += sizeof(word);
+			total += sizeof(word);
+		} else {
+			/* Copy the rest data which is less than 8 bytes. */
+			memcpy(buf, &word, count - total);
+			total = count;
+			break;
+		}
+		avail--;
+	}
+
+	return total;
+}
+
+/*
+ * Write some bytes to the RShim backend.
+ *
+ * If count is not multiple of 8-bytes, the data will be padded to 8-byte
+ * aligned which is required by RShim HW.
+ */
+static ssize_t rshim_write_delayed(struct rshim_backend *bd, int devtype,
+				   const char *buf, size_t count)
+{
+	u64 word;
+	char pad_buf[sizeof(u64)] = { 0 };
+	int size_addr, size_mask, data_addr, max_size;
+	int retval, avail = 0, byte_cnt = 0, retry;
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		if (bd->is_boot_open)
+			return count;
+		size_addr = RSH_TM_HOST_TO_TILE_STS;
+		size_mask = RSH_TM_HOST_TO_TILE_STS__COUNT_MASK;
+		data_addr = RSH_TM_HOST_TO_TILE_DATA;
+		retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+					RSH_TM_HOST_TO_TILE_CTL, &word);
+		if (retval < 0) {
+			pr_err("read_rshim error %d\n", retval);
+			return retval;
+		}
+		max_size = (word >> RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_SHIFT)
+			   & RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RMASK;
+		break;
+
+	case RSH_DEV_TYPE_BOOT:
+		size_addr = RSH_BOOT_FIFO_COUNT;
+		size_mask = RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_MASK;
+		data_addr = RSH_BOOT_FIFO_DATA;
+		max_size = RSH_BOOT_FIFO_SIZE;
+		break;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+
+	while (byte_cnt < count) {
+		/* Check the boot cancel condition. */
+		if (devtype == RSH_DEV_TYPE_BOOT && !bd->boot_work_buf)
+			break;
+
+		/* Add padding if less than 8 bytes left. */
+		if (byte_cnt + sizeof(u64) > count) {
+			memcpy(pad_buf, buf, count - byte_cnt);
+			buf = (const char *)pad_buf;
+		}
+
+		retry = 0;
+		while (avail <= 0) {
+			/* Calculate available space in words. */
+			retval = bd->read_rshim(bd, RSHIM_CHANNEL, size_addr,
+						&word);
+			if (retval < 0) {
+				pr_err("read_rshim error %d\n", retval);
+				break;
+			}
+			avail = max_size - (int)(word & size_mask) - 8;
+			if (avail > 0)
+				break;
+
+			/*
+			 * Retry 100s, or else return failure since the other
+			 * side seems not to be responding.
+			 */
+			if (++retry > 100000)
+				return -ETIMEDOUT;
+			msleep(1);
+		}
+
+		word = *(u64 *)buf;
+		/*
+		 * Convert to little endian before sending to RShim. The
+		 * receiving side should call le64_to_cpu() to convert
+		 * it back.
+		 */
+		word = cpu_to_le64(word);
+		retval = bd->write_rshim(bd, RSHIM_CHANNEL, data_addr, word);
+		if (retval < 0) {
+			pr_err("write_rshim error %d\n", retval);
+			break;
+		}
+		buf += sizeof(word);
+		byte_cnt += sizeof(word);
+		avail--;
+	}
+
+	/* Return number shouldn't count the padded bytes. */
+	return (byte_cnt > count) ? count : byte_cnt;
+}
+
+static ssize_t rshim_write_default(struct rshim_backend *bd, int devtype,
+				   const char *buf, size_t count)
+{
+	int retval;
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		if (bd->is_boot_open)
+			return count;
+
+		/* Set the flag so there is only one outstanding request. */
+		bd->spin_flags |= RSH_SFLG_WRITING;
+
+		/* Wake up the worker. */
+		bd->fifo_work_buf = (char *)buf;
+		bd->fifo_work_buf_len = count;
+		bd->fifo_work_devtype = devtype;
+		/*
+		 * Add barrier so the above writes complete before setting the
+		 * has_fifo_work flag.
+		 */
+		wmb();
+		bd->has_fifo_work = 1;
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+		return 0;
+
+	case RSH_DEV_TYPE_BOOT:
+		reinit_completion(&bd->boot_write_complete);
+		bd->boot_work_buf_len = count;
+		bd->boot_work_buf_actual_len = 0;
+		/*
+		 * Add barrier so the above writes complete before setting the
+		 * boot_work_buf pointer since it's checked in other places.
+		 */
+		wmb();
+		bd->boot_work_buf = (char *)buf;
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+
+		mutex_unlock(&bd->mutex);
+		retval = wait_for_completion_interruptible(
+					&bd->boot_write_complete);
+		/* Cancel the request if interrupted. */
+		if (retval)
+			bd->boot_work_buf = NULL;
+
+		mutex_lock(&bd->mutex);
+		return bd->boot_work_buf_actual_len;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+}
+
+/* Boot file operations routines */
+
+/*
+ * Wait for boot to complete, if necessary.  Return 0 if the boot is done
+ * and it's safe to continue, an error code if something went wrong.  Note
+ * that this routine must be called with the device mutex held.  If it
+ * returns successfully, the mutex will still be held (although it may have
+ * been dropped and reacquired); if it returns unsuccessfully the mutex
+ * will have been dropped.
+ */
+static int wait_for_boot_done(struct rshim_backend *bd)
+{
+	int retval;
+
+	if (!bd->has_reprobe)
+		return 0;
+
+	if (!bd->has_rshim || bd->is_booting) {
+		while (bd->is_booting) {
+			pr_info("boot write, waiting for re-probe\n");
+			/* We're booting, and the backend isn't ready yet. */
+			mutex_unlock(&bd->mutex);
+			/*
+			 * FIXME: might we want a timeout here, too?  If
+			 * the reprobe takes a very long time, something's
+			 * probably wrong.  Maybe a couple of minutes?
+			 */
+			retval = wait_for_completion_interruptible(
+				&bd->booting_complete);
+			if (retval)
+				return retval;
+			mutex_lock(&bd->mutex);
+		}
+		if (!bd->has_rshim) {
+			mutex_unlock(&bd->mutex);
+			return -ENODEV;
+		}
+	}
+
+	return 0;
+}
+
+static ssize_t rshim_boot_write(struct file *file, const char *user_buffer,
+			      size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+	int retval = 0, whichbuf = 0;
+	size_t bytes_written = 0, bytes_left;
+
+	/*
+	 * Hardware requires that we send multiples of 8 bytes.  Ideally
+	 * we'd handle the case where we got unaligned writes by
+	 * accumulating the residue somehow, but none of our clients
+	 * typically do this, so we just clip the size to prevent any
+	 * inadvertent errors from causing hardware problems.
+	 */
+	bytes_left = count & (-((size_t)8));
+	if (!bytes_left)
+		return 0;
+
+	mutex_lock(&bd->mutex);
+	if (bd->is_in_boot_write) {
+		mutex_unlock(&bd->mutex);
+		return -EBUSY;
+	}
+
+	retval = wait_for_boot_done(bd);
+	if (retval) {
+		pr_err("boot_write: wait for boot failed, err %d\n", retval);
+		/* wait_for_boot_done already dropped mutex */
+		return retval;
+	}
+
+	/*
+	 * We're going to drop the mutex while we wait for any outstanding
+	 * write to complete; this keeps another thread from getting in here
+	 * while we do that.
+	 */
+	bd->is_in_boot_write = 1;
+
+	while (bytes_left) {
+		size_t buf_bytes = min((size_t)BOOT_BUF_SIZE, bytes_left);
+		char *buf = bd->boot_buf[whichbuf];
+
+		whichbuf ^= 1;
+		if (copy_from_user(buf, user_buffer, buf_bytes)) {
+			retval = -EFAULT;
+			pr_err("boot_write: copy from user failed\n");
+			break;
+		}
+
+		retval = bd->write(bd, RSH_DEV_TYPE_BOOT, buf, buf_bytes);
+		if (retval > 0) {
+			bytes_left -= retval;
+			user_buffer += retval;
+			bytes_written += retval;
+		} else if (retval == 0) {
+			/* Wait for some time instead of busy polling. */
+			msleep_interruptible(1);
+			continue;
+		}
+		if (retval != buf_bytes)
+			break;
+	}
+
+	bd->is_in_boot_write = 0;
+	mutex_unlock(&bd->mutex);
+
+	/*
+	 * Return an error in case the 'count' is not multiple of 8 bytes.
+	 * At this moment, the truncated data has already been sent to
+	 * the BOOT fifo and hopefully it could still boot the chip.
+	 */
+	if (count % 8 != 0)
+		return -EINVAL;
+
+	return bytes_written ? bytes_written : retval;
+}
+
+static int rshim_boot_release(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+	struct module *owner;
+	int retval;
+
+	/* Restore the boot mode register. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL,
+				 RSH_BOOT_CONTROL,
+				 RSH_BOOT_CONTROL__BOOT_MODE_VAL_EMMC);
+	if (retval)
+		pr_err("couldn't set boot_control, err %d\n", retval);
+
+	mutex_lock(&bd->mutex);
+	bd->is_boot_open = 0;
+	queue_delayed_work(rshim_wq, &bd->work, HZ);
+	mutex_unlock(&bd->mutex);
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+static const struct file_operations rshim_boot_fops = {
+	.owner = THIS_MODULE,
+	.write = rshim_boot_write,
+	.release = rshim_boot_release,
+};
+
+int rshim_boot_open(struct file *file)
+{
+	int retval;
+	int i;
+	struct rshim_backend *bd = file->private_data;
+#if RSH_RESET_MUTEX
+	unsigned long devs_locked = 0;
+#endif
+
+	file->f_op = &rshim_boot_fops;
+
+#if RSH_RESET_MUTEX
+	/*
+	 * We're going to prevent resets and operations from running in
+	 * parallel with other resets.  Our method for this is to grab
+	 * every device's mutex before doing the reset, and then holding
+	 * onto them until the device we reset is reprobed, or a timeout
+	 * expires; the latter is mostly paranoia.  Anyway, in order to
+	 * find all of the other devices, we're going to need to walk the
+	 * device table, so we need to grab its mutex.  We have to do it
+	 * before we get our own device's mutex for lock ordering reasons.
+	 */
+	rshim_lock();
+#endif
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->is_boot_open) {
+		pr_info("can't boot, boot file already open\n");
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		rshim_unlock();
+#endif
+		return -EBUSY;
+	}
+
+	if (!bd->has_rshim) {
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		rshim_unlock();
+#endif
+		return -ENODEV;
+	}
+
+	pr_info("begin booting\n");
+	reinit_completion(&bd->booting_complete);
+	bd->is_booting = 1;
+
+	/*
+	 * Before we reset the chip, make sure we don't have any
+	 * outstanding writes, and flush the write and read FIFOs. (Note
+	 * that we can't have any outstanding reads, since we kill those
+	 * upon release of the TM FIFO file.)
+	 */
+	if (bd->cancel)
+		bd->cancel(bd, RSH_DEV_TYPE_NET, true);
+	bd->read_buf_bytes = 0;
+	bd->read_buf_pkt_rem = 0;
+	bd->read_buf_pkt_padding = 0;
+	spin_lock_irq(&bd->spinlock);
+	/* FIXME: should we be waiting for WRITING to go off, instead? */
+	bd->spin_flags &= ~RSH_SFLG_WRITING;
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		read_reset(bd, i);
+		write_reset(bd, i);
+	}
+	spin_unlock_irq(&bd->spinlock);
+
+	/* Set RShim (external) boot mode. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_BOOT_CONTROL,
+				 RSH_BOOT_CONTROL__BOOT_MODE_VAL_NONE);
+	if (retval) {
+		pr_err("boot_open: error %d writing boot control\n", retval);
+		bd->is_booting = 0;
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		rshim_unlock();
+#endif
+		return retval;
+	}
+
+#if RSH_RESET_MUTEX
+	/*
+	 * Acquire all of the other devices' mutexes, to keep them from
+	 * doing anything while we're performing the reset.  Also kill
+	 * any outstanding boot urbs; that way we'll restart them, after
+	 * the reset is done, and not report errors to the writers.
+	 */
+	for (i = 0; i < rshim_nr_devs; i++) {
+		if (rshim_devs[i] && rshim_devs[i] != bd) {
+			mutex_lock(&rshim_devs[i]->mutex);
+			devs_locked |= 1UL << i;
+			if (rshim_devs[i]->cancel) {
+				rshim_devs[i]->cancel(rshim_devs[i],
+						    RSH_DEV_TYPE_BOOT, true);
+			}
+		}
+	}
+	reinit_completion(&bd->reset_complete);
+#endif
+
+	bd->is_boot_open = 1;
+
+	/* SW reset. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_RESET_CONTROL,
+				 RSH_RESET_CONTROL__RESET_CHIP_VAL_KEY);
+
+	/* Reset the TmFifo. */
+	rshim_fifo_reset(bd);
+
+	/*
+	 * Note that occasionally, we get various errors on writing to
+	 * the reset register.  This appears to be caused by the chip
+	 * actually resetting before the response goes out, or perhaps by
+	 * our noticing the device unplug before we've seen the response.
+	 * Either way, the chip _does_ actually reset, so we just ignore
+	 * the error.  Should we ever start getting these errors without
+	 * the chip being reset, we'll have to figure out how to handle
+	 * this more intelligently.  (One potential option is to not reset
+	 * directly, but to set up a down counter to do the reset, but that
+	 * seems kind of kludgy, especially since Tile software might also
+	 * be trying to use the down counter.)
+	 */
+	if (retval && retval != -EPROTO && retval != -ESHUTDOWN &&
+#ifdef RSH_USB_BMC
+	    /*
+	     * The host driver on the BMC sometimes produces EOVERFLOW on
+	     * reset.  It also seems to have seems to have some sort of bug
+	     * which makes it return more bytes than we actually wrote!  In
+	     * that case we're returning EBADE.
+	     */
+	    retval != -EOVERFLOW && retval != -EBADE &&
+#endif
+	    retval != -ETIMEDOUT && retval != -EPIPE) {
+		pr_err("boot_open: error %d writing reset control\n", retval);
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		while (devs_locked) {
+			int i = __builtin_ctzl(devs_locked);
+
+			mutex_unlock(&rshim_devs[i]->mutex);
+			devs_locked &= ~(1UL << i);
+		}
+		rshim_unlock();
+#endif
+		bd->is_boot_open = 0;
+
+		return retval;
+	}
+
+	if (retval)
+		pr_err("boot_open: got error %d on reset write\n", retval);
+
+	mutex_unlock(&bd->mutex);
+
+#if RSH_RESET_MUTEX
+	rshim_unlock();
+	/*
+	 * We wait for reset_complete (signaled by probe), or for an
+	 * interrupt, or a timeout (set to 5s because of no re-probe
+	 * in the PCIe case). Note that we dropped dev->mutex above
+	 * so that probe can run; the BOOT_OPEN flag should keep our device
+	 * from trying to do anything before the device is reprobed.
+	 */
+	retval = wait_for_completion_interruptible_timeout(&bd->reset_complete,
+							   5 * HZ);
+	if (retval == 0)
+		pr_err("timed out waiting for device reprobe after reset\n");
+
+	while (devs_locked) {
+		int i = __builtin_ctz(devs_locked);
+
+		mutex_unlock(&rshim_devs[i]->mutex);
+		devs_locked &= ~(1UL << i);
+	}
+#endif
+
+	return 0;
+}
+
+/* FIFO common file operations routines */
+
+/*
+ * Signal an error on the FIFO, and wake up anyone who might need to know
+ * about it.
+ */
+static void rshim_fifo_err(struct rshim_backend *bd, int err)
+{
+	int i;
+
+	bd->tmfifo_error = err;
+	wake_up_interruptible_all(&bd->write_completed);
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		wake_up_interruptible_all(&bd->read_fifo[i].operable);
+		wake_up_interruptible_all(&bd->write_fifo[i].operable);
+	}
+}
+
+/* Drain the read buffer, and start another read/interrupt if needed. */
+static void rshim_fifo_input(struct rshim_backend *bd)
+{
+	union rshim_tmfifo_msg_hdr *hdr;
+	bool rx_avail = false;
+
+	if (bd->is_boot_open)
+		return;
+
+again:
+	while (bd->read_buf_next < bd->read_buf_bytes) {
+		int copysize;
+
+		/*
+		 * If we're at the start of a packet, then extract the
+		 * header, and update our count of bytes remaining in the
+		 * packet.
+		 */
+		if (bd->read_buf_pkt_rem == 0) {
+			/* Make sure header is received. */
+			if (bd->read_buf_next + sizeof(*hdr) >
+				bd->read_buf_bytes)
+				break;
+
+			pr_debug("next hdr %d\n", bd->read_buf_next);
+
+			hdr = (union rshim_tmfifo_msg_hdr *)
+				&bd->read_buf[bd->read_buf_next];
+
+			bd->read_buf_pkt_rem = ntohs(hdr->len) + sizeof(*hdr);
+			bd->read_buf_pkt_padding =
+				(8 - (bd->read_buf_pkt_rem & 7)) & 7;
+			if (hdr->type == VIRTIO_ID_NET)
+				bd->rx_chan = TMFIFO_NET_CHAN;
+			else if (hdr->type == VIRTIO_ID_CONSOLE) {
+				bd->rx_chan = TMFIFO_CONS_CHAN;
+				/* Strip off the message header for console. */
+				bd->read_buf_next += sizeof(*hdr);
+				bd->read_buf_pkt_rem -= sizeof(*hdr);
+				if (bd->read_buf_pkt_rem == 0)
+					continue;
+			} else {
+				pr_debug("bad type %d, drop it", hdr->type);
+				bd->read_buf_pkt_rem = 0;
+				bd->read_buf_pkt_padding = 0;
+				bd->read_buf_next = bd->read_buf_bytes;
+				break;
+			}
+
+			pr_debug("drain: hdr, nxt %d rem %d chn %d\n",
+			      bd->read_buf_next, bd->read_buf_pkt_rem,
+			      bd->rx_chan);
+			bd->drop = 0;
+		}
+
+		if (bd->rx_chan == TMFIFO_CONS_CHAN &&
+		    !(bd->spin_flags & RSH_SFLG_CONS_OPEN)) {
+			/*
+			 * If data is coming in for a closed console
+			 * channel, we want to just throw it away.
+			 * Resetting the channel every time through this
+			 * loop is a relatively cheap way to do that.  Note
+			 * that this works because the read buffer is no
+			 * larger than the read FIFO; thus, we know that if
+			 * we reset it here, we will always be able to
+			 * drain the read buffer of any console data, and
+			 * will then launch another read.
+			 */
+			read_reset(bd, TMFIFO_CONS_CHAN);
+			bd->drop = 1;
+		} else if (bd->rx_chan == TMFIFO_NET_CHAN && bd->net == NULL) {
+			/* Drop if networking is not enabled. */
+			read_reset(bd, TMFIFO_NET_CHAN);
+			bd->drop = 1;
+		}
+
+		copysize = min(bd->read_buf_pkt_rem,
+			       bd->read_buf_bytes - bd->read_buf_next);
+		copysize = min(copysize,
+			       read_space_to_end(bd, bd->rx_chan));
+
+		pr_debug("drain: copysize %d, head %d, tail %d, remaining %d\n",
+			 copysize, bd->read_fifo[bd->rx_chan].head,
+			 bd->read_fifo[bd->rx_chan].tail,
+			 bd->read_buf_pkt_rem);
+
+		if (copysize == 0) {
+			/*
+			 * We have data, but no space to put it in, so
+			 * we're done.
+			 */
+			pr_debug("drain: no more space in channel %d\n",
+				 bd->rx_chan);
+			break;
+		}
+
+		if (!bd->drop) {
+			memcpy(read_space_ptr(bd, bd->rx_chan),
+			       &bd->read_buf[bd->read_buf_next],
+			       copysize);
+			read_add_bytes(bd, bd->rx_chan, copysize);
+		}
+
+		bd->read_buf_next += copysize;
+		bd->read_buf_pkt_rem -= copysize;
+
+		wake_up_interruptible_all(&bd->read_fifo[
+				      bd->rx_chan].operable);
+		pr_debug("woke up readable chan %d\n", bd->rx_chan);
+
+		if (bd->read_buf_pkt_rem <= 0) {
+			bd->read_buf_next = bd->read_buf_next +
+				bd->read_buf_pkt_padding;
+			rx_avail = true;
+		}
+	}
+
+	/*
+	 * We've processed all of the data we can, so now we decide if we
+	 * need to launch another I/O.  If there's still data in the read
+	 * buffer, or if we're already reading, don't launch any new
+	 * operations.  If an interrupt just completed, and said there was
+	 * data, or the last time we did a read we got some data, then do
+	 * another read.  Otherwise, do an interrupt.
+	 */
+	if (bd->read_buf_next < bd->read_buf_bytes ||
+	    (bd->spin_flags & RSH_SFLG_READING)) {
+		/* We're doing nothing. */
+		pr_debug("fifo_input: no new read: %s\n",
+			 (bd->read_buf_next < bd->read_buf_bytes) ?
+			 "have data" : "already reading");
+	} else {
+		int len;
+
+		/* Process it if more data is received. */
+		len = bd->read(bd, RSH_DEV_TYPE_NET, (char *)bd->read_buf,
+			      READ_BUF_SIZE);
+		if (len > 0) {
+			bd->read_buf_bytes = len;
+			bd->read_buf_next = 0;
+			goto again;
+		}
+	}
+
+	if (rx_avail) {
+		if (bd->rx_chan == TMFIFO_NET_CHAN) {
+			struct rshim_service *svc;
+
+			/*
+			 * Protect rshim_svc with RCU lock. See comments in
+			 * rshim_register_service() / rshim_register_service()
+			 */
+			rcu_read_lock();
+			svc = rcu_dereference(rshim_svc[RSH_SVC_NET]);
+			if (svc != NULL)
+				(*svc->rx_notify)(bd);
+			rcu_read_unlock();
+		}
+	}
+}
+
+ssize_t rshim_fifo_read(struct rshim_backend *bd, char *buffer,
+		      size_t count, int chan, bool nonblock,
+		      bool to_user)
+{
+	size_t rd_cnt = 0;
+
+	mutex_lock(&bd->mutex);
+
+	while (count) {
+		size_t readsize;
+		int pass1;
+		int pass2;
+
+		pr_debug("fifo_read, top of loop, remaining count %zd\n",
+			 count);
+
+		/*
+		 * We check this each time through the loop since the
+		 * device could get disconnected while we're waiting for
+		 * more data in the read FIFO.
+		 */
+		if (!bd->has_tm) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_read: returning %zd/ENODEV\n", rd_cnt);
+			return rd_cnt ? rd_cnt : -ENODEV;
+		}
+
+		if (bd->tmfifo_error) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_read: returning %zd/%d\n", rd_cnt,
+			      bd->tmfifo_error);
+			return rd_cnt ? rd_cnt : bd->tmfifo_error;
+		}
+
+		if (read_empty(bd, chan)) {
+			pr_debug("fifo_read: fifo empty\n");
+			if (rd_cnt || nonblock) {
+				if (rd_cnt == 0) {
+					spin_lock_irq(&bd->spinlock);
+					rshim_fifo_input(bd);
+					spin_unlock_irq(&bd->spinlock);
+				}
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_read: returning %zd/EAGAIN\n",
+				      rd_cnt);
+				return rd_cnt ? rd_cnt : -EAGAIN;
+			}
+
+			mutex_unlock(&bd->mutex);
+
+			pr_debug("fifo_read: waiting for readable chan %d\n",
+				 chan);
+			if (wait_event_interruptible(
+					bd->read_fifo[chan].operable,
+					    !read_empty(bd, chan))) {
+				pr_debug("fifo_read: returning ERESTARTSYS\n");
+				return to_user ? -EINTR : -ERESTARTSYS;
+			}
+
+			mutex_lock(&bd->mutex);
+
+			/*
+			 * Since we dropped the mutex, we must make
+			 * sure our interface is still there before
+			 * we do anything else.
+			 */
+			continue;
+		}
+
+		/*
+		 * Figure out how many bytes we will transfer on this pass.
+		 */
+		spin_lock_irq(&bd->spinlock);
+
+		readsize = min(count, (size_t)read_cnt(bd, chan));
+
+		pass1 = min(readsize, (size_t)read_cnt_to_end(bd, chan));
+		pass2 = readsize - pass1;
+
+		spin_unlock_irq(&bd->spinlock);
+
+		pr_debug("fifo_read: readsize %zd, head %d, tail %d\n",
+			 readsize, bd->read_fifo[chan].head,
+			 bd->read_fifo[chan].tail);
+
+		if (!to_user) {
+			memcpy(buffer, read_data_ptr(bd, chan), pass1);
+			if (pass2) {
+				memcpy(buffer + pass1,
+				       bd->read_fifo[chan].data, pass2);
+			}
+		} else {
+			if (copy_to_user(buffer, read_data_ptr(bd, chan),
+				pass1) || (pass2 && copy_to_user(buffer + pass1,
+				bd->read_fifo[chan].data, pass2))) {
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_read: returns %zd/EFAULT\n",
+					 rd_cnt);
+				return rd_cnt ? rd_cnt : -EFAULT;
+			}
+		}
+
+		spin_lock_irq(&bd->spinlock);
+
+		read_consume_bytes(bd, chan, readsize);
+
+		/*
+		 * We consumed some bytes, so let's see if we can process
+		 * any more incoming data.
+		 */
+		rshim_fifo_input(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+
+		count -= readsize;
+		buffer += readsize;
+		rd_cnt += readsize;
+		pr_debug("fifo_read: transferred %zd bytes\n", readsize);
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	pr_debug("fifo_read: returning %zd\n", rd_cnt);
+	return rd_cnt;
+}
+EXPORT_SYMBOL(rshim_fifo_read);
+
+static void rshim_fifo_output(struct rshim_backend *bd)
+{
+	int writesize, write_buf_next = 0;
+	int write_avail = WRITE_BUF_SIZE - write_buf_next;
+	int numchan = TMFIFO_MAX_CHAN;
+	int chan, chan_offset;
+
+	/* If we're already writing, we have nowhere to put data. */
+	if (bd->spin_flags & RSH_SFLG_WRITING)
+		return;
+
+	/* Walk through all the channels, sending as much data as possible. */
+	for (chan_offset = 0; chan_offset < numchan; chan_offset++) {
+		/*
+		 * Pick the current channel if not done, otherwise round-robin
+		 * to the next channel.
+		 */
+		if (bd->write_buf_pkt_rem > 0)
+			chan = bd->tx_chan;
+		else {
+			u16 cur_len;
+			union rshim_tmfifo_msg_hdr *hdr = &bd->msg_hdr;
+
+			chan = bd->tx_chan = (bd->tx_chan + 1) % numchan;
+			cur_len = write_cnt(bd, chan);
+
+			/*
+			 * Set up message header for console data which is byte
+			 * stream. Network packets already have the message
+			 * header included.
+			 */
+			if (chan == TMFIFO_CONS_CHAN) {
+				if (cur_len == 0)
+					continue;
+				hdr->data = 0;
+				hdr->type = VIRTIO_ID_CONSOLE;
+				hdr->len = htons(cur_len);
+			} else {
+				int pass1;
+
+				if (cur_len <
+					sizeof(union rshim_tmfifo_msg_hdr))
+					continue;
+
+				pass1 = write_cnt_to_end(bd, chan);
+				if (pass1 >= sizeof(*hdr)) {
+					hdr = (union rshim_tmfifo_msg_hdr *)
+						write_data_ptr(bd, chan);
+				} else {
+					memcpy(hdr, write_data_ptr(bd, chan),
+					       pass1);
+					memcpy((u8 *)hdr + pass1,
+					       bd->write_fifo[chan].data,
+					       sizeof(*hdr) - pass1);
+				}
+			}
+
+			bd->write_buf_pkt_rem = ntohs(hdr->len) + sizeof(*hdr);
+		}
+
+		/* Send out the packet header for the console data. */
+		if (chan == TMFIFO_CONS_CHAN &&
+		    bd->write_buf_pkt_rem > ntohs(bd->msg_hdr.len)) {
+			union rshim_tmfifo_msg_hdr *hdr = &bd->msg_hdr;
+			int left = bd->write_buf_pkt_rem - ntohs(hdr->len);
+			u8 *pos = (u8 *)hdr + sizeof(*hdr) - left;
+
+			writesize = min(write_avail, left);
+			memcpy(&bd->write_buf[write_buf_next], pos, writesize);
+			write_buf_next += writesize;
+			bd->write_buf_pkt_rem -= writesize;
+			write_avail -= writesize;
+
+			/*
+			 * Don't continue if no more space for the header.
+			 * It'll be picked up next time.
+			 */
+			if (left != writesize)
+				break;
+		}
+
+		writesize = min(write_avail, (int)write_cnt(bd, chan));
+		writesize = min(writesize, bd->write_buf_pkt_rem);
+
+		/*
+		 * The write size should be aligned to 8 bytes unless for the
+		 * last block, which will be padded at the end.
+		 */
+		if (bd->write_buf_pkt_rem != writesize)
+			writesize &= -8;
+
+		if (writesize > 0) {
+			int pass1;
+			int pass2;
+
+			pass1 = min(writesize,
+				    (int)write_cnt_to_end(bd, chan));
+			pass2 = writesize - pass1;
+
+			pr_debug("fifo_outproc: chan %d, writesize %d, next %d,"
+				 " head %d, tail %d\n",
+				 chan, writesize, write_buf_next,
+				 bd->write_fifo[chan].head,
+				 bd->write_fifo[chan].tail);
+
+			memcpy(&bd->write_buf[write_buf_next],
+			       write_data_ptr(bd, chan), pass1);
+			memcpy(&bd->write_buf[write_buf_next + pass1],
+			       bd->write_fifo[chan].data, pass2);
+
+			write_consume_bytes(bd, chan, writesize);
+			write_buf_next += writesize;
+			bd->write_buf_pkt_rem -= writesize;
+			/* Add padding at the end. */
+			if (bd->write_buf_pkt_rem == 0)
+				write_buf_next = (write_buf_next + 7) & -8;
+			write_avail = WRITE_BUF_SIZE - write_buf_next;
+
+			wake_up_interruptible_all(
+				&bd->write_fifo[chan].operable);
+			pr_debug("woke up writable chan %d\n", chan);
+		}
+	}
+
+	/* Drop the data if it is still booting. */
+	if (bd->is_boot_open)
+		return;
+
+	/* If we actually put anything in the buffer, send it. */
+	if (write_buf_next) {
+		bd->write(bd, RSH_DEV_TYPE_NET, (char *)bd->write_buf,
+			  write_buf_next);
+	}
+}
+
+int rshim_fifo_alloc(struct rshim_backend *bd)
+{
+	int i, allocfail = 0;
+
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		if (!bd->read_fifo[i].data)
+			bd->read_fifo[i].data =
+				kmalloc(READ_FIFO_SIZE, GFP_KERNEL);
+		allocfail |= bd->read_fifo[i].data == 0;
+
+		if (!bd->write_fifo[i].data)
+			bd->write_fifo[i].data =
+				kmalloc(WRITE_FIFO_SIZE, GFP_KERNEL);
+		allocfail |= bd->write_fifo[i].data == 0;
+	}
+
+	return allocfail;
+}
+EXPORT_SYMBOL(rshim_fifo_alloc);
+
+static void rshim_fifo_reset(struct rshim_backend *bd)
+{
+	int i;
+
+	bd->read_buf_bytes = 0;
+	bd->read_buf_pkt_rem = 0;
+	bd->read_buf_next = 0;
+	bd->read_buf_pkt_padding = 0;
+	bd->write_buf_pkt_rem = 0;
+	bd->rx_chan = bd->tx_chan = 0;
+
+	spin_lock_irq(&bd->spinlock);
+	bd->spin_flags &= ~(RSH_SFLG_WRITING |
+			    RSH_SFLG_READING);
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		read_reset(bd, i);
+		write_reset(bd, i);
+	}
+	spin_unlock_irq(&bd->spinlock);
+}
+
+void rshim_fifo_free(struct rshim_backend *bd)
+{
+	int i;
+
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		kfree(bd->read_fifo[i].data);
+		bd->read_fifo[i].data = NULL;
+		kfree(bd->write_fifo[i].data);
+		bd->write_fifo[i].data = NULL;
+	}
+
+	rshim_fifo_reset(bd);
+
+	bd->has_tm = 0;
+}
+EXPORT_SYMBOL(rshim_fifo_free);
+
+ssize_t rshim_fifo_write(struct rshim_backend *bd, const char *buffer,
+		       size_t count, int chan, bool nonblock,
+		       bool from_user)
+{
+	size_t wr_cnt = 0;
+
+	mutex_lock(&bd->mutex);
+
+	while (count) {
+		size_t writesize;
+		int pass1;
+		int pass2;
+
+		pr_debug("fifo_write, top of loop, remaining count %zd\n",
+			 count);
+
+		/*
+		 * We check this each time through the loop since the
+		 * device could get disconnected while we're waiting for
+		 * more space in the write buffer.
+		 */
+		if (!bd->has_tm) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_write: returning %zd/ENODEV\n", wr_cnt);
+			return wr_cnt ? wr_cnt : -ENODEV;
+		}
+
+		if (bd->tmfifo_error) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_write: returning %zd/%d\n", wr_cnt,
+				 bd->tmfifo_error);
+			return wr_cnt ? wr_cnt : bd->tmfifo_error;
+		}
+
+		if (write_full(bd, chan)) {
+			pr_debug("fifo_write: fifo full\n");
+			if (nonblock) {
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_write: returning %zd/EAGAIN\n",
+					 wr_cnt);
+				return wr_cnt ? wr_cnt : -EAGAIN;
+			}
+
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_write: waiting for writable chan %d\n",
+				 chan);
+			if (wait_event_interruptible(
+				     bd->write_fifo[chan].operable,
+					     !write_full(bd, chan))) {
+				pr_debug("fifo_write: returning %zd/ERESTARTSYS\n",
+					 wr_cnt);
+				return wr_cnt ? wr_cnt : -ERESTARTSYS;
+			}
+			mutex_lock(&bd->mutex);
+			/*
+			 * Since we dropped the mutex, we must make
+			 * sure our interface is still there before
+			 * we do anything else.
+			 */
+			continue;
+		}
+
+		spin_lock_irq(&bd->spinlock);
+
+		writesize = min(count, (size_t)write_space(bd, chan));
+		pass1 = min(writesize, (size_t)write_space_to_end(bd, chan));
+		pass2 = writesize - pass1;
+
+		spin_unlock_irq(&bd->spinlock);
+
+		pr_debug("fifo_write: writesize %zd, head %d, tail %d\n",
+			 writesize, bd->write_fifo[chan].head,
+			 bd->write_fifo[chan].tail);
+
+		if (!from_user) {
+			memcpy(write_space_ptr(bd, chan), buffer, pass1);
+			if (pass2) {
+				memcpy(bd->write_fifo[chan].data,
+				       buffer + pass1, pass2);
+			}
+		} else {
+			if (copy_from_user(write_space_ptr(bd, chan), buffer,
+				pass1) || (pass2 &&
+				copy_from_user(bd->write_fifo[chan].data,
+						buffer + pass1, pass2))) {
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_write: returns %zd/EFAULT\n",
+					 wr_cnt);
+				return wr_cnt ? wr_cnt : -EFAULT;
+			}
+		}
+
+		spin_lock_irq(&bd->spinlock);
+
+		write_add_bytes(bd, chan, writesize);
+
+		/* We have some new bytes, let's see if we can write any. */
+		rshim_fifo_output(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+
+		count -= writesize;
+		buffer += writesize;
+		wr_cnt += writesize;
+		pr_debug("fifo_write: transferred %zd bytes this pass\n",
+			 writesize);
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	pr_debug("fifo_write: returning %zd\n", wr_cnt);
+	return wr_cnt;
+}
+EXPORT_SYMBOL(rshim_fifo_write);
+
+static int rshim_fifo_fsync(struct file *file, loff_t start, loff_t end,
+			    int datasync, int chan)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	mutex_lock(&bd->mutex);
+
+	/*
+	 * To ensure that all of our data has actually made it to the
+	 * device, we first wait until the channel is empty, then we wait
+	 * until there is no outstanding write urb.
+	 */
+	while (!write_empty(bd, chan))
+		if (wait_event_interruptible(bd->write_fifo[chan].operable,
+					     write_empty(bd, chan))) {
+			mutex_unlock(&bd->mutex);
+			return -ERESTARTSYS;
+		}
+
+	while (bd->spin_flags & RSH_SFLG_WRITING)
+		if (wait_event_interruptible(bd->write_completed,
+					     !(bd->spin_flags &
+					       RSH_SFLG_WRITING))) {
+			mutex_unlock(&bd->mutex);
+			return -ERESTARTSYS;
+		}
+
+	mutex_unlock(&bd->mutex);
+
+	return 0;
+}
+
+static unsigned int rshim_fifo_poll(struct file *file, poll_table *wait,
+				  int chan)
+{
+	struct rshim_backend *bd = file->private_data;
+	unsigned int retval = 0;
+
+	mutex_lock(&bd->mutex);
+
+	poll_wait(file, &bd->read_fifo[chan].operable, wait);
+	poll_wait(file, &bd->write_fifo[chan].operable, wait);
+
+	spin_lock_irq(&bd->spinlock);
+
+	if (!read_empty(bd, chan))
+		retval |= POLLIN | POLLRDNORM;
+	if (!write_full(bd, chan))
+		retval |= POLLOUT | POLLWRNORM;
+	/*
+	 * We don't report POLLERR on the console so that it doesn't get
+	 * automatically disconnected when it fails, and so that you can
+	 * connect to it in the error state before rebooting the target.
+	 * This is inconsistent, but being consistent turns out to be very
+	 * annoying.  If someone tries to actually type on it, they'll
+	 * get an error.
+	 */
+	if (bd->tmfifo_error && chan != TMFIFO_CONS_CHAN)
+		retval |= POLLERR;
+	spin_unlock_irq(&bd->spinlock);
+
+	mutex_unlock(&bd->mutex);
+
+	pr_debug("poll chan %d file %p returns 0x%x\n", chan, file, retval);
+
+	return retval;
+}
+
+
+static int rshim_fifo_release(struct inode *inode, struct file *file,
+			      int chan)
+{
+	struct rshim_backend *bd = file->private_data;
+	struct module *owner;
+
+	mutex_lock(&bd->mutex);
+
+	if (chan == TMFIFO_CONS_CHAN) {
+		/*
+		 * If we aren't the last console file, nothing to do but
+		 * fix the reference count.
+		 */
+		bd->console_opens--;
+		if (bd->console_opens) {
+			mutex_unlock(&bd->mutex);
+			return 0;
+		}
+
+		/*
+		 * We've told the host to stop using the TM FIFO console,
+		 * but there may be a lag before it does.  Unless we
+		 * continue to read data from the console stream, the host
+		 * may spin forever waiting for the console to be drained
+		 * and not realize that it's time to stop using it.
+		 * Clearing the CONS_OPEN spin flag will discard any future
+		 * incoming console data, but if our input buffers are full
+		 * now, we might not be even reading from the hardware
+		 * FIFO.  To avoid problems, clear the buffers and call the
+		 * drainer so that it knows there's space.
+		 */
+		spin_lock_irq(&bd->spinlock);
+
+		bd->spin_flags &= ~RSH_SFLG_CONS_OPEN;
+
+		read_reset(bd, TMFIFO_CONS_CHAN);
+		write_reset(bd, TMFIFO_CONS_CHAN);
+
+		if (bd->has_tm)
+			rshim_fifo_input(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+	}
+
+	if (chan == TMFIFO_CONS_CHAN)
+		bd->is_cons_open = 0;
+	else
+		bd->is_tm_open = 0;
+
+	if (!bd->is_tm_open && !bd->is_cons_open) {
+		if (bd->cancel)
+			bd->cancel(bd, RSH_DEV_TYPE_NET, false);
+
+		spin_lock_irq(&bd->spinlock);
+		bd->spin_flags &= ~RSH_SFLG_READING;
+		spin_unlock_irq(&bd->spinlock);
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+/* TMFIFO file operations routines */
+
+static ssize_t rshim_tmfifo_read(struct file *file, char *user_buffer,
+				   size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_read(bd, user_buffer, count, TMFIFO_NET_CHAN,
+			     file->f_flags & O_NONBLOCK, true);
+}
+
+static ssize_t rshim_tmfifo_write(struct file *file, const char *user_buffer,
+				size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_write(bd, user_buffer, count, TMFIFO_NET_CHAN,
+			      file->f_flags & O_NONBLOCK, true);
+}
+
+static int rshim_tmfifo_fsync(struct file *file, loff_t start,
+			      loff_t end, int datasync)
+{
+	return rshim_fifo_fsync(file, start, end, datasync, TMFIFO_NET_CHAN);
+}
+
+static unsigned int rshim_tmfifo_poll(struct file *file, poll_table *wait)
+{
+	return rshim_fifo_poll(file, wait, TMFIFO_NET_CHAN);
+}
+
+static int rshim_tmfifo_release(struct inode *inode, struct file *file)
+{
+	return rshim_fifo_release(inode, file, TMFIFO_NET_CHAN);
+}
+
+static const struct file_operations rshim_tmfifo_fops = {
+	.owner = THIS_MODULE,
+	.read = rshim_tmfifo_read,
+	.write = rshim_tmfifo_write,
+	.fsync = rshim_tmfifo_fsync,
+	.poll = rshim_tmfifo_poll,
+	.release = rshim_tmfifo_release,
+};
+
+static int rshim_tmfifo_open(struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	file->f_op = &rshim_tmfifo_fops;
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->is_tm_open) {
+		pr_debug("tmfifo_open: file already open\n");
+		mutex_unlock(&bd->mutex);
+		return -EBUSY;
+	}
+
+	bd->is_tm_open = 1;
+
+	spin_lock_irq(&bd->spinlock);
+
+	/* Call the drainer to do an initial read, if needed. */
+	rshim_fifo_input(bd);
+
+	spin_unlock_irq(&bd->spinlock);
+
+	mutex_unlock(&bd->mutex);
+
+	return 0;
+}
+
+/* Console file operations routines */
+
+static void rshim_work_handler(struct work_struct *work)
+{
+	struct rshim_backend *bd = container_of((struct delayed_work *) work,
+					      struct rshim_backend, work);
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->keepalive && bd->has_rshim) {
+		bd->write_rshim(bd, RSHIM_CHANNEL, RSH_SCRATCHPAD1,
+				RSH_KEEPALIVE_MAGIC_NUM);
+		bd->keepalive = 0;
+	}
+
+	if (bd->boot_work_buf != NULL) {
+		bd->boot_work_buf_actual_len = rshim_write_delayed(bd,
+							RSH_DEV_TYPE_BOOT,
+							bd->boot_work_buf,
+							bd->boot_work_buf_len);
+		bd->boot_work_buf = NULL;
+		complete_all(&bd->boot_write_complete);
+	}
+
+	if (bd->is_boot_open) {
+		mutex_unlock(&bd->mutex);
+		return;
+	}
+
+	if (bd->has_fifo_work) {
+		int len;
+
+		len = rshim_write_delayed(bd, bd->fifo_work_devtype,
+					  bd->fifo_work_buf,
+					  bd->fifo_work_buf_len);
+		bd->has_fifo_work = 0;
+
+		spin_lock(&bd->spinlock);
+		bd->spin_flags &= ~RSH_SFLG_WRITING;
+		if (len == bd->fifo_work_buf_len) {
+			wake_up_interruptible_all(&bd->write_completed);
+			rshim_notify(bd, RSH_EVENT_FIFO_OUTPUT, 0);
+		} else {
+			pr_err("fifo_write: completed abnormally.\n");
+			rshim_notify(bd, RSH_EVENT_FIFO_ERR, -1);
+		}
+		spin_unlock(&bd->spinlock);
+	}
+
+	if (bd->has_cons_work) {
+		spin_lock_irq(&bd->spinlock);
+
+		/* FIFO output. */
+		rshim_fifo_output(bd);
+
+		/* FIFO input. */
+		rshim_fifo_input(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+
+		bd->has_cons_work = 0;
+	}
+
+	if (!bd->has_reprobe && bd->is_cons_open) {
+		bd->has_cons_work = 1;
+		mod_timer(&bd->timer, jiffies + HZ / 10);
+	}
+
+	mutex_unlock(&bd->mutex);
+}
+
+static ssize_t rshim_console_read(struct file *file, char *user_buffer,
+				    size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_read(bd, user_buffer, count, TMFIFO_CONS_CHAN,
+			     file->f_flags & O_NONBLOCK, true);
+}
+
+static ssize_t rshim_console_write(struct file *file, const char *user_buffer,
+				 size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_write(bd, user_buffer, count, TMFIFO_CONS_CHAN,
+			      file->f_flags & O_NONBLOCK, true);
+}
+
+static int rshim_console_fsync(struct file *file, loff_t start,
+			       loff_t end, int datasync)
+{
+	return rshim_fifo_fsync(file, start, end, datasync, TMFIFO_CONS_CHAN);
+}
+
+static long rshim_console_unlocked_ioctl(struct file *file, unsigned int
+				       cmd, unsigned long arg)
+{
+	struct rshim_backend *bd = file->private_data;
+	int retval = 0;
+
+	mutex_lock(&bd->mutex);
+
+	switch (cmd) {
+	case TCGETS: {
+#ifdef TCGETS2
+		if (kernel_termios_to_user_termios_1(
+			(struct termios __user *)arg, &bd->cons_termios))
+#else
+		if (kernel_termios_to_user_termios(
+			(struct termios __user *)arg, &bd->cons_termios))
+#endif
+			retval = -EFAULT;
+		break;
+	}
+
+	case TCSETS:
+	case TCSETSW:
+	case TCSETSF: {
+#ifdef TCGETS2
+		if (user_termios_to_kernel_termios_1(
+			&bd->cons_termios, (struct termios __user *)arg))
+#else
+		if (user_termios_to_kernel_termios(
+			&bd->cons_termios, (struct termios __user *)arg))
+#endif
+			retval = -EFAULT;
+		break;
+	}
+
+	default:
+		retval = -EINVAL;
+		break;
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	return retval;
+}
+
+static unsigned int rshim_console_poll(struct file *file, poll_table *wait)
+{
+	return rshim_fifo_poll(file, wait, TMFIFO_CONS_CHAN);
+}
+
+static int rshim_console_release(struct inode *inode, struct file *file)
+{
+	return rshim_fifo_release(inode, file, TMFIFO_CONS_CHAN);
+}
+
+static const struct file_operations rshim_console_fops = {
+	.owner = THIS_MODULE,
+	.read = rshim_console_read,
+	.write = rshim_console_write,
+	.fsync = rshim_console_fsync,
+	.unlocked_ioctl = rshim_console_unlocked_ioctl,
+	.poll = rshim_console_poll,
+	.release = rshim_console_release,
+};
+
+static int rshim_console_open(struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	file->f_op = &rshim_console_fops;
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->is_cons_open) {
+		/*
+		 * The console is already open.  This is OK, but it means
+		 * there's no work to do other than updating the reference
+		 * count.
+		 */
+		bd->console_opens++;
+		mutex_unlock(&bd->mutex);
+		return 0;
+	}
+
+	bd->is_cons_open = 1;
+
+	spin_lock_irq(&bd->spinlock);
+
+	bd->spin_flags |= RSH_SFLG_CONS_OPEN;
+
+	spin_unlock_irq(&bd->spinlock);
+
+	if (!bd->has_cons_work) {
+		bd->has_cons_work = 1;
+		queue_delayed_work(rshim_wq, &bd->work, HZ / 10);
+	}
+
+	bd->console_opens++;
+	mutex_unlock(&bd->mutex);
+
+	return 0;
+}
+
+static int rshim_boot_done(struct rshim_backend *bd)
+{
+	if (bd->has_rshim && bd->has_tm) {
+		/* Clear any previous errors. */
+		bd->tmfifo_error = 0;
+
+		/*
+		 * If someone might be waiting for the device to come up,
+		 * tell them it's ready.
+		 */
+		if (bd->is_booting) {
+			bd->is_booting = 0;
+
+			pr_debug("signaling booting complete\n");
+			complete_all(&bd->booting_complete);
+#if RSH_RESET_MUTEX
+			complete_all(&bd->reset_complete);
+#endif
+		};
+
+		/* If the console device is open, start the worker. */
+		if (bd->is_cons_open && !bd->has_cons_work) {
+			bd->has_cons_work = 1;
+			pr_debug("probe: console_work submitted\n");
+			queue_delayed_work(rshim_wq, &bd->work, 0);
+		}
+
+		/* Tell the user this device is now attached. */
+		pr_info("%s now attached\n", rshim_dev_names[bd->dev_index]);
+	}
+
+	return 0;
+}
+
+/* Rshim file operations routines */
+
+static ssize_t rshim_rshim_read(struct file *file, char *user_buffer,
+			      size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd;
+	int retval = 0;
+	u64 buf;
+
+	/* rshim registers are all 8-byte aligned. */
+	if (count != 8 || (*ppos & 7) != 0)
+		return -EINVAL;
+
+	bd = file->private_data;
+
+	mutex_lock(&bd->mutex);
+	retval = bd->read_rshim(bd,
+				(*ppos >> 16) & 0xF, /* channel # */
+				*ppos & 0xFFFF,	 /* addr */
+				&buf);
+	mutex_unlock(&bd->mutex);
+
+	/* If the read was successful, copy the data to userspace */
+	if (!retval && copy_to_user(user_buffer, &buf, count))
+		return -EFAULT;
+
+	return retval ? retval : count;
+}
+
+static ssize_t rshim_rshim_write(struct file *file, const char *user_buffer,
+			       size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd;
+	int retval = 0;
+	u64 buf;
+
+	/* rshim registers are all 8-byte aligned. */
+	if (count != 8 || (*ppos & 7) != 0)
+		return -EINVAL;
+
+	/* Copy the data from userspace */
+	if (copy_from_user(&buf, user_buffer, count))
+		return -EFAULT;
+
+	bd = file->private_data;
+
+	mutex_lock(&bd->mutex);
+	retval = bd->write_rshim(bd,
+				 (*ppos >> 16) & 0xF, /* channel # */
+				 *ppos & 0xFFFF, /* addr */
+				 buf);
+	mutex_unlock(&bd->mutex);
+
+	return retval ? retval : count;
+}
+
+static int rshim_rshim_release(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+	struct module *owner;
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+static const struct file_operations rshim_rshim_fops = {
+	.owner = THIS_MODULE,
+	.read = rshim_rshim_read,
+	.write = rshim_rshim_write,
+	.release = rshim_rshim_release,
+	.llseek = default_llseek,
+};
+
+static int rshim_rshim_open(struct file *file)
+{
+	file->f_op = &rshim_rshim_fops;
+
+	return 0;
+}
+
+/* Misc file operations routines */
+
+static int
+rshim_misc_seq_show(struct seq_file *s, void *token)
+{
+	struct rshim_backend *bd = s->private;
+	int retval;
+	u64 value;
+
+	/* Boot mode. */
+	retval = bd->read_rshim(bd, RSHIM_CHANNEL, RSH_BOOT_CONTROL,
+				&value);
+	if (retval) {
+		pr_err("couldn't read rshim register\n");
+		return retval;
+	}
+	seq_printf(s, "BOOT_MODE %lld\n",
+		   value & RSH_BOOT_CONTROL__BOOT_MODE_MASK);
+
+	/* SW reset flag is always 0. */
+	seq_printf(s, "SW_RESET  %d\n", 0);
+
+	/* Display the driver name. */
+	seq_printf(s, "DRV_NAME  %s\n", bd->owner->name);
+
+	return 0;
+}
+
+static ssize_t rshim_misc_write(struct file *file, const char *user_buffer,
+				size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd;
+	int retval = 0, value;
+	char buf[64], key[32];
+
+	if (*ppos != 0 || count >= sizeof(buf))
+		return -EINVAL;
+
+	/* Copy the data from userspace */
+	if (copy_from_user(buf, user_buffer, count))
+		return -EFAULT;
+
+	if (sscanf(buf, "%s %x", key, &value) != 2)
+		return -EINVAL;
+
+	bd = ((struct seq_file *)file->private_data)->private;
+
+	if (strcmp(key, "BOOT_MODE") == 0) {
+		retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_BOOT_CONTROL,
+				 value & RSH_BOOT_CONTROL__BOOT_MODE_MASK);
+	} else if (strcmp(key, "SW_RESET") == 0) {
+		if (value) {
+			if (!bd->has_reprobe) {
+				/* Detach, which shouldn't hold bd->mutex. */
+				rshim_notify(bd, RSH_EVENT_DETACH, 0);
+
+				mutex_lock(&bd->mutex);
+				/* Reset the TmFifo. */
+				rshim_fifo_reset(bd);
+				mutex_unlock(&bd->mutex);
+			}
+
+			retval = bd->write_rshim(bd, RSHIM_CHANNEL,
+					RSH_RESET_CONTROL,
+					RSH_RESET_CONTROL__RESET_CHIP_VAL_KEY);
+
+			if (!bd->has_reprobe) {
+				/* Attach. */
+				msleep_interruptible(1000);
+				mutex_lock(&bd->mutex);
+				rshim_notify(bd, RSH_EVENT_ATTACH, 0);
+				mutex_unlock(&bd->mutex);
+			}
+		}
+	} else
+		return -EINVAL;
+
+	return retval ? retval : count;
+}
+
+static int rshim_misc_release(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd;
+	struct module *owner;
+	int retval;
+
+	/*
+	 * Note that since this got turned into a seq file by
+	 * rshim_misc_open(), our device pointer isn't in the usual spot
+	 * (the file's private data); that's used by the seq file
+	 * subsystem.
+	 */
+	bd = ((struct seq_file *)file->private_data)->private;
+
+	retval = single_release(inode, file);
+	if (retval)
+		return retval;
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+static const struct file_operations rshim_misc_fops = {
+	.owner = THIS_MODULE,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.write = rshim_misc_write,
+	.release = rshim_misc_release,
+};
+
+static int rshim_misc_open(struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+	int retval;
+
+	/*
+	 * If file->private_data is non-NULL, seq_open (called by
+	 * single_open) thinks it's already a seq_file struct, and
+	 * scribbles over it!  Very bad.
+	 */
+	file->private_data = NULL;
+
+	file->f_op = &rshim_misc_fops;
+	retval = single_open(file, rshim_misc_seq_show, bd);
+
+	return retval;
+}
+
+/* Common file operations routines */
+
+static int rshim_open(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd;
+	int subminor = iminor(inode);
+	int retval;
+
+	rshim_lock();
+
+	bd = rshim_devs[subminor / RSH_DEV_TYPES];
+	if (!bd) {
+		rshim_unlock();
+		return -ENODEV;
+	}
+
+	/* Add a reference to the owner. */
+	if (!try_module_get(bd->owner)) {
+		rshim_unlock();
+		return -ENODEV;
+	}
+
+	/* Increment our usage count for the device. */
+	kref_get(&bd->kref);
+
+	rshim_unlock();
+
+	file->private_data = bd;
+
+	switch (subminor % RSH_DEV_TYPES) {
+	case RSH_DEV_TYPE_BOOT:
+		retval = rshim_boot_open(file);
+		break;
+
+	case RSH_DEV_TYPE_RSHIM:
+		retval = rshim_rshim_open(file);
+		break;
+
+	case RSH_DEV_TYPE_CONSOLE:
+		retval = rshim_console_open(file);
+		break;
+
+	case RSH_DEV_TYPE_NET:
+		retval = rshim_tmfifo_open(file);
+		break;
+
+	case RSH_DEV_TYPE_MISC:
+		retval = rshim_misc_open(file);
+		break;
+
+	default:
+		retval = -ENODEV;
+		break;
+	}
+
+	/* If the minor open failed, drop the usage count. */
+	if (retval < 0) {
+		struct module *owner;
+
+		rshim_lock();
+		owner = RSHIM_READ_ONCE(bd->owner);
+		kref_put(&bd->kref, bd->destroy);
+		module_put(owner);
+		rshim_unlock();
+	}
+
+	return retval;
+}
+
+static const struct file_operations rshim_fops = {
+	.owner = THIS_MODULE,
+	.open =	rshim_open,
+};
+
+int rshim_tmfifo_sync(struct rshim_backend *bd)
+{
+	u64 word;
+	int i, retval, max_size, avail;
+	union rshim_tmfifo_msg_hdr hdr;
+
+	/* Get FIFO max size. */
+	retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+				RSH_TM_HOST_TO_TILE_CTL, &word);
+	if (retval < 0) {
+		pr_err("read_rshim error %d\n", retval);
+		return retval;
+	}
+	max_size = (word >> RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_SHIFT)
+		   & RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RMASK;
+
+	/* Calculate available size. */
+	retval = bd->read_rshim(bd, RSHIM_CHANNEL, RSH_TM_HOST_TO_TILE_STS,
+				&word);
+	if (retval < 0) {
+		pr_err("read_rshim error %d\n", retval);
+		return retval;
+	}
+	avail = max_size - (int)(word & RSH_TM_HOST_TO_TILE_STS__COUNT_MASK);
+
+	if (avail > TMFIFO_MAX_SYNC_WORDS)
+		avail = TMFIFO_MAX_SYNC_WORDS;
+
+	hdr.type = VIRTIO_ID_NET;
+	hdr.len = 0;
+	for (i = 0; i < avail; i++) {
+		retval = bd->write_rshim(bd, RSHIM_CHANNEL,
+					 RSH_TM_HOST_TO_TILE_STS, hdr.data);
+		if (retval < 0)
+			break;
+	}
+
+	return 0;
+}
+
+int rshim_notify(struct rshim_backend *bd, int event, int code)
+{
+	int i, rc = 0;
+	struct rshim_service *svc;
+
+	switch (event) {
+	case RSH_EVENT_FIFO_INPUT:
+		rshim_fifo_input(bd);
+		break;
+
+	case RSH_EVENT_FIFO_OUTPUT:
+		rshim_fifo_output(bd);
+		break;
+
+	case RSH_EVENT_FIFO_ERR:
+		rshim_fifo_err(bd, code);
+		break;
+
+	case RSH_EVENT_ATTACH:
+		rshim_boot_done(bd);
+
+		/* Sync-up the tmfifo if reprobe is not supported. */
+		if (!bd->has_reprobe && bd->has_rshim)
+			rshim_tmfifo_sync(bd);
+
+		rcu_read_lock();
+		for (i = 0; i < RSH_SVC_MAX; i++) {
+			svc = rcu_dereference(rshim_svc[i]);
+			if (svc != NULL && svc->create != NULL) {
+				rc = (*svc->create)(bd);
+				if (rc == -EEXIST)
+					rc = 0;
+				else if (rc) {
+					pr_err("Failed to attach svc %d\n", i);
+					break;
+				}
+			}
+		}
+		rcu_read_unlock();
+
+		spin_lock_irq(&bd->spinlock);
+		rshim_fifo_input(bd);
+		spin_unlock_irq(&bd->spinlock);
+		break;
+
+	case RSH_EVENT_DETACH:
+		for (i = 0; i < RSH_SVC_MAX; i++) {
+			/*
+			 * The svc->delete() could call into Linux kernel and
+			 * potentially trigger synchronize_rcu(). So it should
+			 * be outside of the rcu_read_lock(). Instead, a ref
+			 * counter is used here to avoid race condition between
+			 * svc deletion such as caused by kernel module unload.
+			 */
+			rcu_read_lock();
+			svc = rcu_dereference(rshim_svc[i]);
+			if (svc != NULL)
+				atomic_inc(&svc->ref);
+			rcu_read_unlock();
+
+			if (svc != NULL) {
+				(*svc->delete)(bd);
+				atomic_dec(&svc->ref);
+			}
+		}
+		bd->dev = NULL;
+		break;
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL(rshim_notify);
+
+static int rshim_find_index(char *dev_name)
+{
+	int i, dev_index = -1;
+
+	/* First look for a match with a previous device name. */
+	for (i = 0; i < rshim_nr_devs; i++)
+		if (rshim_dev_names[i] &&
+		    !strcmp(dev_name, rshim_dev_names[i])) {
+			pr_debug("found match with previous at index %d\n", i);
+			dev_index = i;
+			break;
+		}
+
+	/* Then look for a never-used slot. */
+	if (dev_index < 0) {
+		for (i = 0; i < rshim_nr_devs; i++)
+			if (!rshim_dev_names[i]) {
+				pr_debug("found never-used slot %d\n", i);
+				dev_index = i;
+				break;
+			}
+	}
+
+	/* Finally look for a currently-unused slot. */
+	if (dev_index < 0) {
+		for (i = 0; i < rshim_nr_devs; i++)
+			if (!rshim_devs[i]) {
+				pr_debug("found unused slot %d\n", i);
+				dev_index = i;
+				break;
+			}
+	}
+
+	return dev_index;
+}
+
+struct rshim_backend *rshim_find(char *dev_name)
+{
+	int dev_index = rshim_find_index(dev_name);
+
+	/* If none of that worked, we fail. */
+	if (dev_index < 0) {
+		pr_err("couldn't find slot for new device %s\n", dev_name);
+		return NULL;
+	}
+
+	return rshim_devs[dev_index];
+}
+EXPORT_SYMBOL(rshim_find);
+
+/* House-keeping timer. */
+static void rshim_timer_func(struct timer_list *arg)
+{
+	struct rshim_backend *bd =
+	  container_of(arg, struct rshim_backend, timer);
+
+	u32 period = msecs_to_jiffies(rshim_keepalive_period);
+
+	if (bd->has_cons_work)
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+
+	/* Request keepalive update and restart the ~300ms timer. */
+	if (time_after(jiffies, (unsigned long)bd->last_keepalive + period)) {
+		bd->keepalive = 1;
+		bd->last_keepalive = jiffies;
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+	}
+	mod_timer(&bd->timer, jiffies + period);
+}
+
+static ssize_t rshim_path_show(struct device *cdev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct rshim_backend *bd = dev_get_drvdata(cdev);
+
+	if (bd == NULL)
+		return -ENODEV;
+	return snprintf(buf, PAGE_SIZE, "%s\n",
+			rshim_dev_names[bd->dev_index]);
+}
+
+static DEVICE_ATTR(rshim_path, 0444, rshim_path_show, NULL);
+
+static void
+rshim_load_modules(struct work_struct *work)
+{
+	request_module("rshim_net");
+}
+
+static DECLARE_DELAYED_WORK(rshim_load_modules_work, rshim_load_modules);
+
+/* Check whether backend is allowed to register or not. */
+static int rshim_access_check(struct rshim_backend *bd)
+{
+	int i, retval;
+	u64 value;
+
+	/* Write value 0 to RSH_SCRATCHPAD1. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_SCRATCHPAD1, 0);
+	if (retval < 0)
+		return -ENODEV;
+
+	/*
+	 * Poll RSH_SCRATCHPAD1 up to one second to check whether it's reset to
+	 * the keepalive magic value, which indicates another backend driver has
+	 * already attached to this target.
+	 */
+	for (i = 0; i < 10; i++) {
+		retval = bd->read_rshim(bd, RSHIM_CHANNEL, RSH_SCRATCHPAD1,
+					&value);
+		if (retval < 0)
+			return -ENODEV;
+
+		if (value == RSH_KEEPALIVE_MAGIC_NUM) {
+			pr_info("another backend already attached.\n");
+			return -EEXIST;
+		}
+
+		msleep(100);
+	}
+
+	return 0;
+}
+
+int rshim_register(struct rshim_backend *bd)
+{
+	int i, retval, dev_index;
+
+	if (bd->registered)
+		return 0;
+
+	if (backend_driver[0] && strcmp(backend_driver, bd->owner->name))
+		return -EACCES;
+
+	dev_index = rshim_find_index(bd->dev_name);
+	if (dev_index < 0)
+		return -ENODEV;
+
+	if (!bd->read_rshim || !bd->write_rshim) {
+		pr_err("read_rshim/write_rshim missing\n");
+		return -EINVAL;
+	}
+
+	retval = rshim_access_check(bd);
+	if (retval)
+		return retval;
+
+	if (!bd->write)
+		bd->write = rshim_write_default;
+	if (!bd->read)
+		bd->read = rshim_read_default;
+
+	kref_init(&bd->kref);
+	spin_lock_init(&bd->spinlock);
+#if RSH_RESET_MUTEX
+	init_completion(&bd->reset_complete);
+#endif
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		init_waitqueue_head(&bd->read_fifo[i].operable);
+		init_waitqueue_head(&bd->write_fifo[i].operable);
+	}
+
+	init_waitqueue_head(&bd->write_completed);
+	init_completion(&bd->booting_complete);
+	init_completion(&bd->boot_write_complete);
+	memcpy(&bd->cons_termios, &init_console_termios,
+	       sizeof(init_console_termios));
+	INIT_DELAYED_WORK(&bd->work, rshim_work_handler);
+
+	bd->dev_index = dev_index;
+	if (rshim_dev_names[dev_index] != bd->dev_name) {
+		kfree(rshim_dev_names[dev_index]);
+		rshim_dev_names[dev_index] = bd->dev_name;
+	}
+	rshim_devs[dev_index] = bd;
+
+	for (i = 0; i < RSH_DEV_TYPES; i++) {
+		struct device *cl_dev;
+		int err;
+		char devbuf[32];
+
+		cdev_init(&bd->cdevs[i], &rshim_fops);
+		bd->cdevs[i].owner = THIS_MODULE;
+		/*
+		 * FIXME: is this addition really legal, or should
+		 * we be using MKDEV?
+		 */
+		err = cdev_add(&bd->cdevs[i],
+			       rshim_dev_base +
+			       bd->dev_index * RSH_DEV_TYPES + i,
+			       1);
+		/*
+		 * We complain if this fails, but we don't return
+		 * an error; it really shouldn't happen, and it's
+		 * hard to go un-do the rest of the adds.
+		 */
+		if (err)
+			pr_err("rsh%d: couldn't add minor %d\n", dev_index, i);
+
+		cl_dev = device_create(rshim_class, NULL, rshim_dev_base +
+				       bd->dev_index * RSH_DEV_TYPES + i, NULL,
+				       "rshim%d!%s",
+				       bd->dev_index, rshim_dev_minor_names[i]);
+		if (IS_ERR(cl_dev)) {
+			pr_err("rsh%d: couldn't add dev %s, err %ld\n",
+			       dev_index,
+			       format_dev_t(devbuf, rshim_dev_base + dev_index *
+					    RSH_DEV_TYPES + i),
+			       PTR_ERR(cl_dev));
+		} else {
+			pr_debug("added class dev %s\n",
+				 format_dev_t(devbuf, rshim_dev_base +
+					      bd->dev_index *
+					      RSH_DEV_TYPES + i));
+		}
+
+		dev_set_drvdata(cl_dev, bd);
+		if (device_create_file(cl_dev, &dev_attr_rshim_path))
+			pr_err("could not create rshim_path file in sysfs\n");
+	}
+
+	for (i = 0; i < 2; i++) {
+		bd->boot_buf[i] = kmalloc(BOOT_BUF_SIZE, GFP_KERNEL);
+		if (!bd->boot_buf[i]) {
+			if (i == 1) {
+				kfree(bd->boot_buf[0]);
+				bd->boot_buf[0] = NULL;
+			}
+		}
+	}
+
+	timer_setup(&bd->timer, rshim_timer_func, 0);
+
+	bd->registered = 1;
+
+	/* Start the keepalive timer. */
+	bd->last_keepalive = jiffies;
+	mod_timer(&bd->timer, jiffies + 1);
+
+	schedule_delayed_work(&rshim_load_modules_work, 3 * HZ);
+
+	return 0;
+}
+EXPORT_SYMBOL(rshim_register);
+
+void rshim_deregister(struct rshim_backend *bd)
+{
+	int i;
+
+	if (!bd->registered)
+		return;
+
+	/* Stop the timer. */
+	del_timer_sync(&bd->timer);
+
+	for (i = 0; i < 2; i++)
+		kfree(bd->boot_buf[i]);
+
+	for (i = 0; i < RSH_DEV_TYPES; i++) {
+		cdev_del(&bd->cdevs[i]);
+		device_destroy(rshim_class,
+			       rshim_dev_base + bd->dev_index *
+			       RSH_DEV_TYPES + i);
+	}
+
+	rshim_devs[bd->dev_index] = NULL;
+	bd->registered = 0;
+}
+EXPORT_SYMBOL(rshim_deregister);
+
+int rshim_register_service(struct rshim_service *service)
+{
+	int i, retval = 0;
+	struct rshim_service *svc;
+
+	rshim_lock();
+
+	atomic_set(&service->ref, 0);
+
+	BUG_ON(service->type >= RSH_SVC_MAX);
+
+	if (!rshim_svc[service->type]) {
+		svc = kmalloc(sizeof(*svc), GFP_KERNEL);
+		if (svc) {
+			memcpy(svc, service, sizeof(*svc));
+			/*
+			 * Add memory barrir to make sure 'svc' is ready
+			 * before switching the pointer.
+			 */
+			smp_mb();
+
+			/*
+			 * rshim_svc[] is protected by RCU. References to it
+			 * should have rcu_read_lock() / rcu_dereference() /
+			 * rcu_read_lock().
+			 */
+			rcu_assign_pointer(rshim_svc[service->type], svc);
+
+			/* Attach the service to all backends. */
+			for (i = 0; i < rshim_nr_devs; i++) {
+				if (rshim_devs[i] != NULL) {
+					retval = svc->create(rshim_devs[i]);
+					if (retval && retval != -EEXIST)
+						break;
+				}
+			}
+		} else
+			retval = -ENOMEM;
+	} else
+		retval = -EEXIST;
+
+	rshim_unlock();
+
+	/* Deregister / cleanup the service in case of failures. */
+	if (retval && retval != -EEXIST)
+		rshim_deregister_service(service);
+
+	return retval;
+}
+EXPORT_SYMBOL(rshim_register_service);
+
+void rshim_deregister_service(struct rshim_service *service)
+{
+	int i;
+	struct rshim_service *svc = NULL;
+
+	BUG_ON(service->type >= RSH_SVC_MAX);
+
+	/*
+	 * Use synchronize_rcu() to make sure no more outstanding
+	 * references to the 'svc' pointer before releasing it.
+	 *
+	 * The reason to use RCU is that the rshim_svc pointer will be
+	 * accessed in rshim_notify() which could be called in interrupt
+	 * context and not suitable for mutex lock.
+	 */
+	rshim_lock();
+	if (rshim_svc[service->type]) {
+		svc = rshim_svc[service->type];
+
+		/* Delete the service from all backends. */
+		for (i = 0; i < rshim_nr_devs; i++)
+			if (rshim_devs[i] != NULL)
+				svc->delete(rshim_devs[i]);
+
+		rcu_assign_pointer(rshim_svc[service->type], NULL);
+	}
+	rshim_unlock();
+	if (svc != NULL) {
+		synchronize_rcu();
+
+		/* Make sure no more references to the svc pointer. */
+		while (atomic_read(&svc->ref) != 0)
+			msleep(100);
+		kfree(svc);
+	}
+}
+EXPORT_SYMBOL(rshim_deregister_service);
+
+static int __init rshim_init(void)
+{
+	int result, class_registered = 0;
+
+	/* Register our device class. */
+	rshim_class = class_create(THIS_MODULE, "rsh");
+	if (IS_ERR(rshim_class)) {
+		result = PTR_ERR(rshim_class);
+		goto error;
+	}
+	class_registered = 1;
+
+	/* Allocate major/minor numbers. */
+	result = alloc_chrdev_region(&rshim_dev_base, 0,
+				     rshim_nr_devs * RSH_DEV_TYPES,
+				     "rsh");
+	if (result < 0) {
+		pr_err("can't get rshim major\n");
+		goto error;
+	}
+
+	rshim_dev_names = kzalloc(rshim_nr_devs *
+				    sizeof(rshim_dev_names[0]), GFP_KERNEL);
+	rshim_devs = kcalloc(rshim_nr_devs, sizeof(rshim_devs[0]),
+			       GFP_KERNEL);
+
+	if (!rshim_dev_names || !rshim_devs) {
+		result = -ENOMEM;
+		goto error;
+	}
+
+	rshim_wq = create_workqueue("rshim");
+	if (!rshim_wq) {
+		result = -ENOMEM;
+		goto error;
+	}
+
+	return 0;
+
+error:
+	if (rshim_dev_base)
+		unregister_chrdev_region(rshim_dev_base,
+				 rshim_nr_devs * RSH_DEV_TYPES);
+	if (class_registered)
+		class_destroy(rshim_class);
+	kfree(rshim_dev_names);
+	kfree(rshim_devs);
+
+	return result;
+}
+
+static void __exit rshim_exit(void)
+{
+	int i;
+
+	flush_delayed_work(&rshim_load_modules_work);
+
+	/* Free the major/minor numbers. */
+	unregister_chrdev_region(rshim_dev_base,
+				 rshim_nr_devs * RSH_DEV_TYPES);
+
+	/* Destroy our device class. */
+	class_destroy(rshim_class);
+
+	/* Destroy our work queue. */
+	destroy_workqueue(rshim_wq);
+
+	for (i = 0; i < RSH_SVC_MAX; i++)
+		kfree(rshim_svc[i]);
+
+	for (i = 0; i < rshim_nr_devs; i++)
+		kfree(rshim_dev_names[i]);
+
+	kfree(rshim_dev_names);
+	kfree(rshim_devs);
+}
+
+module_init(rshim_init);
+module_exit(rshim_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.12");
diff --git a/drivers/soc/mellanox/host/rshim.h b/drivers/soc/mellanox/host/rshim.h
new file mode 100644
index 0000000..3ac3410
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim.h
@@ -0,0 +1,361 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _RSHIM_H
+#define _RSHIM_H
+
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/termios.h>
+#include <linux/workqueue.h>
+#include <linux/device.h>
+#include <linux/cdev.h>
+
+#include "rshim_regs.h"
+
+/* ACCESS_ONCE() wrapper. */
+#define RSHIM_READ_ONCE(x)	READ_ONCE(x)
+
+/*
+ * This forces only one reset to occur at a time.  Once we've gotten
+ * more experience with this mode we'll probably remove the #define.
+ */
+#define RSH_RESET_MUTEX		1
+
+/* Spin flag values. */
+#define RSH_SFLG_READING	0x1  /* read is active. */
+#define RSH_SFLG_WRITING	0x2  /* write_urb is active. */
+#define RSH_SFLG_CONS_OPEN	0x4  /* console stream is open. */
+
+/*
+ * Buffer/FIFO sizes.  Note that the FIFO sizes must be powers of 2; also,
+ * the read and write buffers must be no larger than the corresponding
+ * FIFOs.
+ */
+#define READ_BUF_SIZE		2048
+#define WRITE_BUF_SIZE		2048
+#define READ_FIFO_SIZE		(4 * 1024)
+#define WRITE_FIFO_SIZE		(4 * 1024)
+#define BOOT_BUF_SIZE		(16 * 1024)
+
+/* Sub-device types. */
+enum {
+	RSH_DEV_TYPE_RSHIM,
+	RSH_DEV_TYPE_BOOT,
+	RSH_DEV_TYPE_CONSOLE,
+	RSH_DEV_TYPE_NET,
+	RSH_DEV_TYPE_MISC,
+	RSH_DEV_TYPES
+};
+
+/* Event types used in rshim_notify(). */
+enum {
+	RSH_EVENT_FIFO_INPUT,		/* fifo ready for input */
+	RSH_EVENT_FIFO_OUTPUT,		/* fifo ready for output */
+	RSH_EVENT_FIFO_ERR,		/* fifo error */
+	RSH_EVENT_ATTACH,		/* backend attaching */
+	RSH_EVENT_DETACH,		/* backend detaching */
+};
+
+/* RShim service types. */
+enum {
+	RSH_SVC_NET,			/* networking service */
+	RSH_SVC_MAX
+};
+
+/* TMFIFO message header. */
+union rshim_tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/* TMFIFO demux channels. */
+enum {
+	TMFIFO_CONS_CHAN,	/* Console */
+	TMFIFO_NET_CHAN,	/* Network */
+	TMFIFO_MAX_CHAN		/* Number of channels */
+};
+
+/* Various rshim definitions. */
+#define RSH_INT_VEC0_RTC__SWINT3_MASK 0x8
+
+#define RSH_BYTE_ACC_READ_TRIGGER 0x50000000
+#define RSH_BYTE_ACC_SIZE 0x10000000
+#define RSH_BYTE_ACC_PENDING 0x20000000
+
+
+#define BOOT_CHANNEL        RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_BOOT
+#define RSHIM_CHANNEL       RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_RSHIM
+#define UART0_CHANNEL       RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART0
+#define UART1_CHANNEL       RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART1
+
+#define RSH_BOOT_FIFO_SIZE   512
+
+/* FIFO structure. */
+struct rshim_fifo {
+	unsigned char *data;
+	unsigned int head;
+	unsigned int tail;
+	wait_queue_head_t operable;
+};
+
+/* RShim backend. */
+struct rshim_backend {
+	/* Device name. */
+	char *dev_name;
+
+	/* Backend owner. */
+	struct module *owner;
+
+	/* Pointer to the backend device. */
+	struct device *dev;
+
+	/* Pointer to the net device. */
+	void *net;
+
+	/* House-keeping Timer. */
+	struct timer_list timer;
+
+	/* Character device structure for each device. */
+	struct cdev cdevs[RSH_DEV_TYPES];
+
+	/*
+	 * The reference count for this structure.  This is incremented by
+	 * each open, and by the probe routine (thus, one reference for
+	 * each of the two interfaces).  It's decremented on each release,
+	 * and on each disconnect.
+	 */
+	struct kref kref;
+
+	/* State flags. */
+	u32 is_booting : 1;        /* Waiting for device to come back. */
+	u32 is_boot_open : 1;      /* Boot device is open. */
+	u32 is_tm_open : 1;        /* TM FIFO device is open. */
+	u32 is_cons_open : 1;      /* Console device is open. */
+	u32 is_in_boot_write : 1;  /* A thread is in boot_write(). */
+	u32 has_cons_work : 1;     /* Console worker thread running. */
+	u32 has_debug : 1;         /* Debug enabled for this device. */
+	u32 has_tm : 1;            /* TM FIFO found. */
+	u32 has_rshim : 1;         /* RSHIM found. */
+	u32 has_fifo_work : 1;     /* FIFO output to be done in worker. */
+	u32 has_reprobe : 1;       /* Reprobe support after SW reset. */
+	u32 drop : 1;              /* Drop the rest of the packet. */
+	u32 registered : 1;        /* Backend has been registered. */
+	u32 keepalive : 1;         /* A flag to update keepalive. */
+
+	/* Jiffies of last keepalive. */
+	u64 last_keepalive;
+
+	/* State flag bits from RSH_SFLG_xxx (see above). */
+	int spin_flags;
+
+	/* Total bytes in the read buffer. */
+	int read_buf_bytes;
+	/* Offset of next unread byte in the read buffer. */
+	int read_buf_next;
+	/* Bytes left in the current packet, or 0 if no current packet. */
+	int read_buf_pkt_rem;
+	/* Padded bytes in the read buffer. */
+	int read_buf_pkt_padding;
+
+	/* Bytes left in the current packet pending to write. */
+	int write_buf_pkt_rem;
+
+	/* Current message header. */
+	union rshim_tmfifo_msg_hdr msg_hdr;
+
+	/* Read FIFOs. */
+	struct rshim_fifo read_fifo[TMFIFO_MAX_CHAN];
+
+	/* Write FIFOs. */
+	struct rshim_fifo write_fifo[TMFIFO_MAX_CHAN];
+
+	/* Read buffer.  This is a DMA'able buffer. */
+	unsigned char *read_buf;
+	dma_addr_t read_buf_dma;
+
+	/* Write buffer.  This is a DMA'able buffer. */
+	unsigned char *write_buf;
+	dma_addr_t write_buf_dma;
+
+	/* Current Tx FIFO channel. */
+	int tx_chan;
+
+	/* Current Rx FIFO channel. */
+	int rx_chan;
+
+	/* First error encountered during read or write. */
+	int tmfifo_error;
+
+	/* Buffers used for boot writes.  Allocated at startup. */
+	char *boot_buf[2];
+
+	/*
+	 * This mutex is used to prevent the interface pointers and the
+	 * device pointer from disappearing while a driver entry point
+	 * is using them.  It's held throughout a read or write operation
+	 * (at least the parts of those operations which depend upon those
+	 * pointers) and is also held whenever those pointers are modified.
+	 * It also protects state flags, and booting_complete.
+	 */
+	struct mutex mutex;
+
+	/* We'll signal completion on this when FLG_BOOTING is turned off. */
+	struct completion booting_complete;
+
+#ifdef RSH_RESET_MUTEX
+	/* Signaled when a device is disconnected. */
+	struct completion reset_complete;
+#endif
+
+	/*
+	 * This wait queue supports fsync; it's woken up whenever an
+	 * outstanding USB write URB is done.  This will need to be more
+	 * complex if we start doing write double-buffering.
+	 */
+	wait_queue_head_t write_completed;
+
+	/* State for our outstanding boot write. */
+	struct completion boot_write_complete;
+
+	/*
+	 * This spinlock is used to protect items which must be updated by
+	 * URB completion handlers, since those can't sleep.  This includes
+	 * the read and write buffer pointers, as well as spin_flags.
+	 */
+	spinlock_t spinlock;
+
+	/* Current termios settings for the console. */
+	struct ktermios cons_termios;
+
+	/* Work queue entry. */
+	struct delayed_work	work;
+
+	/* Pending boot & fifo request for the worker. */
+	u8 *boot_work_buf;
+	u32 boot_work_buf_len;
+	u32 boot_work_buf_actual_len;
+	u8 *fifo_work_buf;
+	u32 fifo_work_buf_len;
+	int fifo_work_devtype;
+
+	/* Number of open console files. */
+	long console_opens;
+
+	/*
+	 * Our index in rshim_devs, which is also the high bits of our
+	 * minor number.
+	 */
+	int dev_index;
+
+	/* APIs provided by backend. */
+
+	/* API to write bulk data to RShim via the backend. */
+	ssize_t (*write)(struct rshim_backend *bd, int devtype,
+			 const char *buf, size_t count);
+
+	/* API to read bulk data from RShim via the backend. */
+	ssize_t (*read)(struct rshim_backend *bd, int devtype,
+			char *buf, size_t count);
+
+	/* API to cancel a read / write request (optional). */
+	void (*cancel)(struct rshim_backend *bd, int devtype, bool is_write);
+
+	/* API to destroy the backend. */
+	void (*destroy)(struct kref *kref);
+
+	/* API to read 8 bytes from RShim. */
+	int (*read_rshim)(struct rshim_backend *bd, int chan, int addr,
+			  u64 *value);
+
+	/* API to write 8 bytes to RShim. */
+	int (*write_rshim)(struct rshim_backend *bd, int chan, int addr,
+			   u64 value);
+};
+
+/* RShim service. */
+struct rshim_service {
+	/* Service type RSH_SVC_xxx. */
+	int type;
+
+	/* Reference number. */
+	atomic_t ref;
+
+	/* Create service. */
+	int (*create)(struct rshim_backend *bd);
+
+	/* Delete service. */
+	int (*delete)(struct rshim_backend *bd);
+
+	/* Notify service Rx is ready. */
+	void (*rx_notify)(struct rshim_backend *bd);
+};
+
+/* Global variables. */
+
+/* Global array to store RShim devices and names. */
+extern struct workqueue_struct *rshim_wq;
+
+/* Common APIs. */
+
+/* Register/unregister backend. */
+int rshim_register(struct rshim_backend *bd);
+void rshim_deregister(struct rshim_backend *bd);
+
+/* Register / deregister service. */
+int rshim_register_service(struct rshim_service *service);
+void rshim_deregister_service(struct rshim_service *service);
+
+/* Find backend by name. */
+struct rshim_backend *rshim_find(char *dev_name);
+
+/* RShim global lock. */
+void rshim_lock(void);
+void rshim_unlock(void);
+
+/* Event notification. */
+int rshim_notify(struct rshim_backend *bd, int event, int code);
+
+/*
+ * FIFO APIs.
+ *
+ * FIFO is demuxed into two channels, one for network interface
+ * (TMFIFO_NET_CHAN), one for console (TMFIFO_CONS_CHAN).
+ */
+
+/* Write / read some bytes to / from the FIFO via the backend. */
+ssize_t rshim_fifo_read(struct rshim_backend *bd, char *buffer,
+		      size_t count, int chan, bool nonblock,
+		      bool to_user);
+ssize_t rshim_fifo_write(struct rshim_backend *bd, const char *buffer,
+		       size_t count, int chan, bool nonblock,
+		       bool from_user);
+
+/* Alloc/free the FIFO. */
+int rshim_fifo_alloc(struct rshim_backend *bd);
+void rshim_fifo_free(struct rshim_backend *bd);
+
+/* Console APIs. */
+
+/* Enable early console. */
+int rshim_cons_early_enable(struct rshim_backend *bd);
+
+#endif /* _RSHIM_H */
diff --git a/drivers/soc/mellanox/host/rshim_regs.h b/drivers/soc/mellanox/host/rshim_regs.h
new file mode 100644
index 0000000..b14df716
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_regs.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#ifndef __RSHIM_REGS_H__
+#define __RSHIM_REGS_H__
+
+#ifdef __ASSEMBLER__
+#define _64bit(x) x
+#else /* __ASSEMBLER__ */
+#define _64bit(x) x ## ULL
+#endif /* __ASSEMBLER */
+
+#include <linux/types.h>
+
+#define RSH_BOOT_FIFO_DATA 0x408
+
+#define RSH_BOOT_FIFO_COUNT 0x488
+#define RSH_BOOT_FIFO_COUNT__LENGTH 0x0001
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_SHIFT 0
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_WIDTH 10
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_RESET_VAL 0
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_RMASK 0x3ff
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_MASK  0x3ff
+
+#define RSH_BOOT_CONTROL 0x528
+#define RSH_BOOT_CONTROL__LENGTH 0x0001
+#define RSH_BOOT_CONTROL__BOOT_MODE_SHIFT 0
+#define RSH_BOOT_CONTROL__BOOT_MODE_WIDTH 2
+#define RSH_BOOT_CONTROL__BOOT_MODE_RESET_VAL 0
+#define RSH_BOOT_CONTROL__BOOT_MODE_RMASK 0x3
+#define RSH_BOOT_CONTROL__BOOT_MODE_MASK  0x3
+#define RSH_BOOT_CONTROL__BOOT_MODE_VAL_NONE 0x0
+#define RSH_BOOT_CONTROL__BOOT_MODE_VAL_EMMC 0x1
+#define RSH_BOOT_CONTROL__BOOT_MODE_VAL_EMMC_LEGACY 0x3
+
+#define RSH_RESET_CONTROL 0x500
+#define RSH_RESET_CONTROL__LENGTH 0x0001
+#define RSH_RESET_CONTROL__RESET_CHIP_SHIFT 0
+#define RSH_RESET_CONTROL__RESET_CHIP_WIDTH 32
+#define RSH_RESET_CONTROL__RESET_CHIP_RESET_VAL 0
+#define RSH_RESET_CONTROL__RESET_CHIP_RMASK 0xffffffff
+#define RSH_RESET_CONTROL__RESET_CHIP_MASK  0xffffffff
+#define RSH_RESET_CONTROL__RESET_CHIP_VAL_KEY 0xca710001
+#define RSH_RESET_CONTROL__DISABLE_SHIFT 32
+#define RSH_RESET_CONTROL__DISABLE_WIDTH 1
+#define RSH_RESET_CONTROL__DISABLE_RESET_VAL 0
+#define RSH_RESET_CONTROL__DISABLE_RMASK 0x1
+#define RSH_RESET_CONTROL__DISABLE_MASK  _64bit(0x100000000)
+#define RSH_RESET_CONTROL__REQ_PND_SHIFT 33
+#define RSH_RESET_CONTROL__REQ_PND_WIDTH 1
+#define RSH_RESET_CONTROL__REQ_PND_RESET_VAL 0
+#define RSH_RESET_CONTROL__REQ_PND_RMASK 0x1
+#define RSH_RESET_CONTROL__REQ_PND_MASK  _64bit(0x200000000)
+
+#define RSH_SCRATCHPAD1 0xc20
+
+#define RSH_SCRATCH_BUF_CTL 0x600
+
+#define RSH_SCRATCH_BUF_DAT 0x610
+
+#define RSH_SEMAPHORE0 0x28
+
+#define RSH_SCRATCHPAD 0x20
+
+#define RSH_TM_HOST_TO_TILE_CTL 0xa30
+#define RSH_TM_HOST_TO_TILE_CTL__LENGTH 0x0001
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_SHIFT 0
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_WIDTH 8
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_RESET_VAL 128
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_RMASK 0xff
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_MASK  0xff
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_SHIFT 8
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_WIDTH 8
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_RESET_VAL 128
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_RMASK 0xff
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_MASK  0xff00
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_SHIFT 32
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_WIDTH 9
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RESET_VAL 256
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_MASK  _64bit(0x1ff00000000)
+
+#define RSH_TM_HOST_TO_TILE_STS 0xa28
+#define RSH_TM_HOST_TO_TILE_STS__LENGTH 0x0001
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_SHIFT 0
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_WIDTH 9
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_RESET_VAL 0
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_RMASK 0x1ff
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_MASK  0x1ff
+
+#define RSH_TM_TILE_TO_HOST_STS 0xa48
+#define RSH_TM_TILE_TO_HOST_STS__LENGTH 0x0001
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_SHIFT 0
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_WIDTH 9
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_RESET_VAL 0
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_RMASK 0x1ff
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_MASK  0x1ff
+
+#define RSH_TM_HOST_TO_TILE_DATA 0xa20
+
+#define RSH_TM_TILE_TO_HOST_DATA 0xa40
+
+#define RSH_MMIO_ADDRESS_SPACE__LENGTH 0x10000000000
+#define RSH_MMIO_ADDRESS_SPACE__STRIDE 0x8
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_SHIFT 0
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_WIDTH 16
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_RESET_VAL 0
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_RMASK 0xffff
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_MASK  0xffff
+#define RSH_MMIO_ADDRESS_SPACE__PROT_SHIFT 16
+#define RSH_MMIO_ADDRESS_SPACE__PROT_WIDTH 3
+#define RSH_MMIO_ADDRESS_SPACE__PROT_RESET_VAL 0
+#define RSH_MMIO_ADDRESS_SPACE__PROT_RMASK 0x7
+#define RSH_MMIO_ADDRESS_SPACE__PROT_MASK  0x70000
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_SHIFT 23
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_WIDTH 4
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_RESET_VAL 0
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_RMASK 0xf
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_MASK  0x7800000
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_BOOT 0x0
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_RSHIM 0x1
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART0 0x2
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART1 0x3
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_DIAG_UART 0x4
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU 0x5
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU_EXT1 0x6
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU_EXT2 0x7
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU_EXT3 0x8
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TIMER 0x9
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_USB 0xa
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_GPIO 0xb
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_MMC 0xc
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TIMER_EXT 0xd
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_WDOG_NS 0xe
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_WDOG_SEC 0xf
+
+#define RSH_SWINT 0x318
+
+#define RSH_BYTE_ACC_CTL 0x490
+
+#define RSH_BYTE_ACC_WDAT 0x498
+
+#define RSH_BYTE_ACC_RDAT 0x4a0
+
+#define RSH_BYTE_ACC_ADDR 0x4a8
+
+#endif /* !defined(__RSHIM_REGS_H__) */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v7 5/9] soc: mellanox: host: Add the common host side Rshim driver
@ 2019-01-03 19:17   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-03 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

An external host can connect to a Mellanox BlueField SoC via an
interface called Rshim. The Rshim driver provides boot, console,
and networking services over this interface. This commit is
the common driver where the other backend (transport) driver will
use.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/Kconfig           |    8 +
 drivers/soc/mellanox/Makefile          |    1 +
 drivers/soc/mellanox/host/Makefile     |    2 +
 drivers/soc/mellanox/host/rshim.c      | 2673 ++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/host/rshim.h      |  361 +++++
 drivers/soc/mellanox/host/rshim_regs.h |  152 ++
 6 files changed, 3197 insertions(+)
 create mode 100644 drivers/soc/mellanox/host/Makefile
 create mode 100644 drivers/soc/mellanox/host/rshim.c
 create mode 100644 drivers/soc/mellanox/host/rshim.h
 create mode 100644 drivers/soc/mellanox/host/rshim_regs.h

diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
index d88efa1..ecd83a4 100644
--- a/drivers/soc/mellanox/Kconfig
+++ b/drivers/soc/mellanox/Kconfig
@@ -16,3 +16,11 @@ config MLNX_BLUEFIELD_TMFIFO
 	  the implementation of a console and network driver.
 
 endif # ARCH_MLNX_BLUEFIELD
+
+config MLNX_BLUEFIELD_HOST
+	tristate "Mellnox BlueField host side drivers"
+	help
+	  If you say yes to this option, then support will be added
+	  for control and communication of Mellanox BlueField SoCs
+	  from an external host via USB or PCI-express.
+
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
index c44c0e2..aaaf2be 100644
--- a/drivers/soc/mellanox/Makefile
+++ b/drivers/soc/mellanox/Makefile
@@ -3,3 +3,4 @@
 # Makefile for Mellanox SoC drivers.
 #
 obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
+obj-$(CONFIG_MLNX_BLUEFIELD_HOST)	+= host/
diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
new file mode 100644
index 0000000..e47842f
--- /dev/null
+++ b/drivers/soc/mellanox/host/Makefile
@@ -0,0 +1,2 @@
+obj-m := rshim.o
+
diff --git a/drivers/soc/mellanox/host/rshim.c b/drivers/soc/mellanox/host/rshim.c
new file mode 100644
index 0000000..32f1124
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim.c
@@ -0,0 +1,2673 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_common.c - Mellanox host-side driver for RShim
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.	See the GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <linux/ioctl.h>
+#include <linux/termios.h>
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+#include <asm/termbits.h>
+#include <linux/circ_buf.h>
+#include <linux/delay.h>
+#include <linux/virtio_ids.h>
+
+#include "rshim.h"
+
+/* Maximum number of devices controlled by this driver. */
+int rshim_nr_devs = 64;
+module_param(rshim_nr_devs, int, 0444);
+MODULE_PARM_DESC(rshim_nr_devs, "Maximum number of supported devices");
+
+static char *backend_driver = "";
+module_param(backend_driver, charp, 0444);
+MODULE_PARM_DESC(backend_driver, "Rshim backend driver to use");
+
+static int rshim_keepalive_period = 300;
+module_param(rshim_keepalive_period, int, 0644);
+MODULE_PARM_DESC(rshim_keepalive_period, "keepalive period in milliseconds");
+
+#define RSH_KEEPALIVE_MAGIC_NUM 0x5089836482ULL
+
+/* Circular buffer macros. */
+
+#define read_empty(bd, chan) \
+	(CIRC_CNT((bd)->read_fifo[chan].head, \
+		  (bd)->read_fifo[chan].tail, READ_FIFO_SIZE) == 0)
+#define read_full(bd, chan) \
+	(CIRC_SPACE((bd)->read_fifo[chan].head, \
+		    (bd)->read_fifo[chan].tail, READ_FIFO_SIZE) == 0)
+#define read_space(bd, chan) \
+	CIRC_SPACE((bd)->read_fifo[chan].head, \
+		   (bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_cnt(bd, chan) \
+	CIRC_CNT((bd)->read_fifo[chan].head, \
+		 (bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_cnt_to_end(bd, chan) \
+	CIRC_CNT_TO_END((bd)->read_fifo[chan].head, \
+			(bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_data_ptr(bd, chan) \
+	((bd)->read_fifo[chan].data + \
+	 ((bd)->read_fifo[chan].tail & (READ_FIFO_SIZE - 1)))
+#define read_consume_bytes(bd, chan, nbytes) \
+	((bd)->read_fifo[chan].tail = \
+		((bd)->read_fifo[chan].tail + (nbytes)) & \
+		 (READ_FIFO_SIZE - 1))
+#define read_space_to_end(bd, chan) \
+	CIRC_SPACE_TO_END((bd)->read_fifo[chan].head, \
+			  (bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_space_offset(bd, chan) \
+	((bd)->read_fifo[chan].head & (READ_FIFO_SIZE - 1))
+#define read_space_ptr(bd, chan) \
+	((bd)->read_fifo[chan].data + read_space_offset(bd, (chan)))
+#define read_add_bytes(bd, chan, nbytes) \
+	((bd)->read_fifo[chan].head = \
+		((bd)->read_fifo[chan].head + (nbytes)) & \
+		 (READ_FIFO_SIZE - 1))
+#define read_reset(bd, chan) \
+	((bd)->read_fifo[chan].head = (bd)->read_fifo[chan].tail = 0)
+
+#define write_empty(bd, chan) \
+	(CIRC_CNT((bd)->write_fifo[chan].head, \
+		  (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE) == 0)
+#define write_full(bd, chan) \
+	(CIRC_SPACE((bd)->write_fifo[chan].head, \
+		    (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE) == 0)
+#define write_space(bd, chan) \
+	CIRC_SPACE((bd)->write_fifo[chan].head, \
+		   (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_cnt(bd, chan) \
+	CIRC_CNT((bd)->write_fifo[chan].head, \
+		 (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_cnt_to_end(bd, chan) \
+	CIRC_CNT_TO_END((bd)->write_fifo[chan].head, \
+			(bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_data_offset(bd, chan) \
+	((bd)->write_fifo[chan].tail & (WRITE_FIFO_SIZE - 1))
+#define write_data_ptr(bd, chan) \
+	((bd)->write_fifo[chan].data + write_data_offset(bd, (chan)))
+#define write_consume_bytes(bd, chan, nbytes) \
+	((bd)->write_fifo[chan].tail = \
+		 ((bd)->write_fifo[chan].tail + (nbytes)) & \
+		  (WRITE_FIFO_SIZE - 1))
+#define write_space_to_end(bd, chan) \
+	CIRC_SPACE_TO_END((bd)->write_fifo[chan].head, \
+			  (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_space_ptr(bd, chan) \
+	((bd)->write_fifo[chan].data + \
+	 ((bd)->write_fifo[chan].head & (WRITE_FIFO_SIZE - 1)))
+#define write_add_bytes(bd, chan, nbytes) \
+	((bd)->write_fifo[chan].head = \
+	 ((bd)->write_fifo[chan].head + (nbytes)) & \
+	  (WRITE_FIFO_SIZE - 1))
+#define write_reset(bd, chan) \
+	((bd)->write_fifo[chan].head = (bd)->write_fifo[chan].tail = 0)
+
+/*
+ * Tile-to-host bits (UART 0 scratchpad).
+ */
+/*
+ * Output write pointer mask.  Note that this is the maximum size; the
+ * write pointer may be smaller if requested by the host.
+ */
+#define CONS_RSHIM_T2H_OUT_WPTR_MASK     0x3FF
+
+/* Tile is done mask. */
+#define CONS_RSHIM_T2H_DONE_MASK         0x400
+
+/*
+ * Input read pointer mask.  Note that this is the maximum size; the read
+ * pointer may be smaller if requested by the host.
+ */
+#define CONS_RSHIM_T2H_IN_RPTR_MASK      0x1FF800
+
+/* Input read pointer shift. */
+#define CONS_RSHIM_T2H_IN_RPTR_SHIFT     11
+
+/* Tile is done mask. */
+#define CONS_RSHIM_T2H_DONE_MASK         0x400
+
+/* Number of words to send as sync-data (calculated by packet MTU). */
+#define TMFIFO_MAX_SYNC_WORDS            (1536 / 8)
+
+/* Terminal characteristics for newly created consoles. */
+static struct ktermios init_console_termios = {
+	.c_iflag = INLCR | ICRNL,
+	.c_oflag = OPOST | ONLCR,
+	.c_cflag = B115200 | HUPCL | CLOCAL | CREAD | CS8,
+	.c_lflag = ISIG | ICANON | ECHOE | ECHOK | ECHOCTL | ECHOKE | IEXTEN,
+	.c_line = 0,
+	.c_cc = INIT_C_CC,
+};
+
+/* Global mutex. */
+static DEFINE_MUTEX(rshim_mutex);
+
+/*
+ * Array of all of the rshim devices.  The high bits of our minor number
+ * index into this table to find the relevant device.
+ */
+struct rshim_backend **rshim_devs;
+
+/*
+ * Work queue. Right now we have one for the whole driver; we might
+ * eventually decide that we need one per device, but we'll see.
+ */
+struct workqueue_struct *rshim_wq;
+EXPORT_SYMBOL(rshim_wq);
+
+/*
+ * Array of pointers to kmalloc'ed strings, holding the path name for
+ * all of the devices we've seen.  If rshim_devs[i] is non-NULL, then
+ * rshim_dev_names[i] is its path name.  If rshim_devs[i] is NULL, then
+ * rshim_dev_names[i] is the name that was last used for that device.
+ * When we see a new device, we look it up in this table; this allows us to
+ * use the same device index we did last time we saw the device.  The
+ * strings within the array persist until the driver is unloaded.
+ */
+char **rshim_dev_names;
+
+/* Name of the sub-device types. */
+char *rshim_dev_minor_names[RSH_DEV_TYPES] = {
+	[RSH_DEV_TYPE_RSHIM] = "rshim",
+	[RSH_DEV_TYPE_BOOT] = "boot",
+	[RSH_DEV_TYPE_CONSOLE] = "console",
+	[RSH_DEV_TYPE_NET] = "net",
+	[RSH_DEV_TYPE_MISC] = "misc",
+};
+
+/* dev_t base index. */
+static dev_t rshim_dev_base;
+
+/* Class structure for our device class. */
+static struct class *rshim_class;
+
+/* Registered services. */
+static struct rshim_service *rshim_svc[RSH_SVC_MAX];
+
+/* FIFO reset. */
+static void rshim_fifo_reset(struct rshim_backend *bd);
+
+/* Global lock / unlock. */
+
+void rshim_lock(void)
+{
+	mutex_lock(&rshim_mutex);
+}
+EXPORT_SYMBOL(rshim_lock);
+
+void rshim_unlock(void)
+{
+	mutex_unlock(&rshim_mutex);
+}
+EXPORT_SYMBOL(rshim_unlock);
+
+/*
+ * Read some bytes from RShim.
+ *
+ * The provided buffer size should be multiple of 8 bytes. If not, the
+ * leftover bytes (which presumably were sent as NUL bytes by the sender)
+ * will be discarded.
+ */
+static ssize_t rshim_read_default(struct rshim_backend *bd, int devtype,
+				char *buf, size_t count)
+{
+	int retval, total = 0, avail = 0;
+	u64 word;
+
+	/* Read is only supported for RShim TMFIFO. */
+	if (devtype != RSH_DEV_TYPE_NET && devtype != RSH_DEV_TYPE_CONSOLE) {
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+	if (bd->is_boot_open)
+		return 0;
+
+	while (total < count) {
+		if (avail == 0) {
+			retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+						RSH_TM_TILE_TO_HOST_STS, &word);
+			if (retval < 0)
+				break;
+			avail = word & RSH_TM_TILE_TO_HOST_STS__COUNT_MASK;
+			if (avail == 0)
+				break;
+		}
+		retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+					RSH_TM_TILE_TO_HOST_DATA, &word);
+		if (retval < 0)
+			break;
+		/*
+		 * Convert it to little endian before sending to RShim. The
+		 * other side should decode it as little endian as well which
+		 * is usually the default case.
+		 */
+		word = le64_to_cpu(word);
+		if (total + sizeof(word) <= count) {
+			*(u64 *)buf = word;
+			buf += sizeof(word);
+			total += sizeof(word);
+		} else {
+			/* Copy the rest data which is less than 8 bytes. */
+			memcpy(buf, &word, count - total);
+			total = count;
+			break;
+		}
+		avail--;
+	}
+
+	return total;
+}
+
+/*
+ * Write some bytes to the RShim backend.
+ *
+ * If count is not multiple of 8-bytes, the data will be padded to 8-byte
+ * aligned which is required by RShim HW.
+ */
+static ssize_t rshim_write_delayed(struct rshim_backend *bd, int devtype,
+				   const char *buf, size_t count)
+{
+	u64 word;
+	char pad_buf[sizeof(u64)] = { 0 };
+	int size_addr, size_mask, data_addr, max_size;
+	int retval, avail = 0, byte_cnt = 0, retry;
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		if (bd->is_boot_open)
+			return count;
+		size_addr = RSH_TM_HOST_TO_TILE_STS;
+		size_mask = RSH_TM_HOST_TO_TILE_STS__COUNT_MASK;
+		data_addr = RSH_TM_HOST_TO_TILE_DATA;
+		retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+					RSH_TM_HOST_TO_TILE_CTL, &word);
+		if (retval < 0) {
+			pr_err("read_rshim error %d\n", retval);
+			return retval;
+		}
+		max_size = (word >> RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_SHIFT)
+			   & RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RMASK;
+		break;
+
+	case RSH_DEV_TYPE_BOOT:
+		size_addr = RSH_BOOT_FIFO_COUNT;
+		size_mask = RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_MASK;
+		data_addr = RSH_BOOT_FIFO_DATA;
+		max_size = RSH_BOOT_FIFO_SIZE;
+		break;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+
+	while (byte_cnt < count) {
+		/* Check the boot cancel condition. */
+		if (devtype == RSH_DEV_TYPE_BOOT && !bd->boot_work_buf)
+			break;
+
+		/* Add padding if less than 8 bytes left. */
+		if (byte_cnt + sizeof(u64) > count) {
+			memcpy(pad_buf, buf, count - byte_cnt);
+			buf = (const char *)pad_buf;
+		}
+
+		retry = 0;
+		while (avail <= 0) {
+			/* Calculate available space in words. */
+			retval = bd->read_rshim(bd, RSHIM_CHANNEL, size_addr,
+						&word);
+			if (retval < 0) {
+				pr_err("read_rshim error %d\n", retval);
+				break;
+			}
+			avail = max_size - (int)(word & size_mask) - 8;
+			if (avail > 0)
+				break;
+
+			/*
+			 * Retry 100s, or else return failure since the other
+			 * side seems not to be responding.
+			 */
+			if (++retry > 100000)
+				return -ETIMEDOUT;
+			msleep(1);
+		}
+
+		word = *(u64 *)buf;
+		/*
+		 * Convert to little endian before sending to RShim. The
+		 * receiving side should call le64_to_cpu() to convert
+		 * it back.
+		 */
+		word = cpu_to_le64(word);
+		retval = bd->write_rshim(bd, RSHIM_CHANNEL, data_addr, word);
+		if (retval < 0) {
+			pr_err("write_rshim error %d\n", retval);
+			break;
+		}
+		buf += sizeof(word);
+		byte_cnt += sizeof(word);
+		avail--;
+	}
+
+	/* Return number shouldn't count the padded bytes. */
+	return (byte_cnt > count) ? count : byte_cnt;
+}
+
+static ssize_t rshim_write_default(struct rshim_backend *bd, int devtype,
+				   const char *buf, size_t count)
+{
+	int retval;
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		if (bd->is_boot_open)
+			return count;
+
+		/* Set the flag so there is only one outstanding request. */
+		bd->spin_flags |= RSH_SFLG_WRITING;
+
+		/* Wake up the worker. */
+		bd->fifo_work_buf = (char *)buf;
+		bd->fifo_work_buf_len = count;
+		bd->fifo_work_devtype = devtype;
+		/*
+		 * Add barrier so the above writes complete before setting the
+		 * has_fifo_work flag.
+		 */
+		wmb();
+		bd->has_fifo_work = 1;
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+		return 0;
+
+	case RSH_DEV_TYPE_BOOT:
+		reinit_completion(&bd->boot_write_complete);
+		bd->boot_work_buf_len = count;
+		bd->boot_work_buf_actual_len = 0;
+		/*
+		 * Add barrier so the above writes complete before setting the
+		 * boot_work_buf pointer since it's checked in other places.
+		 */
+		wmb();
+		bd->boot_work_buf = (char *)buf;
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+
+		mutex_unlock(&bd->mutex);
+		retval = wait_for_completion_interruptible(
+					&bd->boot_write_complete);
+		/* Cancel the request if interrupted. */
+		if (retval)
+			bd->boot_work_buf = NULL;
+
+		mutex_lock(&bd->mutex);
+		return bd->boot_work_buf_actual_len;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+}
+
+/* Boot file operations routines */
+
+/*
+ * Wait for boot to complete, if necessary.  Return 0 if the boot is done
+ * and it's safe to continue, an error code if something went wrong.  Note
+ * that this routine must be called with the device mutex held.  If it
+ * returns successfully, the mutex will still be held (although it may have
+ * been dropped and reacquired); if it returns unsuccessfully the mutex
+ * will have been dropped.
+ */
+static int wait_for_boot_done(struct rshim_backend *bd)
+{
+	int retval;
+
+	if (!bd->has_reprobe)
+		return 0;
+
+	if (!bd->has_rshim || bd->is_booting) {
+		while (bd->is_booting) {
+			pr_info("boot write, waiting for re-probe\n");
+			/* We're booting, and the backend isn't ready yet. */
+			mutex_unlock(&bd->mutex);
+			/*
+			 * FIXME: might we want a timeout here, too?  If
+			 * the reprobe takes a very long time, something's
+			 * probably wrong.  Maybe a couple of minutes?
+			 */
+			retval = wait_for_completion_interruptible(
+				&bd->booting_complete);
+			if (retval)
+				return retval;
+			mutex_lock(&bd->mutex);
+		}
+		if (!bd->has_rshim) {
+			mutex_unlock(&bd->mutex);
+			return -ENODEV;
+		}
+	}
+
+	return 0;
+}
+
+static ssize_t rshim_boot_write(struct file *file, const char *user_buffer,
+			      size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+	int retval = 0, whichbuf = 0;
+	size_t bytes_written = 0, bytes_left;
+
+	/*
+	 * Hardware requires that we send multiples of 8 bytes.  Ideally
+	 * we'd handle the case where we got unaligned writes by
+	 * accumulating the residue somehow, but none of our clients
+	 * typically do this, so we just clip the size to prevent any
+	 * inadvertent errors from causing hardware problems.
+	 */
+	bytes_left = count & (-((size_t)8));
+	if (!bytes_left)
+		return 0;
+
+	mutex_lock(&bd->mutex);
+	if (bd->is_in_boot_write) {
+		mutex_unlock(&bd->mutex);
+		return -EBUSY;
+	}
+
+	retval = wait_for_boot_done(bd);
+	if (retval) {
+		pr_err("boot_write: wait for boot failed, err %d\n", retval);
+		/* wait_for_boot_done already dropped mutex */
+		return retval;
+	}
+
+	/*
+	 * We're going to drop the mutex while we wait for any outstanding
+	 * write to complete; this keeps another thread from getting in here
+	 * while we do that.
+	 */
+	bd->is_in_boot_write = 1;
+
+	while (bytes_left) {
+		size_t buf_bytes = min((size_t)BOOT_BUF_SIZE, bytes_left);
+		char *buf = bd->boot_buf[whichbuf];
+
+		whichbuf ^= 1;
+		if (copy_from_user(buf, user_buffer, buf_bytes)) {
+			retval = -EFAULT;
+			pr_err("boot_write: copy from user failed\n");
+			break;
+		}
+
+		retval = bd->write(bd, RSH_DEV_TYPE_BOOT, buf, buf_bytes);
+		if (retval > 0) {
+			bytes_left -= retval;
+			user_buffer += retval;
+			bytes_written += retval;
+		} else if (retval == 0) {
+			/* Wait for some time instead of busy polling. */
+			msleep_interruptible(1);
+			continue;
+		}
+		if (retval != buf_bytes)
+			break;
+	}
+
+	bd->is_in_boot_write = 0;
+	mutex_unlock(&bd->mutex);
+
+	/*
+	 * Return an error in case the 'count' is not multiple of 8 bytes.
+	 * At this moment, the truncated data has already been sent to
+	 * the BOOT fifo and hopefully it could still boot the chip.
+	 */
+	if (count % 8 != 0)
+		return -EINVAL;
+
+	return bytes_written ? bytes_written : retval;
+}
+
+static int rshim_boot_release(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+	struct module *owner;
+	int retval;
+
+	/* Restore the boot mode register. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL,
+				 RSH_BOOT_CONTROL,
+				 RSH_BOOT_CONTROL__BOOT_MODE_VAL_EMMC);
+	if (retval)
+		pr_err("couldn't set boot_control, err %d\n", retval);
+
+	mutex_lock(&bd->mutex);
+	bd->is_boot_open = 0;
+	queue_delayed_work(rshim_wq, &bd->work, HZ);
+	mutex_unlock(&bd->mutex);
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+static const struct file_operations rshim_boot_fops = {
+	.owner = THIS_MODULE,
+	.write = rshim_boot_write,
+	.release = rshim_boot_release,
+};
+
+int rshim_boot_open(struct file *file)
+{
+	int retval;
+	int i;
+	struct rshim_backend *bd = file->private_data;
+#if RSH_RESET_MUTEX
+	unsigned long devs_locked = 0;
+#endif
+
+	file->f_op = &rshim_boot_fops;
+
+#if RSH_RESET_MUTEX
+	/*
+	 * We're going to prevent resets and operations from running in
+	 * parallel with other resets.  Our method for this is to grab
+	 * every device's mutex before doing the reset, and then holding
+	 * onto them until the device we reset is reprobed, or a timeout
+	 * expires; the latter is mostly paranoia.  Anyway, in order to
+	 * find all of the other devices, we're going to need to walk the
+	 * device table, so we need to grab its mutex.  We have to do it
+	 * before we get our own device's mutex for lock ordering reasons.
+	 */
+	rshim_lock();
+#endif
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->is_boot_open) {
+		pr_info("can't boot, boot file already open\n");
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		rshim_unlock();
+#endif
+		return -EBUSY;
+	}
+
+	if (!bd->has_rshim) {
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		rshim_unlock();
+#endif
+		return -ENODEV;
+	}
+
+	pr_info("begin booting\n");
+	reinit_completion(&bd->booting_complete);
+	bd->is_booting = 1;
+
+	/*
+	 * Before we reset the chip, make sure we don't have any
+	 * outstanding writes, and flush the write and read FIFOs. (Note
+	 * that we can't have any outstanding reads, since we kill those
+	 * upon release of the TM FIFO file.)
+	 */
+	if (bd->cancel)
+		bd->cancel(bd, RSH_DEV_TYPE_NET, true);
+	bd->read_buf_bytes = 0;
+	bd->read_buf_pkt_rem = 0;
+	bd->read_buf_pkt_padding = 0;
+	spin_lock_irq(&bd->spinlock);
+	/* FIXME: should we be waiting for WRITING to go off, instead? */
+	bd->spin_flags &= ~RSH_SFLG_WRITING;
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		read_reset(bd, i);
+		write_reset(bd, i);
+	}
+	spin_unlock_irq(&bd->spinlock);
+
+	/* Set RShim (external) boot mode. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_BOOT_CONTROL,
+				 RSH_BOOT_CONTROL__BOOT_MODE_VAL_NONE);
+	if (retval) {
+		pr_err("boot_open: error %d writing boot control\n", retval);
+		bd->is_booting = 0;
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		rshim_unlock();
+#endif
+		return retval;
+	}
+
+#if RSH_RESET_MUTEX
+	/*
+	 * Acquire all of the other devices' mutexes, to keep them from
+	 * doing anything while we're performing the reset.  Also kill
+	 * any outstanding boot urbs; that way we'll restart them, after
+	 * the reset is done, and not report errors to the writers.
+	 */
+	for (i = 0; i < rshim_nr_devs; i++) {
+		if (rshim_devs[i] && rshim_devs[i] != bd) {
+			mutex_lock(&rshim_devs[i]->mutex);
+			devs_locked |= 1UL << i;
+			if (rshim_devs[i]->cancel) {
+				rshim_devs[i]->cancel(rshim_devs[i],
+						    RSH_DEV_TYPE_BOOT, true);
+			}
+		}
+	}
+	reinit_completion(&bd->reset_complete);
+#endif
+
+	bd->is_boot_open = 1;
+
+	/* SW reset. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_RESET_CONTROL,
+				 RSH_RESET_CONTROL__RESET_CHIP_VAL_KEY);
+
+	/* Reset the TmFifo. */
+	rshim_fifo_reset(bd);
+
+	/*
+	 * Note that occasionally, we get various errors on writing to
+	 * the reset register.  This appears to be caused by the chip
+	 * actually resetting before the response goes out, or perhaps by
+	 * our noticing the device unplug before we've seen the response.
+	 * Either way, the chip _does_ actually reset, so we just ignore
+	 * the error.  Should we ever start getting these errors without
+	 * the chip being reset, we'll have to figure out how to handle
+	 * this more intelligently.  (One potential option is to not reset
+	 * directly, but to set up a down counter to do the reset, but that
+	 * seems kind of kludgy, especially since Tile software might also
+	 * be trying to use the down counter.)
+	 */
+	if (retval && retval != -EPROTO && retval != -ESHUTDOWN &&
+#ifdef RSH_USB_BMC
+	    /*
+	     * The host driver on the BMC sometimes produces EOVERFLOW on
+	     * reset.  It also seems to have seems to have some sort of bug
+	     * which makes it return more bytes than we actually wrote!  In
+	     * that case we're returning EBADE.
+	     */
+	    retval != -EOVERFLOW && retval != -EBADE &&
+#endif
+	    retval != -ETIMEDOUT && retval != -EPIPE) {
+		pr_err("boot_open: error %d writing reset control\n", retval);
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		while (devs_locked) {
+			int i = __builtin_ctzl(devs_locked);
+
+			mutex_unlock(&rshim_devs[i]->mutex);
+			devs_locked &= ~(1UL << i);
+		}
+		rshim_unlock();
+#endif
+		bd->is_boot_open = 0;
+
+		return retval;
+	}
+
+	if (retval)
+		pr_err("boot_open: got error %d on reset write\n", retval);
+
+	mutex_unlock(&bd->mutex);
+
+#if RSH_RESET_MUTEX
+	rshim_unlock();
+	/*
+	 * We wait for reset_complete (signaled by probe), or for an
+	 * interrupt, or a timeout (set to 5s because of no re-probe
+	 * in the PCIe case). Note that we dropped dev->mutex above
+	 * so that probe can run; the BOOT_OPEN flag should keep our device
+	 * from trying to do anything before the device is reprobed.
+	 */
+	retval = wait_for_completion_interruptible_timeout(&bd->reset_complete,
+							   5 * HZ);
+	if (retval == 0)
+		pr_err("timed out waiting for device reprobe after reset\n");
+
+	while (devs_locked) {
+		int i = __builtin_ctz(devs_locked);
+
+		mutex_unlock(&rshim_devs[i]->mutex);
+		devs_locked &= ~(1UL << i);
+	}
+#endif
+
+	return 0;
+}
+
+/* FIFO common file operations routines */
+
+/*
+ * Signal an error on the FIFO, and wake up anyone who might need to know
+ * about it.
+ */
+static void rshim_fifo_err(struct rshim_backend *bd, int err)
+{
+	int i;
+
+	bd->tmfifo_error = err;
+	wake_up_interruptible_all(&bd->write_completed);
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		wake_up_interruptible_all(&bd->read_fifo[i].operable);
+		wake_up_interruptible_all(&bd->write_fifo[i].operable);
+	}
+}
+
+/* Drain the read buffer, and start another read/interrupt if needed. */
+static void rshim_fifo_input(struct rshim_backend *bd)
+{
+	union rshim_tmfifo_msg_hdr *hdr;
+	bool rx_avail = false;
+
+	if (bd->is_boot_open)
+		return;
+
+again:
+	while (bd->read_buf_next < bd->read_buf_bytes) {
+		int copysize;
+
+		/*
+		 * If we're at the start of a packet, then extract the
+		 * header, and update our count of bytes remaining in the
+		 * packet.
+		 */
+		if (bd->read_buf_pkt_rem == 0) {
+			/* Make sure header is received. */
+			if (bd->read_buf_next + sizeof(*hdr) >
+				bd->read_buf_bytes)
+				break;
+
+			pr_debug("next hdr %d\n", bd->read_buf_next);
+
+			hdr = (union rshim_tmfifo_msg_hdr *)
+				&bd->read_buf[bd->read_buf_next];
+
+			bd->read_buf_pkt_rem = ntohs(hdr->len) + sizeof(*hdr);
+			bd->read_buf_pkt_padding =
+				(8 - (bd->read_buf_pkt_rem & 7)) & 7;
+			if (hdr->type == VIRTIO_ID_NET)
+				bd->rx_chan = TMFIFO_NET_CHAN;
+			else if (hdr->type == VIRTIO_ID_CONSOLE) {
+				bd->rx_chan = TMFIFO_CONS_CHAN;
+				/* Strip off the message header for console. */
+				bd->read_buf_next += sizeof(*hdr);
+				bd->read_buf_pkt_rem -= sizeof(*hdr);
+				if (bd->read_buf_pkt_rem == 0)
+					continue;
+			} else {
+				pr_debug("bad type %d, drop it", hdr->type);
+				bd->read_buf_pkt_rem = 0;
+				bd->read_buf_pkt_padding = 0;
+				bd->read_buf_next = bd->read_buf_bytes;
+				break;
+			}
+
+			pr_debug("drain: hdr, nxt %d rem %d chn %d\n",
+			      bd->read_buf_next, bd->read_buf_pkt_rem,
+			      bd->rx_chan);
+			bd->drop = 0;
+		}
+
+		if (bd->rx_chan == TMFIFO_CONS_CHAN &&
+		    !(bd->spin_flags & RSH_SFLG_CONS_OPEN)) {
+			/*
+			 * If data is coming in for a closed console
+			 * channel, we want to just throw it away.
+			 * Resetting the channel every time through this
+			 * loop is a relatively cheap way to do that.  Note
+			 * that this works because the read buffer is no
+			 * larger than the read FIFO; thus, we know that if
+			 * we reset it here, we will always be able to
+			 * drain the read buffer of any console data, and
+			 * will then launch another read.
+			 */
+			read_reset(bd, TMFIFO_CONS_CHAN);
+			bd->drop = 1;
+		} else if (bd->rx_chan == TMFIFO_NET_CHAN && bd->net == NULL) {
+			/* Drop if networking is not enabled. */
+			read_reset(bd, TMFIFO_NET_CHAN);
+			bd->drop = 1;
+		}
+
+		copysize = min(bd->read_buf_pkt_rem,
+			       bd->read_buf_bytes - bd->read_buf_next);
+		copysize = min(copysize,
+			       read_space_to_end(bd, bd->rx_chan));
+
+		pr_debug("drain: copysize %d, head %d, tail %d, remaining %d\n",
+			 copysize, bd->read_fifo[bd->rx_chan].head,
+			 bd->read_fifo[bd->rx_chan].tail,
+			 bd->read_buf_pkt_rem);
+
+		if (copysize == 0) {
+			/*
+			 * We have data, but no space to put it in, so
+			 * we're done.
+			 */
+			pr_debug("drain: no more space in channel %d\n",
+				 bd->rx_chan);
+			break;
+		}
+
+		if (!bd->drop) {
+			memcpy(read_space_ptr(bd, bd->rx_chan),
+			       &bd->read_buf[bd->read_buf_next],
+			       copysize);
+			read_add_bytes(bd, bd->rx_chan, copysize);
+		}
+
+		bd->read_buf_next += copysize;
+		bd->read_buf_pkt_rem -= copysize;
+
+		wake_up_interruptible_all(&bd->read_fifo[
+				      bd->rx_chan].operable);
+		pr_debug("woke up readable chan %d\n", bd->rx_chan);
+
+		if (bd->read_buf_pkt_rem <= 0) {
+			bd->read_buf_next = bd->read_buf_next +
+				bd->read_buf_pkt_padding;
+			rx_avail = true;
+		}
+	}
+
+	/*
+	 * We've processed all of the data we can, so now we decide if we
+	 * need to launch another I/O.  If there's still data in the read
+	 * buffer, or if we're already reading, don't launch any new
+	 * operations.  If an interrupt just completed, and said there was
+	 * data, or the last time we did a read we got some data, then do
+	 * another read.  Otherwise, do an interrupt.
+	 */
+	if (bd->read_buf_next < bd->read_buf_bytes ||
+	    (bd->spin_flags & RSH_SFLG_READING)) {
+		/* We're doing nothing. */
+		pr_debug("fifo_input: no new read: %s\n",
+			 (bd->read_buf_next < bd->read_buf_bytes) ?
+			 "have data" : "already reading");
+	} else {
+		int len;
+
+		/* Process it if more data is received. */
+		len = bd->read(bd, RSH_DEV_TYPE_NET, (char *)bd->read_buf,
+			      READ_BUF_SIZE);
+		if (len > 0) {
+			bd->read_buf_bytes = len;
+			bd->read_buf_next = 0;
+			goto again;
+		}
+	}
+
+	if (rx_avail) {
+		if (bd->rx_chan == TMFIFO_NET_CHAN) {
+			struct rshim_service *svc;
+
+			/*
+			 * Protect rshim_svc with RCU lock. See comments in
+			 * rshim_register_service() / rshim_register_service()
+			 */
+			rcu_read_lock();
+			svc = rcu_dereference(rshim_svc[RSH_SVC_NET]);
+			if (svc != NULL)
+				(*svc->rx_notify)(bd);
+			rcu_read_unlock();
+		}
+	}
+}
+
+ssize_t rshim_fifo_read(struct rshim_backend *bd, char *buffer,
+		      size_t count, int chan, bool nonblock,
+		      bool to_user)
+{
+	size_t rd_cnt = 0;
+
+	mutex_lock(&bd->mutex);
+
+	while (count) {
+		size_t readsize;
+		int pass1;
+		int pass2;
+
+		pr_debug("fifo_read, top of loop, remaining count %zd\n",
+			 count);
+
+		/*
+		 * We check this each time through the loop since the
+		 * device could get disconnected while we're waiting for
+		 * more data in the read FIFO.
+		 */
+		if (!bd->has_tm) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_read: returning %zd/ENODEV\n", rd_cnt);
+			return rd_cnt ? rd_cnt : -ENODEV;
+		}
+
+		if (bd->tmfifo_error) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_read: returning %zd/%d\n", rd_cnt,
+			      bd->tmfifo_error);
+			return rd_cnt ? rd_cnt : bd->tmfifo_error;
+		}
+
+		if (read_empty(bd, chan)) {
+			pr_debug("fifo_read: fifo empty\n");
+			if (rd_cnt || nonblock) {
+				if (rd_cnt == 0) {
+					spin_lock_irq(&bd->spinlock);
+					rshim_fifo_input(bd);
+					spin_unlock_irq(&bd->spinlock);
+				}
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_read: returning %zd/EAGAIN\n",
+				      rd_cnt);
+				return rd_cnt ? rd_cnt : -EAGAIN;
+			}
+
+			mutex_unlock(&bd->mutex);
+
+			pr_debug("fifo_read: waiting for readable chan %d\n",
+				 chan);
+			if (wait_event_interruptible(
+					bd->read_fifo[chan].operable,
+					    !read_empty(bd, chan))) {
+				pr_debug("fifo_read: returning ERESTARTSYS\n");
+				return to_user ? -EINTR : -ERESTARTSYS;
+			}
+
+			mutex_lock(&bd->mutex);
+
+			/*
+			 * Since we dropped the mutex, we must make
+			 * sure our interface is still there before
+			 * we do anything else.
+			 */
+			continue;
+		}
+
+		/*
+		 * Figure out how many bytes we will transfer on this pass.
+		 */
+		spin_lock_irq(&bd->spinlock);
+
+		readsize = min(count, (size_t)read_cnt(bd, chan));
+
+		pass1 = min(readsize, (size_t)read_cnt_to_end(bd, chan));
+		pass2 = readsize - pass1;
+
+		spin_unlock_irq(&bd->spinlock);
+
+		pr_debug("fifo_read: readsize %zd, head %d, tail %d\n",
+			 readsize, bd->read_fifo[chan].head,
+			 bd->read_fifo[chan].tail);
+
+		if (!to_user) {
+			memcpy(buffer, read_data_ptr(bd, chan), pass1);
+			if (pass2) {
+				memcpy(buffer + pass1,
+				       bd->read_fifo[chan].data, pass2);
+			}
+		} else {
+			if (copy_to_user(buffer, read_data_ptr(bd, chan),
+				pass1) || (pass2 && copy_to_user(buffer + pass1,
+				bd->read_fifo[chan].data, pass2))) {
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_read: returns %zd/EFAULT\n",
+					 rd_cnt);
+				return rd_cnt ? rd_cnt : -EFAULT;
+			}
+		}
+
+		spin_lock_irq(&bd->spinlock);
+
+		read_consume_bytes(bd, chan, readsize);
+
+		/*
+		 * We consumed some bytes, so let's see if we can process
+		 * any more incoming data.
+		 */
+		rshim_fifo_input(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+
+		count -= readsize;
+		buffer += readsize;
+		rd_cnt += readsize;
+		pr_debug("fifo_read: transferred %zd bytes\n", readsize);
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	pr_debug("fifo_read: returning %zd\n", rd_cnt);
+	return rd_cnt;
+}
+EXPORT_SYMBOL(rshim_fifo_read);
+
+static void rshim_fifo_output(struct rshim_backend *bd)
+{
+	int writesize, write_buf_next = 0;
+	int write_avail = WRITE_BUF_SIZE - write_buf_next;
+	int numchan = TMFIFO_MAX_CHAN;
+	int chan, chan_offset;
+
+	/* If we're already writing, we have nowhere to put data. */
+	if (bd->spin_flags & RSH_SFLG_WRITING)
+		return;
+
+	/* Walk through all the channels, sending as much data as possible. */
+	for (chan_offset = 0; chan_offset < numchan; chan_offset++) {
+		/*
+		 * Pick the current channel if not done, otherwise round-robin
+		 * to the next channel.
+		 */
+		if (bd->write_buf_pkt_rem > 0)
+			chan = bd->tx_chan;
+		else {
+			u16 cur_len;
+			union rshim_tmfifo_msg_hdr *hdr = &bd->msg_hdr;
+
+			chan = bd->tx_chan = (bd->tx_chan + 1) % numchan;
+			cur_len = write_cnt(bd, chan);
+
+			/*
+			 * Set up message header for console data which is byte
+			 * stream. Network packets already have the message
+			 * header included.
+			 */
+			if (chan == TMFIFO_CONS_CHAN) {
+				if (cur_len == 0)
+					continue;
+				hdr->data = 0;
+				hdr->type = VIRTIO_ID_CONSOLE;
+				hdr->len = htons(cur_len);
+			} else {
+				int pass1;
+
+				if (cur_len <
+					sizeof(union rshim_tmfifo_msg_hdr))
+					continue;
+
+				pass1 = write_cnt_to_end(bd, chan);
+				if (pass1 >= sizeof(*hdr)) {
+					hdr = (union rshim_tmfifo_msg_hdr *)
+						write_data_ptr(bd, chan);
+				} else {
+					memcpy(hdr, write_data_ptr(bd, chan),
+					       pass1);
+					memcpy((u8 *)hdr + pass1,
+					       bd->write_fifo[chan].data,
+					       sizeof(*hdr) - pass1);
+				}
+			}
+
+			bd->write_buf_pkt_rem = ntohs(hdr->len) + sizeof(*hdr);
+		}
+
+		/* Send out the packet header for the console data. */
+		if (chan == TMFIFO_CONS_CHAN &&
+		    bd->write_buf_pkt_rem > ntohs(bd->msg_hdr.len)) {
+			union rshim_tmfifo_msg_hdr *hdr = &bd->msg_hdr;
+			int left = bd->write_buf_pkt_rem - ntohs(hdr->len);
+			u8 *pos = (u8 *)hdr + sizeof(*hdr) - left;
+
+			writesize = min(write_avail, left);
+			memcpy(&bd->write_buf[write_buf_next], pos, writesize);
+			write_buf_next += writesize;
+			bd->write_buf_pkt_rem -= writesize;
+			write_avail -= writesize;
+
+			/*
+			 * Don't continue if no more space for the header.
+			 * It'll be picked up next time.
+			 */
+			if (left != writesize)
+				break;
+		}
+
+		writesize = min(write_avail, (int)write_cnt(bd, chan));
+		writesize = min(writesize, bd->write_buf_pkt_rem);
+
+		/*
+		 * The write size should be aligned to 8 bytes unless for the
+		 * last block, which will be padded at the end.
+		 */
+		if (bd->write_buf_pkt_rem != writesize)
+			writesize &= -8;
+
+		if (writesize > 0) {
+			int pass1;
+			int pass2;
+
+			pass1 = min(writesize,
+				    (int)write_cnt_to_end(bd, chan));
+			pass2 = writesize - pass1;
+
+			pr_debug("fifo_outproc: chan %d, writesize %d, next %d,"
+				 " head %d, tail %d\n",
+				 chan, writesize, write_buf_next,
+				 bd->write_fifo[chan].head,
+				 bd->write_fifo[chan].tail);
+
+			memcpy(&bd->write_buf[write_buf_next],
+			       write_data_ptr(bd, chan), pass1);
+			memcpy(&bd->write_buf[write_buf_next + pass1],
+			       bd->write_fifo[chan].data, pass2);
+
+			write_consume_bytes(bd, chan, writesize);
+			write_buf_next += writesize;
+			bd->write_buf_pkt_rem -= writesize;
+			/* Add padding at the end. */
+			if (bd->write_buf_pkt_rem == 0)
+				write_buf_next = (write_buf_next + 7) & -8;
+			write_avail = WRITE_BUF_SIZE - write_buf_next;
+
+			wake_up_interruptible_all(
+				&bd->write_fifo[chan].operable);
+			pr_debug("woke up writable chan %d\n", chan);
+		}
+	}
+
+	/* Drop the data if it is still booting. */
+	if (bd->is_boot_open)
+		return;
+
+	/* If we actually put anything in the buffer, send it. */
+	if (write_buf_next) {
+		bd->write(bd, RSH_DEV_TYPE_NET, (char *)bd->write_buf,
+			  write_buf_next);
+	}
+}
+
+int rshim_fifo_alloc(struct rshim_backend *bd)
+{
+	int i, allocfail = 0;
+
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		if (!bd->read_fifo[i].data)
+			bd->read_fifo[i].data =
+				kmalloc(READ_FIFO_SIZE, GFP_KERNEL);
+		allocfail |= bd->read_fifo[i].data == 0;
+
+		if (!bd->write_fifo[i].data)
+			bd->write_fifo[i].data =
+				kmalloc(WRITE_FIFO_SIZE, GFP_KERNEL);
+		allocfail |= bd->write_fifo[i].data == 0;
+	}
+
+	return allocfail;
+}
+EXPORT_SYMBOL(rshim_fifo_alloc);
+
+static void rshim_fifo_reset(struct rshim_backend *bd)
+{
+	int i;
+
+	bd->read_buf_bytes = 0;
+	bd->read_buf_pkt_rem = 0;
+	bd->read_buf_next = 0;
+	bd->read_buf_pkt_padding = 0;
+	bd->write_buf_pkt_rem = 0;
+	bd->rx_chan = bd->tx_chan = 0;
+
+	spin_lock_irq(&bd->spinlock);
+	bd->spin_flags &= ~(RSH_SFLG_WRITING |
+			    RSH_SFLG_READING);
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		read_reset(bd, i);
+		write_reset(bd, i);
+	}
+	spin_unlock_irq(&bd->spinlock);
+}
+
+void rshim_fifo_free(struct rshim_backend *bd)
+{
+	int i;
+
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		kfree(bd->read_fifo[i].data);
+		bd->read_fifo[i].data = NULL;
+		kfree(bd->write_fifo[i].data);
+		bd->write_fifo[i].data = NULL;
+	}
+
+	rshim_fifo_reset(bd);
+
+	bd->has_tm = 0;
+}
+EXPORT_SYMBOL(rshim_fifo_free);
+
+ssize_t rshim_fifo_write(struct rshim_backend *bd, const char *buffer,
+		       size_t count, int chan, bool nonblock,
+		       bool from_user)
+{
+	size_t wr_cnt = 0;
+
+	mutex_lock(&bd->mutex);
+
+	while (count) {
+		size_t writesize;
+		int pass1;
+		int pass2;
+
+		pr_debug("fifo_write, top of loop, remaining count %zd\n",
+			 count);
+
+		/*
+		 * We check this each time through the loop since the
+		 * device could get disconnected while we're waiting for
+		 * more space in the write buffer.
+		 */
+		if (!bd->has_tm) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_write: returning %zd/ENODEV\n", wr_cnt);
+			return wr_cnt ? wr_cnt : -ENODEV;
+		}
+
+		if (bd->tmfifo_error) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_write: returning %zd/%d\n", wr_cnt,
+				 bd->tmfifo_error);
+			return wr_cnt ? wr_cnt : bd->tmfifo_error;
+		}
+
+		if (write_full(bd, chan)) {
+			pr_debug("fifo_write: fifo full\n");
+			if (nonblock) {
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_write: returning %zd/EAGAIN\n",
+					 wr_cnt);
+				return wr_cnt ? wr_cnt : -EAGAIN;
+			}
+
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_write: waiting for writable chan %d\n",
+				 chan);
+			if (wait_event_interruptible(
+				     bd->write_fifo[chan].operable,
+					     !write_full(bd, chan))) {
+				pr_debug("fifo_write: returning %zd/ERESTARTSYS\n",
+					 wr_cnt);
+				return wr_cnt ? wr_cnt : -ERESTARTSYS;
+			}
+			mutex_lock(&bd->mutex);
+			/*
+			 * Since we dropped the mutex, we must make
+			 * sure our interface is still there before
+			 * we do anything else.
+			 */
+			continue;
+		}
+
+		spin_lock_irq(&bd->spinlock);
+
+		writesize = min(count, (size_t)write_space(bd, chan));
+		pass1 = min(writesize, (size_t)write_space_to_end(bd, chan));
+		pass2 = writesize - pass1;
+
+		spin_unlock_irq(&bd->spinlock);
+
+		pr_debug("fifo_write: writesize %zd, head %d, tail %d\n",
+			 writesize, bd->write_fifo[chan].head,
+			 bd->write_fifo[chan].tail);
+
+		if (!from_user) {
+			memcpy(write_space_ptr(bd, chan), buffer, pass1);
+			if (pass2) {
+				memcpy(bd->write_fifo[chan].data,
+				       buffer + pass1, pass2);
+			}
+		} else {
+			if (copy_from_user(write_space_ptr(bd, chan), buffer,
+				pass1) || (pass2 &&
+				copy_from_user(bd->write_fifo[chan].data,
+						buffer + pass1, pass2))) {
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_write: returns %zd/EFAULT\n",
+					 wr_cnt);
+				return wr_cnt ? wr_cnt : -EFAULT;
+			}
+		}
+
+		spin_lock_irq(&bd->spinlock);
+
+		write_add_bytes(bd, chan, writesize);
+
+		/* We have some new bytes, let's see if we can write any. */
+		rshim_fifo_output(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+
+		count -= writesize;
+		buffer += writesize;
+		wr_cnt += writesize;
+		pr_debug("fifo_write: transferred %zd bytes this pass\n",
+			 writesize);
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	pr_debug("fifo_write: returning %zd\n", wr_cnt);
+	return wr_cnt;
+}
+EXPORT_SYMBOL(rshim_fifo_write);
+
+static int rshim_fifo_fsync(struct file *file, loff_t start, loff_t end,
+			    int datasync, int chan)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	mutex_lock(&bd->mutex);
+
+	/*
+	 * To ensure that all of our data has actually made it to the
+	 * device, we first wait until the channel is empty, then we wait
+	 * until there is no outstanding write urb.
+	 */
+	while (!write_empty(bd, chan))
+		if (wait_event_interruptible(bd->write_fifo[chan].operable,
+					     write_empty(bd, chan))) {
+			mutex_unlock(&bd->mutex);
+			return -ERESTARTSYS;
+		}
+
+	while (bd->spin_flags & RSH_SFLG_WRITING)
+		if (wait_event_interruptible(bd->write_completed,
+					     !(bd->spin_flags &
+					       RSH_SFLG_WRITING))) {
+			mutex_unlock(&bd->mutex);
+			return -ERESTARTSYS;
+		}
+
+	mutex_unlock(&bd->mutex);
+
+	return 0;
+}
+
+static unsigned int rshim_fifo_poll(struct file *file, poll_table *wait,
+				  int chan)
+{
+	struct rshim_backend *bd = file->private_data;
+	unsigned int retval = 0;
+
+	mutex_lock(&bd->mutex);
+
+	poll_wait(file, &bd->read_fifo[chan].operable, wait);
+	poll_wait(file, &bd->write_fifo[chan].operable, wait);
+
+	spin_lock_irq(&bd->spinlock);
+
+	if (!read_empty(bd, chan))
+		retval |= POLLIN | POLLRDNORM;
+	if (!write_full(bd, chan))
+		retval |= POLLOUT | POLLWRNORM;
+	/*
+	 * We don't report POLLERR on the console so that it doesn't get
+	 * automatically disconnected when it fails, and so that you can
+	 * connect to it in the error state before rebooting the target.
+	 * This is inconsistent, but being consistent turns out to be very
+	 * annoying.  If someone tries to actually type on it, they'll
+	 * get an error.
+	 */
+	if (bd->tmfifo_error && chan != TMFIFO_CONS_CHAN)
+		retval |= POLLERR;
+	spin_unlock_irq(&bd->spinlock);
+
+	mutex_unlock(&bd->mutex);
+
+	pr_debug("poll chan %d file %p returns 0x%x\n", chan, file, retval);
+
+	return retval;
+}
+
+
+static int rshim_fifo_release(struct inode *inode, struct file *file,
+			      int chan)
+{
+	struct rshim_backend *bd = file->private_data;
+	struct module *owner;
+
+	mutex_lock(&bd->mutex);
+
+	if (chan == TMFIFO_CONS_CHAN) {
+		/*
+		 * If we aren't the last console file, nothing to do but
+		 * fix the reference count.
+		 */
+		bd->console_opens--;
+		if (bd->console_opens) {
+			mutex_unlock(&bd->mutex);
+			return 0;
+		}
+
+		/*
+		 * We've told the host to stop using the TM FIFO console,
+		 * but there may be a lag before it does.  Unless we
+		 * continue to read data from the console stream, the host
+		 * may spin forever waiting for the console to be drained
+		 * and not realize that it's time to stop using it.
+		 * Clearing the CONS_OPEN spin flag will discard any future
+		 * incoming console data, but if our input buffers are full
+		 * now, we might not be even reading from the hardware
+		 * FIFO.  To avoid problems, clear the buffers and call the
+		 * drainer so that it knows there's space.
+		 */
+		spin_lock_irq(&bd->spinlock);
+
+		bd->spin_flags &= ~RSH_SFLG_CONS_OPEN;
+
+		read_reset(bd, TMFIFO_CONS_CHAN);
+		write_reset(bd, TMFIFO_CONS_CHAN);
+
+		if (bd->has_tm)
+			rshim_fifo_input(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+	}
+
+	if (chan == TMFIFO_CONS_CHAN)
+		bd->is_cons_open = 0;
+	else
+		bd->is_tm_open = 0;
+
+	if (!bd->is_tm_open && !bd->is_cons_open) {
+		if (bd->cancel)
+			bd->cancel(bd, RSH_DEV_TYPE_NET, false);
+
+		spin_lock_irq(&bd->spinlock);
+		bd->spin_flags &= ~RSH_SFLG_READING;
+		spin_unlock_irq(&bd->spinlock);
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+/* TMFIFO file operations routines */
+
+static ssize_t rshim_tmfifo_read(struct file *file, char *user_buffer,
+				   size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_read(bd, user_buffer, count, TMFIFO_NET_CHAN,
+			     file->f_flags & O_NONBLOCK, true);
+}
+
+static ssize_t rshim_tmfifo_write(struct file *file, const char *user_buffer,
+				size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_write(bd, user_buffer, count, TMFIFO_NET_CHAN,
+			      file->f_flags & O_NONBLOCK, true);
+}
+
+static int rshim_tmfifo_fsync(struct file *file, loff_t start,
+			      loff_t end, int datasync)
+{
+	return rshim_fifo_fsync(file, start, end, datasync, TMFIFO_NET_CHAN);
+}
+
+static unsigned int rshim_tmfifo_poll(struct file *file, poll_table *wait)
+{
+	return rshim_fifo_poll(file, wait, TMFIFO_NET_CHAN);
+}
+
+static int rshim_tmfifo_release(struct inode *inode, struct file *file)
+{
+	return rshim_fifo_release(inode, file, TMFIFO_NET_CHAN);
+}
+
+static const struct file_operations rshim_tmfifo_fops = {
+	.owner = THIS_MODULE,
+	.read = rshim_tmfifo_read,
+	.write = rshim_tmfifo_write,
+	.fsync = rshim_tmfifo_fsync,
+	.poll = rshim_tmfifo_poll,
+	.release = rshim_tmfifo_release,
+};
+
+static int rshim_tmfifo_open(struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	file->f_op = &rshim_tmfifo_fops;
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->is_tm_open) {
+		pr_debug("tmfifo_open: file already open\n");
+		mutex_unlock(&bd->mutex);
+		return -EBUSY;
+	}
+
+	bd->is_tm_open = 1;
+
+	spin_lock_irq(&bd->spinlock);
+
+	/* Call the drainer to do an initial read, if needed. */
+	rshim_fifo_input(bd);
+
+	spin_unlock_irq(&bd->spinlock);
+
+	mutex_unlock(&bd->mutex);
+
+	return 0;
+}
+
+/* Console file operations routines */
+
+static void rshim_work_handler(struct work_struct *work)
+{
+	struct rshim_backend *bd = container_of((struct delayed_work *) work,
+					      struct rshim_backend, work);
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->keepalive && bd->has_rshim) {
+		bd->write_rshim(bd, RSHIM_CHANNEL, RSH_SCRATCHPAD1,
+				RSH_KEEPALIVE_MAGIC_NUM);
+		bd->keepalive = 0;
+	}
+
+	if (bd->boot_work_buf != NULL) {
+		bd->boot_work_buf_actual_len = rshim_write_delayed(bd,
+							RSH_DEV_TYPE_BOOT,
+							bd->boot_work_buf,
+							bd->boot_work_buf_len);
+		bd->boot_work_buf = NULL;
+		complete_all(&bd->boot_write_complete);
+	}
+
+	if (bd->is_boot_open) {
+		mutex_unlock(&bd->mutex);
+		return;
+	}
+
+	if (bd->has_fifo_work) {
+		int len;
+
+		len = rshim_write_delayed(bd, bd->fifo_work_devtype,
+					  bd->fifo_work_buf,
+					  bd->fifo_work_buf_len);
+		bd->has_fifo_work = 0;
+
+		spin_lock(&bd->spinlock);
+		bd->spin_flags &= ~RSH_SFLG_WRITING;
+		if (len == bd->fifo_work_buf_len) {
+			wake_up_interruptible_all(&bd->write_completed);
+			rshim_notify(bd, RSH_EVENT_FIFO_OUTPUT, 0);
+		} else {
+			pr_err("fifo_write: completed abnormally.\n");
+			rshim_notify(bd, RSH_EVENT_FIFO_ERR, -1);
+		}
+		spin_unlock(&bd->spinlock);
+	}
+
+	if (bd->has_cons_work) {
+		spin_lock_irq(&bd->spinlock);
+
+		/* FIFO output. */
+		rshim_fifo_output(bd);
+
+		/* FIFO input. */
+		rshim_fifo_input(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+
+		bd->has_cons_work = 0;
+	}
+
+	if (!bd->has_reprobe && bd->is_cons_open) {
+		bd->has_cons_work = 1;
+		mod_timer(&bd->timer, jiffies + HZ / 10);
+	}
+
+	mutex_unlock(&bd->mutex);
+}
+
+static ssize_t rshim_console_read(struct file *file, char *user_buffer,
+				    size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_read(bd, user_buffer, count, TMFIFO_CONS_CHAN,
+			     file->f_flags & O_NONBLOCK, true);
+}
+
+static ssize_t rshim_console_write(struct file *file, const char *user_buffer,
+				 size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_write(bd, user_buffer, count, TMFIFO_CONS_CHAN,
+			      file->f_flags & O_NONBLOCK, true);
+}
+
+static int rshim_console_fsync(struct file *file, loff_t start,
+			       loff_t end, int datasync)
+{
+	return rshim_fifo_fsync(file, start, end, datasync, TMFIFO_CONS_CHAN);
+}
+
+static long rshim_console_unlocked_ioctl(struct file *file, unsigned int
+				       cmd, unsigned long arg)
+{
+	struct rshim_backend *bd = file->private_data;
+	int retval = 0;
+
+	mutex_lock(&bd->mutex);
+
+	switch (cmd) {
+	case TCGETS: {
+#ifdef TCGETS2
+		if (kernel_termios_to_user_termios_1(
+			(struct termios __user *)arg, &bd->cons_termios))
+#else
+		if (kernel_termios_to_user_termios(
+			(struct termios __user *)arg, &bd->cons_termios))
+#endif
+			retval = -EFAULT;
+		break;
+	}
+
+	case TCSETS:
+	case TCSETSW:
+	case TCSETSF: {
+#ifdef TCGETS2
+		if (user_termios_to_kernel_termios_1(
+			&bd->cons_termios, (struct termios __user *)arg))
+#else
+		if (user_termios_to_kernel_termios(
+			&bd->cons_termios, (struct termios __user *)arg))
+#endif
+			retval = -EFAULT;
+		break;
+	}
+
+	default:
+		retval = -EINVAL;
+		break;
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	return retval;
+}
+
+static unsigned int rshim_console_poll(struct file *file, poll_table *wait)
+{
+	return rshim_fifo_poll(file, wait, TMFIFO_CONS_CHAN);
+}
+
+static int rshim_console_release(struct inode *inode, struct file *file)
+{
+	return rshim_fifo_release(inode, file, TMFIFO_CONS_CHAN);
+}
+
+static const struct file_operations rshim_console_fops = {
+	.owner = THIS_MODULE,
+	.read = rshim_console_read,
+	.write = rshim_console_write,
+	.fsync = rshim_console_fsync,
+	.unlocked_ioctl = rshim_console_unlocked_ioctl,
+	.poll = rshim_console_poll,
+	.release = rshim_console_release,
+};
+
+static int rshim_console_open(struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	file->f_op = &rshim_console_fops;
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->is_cons_open) {
+		/*
+		 * The console is already open.  This is OK, but it means
+		 * there's no work to do other than updating the reference
+		 * count.
+		 */
+		bd->console_opens++;
+		mutex_unlock(&bd->mutex);
+		return 0;
+	}
+
+	bd->is_cons_open = 1;
+
+	spin_lock_irq(&bd->spinlock);
+
+	bd->spin_flags |= RSH_SFLG_CONS_OPEN;
+
+	spin_unlock_irq(&bd->spinlock);
+
+	if (!bd->has_cons_work) {
+		bd->has_cons_work = 1;
+		queue_delayed_work(rshim_wq, &bd->work, HZ / 10);
+	}
+
+	bd->console_opens++;
+	mutex_unlock(&bd->mutex);
+
+	return 0;
+}
+
+static int rshim_boot_done(struct rshim_backend *bd)
+{
+	if (bd->has_rshim && bd->has_tm) {
+		/* Clear any previous errors. */
+		bd->tmfifo_error = 0;
+
+		/*
+		 * If someone might be waiting for the device to come up,
+		 * tell them it's ready.
+		 */
+		if (bd->is_booting) {
+			bd->is_booting = 0;
+
+			pr_debug("signaling booting complete\n");
+			complete_all(&bd->booting_complete);
+#if RSH_RESET_MUTEX
+			complete_all(&bd->reset_complete);
+#endif
+		};
+
+		/* If the console device is open, start the worker. */
+		if (bd->is_cons_open && !bd->has_cons_work) {
+			bd->has_cons_work = 1;
+			pr_debug("probe: console_work submitted\n");
+			queue_delayed_work(rshim_wq, &bd->work, 0);
+		}
+
+		/* Tell the user this device is now attached. */
+		pr_info("%s now attached\n", rshim_dev_names[bd->dev_index]);
+	}
+
+	return 0;
+}
+
+/* Rshim file operations routines */
+
+static ssize_t rshim_rshim_read(struct file *file, char *user_buffer,
+			      size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd;
+	int retval = 0;
+	u64 buf;
+
+	/* rshim registers are all 8-byte aligned. */
+	if (count != 8 || (*ppos & 7) != 0)
+		return -EINVAL;
+
+	bd = file->private_data;
+
+	mutex_lock(&bd->mutex);
+	retval = bd->read_rshim(bd,
+				(*ppos >> 16) & 0xF, /* channel # */
+				*ppos & 0xFFFF,	 /* addr */
+				&buf);
+	mutex_unlock(&bd->mutex);
+
+	/* If the read was successful, copy the data to userspace */
+	if (!retval && copy_to_user(user_buffer, &buf, count))
+		return -EFAULT;
+
+	return retval ? retval : count;
+}
+
+static ssize_t rshim_rshim_write(struct file *file, const char *user_buffer,
+			       size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd;
+	int retval = 0;
+	u64 buf;
+
+	/* rshim registers are all 8-byte aligned. */
+	if (count != 8 || (*ppos & 7) != 0)
+		return -EINVAL;
+
+	/* Copy the data from userspace */
+	if (copy_from_user(&buf, user_buffer, count))
+		return -EFAULT;
+
+	bd = file->private_data;
+
+	mutex_lock(&bd->mutex);
+	retval = bd->write_rshim(bd,
+				 (*ppos >> 16) & 0xF, /* channel # */
+				 *ppos & 0xFFFF, /* addr */
+				 buf);
+	mutex_unlock(&bd->mutex);
+
+	return retval ? retval : count;
+}
+
+static int rshim_rshim_release(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+	struct module *owner;
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+static const struct file_operations rshim_rshim_fops = {
+	.owner = THIS_MODULE,
+	.read = rshim_rshim_read,
+	.write = rshim_rshim_write,
+	.release = rshim_rshim_release,
+	.llseek = default_llseek,
+};
+
+static int rshim_rshim_open(struct file *file)
+{
+	file->f_op = &rshim_rshim_fops;
+
+	return 0;
+}
+
+/* Misc file operations routines */
+
+static int
+rshim_misc_seq_show(struct seq_file *s, void *token)
+{
+	struct rshim_backend *bd = s->private;
+	int retval;
+	u64 value;
+
+	/* Boot mode. */
+	retval = bd->read_rshim(bd, RSHIM_CHANNEL, RSH_BOOT_CONTROL,
+				&value);
+	if (retval) {
+		pr_err("couldn't read rshim register\n");
+		return retval;
+	}
+	seq_printf(s, "BOOT_MODE %lld\n",
+		   value & RSH_BOOT_CONTROL__BOOT_MODE_MASK);
+
+	/* SW reset flag is always 0. */
+	seq_printf(s, "SW_RESET  %d\n", 0);
+
+	/* Display the driver name. */
+	seq_printf(s, "DRV_NAME  %s\n", bd->owner->name);
+
+	return 0;
+}
+
+static ssize_t rshim_misc_write(struct file *file, const char *user_buffer,
+				size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd;
+	int retval = 0, value;
+	char buf[64], key[32];
+
+	if (*ppos != 0 || count >= sizeof(buf))
+		return -EINVAL;
+
+	/* Copy the data from userspace */
+	if (copy_from_user(buf, user_buffer, count))
+		return -EFAULT;
+
+	if (sscanf(buf, "%s %x", key, &value) != 2)
+		return -EINVAL;
+
+	bd = ((struct seq_file *)file->private_data)->private;
+
+	if (strcmp(key, "BOOT_MODE") == 0) {
+		retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_BOOT_CONTROL,
+				 value & RSH_BOOT_CONTROL__BOOT_MODE_MASK);
+	} else if (strcmp(key, "SW_RESET") == 0) {
+		if (value) {
+			if (!bd->has_reprobe) {
+				/* Detach, which shouldn't hold bd->mutex. */
+				rshim_notify(bd, RSH_EVENT_DETACH, 0);
+
+				mutex_lock(&bd->mutex);
+				/* Reset the TmFifo. */
+				rshim_fifo_reset(bd);
+				mutex_unlock(&bd->mutex);
+			}
+
+			retval = bd->write_rshim(bd, RSHIM_CHANNEL,
+					RSH_RESET_CONTROL,
+					RSH_RESET_CONTROL__RESET_CHIP_VAL_KEY);
+
+			if (!bd->has_reprobe) {
+				/* Attach. */
+				msleep_interruptible(1000);
+				mutex_lock(&bd->mutex);
+				rshim_notify(bd, RSH_EVENT_ATTACH, 0);
+				mutex_unlock(&bd->mutex);
+			}
+		}
+	} else
+		return -EINVAL;
+
+	return retval ? retval : count;
+}
+
+static int rshim_misc_release(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd;
+	struct module *owner;
+	int retval;
+
+	/*
+	 * Note that since this got turned into a seq file by
+	 * rshim_misc_open(), our device pointer isn't in the usual spot
+	 * (the file's private data); that's used by the seq file
+	 * subsystem.
+	 */
+	bd = ((struct seq_file *)file->private_data)->private;
+
+	retval = single_release(inode, file);
+	if (retval)
+		return retval;
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+static const struct file_operations rshim_misc_fops = {
+	.owner = THIS_MODULE,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.write = rshim_misc_write,
+	.release = rshim_misc_release,
+};
+
+static int rshim_misc_open(struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+	int retval;
+
+	/*
+	 * If file->private_data is non-NULL, seq_open (called by
+	 * single_open) thinks it's already a seq_file struct, and
+	 * scribbles over it!  Very bad.
+	 */
+	file->private_data = NULL;
+
+	file->f_op = &rshim_misc_fops;
+	retval = single_open(file, rshim_misc_seq_show, bd);
+
+	return retval;
+}
+
+/* Common file operations routines */
+
+static int rshim_open(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd;
+	int subminor = iminor(inode);
+	int retval;
+
+	rshim_lock();
+
+	bd = rshim_devs[subminor / RSH_DEV_TYPES];
+	if (!bd) {
+		rshim_unlock();
+		return -ENODEV;
+	}
+
+	/* Add a reference to the owner. */
+	if (!try_module_get(bd->owner)) {
+		rshim_unlock();
+		return -ENODEV;
+	}
+
+	/* Increment our usage count for the device. */
+	kref_get(&bd->kref);
+
+	rshim_unlock();
+
+	file->private_data = bd;
+
+	switch (subminor % RSH_DEV_TYPES) {
+	case RSH_DEV_TYPE_BOOT:
+		retval = rshim_boot_open(file);
+		break;
+
+	case RSH_DEV_TYPE_RSHIM:
+		retval = rshim_rshim_open(file);
+		break;
+
+	case RSH_DEV_TYPE_CONSOLE:
+		retval = rshim_console_open(file);
+		break;
+
+	case RSH_DEV_TYPE_NET:
+		retval = rshim_tmfifo_open(file);
+		break;
+
+	case RSH_DEV_TYPE_MISC:
+		retval = rshim_misc_open(file);
+		break;
+
+	default:
+		retval = -ENODEV;
+		break;
+	}
+
+	/* If the minor open failed, drop the usage count. */
+	if (retval < 0) {
+		struct module *owner;
+
+		rshim_lock();
+		owner = RSHIM_READ_ONCE(bd->owner);
+		kref_put(&bd->kref, bd->destroy);
+		module_put(owner);
+		rshim_unlock();
+	}
+
+	return retval;
+}
+
+static const struct file_operations rshim_fops = {
+	.owner = THIS_MODULE,
+	.open =	rshim_open,
+};
+
+int rshim_tmfifo_sync(struct rshim_backend *bd)
+{
+	u64 word;
+	int i, retval, max_size, avail;
+	union rshim_tmfifo_msg_hdr hdr;
+
+	/* Get FIFO max size. */
+	retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+				RSH_TM_HOST_TO_TILE_CTL, &word);
+	if (retval < 0) {
+		pr_err("read_rshim error %d\n", retval);
+		return retval;
+	}
+	max_size = (word >> RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_SHIFT)
+		   & RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RMASK;
+
+	/* Calculate available size. */
+	retval = bd->read_rshim(bd, RSHIM_CHANNEL, RSH_TM_HOST_TO_TILE_STS,
+				&word);
+	if (retval < 0) {
+		pr_err("read_rshim error %d\n", retval);
+		return retval;
+	}
+	avail = max_size - (int)(word & RSH_TM_HOST_TO_TILE_STS__COUNT_MASK);
+
+	if (avail > TMFIFO_MAX_SYNC_WORDS)
+		avail = TMFIFO_MAX_SYNC_WORDS;
+
+	hdr.type = VIRTIO_ID_NET;
+	hdr.len = 0;
+	for (i = 0; i < avail; i++) {
+		retval = bd->write_rshim(bd, RSHIM_CHANNEL,
+					 RSH_TM_HOST_TO_TILE_STS, hdr.data);
+		if (retval < 0)
+			break;
+	}
+
+	return 0;
+}
+
+int rshim_notify(struct rshim_backend *bd, int event, int code)
+{
+	int i, rc = 0;
+	struct rshim_service *svc;
+
+	switch (event) {
+	case RSH_EVENT_FIFO_INPUT:
+		rshim_fifo_input(bd);
+		break;
+
+	case RSH_EVENT_FIFO_OUTPUT:
+		rshim_fifo_output(bd);
+		break;
+
+	case RSH_EVENT_FIFO_ERR:
+		rshim_fifo_err(bd, code);
+		break;
+
+	case RSH_EVENT_ATTACH:
+		rshim_boot_done(bd);
+
+		/* Sync-up the tmfifo if reprobe is not supported. */
+		if (!bd->has_reprobe && bd->has_rshim)
+			rshim_tmfifo_sync(bd);
+
+		rcu_read_lock();
+		for (i = 0; i < RSH_SVC_MAX; i++) {
+			svc = rcu_dereference(rshim_svc[i]);
+			if (svc != NULL && svc->create != NULL) {
+				rc = (*svc->create)(bd);
+				if (rc == -EEXIST)
+					rc = 0;
+				else if (rc) {
+					pr_err("Failed to attach svc %d\n", i);
+					break;
+				}
+			}
+		}
+		rcu_read_unlock();
+
+		spin_lock_irq(&bd->spinlock);
+		rshim_fifo_input(bd);
+		spin_unlock_irq(&bd->spinlock);
+		break;
+
+	case RSH_EVENT_DETACH:
+		for (i = 0; i < RSH_SVC_MAX; i++) {
+			/*
+			 * The svc->delete() could call into Linux kernel and
+			 * potentially trigger synchronize_rcu(). So it should
+			 * be outside of the rcu_read_lock(). Instead, a ref
+			 * counter is used here to avoid race condition between
+			 * svc deletion such as caused by kernel module unload.
+			 */
+			rcu_read_lock();
+			svc = rcu_dereference(rshim_svc[i]);
+			if (svc != NULL)
+				atomic_inc(&svc->ref);
+			rcu_read_unlock();
+
+			if (svc != NULL) {
+				(*svc->delete)(bd);
+				atomic_dec(&svc->ref);
+			}
+		}
+		bd->dev = NULL;
+		break;
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL(rshim_notify);
+
+static int rshim_find_index(char *dev_name)
+{
+	int i, dev_index = -1;
+
+	/* First look for a match with a previous device name. */
+	for (i = 0; i < rshim_nr_devs; i++)
+		if (rshim_dev_names[i] &&
+		    !strcmp(dev_name, rshim_dev_names[i])) {
+			pr_debug("found match with previous at index %d\n", i);
+			dev_index = i;
+			break;
+		}
+
+	/* Then look for a never-used slot. */
+	if (dev_index < 0) {
+		for (i = 0; i < rshim_nr_devs; i++)
+			if (!rshim_dev_names[i]) {
+				pr_debug("found never-used slot %d\n", i);
+				dev_index = i;
+				break;
+			}
+	}
+
+	/* Finally look for a currently-unused slot. */
+	if (dev_index < 0) {
+		for (i = 0; i < rshim_nr_devs; i++)
+			if (!rshim_devs[i]) {
+				pr_debug("found unused slot %d\n", i);
+				dev_index = i;
+				break;
+			}
+	}
+
+	return dev_index;
+}
+
+struct rshim_backend *rshim_find(char *dev_name)
+{
+	int dev_index = rshim_find_index(dev_name);
+
+	/* If none of that worked, we fail. */
+	if (dev_index < 0) {
+		pr_err("couldn't find slot for new device %s\n", dev_name);
+		return NULL;
+	}
+
+	return rshim_devs[dev_index];
+}
+EXPORT_SYMBOL(rshim_find);
+
+/* House-keeping timer. */
+static void rshim_timer_func(struct timer_list *arg)
+{
+	struct rshim_backend *bd =
+	  container_of(arg, struct rshim_backend, timer);
+
+	u32 period = msecs_to_jiffies(rshim_keepalive_period);
+
+	if (bd->has_cons_work)
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+
+	/* Request keepalive update and restart the ~300ms timer. */
+	if (time_after(jiffies, (unsigned long)bd->last_keepalive + period)) {
+		bd->keepalive = 1;
+		bd->last_keepalive = jiffies;
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+	}
+	mod_timer(&bd->timer, jiffies + period);
+}
+
+static ssize_t rshim_path_show(struct device *cdev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct rshim_backend *bd = dev_get_drvdata(cdev);
+
+	if (bd == NULL)
+		return -ENODEV;
+	return snprintf(buf, PAGE_SIZE, "%s\n",
+			rshim_dev_names[bd->dev_index]);
+}
+
+static DEVICE_ATTR(rshim_path, 0444, rshim_path_show, NULL);
+
+static void
+rshim_load_modules(struct work_struct *work)
+{
+	request_module("rshim_net");
+}
+
+static DECLARE_DELAYED_WORK(rshim_load_modules_work, rshim_load_modules);
+
+/* Check whether backend is allowed to register or not. */
+static int rshim_access_check(struct rshim_backend *bd)
+{
+	int i, retval;
+	u64 value;
+
+	/* Write value 0 to RSH_SCRATCHPAD1. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_SCRATCHPAD1, 0);
+	if (retval < 0)
+		return -ENODEV;
+
+	/*
+	 * Poll RSH_SCRATCHPAD1 up to one second to check whether it's reset to
+	 * the keepalive magic value, which indicates another backend driver has
+	 * already attached to this target.
+	 */
+	for (i = 0; i < 10; i++) {
+		retval = bd->read_rshim(bd, RSHIM_CHANNEL, RSH_SCRATCHPAD1,
+					&value);
+		if (retval < 0)
+			return -ENODEV;
+
+		if (value == RSH_KEEPALIVE_MAGIC_NUM) {
+			pr_info("another backend already attached.\n");
+			return -EEXIST;
+		}
+
+		msleep(100);
+	}
+
+	return 0;
+}
+
+int rshim_register(struct rshim_backend *bd)
+{
+	int i, retval, dev_index;
+
+	if (bd->registered)
+		return 0;
+
+	if (backend_driver[0] && strcmp(backend_driver, bd->owner->name))
+		return -EACCES;
+
+	dev_index = rshim_find_index(bd->dev_name);
+	if (dev_index < 0)
+		return -ENODEV;
+
+	if (!bd->read_rshim || !bd->write_rshim) {
+		pr_err("read_rshim/write_rshim missing\n");
+		return -EINVAL;
+	}
+
+	retval = rshim_access_check(bd);
+	if (retval)
+		return retval;
+
+	if (!bd->write)
+		bd->write = rshim_write_default;
+	if (!bd->read)
+		bd->read = rshim_read_default;
+
+	kref_init(&bd->kref);
+	spin_lock_init(&bd->spinlock);
+#if RSH_RESET_MUTEX
+	init_completion(&bd->reset_complete);
+#endif
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		init_waitqueue_head(&bd->read_fifo[i].operable);
+		init_waitqueue_head(&bd->write_fifo[i].operable);
+	}
+
+	init_waitqueue_head(&bd->write_completed);
+	init_completion(&bd->booting_complete);
+	init_completion(&bd->boot_write_complete);
+	memcpy(&bd->cons_termios, &init_console_termios,
+	       sizeof(init_console_termios));
+	INIT_DELAYED_WORK(&bd->work, rshim_work_handler);
+
+	bd->dev_index = dev_index;
+	if (rshim_dev_names[dev_index] != bd->dev_name) {
+		kfree(rshim_dev_names[dev_index]);
+		rshim_dev_names[dev_index] = bd->dev_name;
+	}
+	rshim_devs[dev_index] = bd;
+
+	for (i = 0; i < RSH_DEV_TYPES; i++) {
+		struct device *cl_dev;
+		int err;
+		char devbuf[32];
+
+		cdev_init(&bd->cdevs[i], &rshim_fops);
+		bd->cdevs[i].owner = THIS_MODULE;
+		/*
+		 * FIXME: is this addition really legal, or should
+		 * we be using MKDEV?
+		 */
+		err = cdev_add(&bd->cdevs[i],
+			       rshim_dev_base +
+			       bd->dev_index * RSH_DEV_TYPES + i,
+			       1);
+		/*
+		 * We complain if this fails, but we don't return
+		 * an error; it really shouldn't happen, and it's
+		 * hard to go un-do the rest of the adds.
+		 */
+		if (err)
+			pr_err("rsh%d: couldn't add minor %d\n", dev_index, i);
+
+		cl_dev = device_create(rshim_class, NULL, rshim_dev_base +
+				       bd->dev_index * RSH_DEV_TYPES + i, NULL,
+				       "rshim%d!%s",
+				       bd->dev_index, rshim_dev_minor_names[i]);
+		if (IS_ERR(cl_dev)) {
+			pr_err("rsh%d: couldn't add dev %s, err %ld\n",
+			       dev_index,
+			       format_dev_t(devbuf, rshim_dev_base + dev_index *
+					    RSH_DEV_TYPES + i),
+			       PTR_ERR(cl_dev));
+		} else {
+			pr_debug("added class dev %s\n",
+				 format_dev_t(devbuf, rshim_dev_base +
+					      bd->dev_index *
+					      RSH_DEV_TYPES + i));
+		}
+
+		dev_set_drvdata(cl_dev, bd);
+		if (device_create_file(cl_dev, &dev_attr_rshim_path))
+			pr_err("could not create rshim_path file in sysfs\n");
+	}
+
+	for (i = 0; i < 2; i++) {
+		bd->boot_buf[i] = kmalloc(BOOT_BUF_SIZE, GFP_KERNEL);
+		if (!bd->boot_buf[i]) {
+			if (i == 1) {
+				kfree(bd->boot_buf[0]);
+				bd->boot_buf[0] = NULL;
+			}
+		}
+	}
+
+	timer_setup(&bd->timer, rshim_timer_func, 0);
+
+	bd->registered = 1;
+
+	/* Start the keepalive timer. */
+	bd->last_keepalive = jiffies;
+	mod_timer(&bd->timer, jiffies + 1);
+
+	schedule_delayed_work(&rshim_load_modules_work, 3 * HZ);
+
+	return 0;
+}
+EXPORT_SYMBOL(rshim_register);
+
+void rshim_deregister(struct rshim_backend *bd)
+{
+	int i;
+
+	if (!bd->registered)
+		return;
+
+	/* Stop the timer. */
+	del_timer_sync(&bd->timer);
+
+	for (i = 0; i < 2; i++)
+		kfree(bd->boot_buf[i]);
+
+	for (i = 0; i < RSH_DEV_TYPES; i++) {
+		cdev_del(&bd->cdevs[i]);
+		device_destroy(rshim_class,
+			       rshim_dev_base + bd->dev_index *
+			       RSH_DEV_TYPES + i);
+	}
+
+	rshim_devs[bd->dev_index] = NULL;
+	bd->registered = 0;
+}
+EXPORT_SYMBOL(rshim_deregister);
+
+int rshim_register_service(struct rshim_service *service)
+{
+	int i, retval = 0;
+	struct rshim_service *svc;
+
+	rshim_lock();
+
+	atomic_set(&service->ref, 0);
+
+	BUG_ON(service->type >= RSH_SVC_MAX);
+
+	if (!rshim_svc[service->type]) {
+		svc = kmalloc(sizeof(*svc), GFP_KERNEL);
+		if (svc) {
+			memcpy(svc, service, sizeof(*svc));
+			/*
+			 * Add memory barrir to make sure 'svc' is ready
+			 * before switching the pointer.
+			 */
+			smp_mb();
+
+			/*
+			 * rshim_svc[] is protected by RCU. References to it
+			 * should have rcu_read_lock() / rcu_dereference() /
+			 * rcu_read_lock().
+			 */
+			rcu_assign_pointer(rshim_svc[service->type], svc);
+
+			/* Attach the service to all backends. */
+			for (i = 0; i < rshim_nr_devs; i++) {
+				if (rshim_devs[i] != NULL) {
+					retval = svc->create(rshim_devs[i]);
+					if (retval && retval != -EEXIST)
+						break;
+				}
+			}
+		} else
+			retval = -ENOMEM;
+	} else
+		retval = -EEXIST;
+
+	rshim_unlock();
+
+	/* Deregister / cleanup the service in case of failures. */
+	if (retval && retval != -EEXIST)
+		rshim_deregister_service(service);
+
+	return retval;
+}
+EXPORT_SYMBOL(rshim_register_service);
+
+void rshim_deregister_service(struct rshim_service *service)
+{
+	int i;
+	struct rshim_service *svc = NULL;
+
+	BUG_ON(service->type >= RSH_SVC_MAX);
+
+	/*
+	 * Use synchronize_rcu() to make sure no more outstanding
+	 * references to the 'svc' pointer before releasing it.
+	 *
+	 * The reason to use RCU is that the rshim_svc pointer will be
+	 * accessed in rshim_notify() which could be called in interrupt
+	 * context and not suitable for mutex lock.
+	 */
+	rshim_lock();
+	if (rshim_svc[service->type]) {
+		svc = rshim_svc[service->type];
+
+		/* Delete the service from all backends. */
+		for (i = 0; i < rshim_nr_devs; i++)
+			if (rshim_devs[i] != NULL)
+				svc->delete(rshim_devs[i]);
+
+		rcu_assign_pointer(rshim_svc[service->type], NULL);
+	}
+	rshim_unlock();
+	if (svc != NULL) {
+		synchronize_rcu();
+
+		/* Make sure no more references to the svc pointer. */
+		while (atomic_read(&svc->ref) != 0)
+			msleep(100);
+		kfree(svc);
+	}
+}
+EXPORT_SYMBOL(rshim_deregister_service);
+
+static int __init rshim_init(void)
+{
+	int result, class_registered = 0;
+
+	/* Register our device class. */
+	rshim_class = class_create(THIS_MODULE, "rsh");
+	if (IS_ERR(rshim_class)) {
+		result = PTR_ERR(rshim_class);
+		goto error;
+	}
+	class_registered = 1;
+
+	/* Allocate major/minor numbers. */
+	result = alloc_chrdev_region(&rshim_dev_base, 0,
+				     rshim_nr_devs * RSH_DEV_TYPES,
+				     "rsh");
+	if (result < 0) {
+		pr_err("can't get rshim major\n");
+		goto error;
+	}
+
+	rshim_dev_names = kzalloc(rshim_nr_devs *
+				    sizeof(rshim_dev_names[0]), GFP_KERNEL);
+	rshim_devs = kcalloc(rshim_nr_devs, sizeof(rshim_devs[0]),
+			       GFP_KERNEL);
+
+	if (!rshim_dev_names || !rshim_devs) {
+		result = -ENOMEM;
+		goto error;
+	}
+
+	rshim_wq = create_workqueue("rshim");
+	if (!rshim_wq) {
+		result = -ENOMEM;
+		goto error;
+	}
+
+	return 0;
+
+error:
+	if (rshim_dev_base)
+		unregister_chrdev_region(rshim_dev_base,
+				 rshim_nr_devs * RSH_DEV_TYPES);
+	if (class_registered)
+		class_destroy(rshim_class);
+	kfree(rshim_dev_names);
+	kfree(rshim_devs);
+
+	return result;
+}
+
+static void __exit rshim_exit(void)
+{
+	int i;
+
+	flush_delayed_work(&rshim_load_modules_work);
+
+	/* Free the major/minor numbers. */
+	unregister_chrdev_region(rshim_dev_base,
+				 rshim_nr_devs * RSH_DEV_TYPES);
+
+	/* Destroy our device class. */
+	class_destroy(rshim_class);
+
+	/* Destroy our work queue. */
+	destroy_workqueue(rshim_wq);
+
+	for (i = 0; i < RSH_SVC_MAX; i++)
+		kfree(rshim_svc[i]);
+
+	for (i = 0; i < rshim_nr_devs; i++)
+		kfree(rshim_dev_names[i]);
+
+	kfree(rshim_dev_names);
+	kfree(rshim_devs);
+}
+
+module_init(rshim_init);
+module_exit(rshim_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.12");
diff --git a/drivers/soc/mellanox/host/rshim.h b/drivers/soc/mellanox/host/rshim.h
new file mode 100644
index 0000000..3ac3410
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim.h
@@ -0,0 +1,361 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _RSHIM_H
+#define _RSHIM_H
+
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/termios.h>
+#include <linux/workqueue.h>
+#include <linux/device.h>
+#include <linux/cdev.h>
+
+#include "rshim_regs.h"
+
+/* ACCESS_ONCE() wrapper. */
+#define RSHIM_READ_ONCE(x)	READ_ONCE(x)
+
+/*
+ * This forces only one reset to occur at a time.  Once we've gotten
+ * more experience with this mode we'll probably remove the #define.
+ */
+#define RSH_RESET_MUTEX		1
+
+/* Spin flag values. */
+#define RSH_SFLG_READING	0x1  /* read is active. */
+#define RSH_SFLG_WRITING	0x2  /* write_urb is active. */
+#define RSH_SFLG_CONS_OPEN	0x4  /* console stream is open. */
+
+/*
+ * Buffer/FIFO sizes.  Note that the FIFO sizes must be powers of 2; also,
+ * the read and write buffers must be no larger than the corresponding
+ * FIFOs.
+ */
+#define READ_BUF_SIZE		2048
+#define WRITE_BUF_SIZE		2048
+#define READ_FIFO_SIZE		(4 * 1024)
+#define WRITE_FIFO_SIZE		(4 * 1024)
+#define BOOT_BUF_SIZE		(16 * 1024)
+
+/* Sub-device types. */
+enum {
+	RSH_DEV_TYPE_RSHIM,
+	RSH_DEV_TYPE_BOOT,
+	RSH_DEV_TYPE_CONSOLE,
+	RSH_DEV_TYPE_NET,
+	RSH_DEV_TYPE_MISC,
+	RSH_DEV_TYPES
+};
+
+/* Event types used in rshim_notify(). */
+enum {
+	RSH_EVENT_FIFO_INPUT,		/* fifo ready for input */
+	RSH_EVENT_FIFO_OUTPUT,		/* fifo ready for output */
+	RSH_EVENT_FIFO_ERR,		/* fifo error */
+	RSH_EVENT_ATTACH,		/* backend attaching */
+	RSH_EVENT_DETACH,		/* backend detaching */
+};
+
+/* RShim service types. */
+enum {
+	RSH_SVC_NET,			/* networking service */
+	RSH_SVC_MAX
+};
+
+/* TMFIFO message header. */
+union rshim_tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/* TMFIFO demux channels. */
+enum {
+	TMFIFO_CONS_CHAN,	/* Console */
+	TMFIFO_NET_CHAN,	/* Network */
+	TMFIFO_MAX_CHAN		/* Number of channels */
+};
+
+/* Various rshim definitions. */
+#define RSH_INT_VEC0_RTC__SWINT3_MASK 0x8
+
+#define RSH_BYTE_ACC_READ_TRIGGER 0x50000000
+#define RSH_BYTE_ACC_SIZE 0x10000000
+#define RSH_BYTE_ACC_PENDING 0x20000000
+
+
+#define BOOT_CHANNEL        RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_BOOT
+#define RSHIM_CHANNEL       RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_RSHIM
+#define UART0_CHANNEL       RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART0
+#define UART1_CHANNEL       RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART1
+
+#define RSH_BOOT_FIFO_SIZE   512
+
+/* FIFO structure. */
+struct rshim_fifo {
+	unsigned char *data;
+	unsigned int head;
+	unsigned int tail;
+	wait_queue_head_t operable;
+};
+
+/* RShim backend. */
+struct rshim_backend {
+	/* Device name. */
+	char *dev_name;
+
+	/* Backend owner. */
+	struct module *owner;
+
+	/* Pointer to the backend device. */
+	struct device *dev;
+
+	/* Pointer to the net device. */
+	void *net;
+
+	/* House-keeping Timer. */
+	struct timer_list timer;
+
+	/* Character device structure for each device. */
+	struct cdev cdevs[RSH_DEV_TYPES];
+
+	/*
+	 * The reference count for this structure.  This is incremented by
+	 * each open, and by the probe routine (thus, one reference for
+	 * each of the two interfaces).  It's decremented on each release,
+	 * and on each disconnect.
+	 */
+	struct kref kref;
+
+	/* State flags. */
+	u32 is_booting : 1;        /* Waiting for device to come back. */
+	u32 is_boot_open : 1;      /* Boot device is open. */
+	u32 is_tm_open : 1;        /* TM FIFO device is open. */
+	u32 is_cons_open : 1;      /* Console device is open. */
+	u32 is_in_boot_write : 1;  /* A thread is in boot_write(). */
+	u32 has_cons_work : 1;     /* Console worker thread running. */
+	u32 has_debug : 1;         /* Debug enabled for this device. */
+	u32 has_tm : 1;            /* TM FIFO found. */
+	u32 has_rshim : 1;         /* RSHIM found. */
+	u32 has_fifo_work : 1;     /* FIFO output to be done in worker. */
+	u32 has_reprobe : 1;       /* Reprobe support after SW reset. */
+	u32 drop : 1;              /* Drop the rest of the packet. */
+	u32 registered : 1;        /* Backend has been registered. */
+	u32 keepalive : 1;         /* A flag to update keepalive. */
+
+	/* Jiffies of last keepalive. */
+	u64 last_keepalive;
+
+	/* State flag bits from RSH_SFLG_xxx (see above). */
+	int spin_flags;
+
+	/* Total bytes in the read buffer. */
+	int read_buf_bytes;
+	/* Offset of next unread byte in the read buffer. */
+	int read_buf_next;
+	/* Bytes left in the current packet, or 0 if no current packet. */
+	int read_buf_pkt_rem;
+	/* Padded bytes in the read buffer. */
+	int read_buf_pkt_padding;
+
+	/* Bytes left in the current packet pending to write. */
+	int write_buf_pkt_rem;
+
+	/* Current message header. */
+	union rshim_tmfifo_msg_hdr msg_hdr;
+
+	/* Read FIFOs. */
+	struct rshim_fifo read_fifo[TMFIFO_MAX_CHAN];
+
+	/* Write FIFOs. */
+	struct rshim_fifo write_fifo[TMFIFO_MAX_CHAN];
+
+	/* Read buffer.  This is a DMA'able buffer. */
+	unsigned char *read_buf;
+	dma_addr_t read_buf_dma;
+
+	/* Write buffer.  This is a DMA'able buffer. */
+	unsigned char *write_buf;
+	dma_addr_t write_buf_dma;
+
+	/* Current Tx FIFO channel. */
+	int tx_chan;
+
+	/* Current Rx FIFO channel. */
+	int rx_chan;
+
+	/* First error encountered during read or write. */
+	int tmfifo_error;
+
+	/* Buffers used for boot writes.  Allocated at startup. */
+	char *boot_buf[2];
+
+	/*
+	 * This mutex is used to prevent the interface pointers and the
+	 * device pointer from disappearing while a driver entry point
+	 * is using them.  It's held throughout a read or write operation
+	 * (at least the parts of those operations which depend upon those
+	 * pointers) and is also held whenever those pointers are modified.
+	 * It also protects state flags, and booting_complete.
+	 */
+	struct mutex mutex;
+
+	/* We'll signal completion on this when FLG_BOOTING is turned off. */
+	struct completion booting_complete;
+
+#ifdef RSH_RESET_MUTEX
+	/* Signaled when a device is disconnected. */
+	struct completion reset_complete;
+#endif
+
+	/*
+	 * This wait queue supports fsync; it's woken up whenever an
+	 * outstanding USB write URB is done.  This will need to be more
+	 * complex if we start doing write double-buffering.
+	 */
+	wait_queue_head_t write_completed;
+
+	/* State for our outstanding boot write. */
+	struct completion boot_write_complete;
+
+	/*
+	 * This spinlock is used to protect items which must be updated by
+	 * URB completion handlers, since those can't sleep.  This includes
+	 * the read and write buffer pointers, as well as spin_flags.
+	 */
+	spinlock_t spinlock;
+
+	/* Current termios settings for the console. */
+	struct ktermios cons_termios;
+
+	/* Work queue entry. */
+	struct delayed_work	work;
+
+	/* Pending boot & fifo request for the worker. */
+	u8 *boot_work_buf;
+	u32 boot_work_buf_len;
+	u32 boot_work_buf_actual_len;
+	u8 *fifo_work_buf;
+	u32 fifo_work_buf_len;
+	int fifo_work_devtype;
+
+	/* Number of open console files. */
+	long console_opens;
+
+	/*
+	 * Our index in rshim_devs, which is also the high bits of our
+	 * minor number.
+	 */
+	int dev_index;
+
+	/* APIs provided by backend. */
+
+	/* API to write bulk data to RShim via the backend. */
+	ssize_t (*write)(struct rshim_backend *bd, int devtype,
+			 const char *buf, size_t count);
+
+	/* API to read bulk data from RShim via the backend. */
+	ssize_t (*read)(struct rshim_backend *bd, int devtype,
+			char *buf, size_t count);
+
+	/* API to cancel a read / write request (optional). */
+	void (*cancel)(struct rshim_backend *bd, int devtype, bool is_write);
+
+	/* API to destroy the backend. */
+	void (*destroy)(struct kref *kref);
+
+	/* API to read 8 bytes from RShim. */
+	int (*read_rshim)(struct rshim_backend *bd, int chan, int addr,
+			  u64 *value);
+
+	/* API to write 8 bytes to RShim. */
+	int (*write_rshim)(struct rshim_backend *bd, int chan, int addr,
+			   u64 value);
+};
+
+/* RShim service. */
+struct rshim_service {
+	/* Service type RSH_SVC_xxx. */
+	int type;
+
+	/* Reference number. */
+	atomic_t ref;
+
+	/* Create service. */
+	int (*create)(struct rshim_backend *bd);
+
+	/* Delete service. */
+	int (*delete)(struct rshim_backend *bd);
+
+	/* Notify service Rx is ready. */
+	void (*rx_notify)(struct rshim_backend *bd);
+};
+
+/* Global variables. */
+
+/* Global array to store RShim devices and names. */
+extern struct workqueue_struct *rshim_wq;
+
+/* Common APIs. */
+
+/* Register/unregister backend. */
+int rshim_register(struct rshim_backend *bd);
+void rshim_deregister(struct rshim_backend *bd);
+
+/* Register / deregister service. */
+int rshim_register_service(struct rshim_service *service);
+void rshim_deregister_service(struct rshim_service *service);
+
+/* Find backend by name. */
+struct rshim_backend *rshim_find(char *dev_name);
+
+/* RShim global lock. */
+void rshim_lock(void);
+void rshim_unlock(void);
+
+/* Event notification. */
+int rshim_notify(struct rshim_backend *bd, int event, int code);
+
+/*
+ * FIFO APIs.
+ *
+ * FIFO is demuxed into two channels, one for network interface
+ * (TMFIFO_NET_CHAN), one for console (TMFIFO_CONS_CHAN).
+ */
+
+/* Write / read some bytes to / from the FIFO via the backend. */
+ssize_t rshim_fifo_read(struct rshim_backend *bd, char *buffer,
+		      size_t count, int chan, bool nonblock,
+		      bool to_user);
+ssize_t rshim_fifo_write(struct rshim_backend *bd, const char *buffer,
+		       size_t count, int chan, bool nonblock,
+		       bool from_user);
+
+/* Alloc/free the FIFO. */
+int rshim_fifo_alloc(struct rshim_backend *bd);
+void rshim_fifo_free(struct rshim_backend *bd);
+
+/* Console APIs. */
+
+/* Enable early console. */
+int rshim_cons_early_enable(struct rshim_backend *bd);
+
+#endif /* _RSHIM_H */
diff --git a/drivers/soc/mellanox/host/rshim_regs.h b/drivers/soc/mellanox/host/rshim_regs.h
new file mode 100644
index 0000000..b14df716
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_regs.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#ifndef __RSHIM_REGS_H__
+#define __RSHIM_REGS_H__
+
+#ifdef __ASSEMBLER__
+#define _64bit(x) x
+#else /* __ASSEMBLER__ */
+#define _64bit(x) x ## ULL
+#endif /* __ASSEMBLER */
+
+#include <linux/types.h>
+
+#define RSH_BOOT_FIFO_DATA 0x408
+
+#define RSH_BOOT_FIFO_COUNT 0x488
+#define RSH_BOOT_FIFO_COUNT__LENGTH 0x0001
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_SHIFT 0
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_WIDTH 10
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_RESET_VAL 0
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_RMASK 0x3ff
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_MASK  0x3ff
+
+#define RSH_BOOT_CONTROL 0x528
+#define RSH_BOOT_CONTROL__LENGTH 0x0001
+#define RSH_BOOT_CONTROL__BOOT_MODE_SHIFT 0
+#define RSH_BOOT_CONTROL__BOOT_MODE_WIDTH 2
+#define RSH_BOOT_CONTROL__BOOT_MODE_RESET_VAL 0
+#define RSH_BOOT_CONTROL__BOOT_MODE_RMASK 0x3
+#define RSH_BOOT_CONTROL__BOOT_MODE_MASK  0x3
+#define RSH_BOOT_CONTROL__BOOT_MODE_VAL_NONE 0x0
+#define RSH_BOOT_CONTROL__BOOT_MODE_VAL_EMMC 0x1
+#define RSH_BOOT_CONTROL__BOOT_MODE_VAL_EMMC_LEGACY 0x3
+
+#define RSH_RESET_CONTROL 0x500
+#define RSH_RESET_CONTROL__LENGTH 0x0001
+#define RSH_RESET_CONTROL__RESET_CHIP_SHIFT 0
+#define RSH_RESET_CONTROL__RESET_CHIP_WIDTH 32
+#define RSH_RESET_CONTROL__RESET_CHIP_RESET_VAL 0
+#define RSH_RESET_CONTROL__RESET_CHIP_RMASK 0xffffffff
+#define RSH_RESET_CONTROL__RESET_CHIP_MASK  0xffffffff
+#define RSH_RESET_CONTROL__RESET_CHIP_VAL_KEY 0xca710001
+#define RSH_RESET_CONTROL__DISABLE_SHIFT 32
+#define RSH_RESET_CONTROL__DISABLE_WIDTH 1
+#define RSH_RESET_CONTROL__DISABLE_RESET_VAL 0
+#define RSH_RESET_CONTROL__DISABLE_RMASK 0x1
+#define RSH_RESET_CONTROL__DISABLE_MASK  _64bit(0x100000000)
+#define RSH_RESET_CONTROL__REQ_PND_SHIFT 33
+#define RSH_RESET_CONTROL__REQ_PND_WIDTH 1
+#define RSH_RESET_CONTROL__REQ_PND_RESET_VAL 0
+#define RSH_RESET_CONTROL__REQ_PND_RMASK 0x1
+#define RSH_RESET_CONTROL__REQ_PND_MASK  _64bit(0x200000000)
+
+#define RSH_SCRATCHPAD1 0xc20
+
+#define RSH_SCRATCH_BUF_CTL 0x600
+
+#define RSH_SCRATCH_BUF_DAT 0x610
+
+#define RSH_SEMAPHORE0 0x28
+
+#define RSH_SCRATCHPAD 0x20
+
+#define RSH_TM_HOST_TO_TILE_CTL 0xa30
+#define RSH_TM_HOST_TO_TILE_CTL__LENGTH 0x0001
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_SHIFT 0
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_WIDTH 8
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_RESET_VAL 128
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_RMASK 0xff
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_MASK  0xff
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_SHIFT 8
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_WIDTH 8
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_RESET_VAL 128
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_RMASK 0xff
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_MASK  0xff00
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_SHIFT 32
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_WIDTH 9
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RESET_VAL 256
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_MASK  _64bit(0x1ff00000000)
+
+#define RSH_TM_HOST_TO_TILE_STS 0xa28
+#define RSH_TM_HOST_TO_TILE_STS__LENGTH 0x0001
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_SHIFT 0
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_WIDTH 9
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_RESET_VAL 0
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_RMASK 0x1ff
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_MASK  0x1ff
+
+#define RSH_TM_TILE_TO_HOST_STS 0xa48
+#define RSH_TM_TILE_TO_HOST_STS__LENGTH 0x0001
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_SHIFT 0
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_WIDTH 9
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_RESET_VAL 0
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_RMASK 0x1ff
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_MASK  0x1ff
+
+#define RSH_TM_HOST_TO_TILE_DATA 0xa20
+
+#define RSH_TM_TILE_TO_HOST_DATA 0xa40
+
+#define RSH_MMIO_ADDRESS_SPACE__LENGTH 0x10000000000
+#define RSH_MMIO_ADDRESS_SPACE__STRIDE 0x8
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_SHIFT 0
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_WIDTH 16
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_RESET_VAL 0
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_RMASK 0xffff
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_MASK  0xffff
+#define RSH_MMIO_ADDRESS_SPACE__PROT_SHIFT 16
+#define RSH_MMIO_ADDRESS_SPACE__PROT_WIDTH 3
+#define RSH_MMIO_ADDRESS_SPACE__PROT_RESET_VAL 0
+#define RSH_MMIO_ADDRESS_SPACE__PROT_RMASK 0x7
+#define RSH_MMIO_ADDRESS_SPACE__PROT_MASK  0x70000
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_SHIFT 23
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_WIDTH 4
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_RESET_VAL 0
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_RMASK 0xf
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_MASK  0x7800000
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_BOOT 0x0
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_RSHIM 0x1
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART0 0x2
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART1 0x3
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_DIAG_UART 0x4
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU 0x5
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU_EXT1 0x6
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU_EXT2 0x7
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU_EXT3 0x8
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TIMER 0x9
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_USB 0xa
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_GPIO 0xb
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_MMC 0xc
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TIMER_EXT 0xd
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_WDOG_NS 0xe
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_WDOG_SEC 0xf
+
+#define RSH_SWINT 0x318
+
+#define RSH_BYTE_ACC_CTL 0x490
+
+#define RSH_BYTE_ACC_WDAT 0x498
+
+#define RSH_BYTE_ACC_RDAT 0x4a0
+
+#define RSH_BYTE_ACC_ADDR 0x4a8
+
+#endif /* !defined(__RSHIM_REGS_H__) */
-- 
1.8.3.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v7 6/9] soc: mellanox: host: Add networking support over Rshim
  2018-05-25 16:06 ` Liming Sun
@ 2019-01-03 19:17   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-03 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit adds networking support over the Rshim interface of
the BlueField SoC. It communicates with the target (ARM) side via
the Rshim TmFifo.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/host/Makefile    |   2 +-
 drivers/soc/mellanox/host/rshim_net.c | 834 ++++++++++++++++++++++++++++++++++
 2 files changed, 835 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/mellanox/host/rshim_net.c

diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
index e47842f..1a282b9 100644
--- a/drivers/soc/mellanox/host/Makefile
+++ b/drivers/soc/mellanox/host/Makefile
@@ -1,2 +1,2 @@
-obj-m := rshim.o
+obj-m := rshim.o rshim_net.o
 
diff --git a/drivers/soc/mellanox/host/rshim_net.c b/drivers/soc/mellanox/host/rshim_net.c
new file mode 100644
index 0000000..6d10497
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_net.c
@@ -0,0 +1,834 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_net.c - Mellanox RShim network host driver
+ *
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_ring.h>
+#include <linux/virtio_net.h>
+#include <linux/cache.h>
+#include <linux/interrupt.h>
+#include <linux/version.h>
+#include <asm/byteorder.h>
+
+#include "rshim.h"
+
+/* Vring size. */
+#define RSH_NET_VRING_SIZE			1024
+
+/*
+ * Keepalive time in seconds. If configured, the link is considered down
+ * if no Rx activity within the configured time.
+ */
+static int rshim_net_keepalive;
+module_param(rshim_net_keepalive, int, 0644);
+MODULE_PARM_DESC(rshim_net_keepalive,
+		 "Keepalive time in seconds.");
+
+/* Use a timer for house-keeping. */
+static int rshim_net_timer_interval = HZ / 10;
+
+/* Flag to drain the current pending packet. */
+static bool rshim_net_draining_mode;
+
+/* Spin lock. */
+static DEFINE_SPINLOCK(rshim_net_spin_lock);
+
+/* Virtio ring size. */
+static int rshim_net_vring_size = RSH_NET_VRING_SIZE;
+module_param(rshim_net_vring_size, int, 0444);
+MODULE_PARM_DESC(rshim_net_vring_size, "Size of the vring.");
+
+/* Supported virtio-net features. */
+#define RSH_NET_FEATURES		((1 << VIRTIO_NET_F_MTU) | \
+					 (1 << VIRTIO_NET_F_MAC) | \
+					 (1 << VIRTIO_NET_F_STATUS))
+
+/* Default MAC. */
+static u8 rshim_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x02};
+module_param_array(rshim_net_default_mac, byte, NULL, 0);
+MODULE_PARM_DESC(rshim_net_default_mac, "default MAC address");
+
+#define VIRTIO_GET_FEATURES_RETURN_TYPE		u64
+#define VIRTIO_FINALIZE_FEATURES_RETURN_TYPE	int
+#define VIRTIO_NOTIFY_RETURN_TYPE	bool
+#define VIRTIO_NOTIFY_RETURN		{ return true; }
+
+/* MTU setting of the virtio-net interface. */
+#define RSH_NET_MTU			1500
+
+struct rshim_net;
+static void rshim_net_virtio_rxtx(struct virtqueue *vq, bool is_rx);
+static void rshim_net_update_activity(struct rshim_net *net, bool activity);
+
+/* Structure to maintain the ring state. */
+struct rshim_net_vring {
+	void *va;			/* virtual address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	u32 pkt_len;			/* packet total length */
+	u16 next_avail;			/* next avail desc id */
+	union rshim_tmfifo_msg_hdr hdr;	/* header of the current packet */
+	struct rshim_net *net;		/* pointer back to the rshim_net */
+};
+
+/* Event types. */
+enum {
+	RSH_NET_RX_EVENT,		/* Rx event */
+	RSH_NET_TX_EVENT		/* Tx event */
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	RSH_NET_VRING_RX,		/* Rx ring */
+	RSH_NET_VRING_TX,		/* Tx ring */
+	RSH_NET_VRING_NUM
+};
+
+/* RShim net device structure */
+struct rshim_net {
+	struct virtio_device vdev;	/* virtual device */
+	struct mutex lock;
+	struct rshim_backend *bd;		/* backend */
+	u8 status;
+	u16 virtio_registered : 1;
+	u64 features;
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	unsigned long rx_jiffies;	/* last Rx jiffies */
+	struct rshim_net_vring vrings[RSH_NET_VRING_NUM];
+	struct virtio_net_config config;	/* virtio config space */
+};
+
+/* Allocate vrings for the net device. */
+static int rshim_net_alloc_vrings(struct rshim_net *net)
+{
+	void *va;
+	int i, size;
+	struct rshim_net_vring *vring;
+	struct virtio_device *vdev = &net->vdev;
+
+	for (i = 0; i < ARRAY_SIZE(net->vrings); i++) {
+		vring = &net->vrings[i];
+		vring->net = net;
+		vring->size = rshim_net_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = kzalloc(size, GFP_KERNEL);
+		if (!va) {
+			dev_err(vdev->dev.parent, "vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the net device. */
+static void rshim_net_free_vrings(struct rshim_net *net)
+{
+	int i, size;
+	struct rshim_net_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(net->vrings); i++) {
+		vring = &net->vrings[i];
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			kfree(vring->va);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void rshim_net_work_handler(struct work_struct *work)
+{
+	struct virtqueue *vq;
+	struct rshim_net *net = container_of(work, struct rshim_net, work);
+
+	/* Tx. */
+	if (test_and_clear_bit(RSH_NET_TX_EVENT, &net->pend_events) &&
+		       net->virtio_registered) {
+		vq = net->vrings[RSH_NET_VRING_TX].vq;
+		if (vq)
+			rshim_net_virtio_rxtx(vq, false);
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(RSH_NET_RX_EVENT, &net->pend_events) &&
+		       net->virtio_registered) {
+		vq = net->vrings[RSH_NET_VRING_RX].vq;
+		if (vq)
+			rshim_net_virtio_rxtx(vq, true);
+	}
+
+	/* Keepalive check. */
+	if (rshim_net_keepalive &&
+	    time_after(jiffies, net->rx_jiffies +
+		       (unsigned long)rshim_net_keepalive * HZ)) {
+		mutex_lock(&net->lock);
+		rshim_net_update_activity(net, false);
+		mutex_unlock(&net->lock);
+	}
+}
+
+/* Nothing to do for now. */
+static void rshim_net_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+rshim_net_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct rshim_net_vring *vring = (struct rshim_net_vring *)vq->priv;
+
+	if (vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vring->size;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vring->size);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 rshim_net_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+/* House-keeping timer. */
+static void rshim_net_timer(struct timer_list *arg)
+{
+	struct rshim_net *net = container_of(arg, struct rshim_net, timer);
+
+	/*
+	 * Wake up Rx handler in case Rx event is missing or any leftover
+	 * bytes are stuck in the backend.
+	 */
+	test_and_set_bit(RSH_NET_RX_EVENT, &net->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(RSH_NET_TX_EVENT, &net->pend_events);
+
+	schedule_work(&net->work);
+
+	mod_timer(&net->timer, jiffies + rshim_net_timer_interval);
+}
+
+static void rshim_net_release_cur_desc(struct virtio_device *vdev,
+				       struct rshim_net_vring *vring)
+{
+	int idx;
+	unsigned long flags;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+
+	idx = vr->used->idx % vring->size;
+	vr->used->ring[idx].id = vring->desc_head - vr->desc;
+	vr->used->ring[idx].len =
+		cpu_to_virtio32(vdev, vring->pkt_len);
+
+	/*
+	 * Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+
+	vring->desc = NULL;
+
+	/* Notify upper layer. */
+	spin_lock_irqsave(&rshim_net_spin_lock, flags);
+	vring_interrupt(0, vring->vq);
+	spin_unlock_irqrestore(&rshim_net_spin_lock, flags);
+}
+
+/* Update the link activity. */
+static void rshim_net_update_activity(struct rshim_net *net, bool activity)
+{
+	if (activity) {
+		/* Bring up the link. */
+		if (!(net->config.status & VIRTIO_NET_S_LINK_UP)) {
+			net->config.status |= VIRTIO_NET_S_LINK_UP;
+			virtio_config_changed(&net->vdev);
+		}
+	} else {
+		/* Bring down the link. */
+		if (net->config.status & VIRTIO_NET_S_LINK_UP) {
+			int i;
+
+			net->config.status &= ~VIRTIO_NET_S_LINK_UP;
+			virtio_config_changed(&net->vdev);
+
+			/* Reset the ring state. */
+			for (i = 0; i < RSH_NET_VRING_NUM; i++) {
+				net->vrings[i].pkt_len =
+						sizeof(struct virtio_net_hdr);
+				net->vrings[i].cur_len = 0;
+				net->vrings[i].rem_len = 0;
+			}
+		}
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void rshim_net_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct rshim_net_vring *vring = (struct rshim_net_vring *)vq->priv;
+	struct rshim_net *net = vring->net;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &net->vdev;
+	void *addr;
+	int len, idx, seg_len;
+	struct vring_desc *desc;
+
+	mutex_lock(&net->lock);
+
+	/* Get the current pending descriptor. */
+	desc = vring->desc;
+
+	/* Don't continue if booting. */
+	if (net->bd->is_boot_open) {
+		/* Drop the pending buffer. */
+		if (desc != NULL)
+			rshim_net_release_cur_desc(vdev, vring);
+		mutex_unlock(&net->lock);
+		return;
+	}
+
+	while (1) {
+		if (!desc) {
+			/* Don't process new packet in draining mode. */
+			if (RSHIM_READ_ONCE(rshim_net_draining_mode))
+				break;
+
+			/* Get the head desc of next packet. */
+			vring->desc_head = rshim_net_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				mutex_unlock(&net->lock);
+				return;
+			}
+			desc = vring->desc_head;
+
+			/* Packet length is unknown yet. */
+			vring->pkt_len = 0;
+			vring->rem_len = sizeof(vring->hdr);
+		}
+
+		/* Beginning of a packet. */
+		if (vring->pkt_len == 0) {
+			if (is_rx) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Read the packet header. */
+				len = rshim_fifo_read(net->bd,
+					(void *)&vring->hdr +
+					sizeof(vring->hdr) - vring->rem_len,
+					vring->rem_len, TMFIFO_NET_CHAN, true,
+					false);
+				if (len > 0) {
+					vring->rem_len -= len;
+					if (vring->rem_len != 0)
+						continue;
+				} else
+					break;
+
+				/* Update activity. */
+				net->rx_jiffies = jiffies;
+				rshim_net_update_activity(net, true);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (vring->hdr.len == 0) {
+					vring->rem_len = sizeof(vring->hdr);
+					continue;
+				}
+
+				/* Update total length. */
+				vring->pkt_len = ntohs(vring->hdr.len) +
+					sizeof(struct virtio_net_hdr);
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+					vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			} else {
+				/* Write packet header. */
+				if (vring->rem_len == sizeof(vring->hdr)) {
+					len = rshim_net_virtio_get_pkt_len(
+							vdev, desc, vr);
+					vring->hdr.data = 0;
+					vring->hdr.type = VIRTIO_ID_NET;
+					vring->hdr.len = htons(len -
+						sizeof(struct virtio_net_hdr));
+				}
+
+				len = rshim_fifo_write(net->bd,
+					(void *)&vring->hdr +
+					sizeof(vring->hdr) - vring->rem_len,
+					vring->rem_len, TMFIFO_NET_CHAN,
+					true, false);
+				if (len > 0) {
+					vring->rem_len -= len;
+					if (vring->rem_len != 0)
+						continue;
+				} else
+					break;
+
+				/* Update total length. */
+				vring->pkt_len = rshim_net_virtio_get_pkt_len(
+							vdev, desc, vr);
+			}
+
+			vring->cur_len = sizeof(struct virtio_net_hdr);
+			vring->rem_len = vring->pkt_len;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done with this chain. */
+			rshim_net_release_cur_desc(vdev, vring);
+
+			/* Clear desc and go back to the loop. */
+			desc = NULL;
+
+			continue;
+		}
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		if (is_rx) {
+			seg_len = rshim_fifo_read(net->bd,
+					addr + vring->cur_len,
+					len - vring->cur_len,
+					TMFIFO_NET_CHAN, true, false);
+		} else {
+			seg_len = rshim_fifo_write(net->bd,
+					addr + vring->cur_len,
+					len - vring->cur_len,
+					TMFIFO_NET_CHAN, true, false);
+		}
+		if (seg_len > 0)
+			vring->cur_len += seg_len;
+		else {
+			/* Schedule the worker to speed up Tx. */
+			if (!is_rx) {
+				if (!test_and_set_bit(RSH_NET_TX_EVENT,
+				    &net->pend_events))
+					schedule_work(&net->work);
+			}
+			break;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+
+	mutex_unlock(&net->lock);
+}
+
+/* The notify function is called when new buffers are posted. */
+static VIRTIO_NOTIFY_RETURN_TYPE rshim_net_virtio_notify(struct virtqueue *vq)
+{
+	struct rshim_net_vring *vring = (struct rshim_net_vring *)vq->priv;
+	struct rshim_net *net = vring->net;
+
+	/*
+	 * Virtio-net maintains vrings in pairs. Odd number ring for Rx
+	 * and even number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX bit. */
+		if (!test_and_set_bit(RSH_NET_RX_EVENT, &net->pend_events))
+			schedule_work(&net->work);
+	} else {
+		/* Set the TX bit. */
+		if (!test_and_set_bit(RSH_NET_TX_EVENT, &net->pend_events))
+			schedule_work(&net->work);
+	}
+
+	VIRTIO_NOTIFY_RETURN;
+}
+
+/* Get the array of feature bits for this device. */
+static VIRTIO_GET_FEATURES_RETURN_TYPE rshim_net_virtio_get_features(
+	struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	return net->features;
+}
+
+/* Confirm device features to use. */
+static VIRTIO_FINALIZE_FEATURES_RETURN_TYPE rshim_net_virtio_finalize_features(
+	struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	net->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void rshim_net_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct rshim_net_vring *vring;
+	struct virtqueue *vq;
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	for (i = 0; i < ARRAY_SIZE(net->vrings); i++) {
+		vring = &net->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			rshim_net_release_cur_desc(vdev, vring);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int rshim_net_virtio_find_vqs(struct virtio_device *vdev,
+				     unsigned int nvqs,
+				     struct virtqueue *vqs[],
+				     vq_callback_t *callbacks[],
+				     const char * const names[],
+				     const bool *ctx,
+				     struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct rshim_net_vring *vring;
+	struct virtqueue *vq;
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	if (nvqs > ARRAY_SIZE(net->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &net->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+
+		vq = vring_new_virtqueue(
+					 i,
+					 vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 rshim_net_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vq->priv = vring;
+		/*
+		 * Add barrier to make sure vq is ready before assigning to
+		 * vring.
+		 */
+		mb();
+		vring->vq = vq;
+		vqs[i] = vq;
+	}
+
+	return 0;
+
+error:
+	rshim_net_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 rshim_net_virtio_get_status(struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	return net->status;
+}
+
+/* Write the status byte. */
+static void rshim_net_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	net->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void rshim_net_virtio_reset(struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	net->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void rshim_net_virtio_get(struct virtio_device *vdev,
+				 unsigned int offset,
+				 void *buf,
+				 unsigned int len)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	if (offset + len > sizeof(net->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&net->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void rshim_net_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	if (offset + len > sizeof(net->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&net->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static struct virtio_config_ops rshim_net_virtio_config_ops = {
+	.get_features = rshim_net_virtio_get_features,
+	.finalize_features = rshim_net_virtio_finalize_features,
+	.find_vqs = rshim_net_virtio_find_vqs,
+	.del_vqs = rshim_net_virtio_del_vqs,
+	.reset = rshim_net_virtio_reset,
+	.set_status = rshim_net_virtio_set_status,
+	.get_status = rshim_net_virtio_get_status,
+	.get = rshim_net_virtio_get,
+	.set = rshim_net_virtio_set,
+};
+
+/* Remove. */
+static int rshim_net_delete_dev(struct rshim_net *net)
+{
+	if (net) {
+		/* Stop the timer. */
+		del_timer_sync(&net->timer);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&net->work);
+
+		/* Unregister virtio. */
+		if (net->virtio_registered)
+			unregister_virtio_device(&net->vdev);
+
+		/* Free vring. */
+		rshim_net_free_vrings(net);
+
+		kfree(net);
+	}
+
+	return 0;
+}
+
+/* Rx ready. */
+void rshim_net_rx_notify(struct rshim_backend *bd)
+{
+	struct rshim_net *net = (struct rshim_net *)bd->net;
+
+	if (net) {
+		test_and_set_bit(RSH_NET_RX_EVENT, &net->pend_events);
+		schedule_work(&net->work);
+	}
+}
+
+/* Remove. */
+int rshim_net_delete(struct rshim_backend *bd)
+{
+	int ret = 0;
+
+	if (bd->net) {
+		ret = rshim_net_delete_dev((struct rshim_net *)bd->net);
+		bd->net = NULL;
+	}
+
+	return ret;
+}
+
+/* Init. */
+int rshim_net_create(struct rshim_backend *bd)
+{
+	struct rshim_net *net;
+	struct virtio_device *vdev;
+	int ret = -ENOMEM;
+
+	if (bd->net)
+		return -EEXIST;
+
+	net = kzalloc(sizeof(struct rshim_net), GFP_KERNEL);
+	if (!net)
+		return ret;
+
+	INIT_WORK(&net->work, rshim_net_work_handler);
+
+	timer_setup(&net->timer, rshim_net_timer, 0);
+	net->timer.function = rshim_net_timer;
+
+	net->features = RSH_NET_FEATURES;
+	net->config.mtu = RSH_NET_MTU;
+	memcpy(net->config.mac, rshim_net_default_mac,
+	       sizeof(rshim_net_default_mac));
+	/* Set MAC address to be unique even number. */
+	net->config.mac[5] += bd->dev_index * 2;
+
+	mutex_init(&net->lock);
+
+	vdev = &net->vdev;
+	vdev->id.device = VIRTIO_ID_NET;
+	vdev->config = &rshim_net_virtio_config_ops;
+	vdev->dev.parent = bd->dev;
+	vdev->dev.release = rshim_net_virtio_dev_release;
+	if (rshim_net_alloc_vrings(net))
+		goto err;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(vdev);
+	if (ret) {
+		dev_err(bd->dev, "register_virtio_device() failed\n");
+		goto err;
+	}
+	net->virtio_registered = 1;
+
+	mod_timer(&net->timer, jiffies + rshim_net_timer_interval);
+
+	net->bd = bd;
+	/* Add a barrier to keep the order of the two pointer assignments. */
+	mb();
+	bd->net = net;
+
+	/* Bring up the interface. */
+	mutex_lock(&net->lock);
+	rshim_net_update_activity(net, true);
+	mutex_unlock(&net->lock);
+
+	return 0;
+
+err:
+	rshim_net_delete_dev(net);
+	return ret;
+}
+
+struct rshim_service rshim_svc = {
+	.type = RSH_SVC_NET,
+	.create = rshim_net_create,
+	.delete = rshim_net_delete,
+	.rx_notify = rshim_net_rx_notify
+};
+
+static int __init rshim_net_init(void)
+{
+	return rshim_register_service(&rshim_svc);
+}
+
+static void __exit rshim_net_exit(void)
+{
+	/*
+	 * Wait 200ms, which should be good enough to drain the current
+	 * pending packet.
+	 */
+	rshim_net_draining_mode = true;
+	msleep(200);
+
+	return rshim_deregister_service(&rshim_svc);
+}
+
+module_init(rshim_net_init);
+module_exit(rshim_net_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.7");
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v7 6/9] soc: mellanox: host: Add networking support over Rshim
@ 2019-01-03 19:17   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-03 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit adds networking support over the Rshim interface of
the BlueField SoC. It communicates with the target (ARM) side via
the Rshim TmFifo.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/host/Makefile    |   2 +-
 drivers/soc/mellanox/host/rshim_net.c | 834 ++++++++++++++++++++++++++++++++++
 2 files changed, 835 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/mellanox/host/rshim_net.c

diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
index e47842f..1a282b9 100644
--- a/drivers/soc/mellanox/host/Makefile
+++ b/drivers/soc/mellanox/host/Makefile
@@ -1,2 +1,2 @@
-obj-m := rshim.o
+obj-m := rshim.o rshim_net.o
 
diff --git a/drivers/soc/mellanox/host/rshim_net.c b/drivers/soc/mellanox/host/rshim_net.c
new file mode 100644
index 0000000..6d10497
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_net.c
@@ -0,0 +1,834 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_net.c - Mellanox RShim network host driver
+ *
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_ring.h>
+#include <linux/virtio_net.h>
+#include <linux/cache.h>
+#include <linux/interrupt.h>
+#include <linux/version.h>
+#include <asm/byteorder.h>
+
+#include "rshim.h"
+
+/* Vring size. */
+#define RSH_NET_VRING_SIZE			1024
+
+/*
+ * Keepalive time in seconds. If configured, the link is considered down
+ * if no Rx activity within the configured time.
+ */
+static int rshim_net_keepalive;
+module_param(rshim_net_keepalive, int, 0644);
+MODULE_PARM_DESC(rshim_net_keepalive,
+		 "Keepalive time in seconds.");
+
+/* Use a timer for house-keeping. */
+static int rshim_net_timer_interval = HZ / 10;
+
+/* Flag to drain the current pending packet. */
+static bool rshim_net_draining_mode;
+
+/* Spin lock. */
+static DEFINE_SPINLOCK(rshim_net_spin_lock);
+
+/* Virtio ring size. */
+static int rshim_net_vring_size = RSH_NET_VRING_SIZE;
+module_param(rshim_net_vring_size, int, 0444);
+MODULE_PARM_DESC(rshim_net_vring_size, "Size of the vring.");
+
+/* Supported virtio-net features. */
+#define RSH_NET_FEATURES		((1 << VIRTIO_NET_F_MTU) | \
+					 (1 << VIRTIO_NET_F_MAC) | \
+					 (1 << VIRTIO_NET_F_STATUS))
+
+/* Default MAC. */
+static u8 rshim_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x02};
+module_param_array(rshim_net_default_mac, byte, NULL, 0);
+MODULE_PARM_DESC(rshim_net_default_mac, "default MAC address");
+
+#define VIRTIO_GET_FEATURES_RETURN_TYPE		u64
+#define VIRTIO_FINALIZE_FEATURES_RETURN_TYPE	int
+#define VIRTIO_NOTIFY_RETURN_TYPE	bool
+#define VIRTIO_NOTIFY_RETURN		{ return true; }
+
+/* MTU setting of the virtio-net interface. */
+#define RSH_NET_MTU			1500
+
+struct rshim_net;
+static void rshim_net_virtio_rxtx(struct virtqueue *vq, bool is_rx);
+static void rshim_net_update_activity(struct rshim_net *net, bool activity);
+
+/* Structure to maintain the ring state. */
+struct rshim_net_vring {
+	void *va;			/* virtual address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	u32 pkt_len;			/* packet total length */
+	u16 next_avail;			/* next avail desc id */
+	union rshim_tmfifo_msg_hdr hdr;	/* header of the current packet */
+	struct rshim_net *net;		/* pointer back to the rshim_net */
+};
+
+/* Event types. */
+enum {
+	RSH_NET_RX_EVENT,		/* Rx event */
+	RSH_NET_TX_EVENT		/* Tx event */
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	RSH_NET_VRING_RX,		/* Rx ring */
+	RSH_NET_VRING_TX,		/* Tx ring */
+	RSH_NET_VRING_NUM
+};
+
+/* RShim net device structure */
+struct rshim_net {
+	struct virtio_device vdev;	/* virtual device */
+	struct mutex lock;
+	struct rshim_backend *bd;		/* backend */
+	u8 status;
+	u16 virtio_registered : 1;
+	u64 features;
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	unsigned long rx_jiffies;	/* last Rx jiffies */
+	struct rshim_net_vring vrings[RSH_NET_VRING_NUM];
+	struct virtio_net_config config;	/* virtio config space */
+};
+
+/* Allocate vrings for the net device. */
+static int rshim_net_alloc_vrings(struct rshim_net *net)
+{
+	void *va;
+	int i, size;
+	struct rshim_net_vring *vring;
+	struct virtio_device *vdev = &net->vdev;
+
+	for (i = 0; i < ARRAY_SIZE(net->vrings); i++) {
+		vring = &net->vrings[i];
+		vring->net = net;
+		vring->size = rshim_net_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = kzalloc(size, GFP_KERNEL);
+		if (!va) {
+			dev_err(vdev->dev.parent, "vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the net device. */
+static void rshim_net_free_vrings(struct rshim_net *net)
+{
+	int i, size;
+	struct rshim_net_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(net->vrings); i++) {
+		vring = &net->vrings[i];
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			kfree(vring->va);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void rshim_net_work_handler(struct work_struct *work)
+{
+	struct virtqueue *vq;
+	struct rshim_net *net = container_of(work, struct rshim_net, work);
+
+	/* Tx. */
+	if (test_and_clear_bit(RSH_NET_TX_EVENT, &net->pend_events) &&
+		       net->virtio_registered) {
+		vq = net->vrings[RSH_NET_VRING_TX].vq;
+		if (vq)
+			rshim_net_virtio_rxtx(vq, false);
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(RSH_NET_RX_EVENT, &net->pend_events) &&
+		       net->virtio_registered) {
+		vq = net->vrings[RSH_NET_VRING_RX].vq;
+		if (vq)
+			rshim_net_virtio_rxtx(vq, true);
+	}
+
+	/* Keepalive check. */
+	if (rshim_net_keepalive &&
+	    time_after(jiffies, net->rx_jiffies +
+		       (unsigned long)rshim_net_keepalive * HZ)) {
+		mutex_lock(&net->lock);
+		rshim_net_update_activity(net, false);
+		mutex_unlock(&net->lock);
+	}
+}
+
+/* Nothing to do for now. */
+static void rshim_net_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+rshim_net_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct rshim_net_vring *vring = (struct rshim_net_vring *)vq->priv;
+
+	if (vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vring->size;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vring->size);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 rshim_net_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+/* House-keeping timer. */
+static void rshim_net_timer(struct timer_list *arg)
+{
+	struct rshim_net *net = container_of(arg, struct rshim_net, timer);
+
+	/*
+	 * Wake up Rx handler in case Rx event is missing or any leftover
+	 * bytes are stuck in the backend.
+	 */
+	test_and_set_bit(RSH_NET_RX_EVENT, &net->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(RSH_NET_TX_EVENT, &net->pend_events);
+
+	schedule_work(&net->work);
+
+	mod_timer(&net->timer, jiffies + rshim_net_timer_interval);
+}
+
+static void rshim_net_release_cur_desc(struct virtio_device *vdev,
+				       struct rshim_net_vring *vring)
+{
+	int idx;
+	unsigned long flags;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+
+	idx = vr->used->idx % vring->size;
+	vr->used->ring[idx].id = vring->desc_head - vr->desc;
+	vr->used->ring[idx].len =
+		cpu_to_virtio32(vdev, vring->pkt_len);
+
+	/*
+	 * Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+
+	vring->desc = NULL;
+
+	/* Notify upper layer. */
+	spin_lock_irqsave(&rshim_net_spin_lock, flags);
+	vring_interrupt(0, vring->vq);
+	spin_unlock_irqrestore(&rshim_net_spin_lock, flags);
+}
+
+/* Update the link activity. */
+static void rshim_net_update_activity(struct rshim_net *net, bool activity)
+{
+	if (activity) {
+		/* Bring up the link. */
+		if (!(net->config.status & VIRTIO_NET_S_LINK_UP)) {
+			net->config.status |= VIRTIO_NET_S_LINK_UP;
+			virtio_config_changed(&net->vdev);
+		}
+	} else {
+		/* Bring down the link. */
+		if (net->config.status & VIRTIO_NET_S_LINK_UP) {
+			int i;
+
+			net->config.status &= ~VIRTIO_NET_S_LINK_UP;
+			virtio_config_changed(&net->vdev);
+
+			/* Reset the ring state. */
+			for (i = 0; i < RSH_NET_VRING_NUM; i++) {
+				net->vrings[i].pkt_len =
+						sizeof(struct virtio_net_hdr);
+				net->vrings[i].cur_len = 0;
+				net->vrings[i].rem_len = 0;
+			}
+		}
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void rshim_net_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct rshim_net_vring *vring = (struct rshim_net_vring *)vq->priv;
+	struct rshim_net *net = vring->net;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &net->vdev;
+	void *addr;
+	int len, idx, seg_len;
+	struct vring_desc *desc;
+
+	mutex_lock(&net->lock);
+
+	/* Get the current pending descriptor. */
+	desc = vring->desc;
+
+	/* Don't continue if booting. */
+	if (net->bd->is_boot_open) {
+		/* Drop the pending buffer. */
+		if (desc != NULL)
+			rshim_net_release_cur_desc(vdev, vring);
+		mutex_unlock(&net->lock);
+		return;
+	}
+
+	while (1) {
+		if (!desc) {
+			/* Don't process new packet in draining mode. */
+			if (RSHIM_READ_ONCE(rshim_net_draining_mode))
+				break;
+
+			/* Get the head desc of next packet. */
+			vring->desc_head = rshim_net_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				mutex_unlock(&net->lock);
+				return;
+			}
+			desc = vring->desc_head;
+
+			/* Packet length is unknown yet. */
+			vring->pkt_len = 0;
+			vring->rem_len = sizeof(vring->hdr);
+		}
+
+		/* Beginning of a packet. */
+		if (vring->pkt_len == 0) {
+			if (is_rx) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Read the packet header. */
+				len = rshim_fifo_read(net->bd,
+					(void *)&vring->hdr +
+					sizeof(vring->hdr) - vring->rem_len,
+					vring->rem_len, TMFIFO_NET_CHAN, true,
+					false);
+				if (len > 0) {
+					vring->rem_len -= len;
+					if (vring->rem_len != 0)
+						continue;
+				} else
+					break;
+
+				/* Update activity. */
+				net->rx_jiffies = jiffies;
+				rshim_net_update_activity(net, true);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (vring->hdr.len == 0) {
+					vring->rem_len = sizeof(vring->hdr);
+					continue;
+				}
+
+				/* Update total length. */
+				vring->pkt_len = ntohs(vring->hdr.len) +
+					sizeof(struct virtio_net_hdr);
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+					vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			} else {
+				/* Write packet header. */
+				if (vring->rem_len == sizeof(vring->hdr)) {
+					len = rshim_net_virtio_get_pkt_len(
+							vdev, desc, vr);
+					vring->hdr.data = 0;
+					vring->hdr.type = VIRTIO_ID_NET;
+					vring->hdr.len = htons(len -
+						sizeof(struct virtio_net_hdr));
+				}
+
+				len = rshim_fifo_write(net->bd,
+					(void *)&vring->hdr +
+					sizeof(vring->hdr) - vring->rem_len,
+					vring->rem_len, TMFIFO_NET_CHAN,
+					true, false);
+				if (len > 0) {
+					vring->rem_len -= len;
+					if (vring->rem_len != 0)
+						continue;
+				} else
+					break;
+
+				/* Update total length. */
+				vring->pkt_len = rshim_net_virtio_get_pkt_len(
+							vdev, desc, vr);
+			}
+
+			vring->cur_len = sizeof(struct virtio_net_hdr);
+			vring->rem_len = vring->pkt_len;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done with this chain. */
+			rshim_net_release_cur_desc(vdev, vring);
+
+			/* Clear desc and go back to the loop. */
+			desc = NULL;
+
+			continue;
+		}
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		if (is_rx) {
+			seg_len = rshim_fifo_read(net->bd,
+					addr + vring->cur_len,
+					len - vring->cur_len,
+					TMFIFO_NET_CHAN, true, false);
+		} else {
+			seg_len = rshim_fifo_write(net->bd,
+					addr + vring->cur_len,
+					len - vring->cur_len,
+					TMFIFO_NET_CHAN, true, false);
+		}
+		if (seg_len > 0)
+			vring->cur_len += seg_len;
+		else {
+			/* Schedule the worker to speed up Tx. */
+			if (!is_rx) {
+				if (!test_and_set_bit(RSH_NET_TX_EVENT,
+				    &net->pend_events))
+					schedule_work(&net->work);
+			}
+			break;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+
+	mutex_unlock(&net->lock);
+}
+
+/* The notify function is called when new buffers are posted. */
+static VIRTIO_NOTIFY_RETURN_TYPE rshim_net_virtio_notify(struct virtqueue *vq)
+{
+	struct rshim_net_vring *vring = (struct rshim_net_vring *)vq->priv;
+	struct rshim_net *net = vring->net;
+
+	/*
+	 * Virtio-net maintains vrings in pairs. Odd number ring for Rx
+	 * and even number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX bit. */
+		if (!test_and_set_bit(RSH_NET_RX_EVENT, &net->pend_events))
+			schedule_work(&net->work);
+	} else {
+		/* Set the TX bit. */
+		if (!test_and_set_bit(RSH_NET_TX_EVENT, &net->pend_events))
+			schedule_work(&net->work);
+	}
+
+	VIRTIO_NOTIFY_RETURN;
+}
+
+/* Get the array of feature bits for this device. */
+static VIRTIO_GET_FEATURES_RETURN_TYPE rshim_net_virtio_get_features(
+	struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	return net->features;
+}
+
+/* Confirm device features to use. */
+static VIRTIO_FINALIZE_FEATURES_RETURN_TYPE rshim_net_virtio_finalize_features(
+	struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	net->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void rshim_net_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct rshim_net_vring *vring;
+	struct virtqueue *vq;
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	for (i = 0; i < ARRAY_SIZE(net->vrings); i++) {
+		vring = &net->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			rshim_net_release_cur_desc(vdev, vring);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int rshim_net_virtio_find_vqs(struct virtio_device *vdev,
+				     unsigned int nvqs,
+				     struct virtqueue *vqs[],
+				     vq_callback_t *callbacks[],
+				     const char * const names[],
+				     const bool *ctx,
+				     struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct rshim_net_vring *vring;
+	struct virtqueue *vq;
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	if (nvqs > ARRAY_SIZE(net->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &net->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+
+		vq = vring_new_virtqueue(
+					 i,
+					 vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 rshim_net_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vq->priv = vring;
+		/*
+		 * Add barrier to make sure vq is ready before assigning to
+		 * vring.
+		 */
+		mb();
+		vring->vq = vq;
+		vqs[i] = vq;
+	}
+
+	return 0;
+
+error:
+	rshim_net_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 rshim_net_virtio_get_status(struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	return net->status;
+}
+
+/* Write the status byte. */
+static void rshim_net_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	net->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void rshim_net_virtio_reset(struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	net->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void rshim_net_virtio_get(struct virtio_device *vdev,
+				 unsigned int offset,
+				 void *buf,
+				 unsigned int len)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	if (offset + len > sizeof(net->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&net->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void rshim_net_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	if (offset + len > sizeof(net->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&net->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static struct virtio_config_ops rshim_net_virtio_config_ops = {
+	.get_features = rshim_net_virtio_get_features,
+	.finalize_features = rshim_net_virtio_finalize_features,
+	.find_vqs = rshim_net_virtio_find_vqs,
+	.del_vqs = rshim_net_virtio_del_vqs,
+	.reset = rshim_net_virtio_reset,
+	.set_status = rshim_net_virtio_set_status,
+	.get_status = rshim_net_virtio_get_status,
+	.get = rshim_net_virtio_get,
+	.set = rshim_net_virtio_set,
+};
+
+/* Remove. */
+static int rshim_net_delete_dev(struct rshim_net *net)
+{
+	if (net) {
+		/* Stop the timer. */
+		del_timer_sync(&net->timer);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&net->work);
+
+		/* Unregister virtio. */
+		if (net->virtio_registered)
+			unregister_virtio_device(&net->vdev);
+
+		/* Free vring. */
+		rshim_net_free_vrings(net);
+
+		kfree(net);
+	}
+
+	return 0;
+}
+
+/* Rx ready. */
+void rshim_net_rx_notify(struct rshim_backend *bd)
+{
+	struct rshim_net *net = (struct rshim_net *)bd->net;
+
+	if (net) {
+		test_and_set_bit(RSH_NET_RX_EVENT, &net->pend_events);
+		schedule_work(&net->work);
+	}
+}
+
+/* Remove. */
+int rshim_net_delete(struct rshim_backend *bd)
+{
+	int ret = 0;
+
+	if (bd->net) {
+		ret = rshim_net_delete_dev((struct rshim_net *)bd->net);
+		bd->net = NULL;
+	}
+
+	return ret;
+}
+
+/* Init. */
+int rshim_net_create(struct rshim_backend *bd)
+{
+	struct rshim_net *net;
+	struct virtio_device *vdev;
+	int ret = -ENOMEM;
+
+	if (bd->net)
+		return -EEXIST;
+
+	net = kzalloc(sizeof(struct rshim_net), GFP_KERNEL);
+	if (!net)
+		return ret;
+
+	INIT_WORK(&net->work, rshim_net_work_handler);
+
+	timer_setup(&net->timer, rshim_net_timer, 0);
+	net->timer.function = rshim_net_timer;
+
+	net->features = RSH_NET_FEATURES;
+	net->config.mtu = RSH_NET_MTU;
+	memcpy(net->config.mac, rshim_net_default_mac,
+	       sizeof(rshim_net_default_mac));
+	/* Set MAC address to be unique even number. */
+	net->config.mac[5] += bd->dev_index * 2;
+
+	mutex_init(&net->lock);
+
+	vdev = &net->vdev;
+	vdev->id.device = VIRTIO_ID_NET;
+	vdev->config = &rshim_net_virtio_config_ops;
+	vdev->dev.parent = bd->dev;
+	vdev->dev.release = rshim_net_virtio_dev_release;
+	if (rshim_net_alloc_vrings(net))
+		goto err;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(vdev);
+	if (ret) {
+		dev_err(bd->dev, "register_virtio_device() failed\n");
+		goto err;
+	}
+	net->virtio_registered = 1;
+
+	mod_timer(&net->timer, jiffies + rshim_net_timer_interval);
+
+	net->bd = bd;
+	/* Add a barrier to keep the order of the two pointer assignments. */
+	mb();
+	bd->net = net;
+
+	/* Bring up the interface. */
+	mutex_lock(&net->lock);
+	rshim_net_update_activity(net, true);
+	mutex_unlock(&net->lock);
+
+	return 0;
+
+err:
+	rshim_net_delete_dev(net);
+	return ret;
+}
+
+struct rshim_service rshim_svc = {
+	.type = RSH_SVC_NET,
+	.create = rshim_net_create,
+	.delete = rshim_net_delete,
+	.rx_notify = rshim_net_rx_notify
+};
+
+static int __init rshim_net_init(void)
+{
+	return rshim_register_service(&rshim_svc);
+}
+
+static void __exit rshim_net_exit(void)
+{
+	/*
+	 * Wait 200ms, which should be good enough to drain the current
+	 * pending packet.
+	 */
+	rshim_net_draining_mode = true;
+	msleep(200);
+
+	return rshim_deregister_service(&rshim_svc);
+}
+
+module_init(rshim_net_init);
+module_exit(rshim_net_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.7");
-- 
1.8.3.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v7 7/9] soc: mellanox: host: Add the Rshim USB backend driver
  2018-05-25 16:06 ` Liming Sun
@ 2019-01-03 19:17   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-03 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit adds the USB backend driver to access the Rshim
interface on the BlueField SoC. It can be used when a USB cable
is connected to the Smart NIC or standalone device.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/host/Makefile    |    2 +-
 drivers/soc/mellanox/host/rshim_usb.c | 1035 +++++++++++++++++++++++++++++++++
 2 files changed, 1036 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/mellanox/host/rshim_usb.c

diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
index 1a282b9..c6703cd 100644
--- a/drivers/soc/mellanox/host/Makefile
+++ b/drivers/soc/mellanox/host/Makefile
@@ -1,2 +1,2 @@
-obj-m := rshim.o rshim_net.o
+obj-m := rshim.o rshim_net.o rshim_usb.o
 
diff --git a/drivers/soc/mellanox/host/rshim_usb.c b/drivers/soc/mellanox/host/rshim_usb.c
new file mode 100644
index 0000000..aad6250
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_usb.c
@@ -0,0 +1,1035 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_usb.c - Mellanox RShim USB host driver
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/*
+ * This source code was originally derived from:
+ *
+ *   USB Skeleton driver - 2.0
+ *
+ *   Copyright (C) 2001-2004 Greg Kroah-Hartman (greg@kroah.com)
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License as
+ *	published by the Free Software Foundation, version 2.
+ *
+ * Some code was also lifted from the example drivers in "Linux Device
+ * Drivers" by Alessandro Rubini and Jonathan Corbet, published by
+ * O'Reilly & Associates.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/usb.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <linux/ioctl.h>
+#include <linux/termios.h>
+#include <linux/workqueue.h>
+#include <asm/termbits.h>
+#include <linux/circ_buf.h>
+
+#include "rshim.h"
+
+/* Disable RSim access. */
+static int rshim_disable;
+module_param(rshim_disable, int, 0444);
+MODULE_PARM_DESC(rshim_disable, "Disable rshim (obsoleted)");
+
+/* Our USB vendor/product IDs. */
+#define USB_TILERA_VENDOR_ID	0x22dc	 /* Tilera Corporation */
+#define USB_BLUEFIELD_PRODUCT_ID	0x0004	 /* Mellanox Bluefield */
+
+/* Number of retries for the tmfifo read/write path. */
+#define READ_RETRIES		5
+#define WRITE_RETRIES		5
+
+/* Structure to hold all of our device specific stuff. */
+struct rshim_usb {
+	/* RShim backend structure. */
+	struct rshim_backend bd;
+
+	/*
+	 * The USB device for this device.  We bump its reference count
+	 * when the first interface is probed, and drop the ref when the
+	 * last interface is disconnected.
+	 */
+	struct usb_device *udev;
+
+	/* The USB interfaces for this device. */
+	struct usb_interface *rshim_interface;
+
+	/* State for our outstanding boot write. */
+	struct urb *boot_urb;
+
+	/* Control data. */
+	u64 ctrl_data;
+
+	/* Interrupt data buffer.  This is a USB DMA'able buffer. */
+	u64 *intr_buf;
+	dma_addr_t intr_buf_dma;
+
+	/* Read/interrupt urb, retries, and mode. */
+	struct urb *read_or_intr_urb;
+	int read_or_intr_retries;
+	int read_urb_is_intr;
+
+	/* Write urb and retries. */
+	struct urb *write_urb;
+	int write_retries;
+
+	/* The address of the boot FIFO endpoint. */
+	u8 boot_fifo_ep;
+	/* The address of the tile-monitor FIFO interrupt endpoint. */
+	u8 tm_fifo_int_ep;
+	/* The address of the tile-monitor FIFO input endpoint. */
+	u8 tm_fifo_in_ep;
+	/* The address of the tile-monitor FIFO output endpoint. */
+	u8 tm_fifo_out_ep;
+};
+
+/* Table of devices that work with this driver */
+static struct usb_device_id rshim_usb_table[] = {
+	{ USB_DEVICE(USB_TILERA_VENDOR_ID, USB_BLUEFIELD_PRODUCT_ID) },
+	{ }					/* Terminating entry */
+};
+MODULE_DEVICE_TABLE(usb, rshim_usb_table);
+
+/* Random compatibility hacks. */
+
+/* Arguments to an urb completion handler. */
+#define URB_COMP_ARGS struct urb *urb
+
+static void rshim_usb_delete(struct kref *kref)
+{
+	struct rshim_backend *bd;
+	struct rshim_usb *dev;
+
+	bd = container_of(kref, struct rshim_backend, kref);
+	dev = container_of(bd, struct rshim_usb, bd);
+
+	rshim_deregister(bd);
+	kfree(dev);
+}
+
+/* Rshim read/write routines */
+
+static int rshim_usb_read_rshim(struct rshim_backend *bd, int chan, int addr,
+			      u64 *result)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+	int retval;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	/* Do a blocking control read and endian conversion. */
+	retval = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0),
+				 0,  /* request */
+				 USB_RECIP_ENDPOINT | USB_TYPE_VENDOR |
+				 USB_DIR_IN,  /* request type */
+				 chan, /* value */
+				 addr, /* index */
+				 &dev->ctrl_data, 8, 2000);
+
+	/*
+	 * The RShim HW puts bytes on the wire in little-endian order
+	 * regardless of endianness settings either in the host or the ARM
+	 * cores.
+	 */
+	*result = le64_to_cpu(dev->ctrl_data);
+	if (retval == 8)
+		return 0;
+
+	/*
+	 * These are weird error codes, but we want to use something
+	 * the USB stack doesn't use so that we can identify short/long
+	 * reads.
+	 */
+	return retval >= 0 ? (retval > 8 ? -EBADE : -EBADR) : retval;
+}
+
+static int rshim_usb_write_rshim(struct rshim_backend *bd, int chan, int addr,
+			       u64 value)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+	int retval;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	/* Convert the word to little endian and do blocking control write. */
+	dev->ctrl_data = cpu_to_le64(value);
+	retval = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0),
+				 0,  /* request */
+				 USB_RECIP_ENDPOINT | USB_TYPE_VENDOR |
+				 USB_DIR_OUT,  /* request type */
+				 chan, /* value */
+				 addr, /* index */
+				 &dev->ctrl_data, 8, 2000);
+
+	if (retval == 8)
+		return 0;
+
+	/*
+	 * These are weird error codes, but we want to use something
+	 * the USB stack doesn't use so that we can identify short/long
+	 * writes.
+	 */
+	return retval >= 0 ? (retval > 8 ? -EBADE : -EBADR) : retval;
+}
+
+/* Boot routines */
+
+static void rshim_usb_boot_write_callback(URB_COMP_ARGS)
+{
+	struct rshim_usb *dev = urb->context;
+
+	if (urb->status == -ENOENT)
+		pr_debug("boot tx canceled, actual length %d\n",
+			 urb->actual_length);
+	else if (urb->status)
+		pr_debug("boot tx failed, status %d, actual length %d\n",
+			 urb->status, urb->actual_length);
+
+	complete_all(&dev->bd.boot_write_complete);
+}
+
+static ssize_t rshim_usb_boot_write(struct rshim_usb *dev, const char *buf,
+				  size_t count)
+{
+	struct rshim_backend *bd = &dev->bd;
+	int retval = 0;
+	size_t bytes_written = 0;
+
+	/* Create and fill an urb */
+	dev->boot_urb = usb_alloc_urb(0, GFP_KERNEL);
+	if (unlikely(!dev->boot_urb)) {
+		pr_debug("boot_write: couldn't allocate urb\n");
+		return -ENOMEM;
+	}
+	usb_fill_bulk_urb(dev->boot_urb, dev->udev,
+			  usb_sndbulkpipe(dev->udev, dev->boot_fifo_ep),
+			  (char *)buf, count, rshim_usb_boot_write_callback,
+			  dev);
+
+	/* Submit the urb. */
+	reinit_completion(&bd->boot_write_complete);
+	retval = usb_submit_urb(dev->boot_urb, GFP_KERNEL);
+	if (retval)
+		goto done;
+
+	/*
+	 * Wait until it's done. If anything goes wrong in the USB layer,
+	 * the callback function might never get called and cause stuck.
+	 * Here we release the mutex so user could use 'ctrl + c' to terminate
+	 * the current write. Once the boot file is opened again, the
+	 * outstanding urb will be canceled.
+	 *
+	 * Note: when boot stream starts to write, it will either run to
+	 * completion, or be interrupted by user. The urb callback function will
+	 * be called during this period. There are no other operations to affect
+	 * the boot stream. So unlocking the mutex is considered safe.
+	 */
+	mutex_unlock(&bd->mutex);
+	retval = wait_for_completion_interruptible(&bd->boot_write_complete);
+	mutex_lock(&bd->mutex);
+	if (retval) {
+		usb_kill_urb(dev->boot_urb);
+		bytes_written += dev->boot_urb->actual_length;
+		goto done;
+	}
+
+	if (dev->boot_urb->actual_length !=
+		dev->boot_urb->transfer_buffer_length) {
+		pr_debug("length mismatch, exp %d act %d stat %d\n",
+			 dev->boot_urb->transfer_buffer_length,
+			 dev->boot_urb->actual_length,
+			 dev->boot_urb->status);
+	}
+
+#ifdef RSH_USB_BMC
+	/*
+	 * The UHCI host controller on the BMC seems to
+	 * overestimate the amount of data it's
+	 * successfully sent when it sees a babble error.
+	 */
+	if (dev->boot_urb->status == -EOVERFLOW &&
+	    dev->boot_urb->actual_length >= 64) {
+		dev->boot_urb->actual_length -= 64;
+		pr_debug("saw babble, new length %d\n",
+		dev->boot_urb->actual_length);
+	}
+#endif
+
+	bytes_written = dev->boot_urb->actual_length;
+
+	if (dev->boot_urb->status == -ENOENT &&
+	    dev->boot_urb->transfer_buffer_length !=
+	    dev->boot_urb->actual_length) {
+		pr_debug("boot_write: urb canceled.\n");
+	} else {
+		if (dev->boot_urb->status) {
+			pr_debug("boot_write: urb failed, status %d\n",
+				 dev->boot_urb->status);
+		}
+		if (dev->boot_urb->status != -ENOENT && !retval)
+			retval = dev->boot_urb->status;
+	}
+
+done:
+	usb_free_urb(dev->boot_urb);
+	dev->boot_urb = NULL;
+
+	return bytes_written ? bytes_written : retval;
+}
+
+/* FIFO routines */
+
+static void rshim_usb_fifo_read_callback(URB_COMP_ARGS)
+{
+	struct rshim_usb *dev = urb->context;
+	struct rshim_backend *bd = &dev->bd;
+
+	spin_lock(&bd->spinlock);
+
+	pr_debug("usb_fifo_read_callback: %s urb completed, status %d, "
+		 "actual length %d, intr buf %d\n",
+		 dev->read_urb_is_intr ? "interrupt" : "read",
+		 urb->status, urb->actual_length, (int) *dev->intr_buf);
+
+	bd->spin_flags &= ~RSH_SFLG_READING;
+
+	if (urb->status == 0) {
+		/*
+		 * If a read completed, clear the number of bytes available
+		 * from the last interrupt, and set up the new buffer for
+		 * processing.  (If an interrupt completed, there's nothing
+		 * to do, since the number of bytes available was already
+		 * set by the I/O itself.)
+		 */
+		if (!dev->read_urb_is_intr) {
+			*dev->intr_buf = 0;
+			bd->read_buf_bytes = urb->actual_length;
+			bd->read_buf_next = 0;
+		}
+
+		/* Process any data we got, and launch another I/O if needed. */
+		rshim_notify(bd, RSH_EVENT_FIFO_INPUT, 0);
+	} else if (urb->status == -ENOENT) {
+		/*
+		 * The urb was explicitly cancelled.  The only time we
+		 * currently do this is when we close the stream.  If we
+		 * mark this as an error, tile-monitor --resume won't work,
+		 * so we just want to do nothing.
+		 */
+	} else if (urb->status == -ECONNRESET ||
+		   urb->status == -ESHUTDOWN) {
+		/*
+		 * The device went away.  We don't want to retry this, and
+		 * we expect things to get better, probably after a device
+		 * reset, but in the meantime, we should let upper layers
+		 * know there was a problem.
+		 */
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	} else if (dev->read_or_intr_retries < READ_RETRIES &&
+		   urb->actual_length == 0 &&
+		   (urb->status == -EPROTO || urb->status == -EILSEQ ||
+		    urb->status == -EOVERFLOW)) {
+		/*
+		 * We got an error which could benefit from being retried.
+		 * Just submit the same urb again.  Note that we don't
+		 * handle partial reads; it's hard, and we haven't really
+		 * seen them.
+		 */
+		int retval;
+
+		dev->read_or_intr_retries++;
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			pr_debug("fifo_read_callback: resubmitted urb but got error %d",
+				 retval);
+			/*
+			 * In this case, we won't try again; signal the
+			 * error to upper layers.
+			 */
+			rshim_notify(bd, RSH_EVENT_FIFO_ERR, retval);
+		} else {
+			bd->spin_flags |= RSH_SFLG_READING;
+		}
+	} else {
+		/*
+		 * We got some error we don't know how to handle, or we got
+		 * too many errors.  Either way we don't retry any more,
+		 * but we signal the error to upper layers.
+		 */
+		pr_err("fifo_read_callback: %s urb completed abnormally, "
+		       "error %d\n",
+		       dev->read_urb_is_intr ? "interrupt" : "read",
+		       urb->status);
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	}
+
+	spin_unlock(&bd->spinlock);
+}
+
+static void rshim_usb_fifo_read(struct rshim_usb *dev, char *buffer,
+			      size_t count)
+{
+	struct rshim_backend *bd = &dev->bd;
+
+	if ((int) *dev->intr_buf || bd->read_buf_bytes) {
+		/* We're doing a read. */
+
+		int retval;
+		struct urb *urb = dev->read_or_intr_urb;
+
+		usb_fill_bulk_urb(urb, dev->udev,
+				  usb_rcvbulkpipe(dev->udev,
+						  dev->tm_fifo_in_ep),
+				  buffer, count,
+				  rshim_usb_fifo_read_callback,
+				  dev);
+		urb->transfer_dma = dev->bd.read_buf_dma;
+		urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+
+		dev->bd.spin_flags |= RSH_SFLG_READING;
+		dev->read_urb_is_intr = 0;
+		dev->read_or_intr_retries = 0;
+
+		/* Submit the urb. */
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			dev->bd.spin_flags &= ~RSH_SFLG_READING;
+			pr_debug("fifo_drain: failed submitting read "
+			      "urb, error %d", retval);
+		}
+		pr_debug("fifo_read_callback: resubmitted read urb\n");
+	} else {
+		/* We're doing an interrupt. */
+
+		int retval;
+		struct urb *urb = dev->read_or_intr_urb;
+
+		usb_fill_int_urb(urb, dev->udev,
+				 usb_rcvintpipe(dev->udev, dev->tm_fifo_int_ep),
+				 dev->intr_buf, sizeof(*dev->intr_buf),
+				 rshim_usb_fifo_read_callback,
+				 /*
+				  * FIXME: is 6 a good interval value?  That's
+				  * polling at 8000/(1 << 6) == 125 Hz.
+				  */
+				 dev, 6);
+		urb->transfer_dma = dev->intr_buf_dma;
+		urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+
+		dev->bd.spin_flags |= RSH_SFLG_READING;
+		dev->read_urb_is_intr = 1;
+		dev->read_or_intr_retries = 0;
+
+		/* Submit the urb */
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			dev->bd.spin_flags &= ~RSH_SFLG_READING;
+			pr_debug("fifo_read_callback: failed submitting "
+			      "interrupt urb, error %d", retval);
+		}
+		pr_debug("fifo_read_callback: resubmitted interrupt urb\n");
+	}
+}
+
+static void rshim_usb_fifo_write_callback(URB_COMP_ARGS)
+{
+	struct rshim_usb *dev = urb->context;
+	struct rshim_backend *bd = &dev->bd;
+
+	spin_lock(&bd->spinlock);
+
+	pr_debug("fifo_write_callback: urb completed, status %d, "
+		 "actual length %d, intr buf %d\n",
+		 urb->status, urb->actual_length, (int) *dev->intr_buf);
+
+	bd->spin_flags &= ~RSH_SFLG_WRITING;
+
+	if (urb->status == 0) {
+		/* A write completed. */
+		wake_up_interruptible_all(&bd->write_completed);
+		rshim_notify(bd, RSH_EVENT_FIFO_OUTPUT, 0);
+	} else if (urb->status == -ENOENT) {
+		/*
+		 * The urb was explicitly cancelled.  The only time we
+		 * currently do this is when we close the stream.  If we
+		 * mark this as an error, tile-monitor --resume won't work,
+		 * so we just want to do nothing.
+		 */
+	} else if (urb->status == -ECONNRESET ||
+		   urb->status == -ESHUTDOWN) {
+		/*
+		 * The device went away.  We don't want to retry this, and
+		 * we expect things to get better, probably after a device
+		 * reset, but in the meantime, we should let upper layers
+		 * know there was a problem.
+		 */
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	} else if (dev->write_retries < WRITE_RETRIES &&
+		   urb->actual_length == 0 &&
+		   (urb->status == -EPROTO || urb->status == -EILSEQ ||
+		    urb->status == -EOVERFLOW)) {
+		/*
+		 * We got an error which could benefit from being retried.
+		 * Just submit the same urb again.  Note that we don't
+		 * handle partial writes; it's hard, and we haven't really
+		 * seen them.
+		 */
+		int retval;
+
+		dev->write_retries++;
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			pr_err("fifo_write_callback: resubmitted urb but "
+			       "got error %d\n", retval);
+			/*
+			 * In this case, we won't try again; signal the
+			 * error to upper layers.
+			 */
+			rshim_notify(bd, RSH_EVENT_FIFO_ERR, retval);
+		} else {
+			bd->spin_flags |= RSH_SFLG_WRITING;
+		}
+	} else {
+		/*
+		 * We got some error we don't know how to handle, or we got
+		 * too many errors.  Either way we don't retry any more,
+		 * but we signal the error to upper layers.
+		 */
+		pr_err("fifo_write_callback: urb completed abnormally, "
+		       "error %d\n", urb->status);
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	}
+
+	spin_unlock(&bd->spinlock);
+}
+
+static int rshim_usb_fifo_write(struct rshim_usb *dev, const char *buffer,
+			      size_t count)
+{
+	struct rshim_backend *bd = &dev->bd;
+	int retval;
+
+	WARN_ONCE(count % 8 != 0, "rshim write %d is not multiple of 8 bytes\n",
+		  (int)count);
+
+	/* Initialize the urb properly. */
+	usb_fill_bulk_urb(dev->write_urb, dev->udev,
+			  usb_sndbulkpipe(dev->udev,
+					  dev->tm_fifo_out_ep),
+			  (char *)buffer,
+			  count,
+			  rshim_usb_fifo_write_callback,
+			  dev);
+	dev->write_urb->transfer_dma = bd->write_buf_dma;
+	dev->write_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+	dev->write_retries = 0;
+
+	/* Send the data out the bulk port. */
+	retval = usb_submit_urb(dev->write_urb, GFP_ATOMIC);
+	if (retval) {
+		bd->spin_flags &= ~RSH_SFLG_WRITING;
+		pr_err("fifo_write: failed submitting write "
+		       "urb, error %d\n", retval);
+		return -1;
+	}
+
+	bd->spin_flags |= RSH_SFLG_WRITING;
+	return 0;
+}
+
+/* Probe routines */
+
+/* These make the endpoint test code in rshim_usb_probe() a lot cleaner. */
+#define is_in_ep(ep)   (((ep)->bEndpointAddress & USB_ENDPOINT_DIR_MASK) == \
+			USB_DIR_IN)
+#define is_bulk_ep(ep) (((ep)->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == \
+			USB_ENDPOINT_XFER_BULK)
+#define is_int_ep(ep)  (((ep)->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == \
+			USB_ENDPOINT_XFER_INT)
+#define max_pkt(ep)    le16_to_cpu(ep->wMaxPacketSize)
+#define ep_addr(ep)    (ep->bEndpointAddress)
+
+static ssize_t rshim_usb_backend_read(struct rshim_backend *bd, int devtype,
+				    char *buf, size_t count)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		rshim_usb_fifo_read(dev, buf, count);
+		return 0;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+}
+
+static ssize_t rshim_usb_backend_write(struct rshim_backend *bd, int devtype,
+				     const char *buf, size_t count)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		return rshim_usb_fifo_write(dev, buf, count);
+
+	case RSH_DEV_TYPE_BOOT:
+		return rshim_usb_boot_write(dev, buf, count);
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+}
+
+static void rshim_usb_backend_cancel_req(struct rshim_backend *bd, int devtype,
+				       bool is_write)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		if (is_write)
+			usb_kill_urb(dev->write_urb);
+		else
+			usb_kill_urb(dev->read_or_intr_urb);
+		break;
+
+	case RSH_DEV_TYPE_BOOT:
+		usb_kill_urb(dev->boot_urb);
+		break;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		break;
+	}
+}
+
+static int rshim_usb_probe(struct usb_interface *interface,
+			 const struct usb_device_id *id)
+{
+	char *usb_dev_name;
+	int dev_name_len = 32;
+	struct rshim_usb *dev = NULL;
+	struct rshim_backend *bd;
+	struct usb_host_interface *iface_desc;
+	struct usb_endpoint_descriptor *ep;
+	int i;
+	int allocfail = 0;
+	int retval = -ENOMEM;
+
+	/*
+	 * Get our device pathname.  The usb_make_path interface uselessly
+	 * returns -1 if the output buffer is too small, instead of telling
+	 * us how big it needs to be, so we just start with a reasonable
+	 * size and double it until the name fits.
+	 */
+	while (1) {
+		usb_dev_name = kmalloc(dev_name_len, GFP_KERNEL);
+		if (!usb_dev_name)
+			goto error;
+		if (usb_make_path(interface_to_usbdev(interface), usb_dev_name,
+				  dev_name_len) >= 0)
+			break;
+		kfree(usb_dev_name);
+		dev_name_len *= 2;
+	}
+
+	pr_debug("probing %s\n", usb_dev_name);
+
+	/*
+	 * Now see if we've previously seen this device.  If so, we use the
+	 * same device number, otherwise we pick the first available one.
+	 */
+	rshim_lock();
+
+	/* Find the backend. */
+	bd = rshim_find(usb_dev_name);
+	if (bd) {
+		pr_debug("found previously allocated rshim_usb structure\n");
+		kref_get(&bd->kref);
+		dev = container_of(bd, struct rshim_usb, bd);
+		kfree(usb_dev_name);
+		usb_dev_name = NULL;
+	} else {
+		pr_debug("creating new rshim_usb structure\n");
+		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+		if (dev == NULL) {
+			pr_err("couldn't get memory for new device\n");
+			rshim_unlock();
+			goto error;
+		}
+
+		bd = &dev->bd;
+		bd->dev_name = usb_dev_name;
+		bd->read = rshim_usb_backend_read;
+		bd->write = rshim_usb_backend_write;
+		bd->cancel = rshim_usb_backend_cancel_req;
+		bd->destroy = rshim_usb_delete;
+		bd->read_rshim = rshim_usb_read_rshim;
+		bd->write_rshim = rshim_usb_write_rshim;
+		bd->has_reprobe = 1;
+		bd->owner = THIS_MODULE;
+		mutex_init(&bd->mutex);
+	}
+
+	/*
+	 * This has to be done on the first probe, whether or not we
+	 * allocated a new rshim_usb structure, since it's always dropped
+	 * on the second disconnect.
+	 */
+	if (!bd->has_rshim && !bd->has_tm)
+		dev->udev = usb_get_dev(interface_to_usbdev(interface));
+
+	/*
+	 * It would seem more logical to allocate these above when we create
+	 * a new rshim_usb structure, but we don't want to do it until we've
+	 * upped the usb device reference count.
+	 */
+	allocfail |= rshim_fifo_alloc(bd);
+
+	if (!bd->read_buf)
+		bd->read_buf = usb_alloc_coherent(dev->udev, READ_BUF_SIZE,
+						   GFP_KERNEL,
+						   &bd->read_buf_dma);
+	allocfail |= bd->read_buf == 0;
+
+	if (!dev->intr_buf) {
+		dev->intr_buf = usb_alloc_coherent(dev->udev,
+						   sizeof(*dev->intr_buf),
+						   GFP_KERNEL,
+						   &dev->intr_buf_dma);
+		if (dev->intr_buf != NULL)
+			*dev->intr_buf = 0;
+	}
+	allocfail |= dev->intr_buf == 0;
+
+	if (!bd->write_buf) {
+		bd->write_buf = usb_alloc_coherent(dev->udev,
+						       WRITE_BUF_SIZE,
+						       GFP_KERNEL,
+						       &bd->write_buf_dma);
+	}
+	allocfail |= bd->write_buf == 0;
+
+	if (!dev->read_or_intr_urb)
+		dev->read_or_intr_urb = usb_alloc_urb(0, GFP_KERNEL);
+	allocfail |= dev->read_or_intr_urb == 0;
+
+	if (!dev->write_urb)
+		dev->write_urb = usb_alloc_urb(0, GFP_KERNEL);
+	allocfail |= dev->write_urb == 0;
+
+	if (allocfail) {
+		pr_err("can't allocate buffers or urbs\n");
+		rshim_unlock();
+		goto error;
+	}
+
+	rshim_unlock();
+
+	iface_desc = interface->cur_altsetting;
+
+	/* Make sure this is a vendor-specific interface class. */
+	if (iface_desc->desc.bInterfaceClass != 0xFF)
+		goto error;
+
+	/* See which interface this is, then save the correct data. */
+
+	mutex_lock(&bd->mutex);
+	if (iface_desc->desc.bInterfaceSubClass == 0) {
+		pr_debug("found rshim interface\n");
+		/*
+		 * We only expect one endpoint here, just make sure its
+		 * attributes match.
+		 */
+		if (iface_desc->desc.bNumEndpoints != 1) {
+			pr_err("wrong number of endpoints for rshim "
+			       "interface\n");
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+		ep = &iface_desc->endpoint[0].desc;
+
+		/* We expect a bulk out endpoint. */
+		if (!is_bulk_ep(ep) || is_in_ep(ep)) {
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+
+		bd->has_rshim = 1;
+		dev->rshim_interface = interface;
+		dev->boot_fifo_ep = ep_addr(ep);
+
+	} else if (iface_desc->desc.bInterfaceSubClass == 1) {
+		pr_debug("found tmfifo interface\n");
+		/*
+		 * We expect 3 endpoints here.  Since they're listed in
+		 * random order we have to use their attributes to figure
+		 * out which is which.
+		 */
+		if (iface_desc->desc.bNumEndpoints != 3) {
+			pr_err("wrong number of endpoints for tm "
+			       "interface\n");
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+		dev->tm_fifo_in_ep = 0;
+		dev->tm_fifo_int_ep = 0;
+		dev->tm_fifo_out_ep = 0;
+
+		for (i = 0; i < iface_desc->desc.bNumEndpoints; i++) {
+			ep = &iface_desc->endpoint[i].desc;
+
+			if (is_in_ep(ep)) {
+				if (is_bulk_ep(ep)) {
+					/* Bulk in endpoint. */
+					dev->tm_fifo_in_ep = ep_addr(ep);
+				} else if (is_int_ep(ep)) {
+					/* Interrupt in endpoint. */
+					dev->tm_fifo_int_ep = ep_addr(ep);
+				}
+			} else {
+				if (is_bulk_ep(ep)) {
+					/* Bulk out endpoint. */
+					dev->tm_fifo_out_ep = ep_addr(ep);
+				}
+			}
+		}
+
+		if (!dev->tm_fifo_in_ep || !dev->tm_fifo_int_ep ||
+		    !dev->tm_fifo_out_ep) {
+			pr_err("could not find all required endpoints for "
+			       "tm interface\n");
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+		bd->has_tm = 1;
+	} else {
+		mutex_unlock(&bd->mutex);
+		goto error;
+	}
+
+	/* Save our data pointer in this interface device. */
+	usb_set_intfdata(interface, dev);
+
+	if (!bd->dev)
+		bd->dev = &dev->udev->dev;
+
+	/*
+	 * Register rshim here since it needs to detect whether other backend
+	 * has already registered or not, which involves reading/writing rshim
+	 * registers and has assumption that the under layer is working.
+	 */
+	rshim_lock();
+	if (!bd->registered) {
+		retval = rshim_register(bd);
+		if (retval) {
+			rshim_unlock();
+			goto error;
+		}
+	}
+	rshim_unlock();
+
+	/* Notify that device is attached. */
+	retval = rshim_notify(&dev->bd, RSH_EVENT_ATTACH, 0);
+	mutex_unlock(&dev->bd.mutex);
+	if (retval)
+		goto error;
+
+	return 0;
+
+error:
+	if (dev) {
+		usb_free_urb(dev->read_or_intr_urb);
+		dev->read_or_intr_urb = NULL;
+		usb_free_urb(dev->write_urb);
+		dev->write_urb = NULL;
+
+		usb_free_coherent(dev->udev, READ_BUF_SIZE,
+				  dev->bd.read_buf, dev->bd.read_buf_dma);
+		dev->bd.read_buf = NULL;
+
+		usb_free_coherent(dev->udev, WRITE_BUF_SIZE,
+				  dev->bd.write_buf, dev->bd.write_buf_dma);
+		dev->bd.write_buf = NULL;
+
+		rshim_fifo_free(&dev->bd);
+
+		usb_free_coherent(dev->udev, sizeof(*dev->intr_buf),
+				  dev->intr_buf, dev->intr_buf_dma);
+		dev->intr_buf = NULL;
+
+		rshim_lock();
+		kref_put(&dev->bd.kref, rshim_usb_delete);
+		rshim_unlock();
+	}
+
+	kfree(usb_dev_name);
+	return retval;
+}
+
+static void rshim_usb_disconnect(struct usb_interface *interface)
+{
+	struct rshim_usb *dev;
+	struct rshim_backend *bd;
+	int flush_wq = 0;
+
+	dev = usb_get_intfdata(interface);
+	bd = &dev->bd;
+	usb_set_intfdata(interface, NULL);
+
+	rshim_notify(bd, RSH_EVENT_DETACH, 0);
+
+	/*
+	 * Clear this interface so we don't unregister our devices next
+	 * time.
+	 */
+	mutex_lock(&bd->mutex);
+
+	if (dev->rshim_interface == interface) {
+		bd->has_rshim = 0;
+		dev->rshim_interface = NULL;
+	} else {
+		/*
+		 * We have to get rid of any USB state, since it may be
+		 * tied to the USB device which is going to vanish as soon
+		 * as we get both disconnects.  We'll reallocate these
+		 * on the next probe.
+		 *
+		 * Supposedly the code which called us already killed any
+		 * outstanding URBs, but it doesn't hurt to be sure.
+		 */
+
+		/*
+		 * We must make sure the console worker isn't running
+		 * before we free all these resources, and particularly
+		 * before we decrement our usage count, below.  Most of the
+		 * time, if it's even enabled, it'll be scheduled to run at
+		 * some point in the future, and we can take care of that
+		 * by asking that it be canceled.
+		 *
+		 * However, it's possible that it's already started
+		 * running, but can't make progress because it's waiting
+		 * for the device mutex, which we currently have.  We
+		 * handle this case by clearing the bit that says it's
+		 * enabled.  The worker tests this bit as soon as it gets
+		 * the mutex, and if it's clear, it just returns without
+		 * rescheduling itself.  Note that if we didn't
+		 * successfully cancel it, we flush the work entry below,
+		 * after we drop the mutex, to be sure it's done before we
+		 * decrement the device usage count.
+		 *
+		 * XXX This might be racy; what if something else which
+		 * would enable the worker runs after we drop the mutex
+		 * but before the worker itself runs?
+		 */
+		flush_wq = !cancel_delayed_work(&bd->work);
+		bd->has_cons_work = 0;
+
+		usb_kill_urb(dev->read_or_intr_urb);
+		usb_free_urb(dev->read_or_intr_urb);
+		dev->read_or_intr_urb = NULL;
+		usb_kill_urb(dev->write_urb);
+		usb_free_urb(dev->write_urb);
+		dev->write_urb = NULL;
+
+		usb_free_coherent(dev->udev, READ_BUF_SIZE,
+				  bd->read_buf, bd->read_buf_dma);
+		bd->read_buf = NULL;
+
+		usb_free_coherent(dev->udev, sizeof(*dev->intr_buf),
+				  dev->intr_buf, dev->intr_buf_dma);
+		dev->intr_buf = NULL;
+
+		usb_free_coherent(dev->udev, WRITE_BUF_SIZE,
+				  bd->write_buf, bd->write_buf_dma);
+		bd->write_buf = NULL;
+
+		rshim_fifo_free(bd);
+	}
+
+	if (!bd->has_rshim && !bd->has_tm) {
+		usb_put_dev(dev->udev);
+		dev->udev = NULL;
+		pr_info("now disconnected\n");
+	} else {
+		pr_debug("partially disconnected\n");
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	/* This can't be done while we hold the mutex; see comments above. */
+	if (flush_wq)
+		flush_workqueue(rshim_wq);
+
+	/* decrement our usage count */
+	rshim_lock();
+	kref_put(&bd->kref, rshim_usb_delete);
+	rshim_unlock();
+}
+
+static struct usb_driver rshim_usb_driver = {
+	.name = "rshim_usb",
+	.probe = rshim_usb_probe,
+	.disconnect = rshim_usb_disconnect,
+	.id_table = rshim_usb_table,
+};
+
+static int __init rshim_usb_init(void)
+{
+	int result;
+
+	/* Register this driver with the USB subsystem. */
+	result = usb_register(&rshim_usb_driver);
+	if (result)
+		pr_err("usb_register failed, error number %d\n", result);
+
+	return result;
+}
+
+static void __exit rshim_usb_exit(void)
+{
+	/* Deregister this driver with the USB subsystem. */
+	usb_deregister(&rshim_usb_driver);
+}
+
+module_init(rshim_usb_init);
+module_exit(rshim_usb_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.6");
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v7 7/9] soc: mellanox: host: Add the Rshim USB backend driver
@ 2019-01-03 19:17   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-03 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit adds the USB backend driver to access the Rshim
interface on the BlueField SoC. It can be used when a USB cable
is connected to the Smart NIC or standalone device.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/host/Makefile    |    2 +-
 drivers/soc/mellanox/host/rshim_usb.c | 1035 +++++++++++++++++++++++++++++++++
 2 files changed, 1036 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/mellanox/host/rshim_usb.c

diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
index 1a282b9..c6703cd 100644
--- a/drivers/soc/mellanox/host/Makefile
+++ b/drivers/soc/mellanox/host/Makefile
@@ -1,2 +1,2 @@
-obj-m := rshim.o rshim_net.o
+obj-m := rshim.o rshim_net.o rshim_usb.o
 
diff --git a/drivers/soc/mellanox/host/rshim_usb.c b/drivers/soc/mellanox/host/rshim_usb.c
new file mode 100644
index 0000000..aad6250
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_usb.c
@@ -0,0 +1,1035 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_usb.c - Mellanox RShim USB host driver
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/*
+ * This source code was originally derived from:
+ *
+ *   USB Skeleton driver - 2.0
+ *
+ *   Copyright (C) 2001-2004 Greg Kroah-Hartman (greg@kroah.com)
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License as
+ *	published by the Free Software Foundation, version 2.
+ *
+ * Some code was also lifted from the example drivers in "Linux Device
+ * Drivers" by Alessandro Rubini and Jonathan Corbet, published by
+ * O'Reilly & Associates.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/usb.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <linux/ioctl.h>
+#include <linux/termios.h>
+#include <linux/workqueue.h>
+#include <asm/termbits.h>
+#include <linux/circ_buf.h>
+
+#include "rshim.h"
+
+/* Disable RSim access. */
+static int rshim_disable;
+module_param(rshim_disable, int, 0444);
+MODULE_PARM_DESC(rshim_disable, "Disable rshim (obsoleted)");
+
+/* Our USB vendor/product IDs. */
+#define USB_TILERA_VENDOR_ID	0x22dc	 /* Tilera Corporation */
+#define USB_BLUEFIELD_PRODUCT_ID	0x0004	 /* Mellanox Bluefield */
+
+/* Number of retries for the tmfifo read/write path. */
+#define READ_RETRIES		5
+#define WRITE_RETRIES		5
+
+/* Structure to hold all of our device specific stuff. */
+struct rshim_usb {
+	/* RShim backend structure. */
+	struct rshim_backend bd;
+
+	/*
+	 * The USB device for this device.  We bump its reference count
+	 * when the first interface is probed, and drop the ref when the
+	 * last interface is disconnected.
+	 */
+	struct usb_device *udev;
+
+	/* The USB interfaces for this device. */
+	struct usb_interface *rshim_interface;
+
+	/* State for our outstanding boot write. */
+	struct urb *boot_urb;
+
+	/* Control data. */
+	u64 ctrl_data;
+
+	/* Interrupt data buffer.  This is a USB DMA'able buffer. */
+	u64 *intr_buf;
+	dma_addr_t intr_buf_dma;
+
+	/* Read/interrupt urb, retries, and mode. */
+	struct urb *read_or_intr_urb;
+	int read_or_intr_retries;
+	int read_urb_is_intr;
+
+	/* Write urb and retries. */
+	struct urb *write_urb;
+	int write_retries;
+
+	/* The address of the boot FIFO endpoint. */
+	u8 boot_fifo_ep;
+	/* The address of the tile-monitor FIFO interrupt endpoint. */
+	u8 tm_fifo_int_ep;
+	/* The address of the tile-monitor FIFO input endpoint. */
+	u8 tm_fifo_in_ep;
+	/* The address of the tile-monitor FIFO output endpoint. */
+	u8 tm_fifo_out_ep;
+};
+
+/* Table of devices that work with this driver */
+static struct usb_device_id rshim_usb_table[] = {
+	{ USB_DEVICE(USB_TILERA_VENDOR_ID, USB_BLUEFIELD_PRODUCT_ID) },
+	{ }					/* Terminating entry */
+};
+MODULE_DEVICE_TABLE(usb, rshim_usb_table);
+
+/* Random compatibility hacks. */
+
+/* Arguments to an urb completion handler. */
+#define URB_COMP_ARGS struct urb *urb
+
+static void rshim_usb_delete(struct kref *kref)
+{
+	struct rshim_backend *bd;
+	struct rshim_usb *dev;
+
+	bd = container_of(kref, struct rshim_backend, kref);
+	dev = container_of(bd, struct rshim_usb, bd);
+
+	rshim_deregister(bd);
+	kfree(dev);
+}
+
+/* Rshim read/write routines */
+
+static int rshim_usb_read_rshim(struct rshim_backend *bd, int chan, int addr,
+			      u64 *result)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+	int retval;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	/* Do a blocking control read and endian conversion. */
+	retval = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0),
+				 0,  /* request */
+				 USB_RECIP_ENDPOINT | USB_TYPE_VENDOR |
+				 USB_DIR_IN,  /* request type */
+				 chan, /* value */
+				 addr, /* index */
+				 &dev->ctrl_data, 8, 2000);
+
+	/*
+	 * The RShim HW puts bytes on the wire in little-endian order
+	 * regardless of endianness settings either in the host or the ARM
+	 * cores.
+	 */
+	*result = le64_to_cpu(dev->ctrl_data);
+	if (retval == 8)
+		return 0;
+
+	/*
+	 * These are weird error codes, but we want to use something
+	 * the USB stack doesn't use so that we can identify short/long
+	 * reads.
+	 */
+	return retval >= 0 ? (retval > 8 ? -EBADE : -EBADR) : retval;
+}
+
+static int rshim_usb_write_rshim(struct rshim_backend *bd, int chan, int addr,
+			       u64 value)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+	int retval;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	/* Convert the word to little endian and do blocking control write. */
+	dev->ctrl_data = cpu_to_le64(value);
+	retval = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0),
+				 0,  /* request */
+				 USB_RECIP_ENDPOINT | USB_TYPE_VENDOR |
+				 USB_DIR_OUT,  /* request type */
+				 chan, /* value */
+				 addr, /* index */
+				 &dev->ctrl_data, 8, 2000);
+
+	if (retval == 8)
+		return 0;
+
+	/*
+	 * These are weird error codes, but we want to use something
+	 * the USB stack doesn't use so that we can identify short/long
+	 * writes.
+	 */
+	return retval >= 0 ? (retval > 8 ? -EBADE : -EBADR) : retval;
+}
+
+/* Boot routines */
+
+static void rshim_usb_boot_write_callback(URB_COMP_ARGS)
+{
+	struct rshim_usb *dev = urb->context;
+
+	if (urb->status == -ENOENT)
+		pr_debug("boot tx canceled, actual length %d\n",
+			 urb->actual_length);
+	else if (urb->status)
+		pr_debug("boot tx failed, status %d, actual length %d\n",
+			 urb->status, urb->actual_length);
+
+	complete_all(&dev->bd.boot_write_complete);
+}
+
+static ssize_t rshim_usb_boot_write(struct rshim_usb *dev, const char *buf,
+				  size_t count)
+{
+	struct rshim_backend *bd = &dev->bd;
+	int retval = 0;
+	size_t bytes_written = 0;
+
+	/* Create and fill an urb */
+	dev->boot_urb = usb_alloc_urb(0, GFP_KERNEL);
+	if (unlikely(!dev->boot_urb)) {
+		pr_debug("boot_write: couldn't allocate urb\n");
+		return -ENOMEM;
+	}
+	usb_fill_bulk_urb(dev->boot_urb, dev->udev,
+			  usb_sndbulkpipe(dev->udev, dev->boot_fifo_ep),
+			  (char *)buf, count, rshim_usb_boot_write_callback,
+			  dev);
+
+	/* Submit the urb. */
+	reinit_completion(&bd->boot_write_complete);
+	retval = usb_submit_urb(dev->boot_urb, GFP_KERNEL);
+	if (retval)
+		goto done;
+
+	/*
+	 * Wait until it's done. If anything goes wrong in the USB layer,
+	 * the callback function might never get called and cause stuck.
+	 * Here we release the mutex so user could use 'ctrl + c' to terminate
+	 * the current write. Once the boot file is opened again, the
+	 * outstanding urb will be canceled.
+	 *
+	 * Note: when boot stream starts to write, it will either run to
+	 * completion, or be interrupted by user. The urb callback function will
+	 * be called during this period. There are no other operations to affect
+	 * the boot stream. So unlocking the mutex is considered safe.
+	 */
+	mutex_unlock(&bd->mutex);
+	retval = wait_for_completion_interruptible(&bd->boot_write_complete);
+	mutex_lock(&bd->mutex);
+	if (retval) {
+		usb_kill_urb(dev->boot_urb);
+		bytes_written += dev->boot_urb->actual_length;
+		goto done;
+	}
+
+	if (dev->boot_urb->actual_length !=
+		dev->boot_urb->transfer_buffer_length) {
+		pr_debug("length mismatch, exp %d act %d stat %d\n",
+			 dev->boot_urb->transfer_buffer_length,
+			 dev->boot_urb->actual_length,
+			 dev->boot_urb->status);
+	}
+
+#ifdef RSH_USB_BMC
+	/*
+	 * The UHCI host controller on the BMC seems to
+	 * overestimate the amount of data it's
+	 * successfully sent when it sees a babble error.
+	 */
+	if (dev->boot_urb->status == -EOVERFLOW &&
+	    dev->boot_urb->actual_length >= 64) {
+		dev->boot_urb->actual_length -= 64;
+		pr_debug("saw babble, new length %d\n",
+		dev->boot_urb->actual_length);
+	}
+#endif
+
+	bytes_written = dev->boot_urb->actual_length;
+
+	if (dev->boot_urb->status == -ENOENT &&
+	    dev->boot_urb->transfer_buffer_length !=
+	    dev->boot_urb->actual_length) {
+		pr_debug("boot_write: urb canceled.\n");
+	} else {
+		if (dev->boot_urb->status) {
+			pr_debug("boot_write: urb failed, status %d\n",
+				 dev->boot_urb->status);
+		}
+		if (dev->boot_urb->status != -ENOENT && !retval)
+			retval = dev->boot_urb->status;
+	}
+
+done:
+	usb_free_urb(dev->boot_urb);
+	dev->boot_urb = NULL;
+
+	return bytes_written ? bytes_written : retval;
+}
+
+/* FIFO routines */
+
+static void rshim_usb_fifo_read_callback(URB_COMP_ARGS)
+{
+	struct rshim_usb *dev = urb->context;
+	struct rshim_backend *bd = &dev->bd;
+
+	spin_lock(&bd->spinlock);
+
+	pr_debug("usb_fifo_read_callback: %s urb completed, status %d, "
+		 "actual length %d, intr buf %d\n",
+		 dev->read_urb_is_intr ? "interrupt" : "read",
+		 urb->status, urb->actual_length, (int) *dev->intr_buf);
+
+	bd->spin_flags &= ~RSH_SFLG_READING;
+
+	if (urb->status == 0) {
+		/*
+		 * If a read completed, clear the number of bytes available
+		 * from the last interrupt, and set up the new buffer for
+		 * processing.  (If an interrupt completed, there's nothing
+		 * to do, since the number of bytes available was already
+		 * set by the I/O itself.)
+		 */
+		if (!dev->read_urb_is_intr) {
+			*dev->intr_buf = 0;
+			bd->read_buf_bytes = urb->actual_length;
+			bd->read_buf_next = 0;
+		}
+
+		/* Process any data we got, and launch another I/O if needed. */
+		rshim_notify(bd, RSH_EVENT_FIFO_INPUT, 0);
+	} else if (urb->status == -ENOENT) {
+		/*
+		 * The urb was explicitly cancelled.  The only time we
+		 * currently do this is when we close the stream.  If we
+		 * mark this as an error, tile-monitor --resume won't work,
+		 * so we just want to do nothing.
+		 */
+	} else if (urb->status == -ECONNRESET ||
+		   urb->status == -ESHUTDOWN) {
+		/*
+		 * The device went away.  We don't want to retry this, and
+		 * we expect things to get better, probably after a device
+		 * reset, but in the meantime, we should let upper layers
+		 * know there was a problem.
+		 */
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	} else if (dev->read_or_intr_retries < READ_RETRIES &&
+		   urb->actual_length == 0 &&
+		   (urb->status == -EPROTO || urb->status == -EILSEQ ||
+		    urb->status == -EOVERFLOW)) {
+		/*
+		 * We got an error which could benefit from being retried.
+		 * Just submit the same urb again.  Note that we don't
+		 * handle partial reads; it's hard, and we haven't really
+		 * seen them.
+		 */
+		int retval;
+
+		dev->read_or_intr_retries++;
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			pr_debug("fifo_read_callback: resubmitted urb but got error %d",
+				 retval);
+			/*
+			 * In this case, we won't try again; signal the
+			 * error to upper layers.
+			 */
+			rshim_notify(bd, RSH_EVENT_FIFO_ERR, retval);
+		} else {
+			bd->spin_flags |= RSH_SFLG_READING;
+		}
+	} else {
+		/*
+		 * We got some error we don't know how to handle, or we got
+		 * too many errors.  Either way we don't retry any more,
+		 * but we signal the error to upper layers.
+		 */
+		pr_err("fifo_read_callback: %s urb completed abnormally, "
+		       "error %d\n",
+		       dev->read_urb_is_intr ? "interrupt" : "read",
+		       urb->status);
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	}
+
+	spin_unlock(&bd->spinlock);
+}
+
+static void rshim_usb_fifo_read(struct rshim_usb *dev, char *buffer,
+			      size_t count)
+{
+	struct rshim_backend *bd = &dev->bd;
+
+	if ((int) *dev->intr_buf || bd->read_buf_bytes) {
+		/* We're doing a read. */
+
+		int retval;
+		struct urb *urb = dev->read_or_intr_urb;
+
+		usb_fill_bulk_urb(urb, dev->udev,
+				  usb_rcvbulkpipe(dev->udev,
+						  dev->tm_fifo_in_ep),
+				  buffer, count,
+				  rshim_usb_fifo_read_callback,
+				  dev);
+		urb->transfer_dma = dev->bd.read_buf_dma;
+		urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+
+		dev->bd.spin_flags |= RSH_SFLG_READING;
+		dev->read_urb_is_intr = 0;
+		dev->read_or_intr_retries = 0;
+
+		/* Submit the urb. */
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			dev->bd.spin_flags &= ~RSH_SFLG_READING;
+			pr_debug("fifo_drain: failed submitting read "
+			      "urb, error %d", retval);
+		}
+		pr_debug("fifo_read_callback: resubmitted read urb\n");
+	} else {
+		/* We're doing an interrupt. */
+
+		int retval;
+		struct urb *urb = dev->read_or_intr_urb;
+
+		usb_fill_int_urb(urb, dev->udev,
+				 usb_rcvintpipe(dev->udev, dev->tm_fifo_int_ep),
+				 dev->intr_buf, sizeof(*dev->intr_buf),
+				 rshim_usb_fifo_read_callback,
+				 /*
+				  * FIXME: is 6 a good interval value?  That's
+				  * polling at 8000/(1 << 6) == 125 Hz.
+				  */
+				 dev, 6);
+		urb->transfer_dma = dev->intr_buf_dma;
+		urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+
+		dev->bd.spin_flags |= RSH_SFLG_READING;
+		dev->read_urb_is_intr = 1;
+		dev->read_or_intr_retries = 0;
+
+		/* Submit the urb */
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			dev->bd.spin_flags &= ~RSH_SFLG_READING;
+			pr_debug("fifo_read_callback: failed submitting "
+			      "interrupt urb, error %d", retval);
+		}
+		pr_debug("fifo_read_callback: resubmitted interrupt urb\n");
+	}
+}
+
+static void rshim_usb_fifo_write_callback(URB_COMP_ARGS)
+{
+	struct rshim_usb *dev = urb->context;
+	struct rshim_backend *bd = &dev->bd;
+
+	spin_lock(&bd->spinlock);
+
+	pr_debug("fifo_write_callback: urb completed, status %d, "
+		 "actual length %d, intr buf %d\n",
+		 urb->status, urb->actual_length, (int) *dev->intr_buf);
+
+	bd->spin_flags &= ~RSH_SFLG_WRITING;
+
+	if (urb->status == 0) {
+		/* A write completed. */
+		wake_up_interruptible_all(&bd->write_completed);
+		rshim_notify(bd, RSH_EVENT_FIFO_OUTPUT, 0);
+	} else if (urb->status == -ENOENT) {
+		/*
+		 * The urb was explicitly cancelled.  The only time we
+		 * currently do this is when we close the stream.  If we
+		 * mark this as an error, tile-monitor --resume won't work,
+		 * so we just want to do nothing.
+		 */
+	} else if (urb->status == -ECONNRESET ||
+		   urb->status == -ESHUTDOWN) {
+		/*
+		 * The device went away.  We don't want to retry this, and
+		 * we expect things to get better, probably after a device
+		 * reset, but in the meantime, we should let upper layers
+		 * know there was a problem.
+		 */
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	} else if (dev->write_retries < WRITE_RETRIES &&
+		   urb->actual_length == 0 &&
+		   (urb->status == -EPROTO || urb->status == -EILSEQ ||
+		    urb->status == -EOVERFLOW)) {
+		/*
+		 * We got an error which could benefit from being retried.
+		 * Just submit the same urb again.  Note that we don't
+		 * handle partial writes; it's hard, and we haven't really
+		 * seen them.
+		 */
+		int retval;
+
+		dev->write_retries++;
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			pr_err("fifo_write_callback: resubmitted urb but "
+			       "got error %d\n", retval);
+			/*
+			 * In this case, we won't try again; signal the
+			 * error to upper layers.
+			 */
+			rshim_notify(bd, RSH_EVENT_FIFO_ERR, retval);
+		} else {
+			bd->spin_flags |= RSH_SFLG_WRITING;
+		}
+	} else {
+		/*
+		 * We got some error we don't know how to handle, or we got
+		 * too many errors.  Either way we don't retry any more,
+		 * but we signal the error to upper layers.
+		 */
+		pr_err("fifo_write_callback: urb completed abnormally, "
+		       "error %d\n", urb->status);
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	}
+
+	spin_unlock(&bd->spinlock);
+}
+
+static int rshim_usb_fifo_write(struct rshim_usb *dev, const char *buffer,
+			      size_t count)
+{
+	struct rshim_backend *bd = &dev->bd;
+	int retval;
+
+	WARN_ONCE(count % 8 != 0, "rshim write %d is not multiple of 8 bytes\n",
+		  (int)count);
+
+	/* Initialize the urb properly. */
+	usb_fill_bulk_urb(dev->write_urb, dev->udev,
+			  usb_sndbulkpipe(dev->udev,
+					  dev->tm_fifo_out_ep),
+			  (char *)buffer,
+			  count,
+			  rshim_usb_fifo_write_callback,
+			  dev);
+	dev->write_urb->transfer_dma = bd->write_buf_dma;
+	dev->write_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+	dev->write_retries = 0;
+
+	/* Send the data out the bulk port. */
+	retval = usb_submit_urb(dev->write_urb, GFP_ATOMIC);
+	if (retval) {
+		bd->spin_flags &= ~RSH_SFLG_WRITING;
+		pr_err("fifo_write: failed submitting write "
+		       "urb, error %d\n", retval);
+		return -1;
+	}
+
+	bd->spin_flags |= RSH_SFLG_WRITING;
+	return 0;
+}
+
+/* Probe routines */
+
+/* These make the endpoint test code in rshim_usb_probe() a lot cleaner. */
+#define is_in_ep(ep)   (((ep)->bEndpointAddress & USB_ENDPOINT_DIR_MASK) == \
+			USB_DIR_IN)
+#define is_bulk_ep(ep) (((ep)->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == \
+			USB_ENDPOINT_XFER_BULK)
+#define is_int_ep(ep)  (((ep)->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == \
+			USB_ENDPOINT_XFER_INT)
+#define max_pkt(ep)    le16_to_cpu(ep->wMaxPacketSize)
+#define ep_addr(ep)    (ep->bEndpointAddress)
+
+static ssize_t rshim_usb_backend_read(struct rshim_backend *bd, int devtype,
+				    char *buf, size_t count)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		rshim_usb_fifo_read(dev, buf, count);
+		return 0;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+}
+
+static ssize_t rshim_usb_backend_write(struct rshim_backend *bd, int devtype,
+				     const char *buf, size_t count)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		return rshim_usb_fifo_write(dev, buf, count);
+
+	case RSH_DEV_TYPE_BOOT:
+		return rshim_usb_boot_write(dev, buf, count);
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+}
+
+static void rshim_usb_backend_cancel_req(struct rshim_backend *bd, int devtype,
+				       bool is_write)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		if (is_write)
+			usb_kill_urb(dev->write_urb);
+		else
+			usb_kill_urb(dev->read_or_intr_urb);
+		break;
+
+	case RSH_DEV_TYPE_BOOT:
+		usb_kill_urb(dev->boot_urb);
+		break;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		break;
+	}
+}
+
+static int rshim_usb_probe(struct usb_interface *interface,
+			 const struct usb_device_id *id)
+{
+	char *usb_dev_name;
+	int dev_name_len = 32;
+	struct rshim_usb *dev = NULL;
+	struct rshim_backend *bd;
+	struct usb_host_interface *iface_desc;
+	struct usb_endpoint_descriptor *ep;
+	int i;
+	int allocfail = 0;
+	int retval = -ENOMEM;
+
+	/*
+	 * Get our device pathname.  The usb_make_path interface uselessly
+	 * returns -1 if the output buffer is too small, instead of telling
+	 * us how big it needs to be, so we just start with a reasonable
+	 * size and double it until the name fits.
+	 */
+	while (1) {
+		usb_dev_name = kmalloc(dev_name_len, GFP_KERNEL);
+		if (!usb_dev_name)
+			goto error;
+		if (usb_make_path(interface_to_usbdev(interface), usb_dev_name,
+				  dev_name_len) >= 0)
+			break;
+		kfree(usb_dev_name);
+		dev_name_len *= 2;
+	}
+
+	pr_debug("probing %s\n", usb_dev_name);
+
+	/*
+	 * Now see if we've previously seen this device.  If so, we use the
+	 * same device number, otherwise we pick the first available one.
+	 */
+	rshim_lock();
+
+	/* Find the backend. */
+	bd = rshim_find(usb_dev_name);
+	if (bd) {
+		pr_debug("found previously allocated rshim_usb structure\n");
+		kref_get(&bd->kref);
+		dev = container_of(bd, struct rshim_usb, bd);
+		kfree(usb_dev_name);
+		usb_dev_name = NULL;
+	} else {
+		pr_debug("creating new rshim_usb structure\n");
+		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+		if (dev == NULL) {
+			pr_err("couldn't get memory for new device\n");
+			rshim_unlock();
+			goto error;
+		}
+
+		bd = &dev->bd;
+		bd->dev_name = usb_dev_name;
+		bd->read = rshim_usb_backend_read;
+		bd->write = rshim_usb_backend_write;
+		bd->cancel = rshim_usb_backend_cancel_req;
+		bd->destroy = rshim_usb_delete;
+		bd->read_rshim = rshim_usb_read_rshim;
+		bd->write_rshim = rshim_usb_write_rshim;
+		bd->has_reprobe = 1;
+		bd->owner = THIS_MODULE;
+		mutex_init(&bd->mutex);
+	}
+
+	/*
+	 * This has to be done on the first probe, whether or not we
+	 * allocated a new rshim_usb structure, since it's always dropped
+	 * on the second disconnect.
+	 */
+	if (!bd->has_rshim && !bd->has_tm)
+		dev->udev = usb_get_dev(interface_to_usbdev(interface));
+
+	/*
+	 * It would seem more logical to allocate these above when we create
+	 * a new rshim_usb structure, but we don't want to do it until we've
+	 * upped the usb device reference count.
+	 */
+	allocfail |= rshim_fifo_alloc(bd);
+
+	if (!bd->read_buf)
+		bd->read_buf = usb_alloc_coherent(dev->udev, READ_BUF_SIZE,
+						   GFP_KERNEL,
+						   &bd->read_buf_dma);
+	allocfail |= bd->read_buf == 0;
+
+	if (!dev->intr_buf) {
+		dev->intr_buf = usb_alloc_coherent(dev->udev,
+						   sizeof(*dev->intr_buf),
+						   GFP_KERNEL,
+						   &dev->intr_buf_dma);
+		if (dev->intr_buf != NULL)
+			*dev->intr_buf = 0;
+	}
+	allocfail |= dev->intr_buf == 0;
+
+	if (!bd->write_buf) {
+		bd->write_buf = usb_alloc_coherent(dev->udev,
+						       WRITE_BUF_SIZE,
+						       GFP_KERNEL,
+						       &bd->write_buf_dma);
+	}
+	allocfail |= bd->write_buf == 0;
+
+	if (!dev->read_or_intr_urb)
+		dev->read_or_intr_urb = usb_alloc_urb(0, GFP_KERNEL);
+	allocfail |= dev->read_or_intr_urb == 0;
+
+	if (!dev->write_urb)
+		dev->write_urb = usb_alloc_urb(0, GFP_KERNEL);
+	allocfail |= dev->write_urb == 0;
+
+	if (allocfail) {
+		pr_err("can't allocate buffers or urbs\n");
+		rshim_unlock();
+		goto error;
+	}
+
+	rshim_unlock();
+
+	iface_desc = interface->cur_altsetting;
+
+	/* Make sure this is a vendor-specific interface class. */
+	if (iface_desc->desc.bInterfaceClass != 0xFF)
+		goto error;
+
+	/* See which interface this is, then save the correct data. */
+
+	mutex_lock(&bd->mutex);
+	if (iface_desc->desc.bInterfaceSubClass == 0) {
+		pr_debug("found rshim interface\n");
+		/*
+		 * We only expect one endpoint here, just make sure its
+		 * attributes match.
+		 */
+		if (iface_desc->desc.bNumEndpoints != 1) {
+			pr_err("wrong number of endpoints for rshim "
+			       "interface\n");
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+		ep = &iface_desc->endpoint[0].desc;
+
+		/* We expect a bulk out endpoint. */
+		if (!is_bulk_ep(ep) || is_in_ep(ep)) {
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+
+		bd->has_rshim = 1;
+		dev->rshim_interface = interface;
+		dev->boot_fifo_ep = ep_addr(ep);
+
+	} else if (iface_desc->desc.bInterfaceSubClass == 1) {
+		pr_debug("found tmfifo interface\n");
+		/*
+		 * We expect 3 endpoints here.  Since they're listed in
+		 * random order we have to use their attributes to figure
+		 * out which is which.
+		 */
+		if (iface_desc->desc.bNumEndpoints != 3) {
+			pr_err("wrong number of endpoints for tm "
+			       "interface\n");
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+		dev->tm_fifo_in_ep = 0;
+		dev->tm_fifo_int_ep = 0;
+		dev->tm_fifo_out_ep = 0;
+
+		for (i = 0; i < iface_desc->desc.bNumEndpoints; i++) {
+			ep = &iface_desc->endpoint[i].desc;
+
+			if (is_in_ep(ep)) {
+				if (is_bulk_ep(ep)) {
+					/* Bulk in endpoint. */
+					dev->tm_fifo_in_ep = ep_addr(ep);
+				} else if (is_int_ep(ep)) {
+					/* Interrupt in endpoint. */
+					dev->tm_fifo_int_ep = ep_addr(ep);
+				}
+			} else {
+				if (is_bulk_ep(ep)) {
+					/* Bulk out endpoint. */
+					dev->tm_fifo_out_ep = ep_addr(ep);
+				}
+			}
+		}
+
+		if (!dev->tm_fifo_in_ep || !dev->tm_fifo_int_ep ||
+		    !dev->tm_fifo_out_ep) {
+			pr_err("could not find all required endpoints for "
+			       "tm interface\n");
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+		bd->has_tm = 1;
+	} else {
+		mutex_unlock(&bd->mutex);
+		goto error;
+	}
+
+	/* Save our data pointer in this interface device. */
+	usb_set_intfdata(interface, dev);
+
+	if (!bd->dev)
+		bd->dev = &dev->udev->dev;
+
+	/*
+	 * Register rshim here since it needs to detect whether other backend
+	 * has already registered or not, which involves reading/writing rshim
+	 * registers and has assumption that the under layer is working.
+	 */
+	rshim_lock();
+	if (!bd->registered) {
+		retval = rshim_register(bd);
+		if (retval) {
+			rshim_unlock();
+			goto error;
+		}
+	}
+	rshim_unlock();
+
+	/* Notify that device is attached. */
+	retval = rshim_notify(&dev->bd, RSH_EVENT_ATTACH, 0);
+	mutex_unlock(&dev->bd.mutex);
+	if (retval)
+		goto error;
+
+	return 0;
+
+error:
+	if (dev) {
+		usb_free_urb(dev->read_or_intr_urb);
+		dev->read_or_intr_urb = NULL;
+		usb_free_urb(dev->write_urb);
+		dev->write_urb = NULL;
+
+		usb_free_coherent(dev->udev, READ_BUF_SIZE,
+				  dev->bd.read_buf, dev->bd.read_buf_dma);
+		dev->bd.read_buf = NULL;
+
+		usb_free_coherent(dev->udev, WRITE_BUF_SIZE,
+				  dev->bd.write_buf, dev->bd.write_buf_dma);
+		dev->bd.write_buf = NULL;
+
+		rshim_fifo_free(&dev->bd);
+
+		usb_free_coherent(dev->udev, sizeof(*dev->intr_buf),
+				  dev->intr_buf, dev->intr_buf_dma);
+		dev->intr_buf = NULL;
+
+		rshim_lock();
+		kref_put(&dev->bd.kref, rshim_usb_delete);
+		rshim_unlock();
+	}
+
+	kfree(usb_dev_name);
+	return retval;
+}
+
+static void rshim_usb_disconnect(struct usb_interface *interface)
+{
+	struct rshim_usb *dev;
+	struct rshim_backend *bd;
+	int flush_wq = 0;
+
+	dev = usb_get_intfdata(interface);
+	bd = &dev->bd;
+	usb_set_intfdata(interface, NULL);
+
+	rshim_notify(bd, RSH_EVENT_DETACH, 0);
+
+	/*
+	 * Clear this interface so we don't unregister our devices next
+	 * time.
+	 */
+	mutex_lock(&bd->mutex);
+
+	if (dev->rshim_interface == interface) {
+		bd->has_rshim = 0;
+		dev->rshim_interface = NULL;
+	} else {
+		/*
+		 * We have to get rid of any USB state, since it may be
+		 * tied to the USB device which is going to vanish as soon
+		 * as we get both disconnects.  We'll reallocate these
+		 * on the next probe.
+		 *
+		 * Supposedly the code which called us already killed any
+		 * outstanding URBs, but it doesn't hurt to be sure.
+		 */
+
+		/*
+		 * We must make sure the console worker isn't running
+		 * before we free all these resources, and particularly
+		 * before we decrement our usage count, below.  Most of the
+		 * time, if it's even enabled, it'll be scheduled to run at
+		 * some point in the future, and we can take care of that
+		 * by asking that it be canceled.
+		 *
+		 * However, it's possible that it's already started
+		 * running, but can't make progress because it's waiting
+		 * for the device mutex, which we currently have.  We
+		 * handle this case by clearing the bit that says it's
+		 * enabled.  The worker tests this bit as soon as it gets
+		 * the mutex, and if it's clear, it just returns without
+		 * rescheduling itself.  Note that if we didn't
+		 * successfully cancel it, we flush the work entry below,
+		 * after we drop the mutex, to be sure it's done before we
+		 * decrement the device usage count.
+		 *
+		 * XXX This might be racy; what if something else which
+		 * would enable the worker runs after we drop the mutex
+		 * but before the worker itself runs?
+		 */
+		flush_wq = !cancel_delayed_work(&bd->work);
+		bd->has_cons_work = 0;
+
+		usb_kill_urb(dev->read_or_intr_urb);
+		usb_free_urb(dev->read_or_intr_urb);
+		dev->read_or_intr_urb = NULL;
+		usb_kill_urb(dev->write_urb);
+		usb_free_urb(dev->write_urb);
+		dev->write_urb = NULL;
+
+		usb_free_coherent(dev->udev, READ_BUF_SIZE,
+				  bd->read_buf, bd->read_buf_dma);
+		bd->read_buf = NULL;
+
+		usb_free_coherent(dev->udev, sizeof(*dev->intr_buf),
+				  dev->intr_buf, dev->intr_buf_dma);
+		dev->intr_buf = NULL;
+
+		usb_free_coherent(dev->udev, WRITE_BUF_SIZE,
+				  bd->write_buf, bd->write_buf_dma);
+		bd->write_buf = NULL;
+
+		rshim_fifo_free(bd);
+	}
+
+	if (!bd->has_rshim && !bd->has_tm) {
+		usb_put_dev(dev->udev);
+		dev->udev = NULL;
+		pr_info("now disconnected\n");
+	} else {
+		pr_debug("partially disconnected\n");
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	/* This can't be done while we hold the mutex; see comments above. */
+	if (flush_wq)
+		flush_workqueue(rshim_wq);
+
+	/* decrement our usage count */
+	rshim_lock();
+	kref_put(&bd->kref, rshim_usb_delete);
+	rshim_unlock();
+}
+
+static struct usb_driver rshim_usb_driver = {
+	.name = "rshim_usb",
+	.probe = rshim_usb_probe,
+	.disconnect = rshim_usb_disconnect,
+	.id_table = rshim_usb_table,
+};
+
+static int __init rshim_usb_init(void)
+{
+	int result;
+
+	/* Register this driver with the USB subsystem. */
+	result = usb_register(&rshim_usb_driver);
+	if (result)
+		pr_err("usb_register failed, error number %d\n", result);
+
+	return result;
+}
+
+static void __exit rshim_usb_exit(void)
+{
+	/* Deregister this driver with the USB subsystem. */
+	usb_deregister(&rshim_usb_driver);
+}
+
+module_init(rshim_usb_init);
+module_exit(rshim_usb_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.6");
-- 
1.8.3.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v7 8/9] soc: mellanox: host: Add the Rshim PCIe backend driver
  2018-05-25 16:06 ` Liming Sun
@ 2019-01-03 19:17   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-03 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit adds the PCIe backend driver to access the Rshim
interface on the BlueField SoC, such as on the Smart NIC.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/host/Makefile     |   2 +-
 drivers/soc/mellanox/host/rshim_pcie.c | 478 +++++++++++++++++++++++++++++++++
 2 files changed, 479 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/mellanox/host/rshim_pcie.c

diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
index c6703cd..fa4b21c 100644
--- a/drivers/soc/mellanox/host/Makefile
+++ b/drivers/soc/mellanox/host/Makefile
@@ -1,2 +1,2 @@
-obj-m := rshim.o rshim_net.o rshim_usb.o
+obj-m := rshim.o rshim_net.o rshim_usb.o rshim_pcie.o
 
diff --git a/drivers/soc/mellanox/host/rshim_pcie.c b/drivers/soc/mellanox/host/rshim_pcie.c
new file mode 100644
index 0000000..3fa7bd9
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_pcie.c
@@ -0,0 +1,478 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_pcie.c - Mellanox RShim PCIe host driver
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/pci.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+
+#include "rshim.h"
+
+/* Disable RSim access. */
+static int rshim_disable;
+module_param(rshim_disable, int, 0444);
+MODULE_PARM_DESC(rshim_disable, "Disable rshim (obsoleted)");
+
+/** Our Vendor/Device IDs. */
+#define TILERA_VENDOR_ID					0x15b3
+#define BLUEFIELD_DEVICE_ID					0xc2d2
+
+/** The offset in BAR2 of the RShim region. */
+#define PCI_RSHIM_WINDOW_OFFSET					0x0
+
+/** The size the RShim region. */
+#define PCI_RSHIM_WINDOW_SIZE					0x100000
+
+/* Maximum number of devices this driver can handle */
+#define MAX_DEV_COUNT						16
+
+struct rshim_pcie {
+	/* RShim backend structure. */
+	struct rshim_backend	bd;
+
+	struct pci_dev *pci_dev;
+
+	/* RShim BAR size. */
+	uint64_t bar0_size;
+
+	/* Address of the RShim registers. */
+	u8 __iomem *rshim_regs;
+
+	/* Keep track of number of 8-byte word writes */
+	u8 write_count;
+};
+
+static struct rshim_pcie *instances[MAX_DEV_COUNT];
+
+#ifndef CONFIG_64BIT
+/* Wait until the RSH_BYTE_ACC_CTL pending bit is cleared */
+static int rshim_byte_acc_pending_wait(struct rshim_pcie *dev, int chan)
+{
+	u32 read_value;
+
+	do {
+		read_value = readl(dev->rshim_regs +
+			(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+		if (signal_pending(current))
+			return -EINTR;
+
+	} while (read_value & RSH_BYTE_ACC_PENDING);
+
+	return 0;
+}
+
+/*
+ * RShim read/write methods for 32-bit systems
+ * Mechanism to do an 8-byte access to the Rshim using
+ * two 4-byte accesses through the Rshim Byte Access Widget.
+ */
+static int rshim_byte_acc_read(struct rshim_pcie *dev, int chan, int addr,
+				u64 *result)
+{
+	int retval;
+	u32 read_value;
+	u64 read_result;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	writel(RSH_BYTE_ACC_SIZE, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	writel(addr, dev->rshim_regs + (RSH_BYTE_ACC_ADDR | (chan << 16)));
+
+	/* Write trigger bits to perform read */
+	writel(RSH_BYTE_ACC_READ_TRIGGER, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read lower 32-bits of data */
+	read_value = readl(dev->rshim_regs +
+		(RSH_BYTE_ACC_RDAT | (chan << 16)));
+
+	read_result = (u64)read_value << 32;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read upper 32-bits of data */
+	read_value = readl(dev->rshim_regs +
+		(RSH_BYTE_ACC_RDAT | (chan << 16)));
+
+	read_result |= (u64)read_value;
+	*result = be64_to_cpu(read_result);
+
+	return 0;
+}
+
+static int rshim_byte_acc_write(struct rshim_pcie *dev, int chan, int addr,
+				u64 value)
+{
+	int retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	writel(RSH_BYTE_ACC_SIZE, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	writel(addr, dev->rshim_regs + (RSH_BYTE_ACC_ADDR | (chan << 16)));
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	writel(RSH_BYTE_ACC_SIZE, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Write lower 32 bits of data to TRIO_CR_GW_DATA */
+	writel((u32)(value >> 32), dev->rshim_regs +
+		(RSH_BYTE_ACC_WDAT | (chan << 16)));
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Write upper 32 bits of data to TRIO_CR_GW_DATA */
+	writel((u32)(value), dev->rshim_regs +
+		(RSH_BYTE_ACC_WDAT | (chan << 16)));
+
+	return 0;
+}
+#endif /* CONFIG_64BIT */
+
+/* RShim read/write routines */
+static int rshim_pcie_read(struct rshim_backend *bd, int chan, int addr,
+				u64 *result)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	int retval = 0;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	dev->write_count = 0;
+
+#ifndef CONFIG_64BIT
+	retval = rshim_byte_acc_read(dev, chan, addr, result);
+#else
+	*result = readq(dev->rshim_regs + (addr | (chan << 16)));
+#endif
+	return retval;
+}
+
+static int rshim_pcie_write(struct rshim_backend *bd, int chan, int addr,
+				u64 value)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	u64 result;
+	int retval = 0;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	/*
+	 * We cannot stream large numbers of PCIe writes to the RShim's BAR.
+	 * Instead, we must write no more than 15 8-byte words before
+	 * doing a read from another register within the BAR,
+	 * which forces previous writes to drain.
+	 */
+	if (dev->write_count == 15) {
+		/* Add memory barrier to synchronize the order. */
+		mb();
+		rshim_pcie_read(bd, chan, RSH_SCRATCHPAD, &result);
+	}
+	dev->write_count++;
+#ifndef CONFIG_64BIT
+	retval = rshim_byte_acc_write(dev, chan, addr, value);
+#else
+	writeq(value, dev->rshim_regs + (addr | (chan << 16)));
+#endif
+
+	return retval;
+}
+
+static void rshim_pcie_delete(struct kref *kref)
+{
+	struct rshim_backend *bd;
+	struct rshim_pcie *dev;
+
+	bd = container_of(kref, struct rshim_backend, kref);
+	dev = container_of(bd, struct rshim_pcie, bd);
+
+	rshim_deregister(bd);
+	if (dev->pci_dev)
+		dev_set_drvdata(&dev->pci_dev->dev, NULL);
+	kfree(dev);
+}
+
+/* Probe routine */
+static int rshim_pcie_probe(struct pci_dev *pci_dev,
+			    const struct pci_device_id *id)
+{
+	struct rshim_pcie *dev;
+	struct rshim_backend *bd;
+	char *pcie_dev_name;
+	int index, retval, err = 0, allocfail = 0;
+	const int max_name_len = 20;
+
+	for (index = 0; index < MAX_DEV_COUNT; index++)
+		if (instances[index] == NULL)
+			break;
+	if (index == MAX_DEV_COUNT) {
+		pr_err("Driver cannot handle any more devices.\n");
+		return -ENODEV;
+	}
+
+	pcie_dev_name = kzalloc(max_name_len, GFP_KERNEL);
+	if (pcie_dev_name == NULL) {
+		err = -ENOMEM;
+		goto error;
+	}
+	retval = snprintf(pcie_dev_name, max_name_len,
+				"rshim_pcie%d", index);
+	if (WARN_ON_ONCE(retval >= max_name_len)) {
+		err = -EINVAL;
+		goto error;
+	}
+
+	pr_debug("Probing %s\n", pcie_dev_name);
+
+	rshim_lock();
+
+	/* Find the backend. */
+	bd = rshim_find(pcie_dev_name);
+	if (bd) {
+		kref_get(&bd->kref);
+		dev = container_of(bd, struct rshim_pcie, bd);
+	} else {
+		/* Get some memory for this device's driver state. */
+		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+		if (dev == NULL) {
+			err = -ENOMEM;
+			rshim_unlock();
+			goto error;
+		}
+
+		instances[index] = dev;
+		bd = &dev->bd;
+		bd->has_rshim = 1;
+		bd->has_tm = 1;
+		bd->dev_name = pcie_dev_name;
+		bd->read_rshim = rshim_pcie_read;
+		bd->write_rshim = rshim_pcie_write;
+		bd->destroy = rshim_pcie_delete;
+		bd->owner = THIS_MODULE;
+		dev->write_count = 0;
+		mutex_init(&bd->mutex);
+	}
+
+	retval = rshim_fifo_alloc(bd);
+	if (retval) {
+		rshim_unlock();
+		dev_err(&pci_dev->dev, "Failed to allocate fifo\n");
+		err = -ENOMEM;
+		goto enable_failed;
+	}
+
+	allocfail |= rshim_fifo_alloc(bd);
+
+	if (!bd->read_buf) {
+		bd->read_buf = kzalloc(READ_BUF_SIZE,
+					   GFP_KERNEL);
+	}
+	allocfail |= bd->read_buf == 0;
+
+	if (!bd->write_buf) {
+		bd->write_buf = kzalloc(WRITE_BUF_SIZE,
+					    GFP_KERNEL);
+	}
+	allocfail |= bd->write_buf == 0;
+
+	if (allocfail) {
+		rshim_unlock();
+		pr_err("can't allocate buffers\n");
+		goto enable_failed;
+	}
+
+	rshim_unlock();
+
+	/* Enable the device. */
+	err = pci_enable_device(pci_dev);
+	if (err != 0) {
+		pr_err("Device enable failed with error %d\n", err);
+		goto enable_failed;
+	}
+
+	/* Initialize object */
+	dev->pci_dev = pci_dev;
+	dev_set_drvdata(&pci_dev->dev, dev);
+
+	dev->bar0_size = pci_resource_len(pci_dev, 0);
+
+	/* Fail if the BAR is unassigned. */
+	if (!dev->bar0_size) {
+		pr_err("BAR unassigned, run 'lspci -v'.\n");
+		err = -ENOMEM;
+		goto rshim_map_failed;
+	}
+
+	/* Map in the RShim registers. */
+	dev->rshim_regs = ioremap(pci_resource_start(pci_dev, 0) +
+				  PCI_RSHIM_WINDOW_OFFSET,
+				  PCI_RSHIM_WINDOW_SIZE);
+	if (dev->rshim_regs == NULL) {
+		dev_err(&pci_dev->dev, "Failed to map RShim registers\n");
+		err = -ENOMEM;
+		goto rshim_map_failed;
+	}
+
+	/* Enable PCI bus mastering. */
+	pci_set_master(pci_dev);
+
+	/*
+	 * Register rshim here since it needs to detect whether other backend
+	 * has already registered or not, which involves reading/writing rshim
+	 * registers and has assumption that the under layer is working.
+	 */
+	rshim_lock();
+	if (!bd->registered) {
+		retval = rshim_register(bd);
+		if (retval) {
+			rshim_unlock();
+			goto rshim_map_failed;
+		} else
+			pcie_dev_name = NULL;
+	}
+	rshim_unlock();
+
+	/* Notify that the device is attached */
+	mutex_lock(&bd->mutex);
+	retval = rshim_notify(bd, RSH_EVENT_ATTACH, 0);
+	mutex_unlock(&bd->mutex);
+	if (retval)
+		goto rshim_map_failed;
+
+	return 0;
+
+ rshim_map_failed:
+	pci_disable_device(pci_dev);
+ enable_failed:
+	rshim_lock();
+	kref_put(&bd->kref, rshim_pcie_delete);
+	rshim_unlock();
+ error:
+	kfree(pcie_dev_name);
+	return err;
+}
+
+/* Called via pci_unregister_driver() when the module is removed. */
+static void rshim_pcie_remove(struct pci_dev *pci_dev)
+{
+	struct rshim_pcie *dev = dev_get_drvdata(&pci_dev->dev);
+	int flush_wq;
+
+	if (!dev)
+		return;
+
+	/*
+	 * Reset TRIO_PCIE_INTFC_RX_BAR0_ADDR_MASK and TRIO_MAP_RSH_BASE.
+	 * Otherwise, upon host reboot, the two registers will retain previous
+	 * values that don't match the new BAR0 address that is assigned to
+	 * the PCIe ports, causing host MMIO access to RShim to fail.
+	 */
+	rshim_pcie_write(&dev->bd, (RSH_SWINT >> 16) & 0xF,
+		RSH_SWINT & 0xFFFF, RSH_INT_VEC0_RTC__SWINT3_MASK);
+
+	/* Clear the flags before unmapping rshim registers to avoid race. */
+	dev->bd.has_rshim = 0;
+	dev->bd.has_tm = 0;
+	/* Add memory barrier to synchronize the order. */
+	mb();
+
+	if (dev->rshim_regs)
+		iounmap(dev->rshim_regs);
+
+	rshim_notify(&dev->bd, RSH_EVENT_DETACH, 0);
+	mutex_lock(&dev->bd.mutex);
+	flush_wq = !cancel_delayed_work(&dev->bd.work);
+	if (flush_wq)
+		flush_workqueue(rshim_wq);
+	dev->bd.has_cons_work = 0;
+	kfree(dev->bd.read_buf);
+	kfree(dev->bd.write_buf);
+	rshim_fifo_free(&dev->bd);
+	mutex_unlock(&dev->bd.mutex);
+
+	rshim_lock();
+	kref_put(&dev->bd.kref, rshim_pcie_delete);
+	rshim_unlock();
+
+	pci_disable_device(pci_dev);
+	dev_set_drvdata(&pci_dev->dev, NULL);
+}
+
+static struct pci_device_id rshim_pcie_table[] = {
+	{ PCI_DEVICE(TILERA_VENDOR_ID, BLUEFIELD_DEVICE_ID), },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, rshim_pcie_table);
+
+static struct pci_driver rshim_pcie_driver = {
+	.name = "rshim_pcie",
+	.probe = rshim_pcie_probe,
+	.remove = rshim_pcie_remove,
+	.id_table = rshim_pcie_table,
+};
+
+static int __init rshim_pcie_init(void)
+{
+	int result;
+
+	/* Register the driver */
+	result = pci_register_driver(&rshim_pcie_driver);
+	if (result)
+		pr_err("pci_register failed, error number %d\n", result);
+
+	return result;
+}
+
+static void __exit rshim_pcie_exit(void)
+{
+	/* Unregister the driver. */
+	pci_unregister_driver(&rshim_pcie_driver);
+}
+
+module_init(rshim_pcie_init);
+module_exit(rshim_pcie_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.6");
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v7 8/9] soc: mellanox: host: Add the Rshim PCIe backend driver
@ 2019-01-03 19:17   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-03 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit adds the PCIe backend driver to access the Rshim
interface on the BlueField SoC, such as on the Smart NIC.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/host/Makefile     |   2 +-
 drivers/soc/mellanox/host/rshim_pcie.c | 478 +++++++++++++++++++++++++++++++++
 2 files changed, 479 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/mellanox/host/rshim_pcie.c

diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
index c6703cd..fa4b21c 100644
--- a/drivers/soc/mellanox/host/Makefile
+++ b/drivers/soc/mellanox/host/Makefile
@@ -1,2 +1,2 @@
-obj-m := rshim.o rshim_net.o rshim_usb.o
+obj-m := rshim.o rshim_net.o rshim_usb.o rshim_pcie.o
 
diff --git a/drivers/soc/mellanox/host/rshim_pcie.c b/drivers/soc/mellanox/host/rshim_pcie.c
new file mode 100644
index 0000000..3fa7bd9
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_pcie.c
@@ -0,0 +1,478 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_pcie.c - Mellanox RShim PCIe host driver
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/pci.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+
+#include "rshim.h"
+
+/* Disable RSim access. */
+static int rshim_disable;
+module_param(rshim_disable, int, 0444);
+MODULE_PARM_DESC(rshim_disable, "Disable rshim (obsoleted)");
+
+/** Our Vendor/Device IDs. */
+#define TILERA_VENDOR_ID					0x15b3
+#define BLUEFIELD_DEVICE_ID					0xc2d2
+
+/** The offset in BAR2 of the RShim region. */
+#define PCI_RSHIM_WINDOW_OFFSET					0x0
+
+/** The size the RShim region. */
+#define PCI_RSHIM_WINDOW_SIZE					0x100000
+
+/* Maximum number of devices this driver can handle */
+#define MAX_DEV_COUNT						16
+
+struct rshim_pcie {
+	/* RShim backend structure. */
+	struct rshim_backend	bd;
+
+	struct pci_dev *pci_dev;
+
+	/* RShim BAR size. */
+	uint64_t bar0_size;
+
+	/* Address of the RShim registers. */
+	u8 __iomem *rshim_regs;
+
+	/* Keep track of number of 8-byte word writes */
+	u8 write_count;
+};
+
+static struct rshim_pcie *instances[MAX_DEV_COUNT];
+
+#ifndef CONFIG_64BIT
+/* Wait until the RSH_BYTE_ACC_CTL pending bit is cleared */
+static int rshim_byte_acc_pending_wait(struct rshim_pcie *dev, int chan)
+{
+	u32 read_value;
+
+	do {
+		read_value = readl(dev->rshim_regs +
+			(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+		if (signal_pending(current))
+			return -EINTR;
+
+	} while (read_value & RSH_BYTE_ACC_PENDING);
+
+	return 0;
+}
+
+/*
+ * RShim read/write methods for 32-bit systems
+ * Mechanism to do an 8-byte access to the Rshim using
+ * two 4-byte accesses through the Rshim Byte Access Widget.
+ */
+static int rshim_byte_acc_read(struct rshim_pcie *dev, int chan, int addr,
+				u64 *result)
+{
+	int retval;
+	u32 read_value;
+	u64 read_result;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	writel(RSH_BYTE_ACC_SIZE, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	writel(addr, dev->rshim_regs + (RSH_BYTE_ACC_ADDR | (chan << 16)));
+
+	/* Write trigger bits to perform read */
+	writel(RSH_BYTE_ACC_READ_TRIGGER, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read lower 32-bits of data */
+	read_value = readl(dev->rshim_regs +
+		(RSH_BYTE_ACC_RDAT | (chan << 16)));
+
+	read_result = (u64)read_value << 32;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read upper 32-bits of data */
+	read_value = readl(dev->rshim_regs +
+		(RSH_BYTE_ACC_RDAT | (chan << 16)));
+
+	read_result |= (u64)read_value;
+	*result = be64_to_cpu(read_result);
+
+	return 0;
+}
+
+static int rshim_byte_acc_write(struct rshim_pcie *dev, int chan, int addr,
+				u64 value)
+{
+	int retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	writel(RSH_BYTE_ACC_SIZE, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	writel(addr, dev->rshim_regs + (RSH_BYTE_ACC_ADDR | (chan << 16)));
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	writel(RSH_BYTE_ACC_SIZE, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Write lower 32 bits of data to TRIO_CR_GW_DATA */
+	writel((u32)(value >> 32), dev->rshim_regs +
+		(RSH_BYTE_ACC_WDAT | (chan << 16)));
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Write upper 32 bits of data to TRIO_CR_GW_DATA */
+	writel((u32)(value), dev->rshim_regs +
+		(RSH_BYTE_ACC_WDAT | (chan << 16)));
+
+	return 0;
+}
+#endif /* CONFIG_64BIT */
+
+/* RShim read/write routines */
+static int rshim_pcie_read(struct rshim_backend *bd, int chan, int addr,
+				u64 *result)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	int retval = 0;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	dev->write_count = 0;
+
+#ifndef CONFIG_64BIT
+	retval = rshim_byte_acc_read(dev, chan, addr, result);
+#else
+	*result = readq(dev->rshim_regs + (addr | (chan << 16)));
+#endif
+	return retval;
+}
+
+static int rshim_pcie_write(struct rshim_backend *bd, int chan, int addr,
+				u64 value)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	u64 result;
+	int retval = 0;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	/*
+	 * We cannot stream large numbers of PCIe writes to the RShim's BAR.
+	 * Instead, we must write no more than 15 8-byte words before
+	 * doing a read from another register within the BAR,
+	 * which forces previous writes to drain.
+	 */
+	if (dev->write_count == 15) {
+		/* Add memory barrier to synchronize the order. */
+		mb();
+		rshim_pcie_read(bd, chan, RSH_SCRATCHPAD, &result);
+	}
+	dev->write_count++;
+#ifndef CONFIG_64BIT
+	retval = rshim_byte_acc_write(dev, chan, addr, value);
+#else
+	writeq(value, dev->rshim_regs + (addr | (chan << 16)));
+#endif
+
+	return retval;
+}
+
+static void rshim_pcie_delete(struct kref *kref)
+{
+	struct rshim_backend *bd;
+	struct rshim_pcie *dev;
+
+	bd = container_of(kref, struct rshim_backend, kref);
+	dev = container_of(bd, struct rshim_pcie, bd);
+
+	rshim_deregister(bd);
+	if (dev->pci_dev)
+		dev_set_drvdata(&dev->pci_dev->dev, NULL);
+	kfree(dev);
+}
+
+/* Probe routine */
+static int rshim_pcie_probe(struct pci_dev *pci_dev,
+			    const struct pci_device_id *id)
+{
+	struct rshim_pcie *dev;
+	struct rshim_backend *bd;
+	char *pcie_dev_name;
+	int index, retval, err = 0, allocfail = 0;
+	const int max_name_len = 20;
+
+	for (index = 0; index < MAX_DEV_COUNT; index++)
+		if (instances[index] == NULL)
+			break;
+	if (index == MAX_DEV_COUNT) {
+		pr_err("Driver cannot handle any more devices.\n");
+		return -ENODEV;
+	}
+
+	pcie_dev_name = kzalloc(max_name_len, GFP_KERNEL);
+	if (pcie_dev_name == NULL) {
+		err = -ENOMEM;
+		goto error;
+	}
+	retval = snprintf(pcie_dev_name, max_name_len,
+				"rshim_pcie%d", index);
+	if (WARN_ON_ONCE(retval >= max_name_len)) {
+		err = -EINVAL;
+		goto error;
+	}
+
+	pr_debug("Probing %s\n", pcie_dev_name);
+
+	rshim_lock();
+
+	/* Find the backend. */
+	bd = rshim_find(pcie_dev_name);
+	if (bd) {
+		kref_get(&bd->kref);
+		dev = container_of(bd, struct rshim_pcie, bd);
+	} else {
+		/* Get some memory for this device's driver state. */
+		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+		if (dev == NULL) {
+			err = -ENOMEM;
+			rshim_unlock();
+			goto error;
+		}
+
+		instances[index] = dev;
+		bd = &dev->bd;
+		bd->has_rshim = 1;
+		bd->has_tm = 1;
+		bd->dev_name = pcie_dev_name;
+		bd->read_rshim = rshim_pcie_read;
+		bd->write_rshim = rshim_pcie_write;
+		bd->destroy = rshim_pcie_delete;
+		bd->owner = THIS_MODULE;
+		dev->write_count = 0;
+		mutex_init(&bd->mutex);
+	}
+
+	retval = rshim_fifo_alloc(bd);
+	if (retval) {
+		rshim_unlock();
+		dev_err(&pci_dev->dev, "Failed to allocate fifo\n");
+		err = -ENOMEM;
+		goto enable_failed;
+	}
+
+	allocfail |= rshim_fifo_alloc(bd);
+
+	if (!bd->read_buf) {
+		bd->read_buf = kzalloc(READ_BUF_SIZE,
+					   GFP_KERNEL);
+	}
+	allocfail |= bd->read_buf == 0;
+
+	if (!bd->write_buf) {
+		bd->write_buf = kzalloc(WRITE_BUF_SIZE,
+					    GFP_KERNEL);
+	}
+	allocfail |= bd->write_buf == 0;
+
+	if (allocfail) {
+		rshim_unlock();
+		pr_err("can't allocate buffers\n");
+		goto enable_failed;
+	}
+
+	rshim_unlock();
+
+	/* Enable the device. */
+	err = pci_enable_device(pci_dev);
+	if (err != 0) {
+		pr_err("Device enable failed with error %d\n", err);
+		goto enable_failed;
+	}
+
+	/* Initialize object */
+	dev->pci_dev = pci_dev;
+	dev_set_drvdata(&pci_dev->dev, dev);
+
+	dev->bar0_size = pci_resource_len(pci_dev, 0);
+
+	/* Fail if the BAR is unassigned. */
+	if (!dev->bar0_size) {
+		pr_err("BAR unassigned, run 'lspci -v'.\n");
+		err = -ENOMEM;
+		goto rshim_map_failed;
+	}
+
+	/* Map in the RShim registers. */
+	dev->rshim_regs = ioremap(pci_resource_start(pci_dev, 0) +
+				  PCI_RSHIM_WINDOW_OFFSET,
+				  PCI_RSHIM_WINDOW_SIZE);
+	if (dev->rshim_regs == NULL) {
+		dev_err(&pci_dev->dev, "Failed to map RShim registers\n");
+		err = -ENOMEM;
+		goto rshim_map_failed;
+	}
+
+	/* Enable PCI bus mastering. */
+	pci_set_master(pci_dev);
+
+	/*
+	 * Register rshim here since it needs to detect whether other backend
+	 * has already registered or not, which involves reading/writing rshim
+	 * registers and has assumption that the under layer is working.
+	 */
+	rshim_lock();
+	if (!bd->registered) {
+		retval = rshim_register(bd);
+		if (retval) {
+			rshim_unlock();
+			goto rshim_map_failed;
+		} else
+			pcie_dev_name = NULL;
+	}
+	rshim_unlock();
+
+	/* Notify that the device is attached */
+	mutex_lock(&bd->mutex);
+	retval = rshim_notify(bd, RSH_EVENT_ATTACH, 0);
+	mutex_unlock(&bd->mutex);
+	if (retval)
+		goto rshim_map_failed;
+
+	return 0;
+
+ rshim_map_failed:
+	pci_disable_device(pci_dev);
+ enable_failed:
+	rshim_lock();
+	kref_put(&bd->kref, rshim_pcie_delete);
+	rshim_unlock();
+ error:
+	kfree(pcie_dev_name);
+	return err;
+}
+
+/* Called via pci_unregister_driver() when the module is removed. */
+static void rshim_pcie_remove(struct pci_dev *pci_dev)
+{
+	struct rshim_pcie *dev = dev_get_drvdata(&pci_dev->dev);
+	int flush_wq;
+
+	if (!dev)
+		return;
+
+	/*
+	 * Reset TRIO_PCIE_INTFC_RX_BAR0_ADDR_MASK and TRIO_MAP_RSH_BASE.
+	 * Otherwise, upon host reboot, the two registers will retain previous
+	 * values that don't match the new BAR0 address that is assigned to
+	 * the PCIe ports, causing host MMIO access to RShim to fail.
+	 */
+	rshim_pcie_write(&dev->bd, (RSH_SWINT >> 16) & 0xF,
+		RSH_SWINT & 0xFFFF, RSH_INT_VEC0_RTC__SWINT3_MASK);
+
+	/* Clear the flags before unmapping rshim registers to avoid race. */
+	dev->bd.has_rshim = 0;
+	dev->bd.has_tm = 0;
+	/* Add memory barrier to synchronize the order. */
+	mb();
+
+	if (dev->rshim_regs)
+		iounmap(dev->rshim_regs);
+
+	rshim_notify(&dev->bd, RSH_EVENT_DETACH, 0);
+	mutex_lock(&dev->bd.mutex);
+	flush_wq = !cancel_delayed_work(&dev->bd.work);
+	if (flush_wq)
+		flush_workqueue(rshim_wq);
+	dev->bd.has_cons_work = 0;
+	kfree(dev->bd.read_buf);
+	kfree(dev->bd.write_buf);
+	rshim_fifo_free(&dev->bd);
+	mutex_unlock(&dev->bd.mutex);
+
+	rshim_lock();
+	kref_put(&dev->bd.kref, rshim_pcie_delete);
+	rshim_unlock();
+
+	pci_disable_device(pci_dev);
+	dev_set_drvdata(&pci_dev->dev, NULL);
+}
+
+static struct pci_device_id rshim_pcie_table[] = {
+	{ PCI_DEVICE(TILERA_VENDOR_ID, BLUEFIELD_DEVICE_ID), },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, rshim_pcie_table);
+
+static struct pci_driver rshim_pcie_driver = {
+	.name = "rshim_pcie",
+	.probe = rshim_pcie_probe,
+	.remove = rshim_pcie_remove,
+	.id_table = rshim_pcie_table,
+};
+
+static int __init rshim_pcie_init(void)
+{
+	int result;
+
+	/* Register the driver */
+	result = pci_register_driver(&rshim_pcie_driver);
+	if (result)
+		pr_err("pci_register failed, error number %d\n", result);
+
+	return result;
+}
+
+static void __exit rshim_pcie_exit(void)
+{
+	/* Unregister the driver. */
+	pci_unregister_driver(&rshim_pcie_driver);
+}
+
+module_init(rshim_pcie_init);
+module_exit(rshim_pcie_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.6");
-- 
1.8.3.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v7 9/9] soc: mellanox: host: Add the Rshim PCIe live-fish backend driver
  2018-05-25 16:06 ` Liming Sun
@ 2019-01-03 19:17   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-03 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit adds the PCIe live-fish backend driver to access the
Rshim interface on the BlueField SoC, such as on the Smart NIC.
It is slow access and can be used for live-fish mode when the NIC
firmware hasn't been programmed yet.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/host/Makefile        |   2 +-
 drivers/soc/mellanox/host/rshim_pcie_lf.c | 695 ++++++++++++++++++++++++++++++
 2 files changed, 696 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/mellanox/host/rshim_pcie_lf.c

diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
index fa4b21c..79a1c86 100644
--- a/drivers/soc/mellanox/host/Makefile
+++ b/drivers/soc/mellanox/host/Makefile
@@ -1,2 +1,2 @@
-obj-m := rshim.o rshim_net.o rshim_usb.o rshim_pcie.o
+obj-m := rshim.o rshim_net.o rshim_usb.o rshim_pcie.o rshim_pcie_lf.o
 
diff --git a/drivers/soc/mellanox/host/rshim_pcie_lf.c b/drivers/soc/mellanox/host/rshim_pcie_lf.c
new file mode 100644
index 0000000..08e2c15
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_pcie_lf.c
@@ -0,0 +1,695 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_pcie_lf.c - Mellanox RShim PCIe Livefish driver for x86 host
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/pci.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+
+#include "rshim.h"
+
+/* Disable RSim access. */
+static int rshim_disable;
+module_param(rshim_disable, int, 0444);
+MODULE_PARM_DESC(rshim_disable, "Disable rshim (obsoleted)");
+
+/** Our Vendor/Device IDs. */
+#define TILERA_VENDOR_ID					0x15b3
+#define BLUEFIELD_DEVICE_ID					0x0211
+
+/* Maximum number of devices this driver can handle */
+#define MAX_DEV_COUNT						16
+
+/* Mellanox Address & Data Capabilities */
+#define MELLANOX_ADDR						0x58
+#define MELLANOX_DATA						0x5c
+#define MELLANOX_CAP_READ					0x1
+
+/* TRIO_CR_GATEWAY registers */
+#define TRIO_CR_GW_LOCK						0xe38a0
+#define TRIO_CR_GW_LOCK_CPY					0xe38a4
+#define TRIO_CR_GW_DATA_UPPER					0xe38ac
+#define TRIO_CR_GW_DATA_LOWER					0xe38b0
+#define TRIO_CR_GW_CTL						0xe38b4
+#define TRIO_CR_GW_ADDR_UPPER					0xe38b8
+#define TRIO_CR_GW_ADDR_LOWER					0xe38bc
+#define TRIO_CR_GW_LOCK_ACQUIRED				0x80000000
+#define TRIO_CR_GW_LOCK_RELEASE					0x0
+#define TRIO_CR_GW_BUSY						0x60000000
+#define TRIO_CR_GW_TRIGGER					0xe0000000
+#define TRIO_CR_GW_READ_4BYTE					0x6
+#define TRIO_CR_GW_WRITE_4BYTE					0x2
+
+/* Base RShim Address */
+#define RSH_BASE_ADDR						0x80000000
+#define RSH_CHANNEL1_BASE					0x80010000
+
+struct rshim_pcie {
+	/* RShim backend structure. */
+	struct rshim_backend	bd;
+
+	struct pci_dev *pci_dev;
+
+	/* Keep track of number of 8-byte word writes */
+	u8 write_count;
+};
+
+static struct rshim_pcie *instances[MAX_DEV_COUNT];
+
+/* Mechanism to access the CR space using hidden PCI capabilities */
+static int pci_cap_read(struct pci_dev *pci_dev, int offset,
+				u32 *result)
+{
+	int retval;
+
+	/*
+	 * Write target offset to MELLANOX_ADDR.
+	 * Set LSB to indicate a read operation.
+	 */
+	retval = pci_write_config_dword(pci_dev, MELLANOX_ADDR,
+				offset | MELLANOX_CAP_READ);
+	if (retval)
+		return retval;
+
+	/* Read result from MELLANOX_DATA */
+	retval = pci_read_config_dword(pci_dev, MELLANOX_DATA,
+				result);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+static int pci_cap_write(struct pci_dev *pci_dev, int offset,
+				u32 value)
+{
+	int retval;
+
+	/* Write data to MELLANOX_DATA */
+	retval = pci_write_config_dword(pci_dev, MELLANOX_DATA,
+				value);
+	if (retval)
+		return retval;
+
+	/*
+	 * Write target offset to MELLANOX_ADDR.
+	 * Leave LSB clear to indicate a write operation.
+	 */
+	retval = pci_write_config_dword(pci_dev, MELLANOX_ADDR,
+				offset);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/* Acquire and release the TRIO_CR_GW_LOCK. */
+static int trio_cr_gw_lock_acquire(struct pci_dev *pci_dev)
+{
+	int retval;
+	u32 read_value;
+
+	/* Wait until TRIO_CR_GW_LOCK is free */
+	do {
+		retval = pci_cap_read(pci_dev, TRIO_CR_GW_LOCK,
+				&read_value);
+		if (retval)
+			return retval;
+		if (signal_pending(current))
+			return -EINTR;
+	} while (read_value & TRIO_CR_GW_LOCK_ACQUIRED);
+
+	/* Acquire TRIO_CR_GW_LOCK */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_LOCK_ACQUIRED);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+static int trio_cr_gw_lock_release(struct pci_dev *pci_dev)
+{
+	int retval;
+
+	/* Release TRIO_CR_GW_LOCK */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_LOCK_RELEASE);
+
+	return retval;
+}
+
+/*
+ * Mechanism to access the RShim from the CR space using the
+ * TRIO_CR_GATEWAY.
+ */
+static int trio_cr_gw_read(struct pci_dev *pci_dev, int addr,
+				u32 *result)
+{
+	int retval;
+
+	/* Acquire TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_acquire(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write addr to TRIO_CR_GW_ADDR_LOWER */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_ADDR_LOWER,
+				addr);
+	if (retval)
+		return retval;
+
+	/* Set TRIO_CR_GW_READ_4BYTE */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_CTL,
+				TRIO_CR_GW_READ_4BYTE);
+	if (retval)
+		return retval;
+
+	/* Trigger TRIO_CR_GW to read from addr */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_TRIGGER);
+	if (retval)
+		return retval;
+
+	/* Read 32-bit data from TRIO_CR_GW_DATA_LOWER */
+	retval = pci_cap_read(pci_dev, TRIO_CR_GW_DATA_LOWER,
+				result);
+	if (retval)
+		return retval;
+
+	/* Release TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_release(pci_dev);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+static int trio_cr_gw_write(struct pci_dev *pci_dev, int addr,
+				u32 value)
+{
+	int retval;
+
+	/* Acquire TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_acquire(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write 32-bit data to TRIO_CR_GW_DATA_LOWER */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_DATA_LOWER,
+				value);
+	if (retval)
+		return retval;
+
+	/* Write addr to TRIO_CR_GW_ADDR_LOWER */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_ADDR_LOWER,
+				addr);
+	if (retval)
+		return retval;
+
+	/* Set TRIO_CR_GW_WRITE_4BYTE */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_CTL,
+				TRIO_CR_GW_WRITE_4BYTE);
+	if (retval)
+		return retval;
+
+	/* Trigger CR gateway to write to RShim */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_TRIGGER);
+	if (retval)
+		return retval;
+
+	/* Release TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_release(pci_dev);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/* Wait until the RSH_BYTE_ACC_CTL pending bit is cleared */
+static int rshim_byte_acc_pending_wait(struct pci_dev *pci_dev)
+{
+	int retval;
+	u32 read_value;
+
+	do {
+		retval = trio_cr_gw_read(pci_dev,
+			RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL, &read_value);
+		if (retval)
+			return retval;
+		if (signal_pending(current))
+			return -EINTR;
+	} while (read_value & (RSH_CHANNEL1_BASE + RSH_BYTE_ACC_PENDING));
+
+	return 0;
+}
+
+/*
+ * Mechanism to do an 8-byte access to the Rshim using
+ * two 4-byte accesses through the Rshim Byte Access Widget.
+ */
+static int rshim_byte_acc_read(struct pci_dev *pci_dev, int addr,
+				u64 *result)
+{
+	int retval;
+	u32 read_value;
+	u64 read_result;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_SIZE);
+	if (retval)
+		return retval;
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_ADDR, addr);
+	if (retval)
+		return retval;
+
+	/* Write trigger bits to perform read */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_READ_TRIGGER);
+	if (retval)
+		return retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read lower 32-bits of data */
+	retval = trio_cr_gw_read(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_RDAT,
+				&read_value);
+	if (retval)
+		return retval;
+
+	read_result = (u64)read_value << 32;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read upper 32-bits of data */
+	retval = trio_cr_gw_read(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_RDAT,
+				&read_value);
+	if (retval)
+		return retval;
+
+	read_result |= (u64)read_value;
+	*result = be64_to_cpu(read_result);
+
+	return 0;
+}
+
+static int rshim_byte_acc_write(struct pci_dev *pci_dev, int addr,
+				u64 value)
+{
+	int retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_SIZE);
+	if (retval)
+		return retval;
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_ADDR, addr);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_SIZE);
+	if (retval)
+		return retval;
+
+	/* Write lower 32 bits of data to TRIO_CR_GW_DATA */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_WDAT, (u32)(value >> 32));
+	if (retval)
+		return retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write upper 32 bits of data to TRIO_CR_GW_DATA */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_WDAT, (u32)(value));
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/*
+ * The RShim Boot FIFO has a holding register which can couple
+ * two consecutive 4-byte writes into a single 8-byte write
+ * before pushing the data into the FIFO.
+ * Hence the RShim Byte Access Widget is not necessary to write
+ * to the BOOT FIFO using 4-byte writes.
+ */
+static int rshim_boot_fifo_write(struct pci_dev *pci_dev, int addr,
+				u64 value)
+{
+	int retval;
+
+	/* Write lower 32 bits of data to RSH_BOOT_FIFO_DATA */
+	retval = trio_cr_gw_write(pci_dev, addr,
+				(u32)(value >> 32));
+	if (retval)
+		return retval;
+
+	/* Write upper 32 bits of data to RSH_BOOT_FIFO_DATA */
+	retval = trio_cr_gw_write(pci_dev, addr,
+				(u32)(value));
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/* RShim read/write routines */
+static int rshim_pcie_read(struct rshim_backend *bd, int chan, int addr,
+				u64 *result)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	struct pci_dev *pci_dev = dev->pci_dev;
+	int retval;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	dev->write_count = 0;
+
+	addr = RSH_BASE_ADDR + (addr | (chan << 16));
+	addr = be32_to_cpu(addr);
+
+	retval = rshim_byte_acc_read(pci_dev, addr, result);
+
+	return retval;
+}
+
+static int rshim_pcie_write(struct rshim_backend *bd, int chan, int addr,
+				u64 value)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	struct pci_dev *pci_dev = dev->pci_dev;
+	int retval;
+	u64 result;
+	bool is_boot_stream = (addr == RSH_BOOT_FIFO_DATA);
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	addr = RSH_BASE_ADDR + (addr | (chan << 16));
+	if (!is_boot_stream)
+		addr = be32_to_cpu(addr);
+
+	value = be64_to_cpu(value);
+
+	/*
+	 * We cannot stream large numbers of PCIe writes to the RShim.
+	 * Instead, we must write no more than 15 words before
+	 * doing a read from another register within the RShim,
+	 * which forces previous writes to drain.
+	 * Note that we allow a max write_count of 7 since each 8-byte
+	 * write is done using 2 4-byte writes in the boot fifo case.
+	 */
+	if (dev->write_count == 7) {
+		/* Add memory barrier to synchronize the order. */
+		mb();
+		rshim_pcie_read(bd, 1, RSH_SCRATCHPAD, &result);
+	}
+	dev->write_count++;
+
+	if (is_boot_stream)
+		retval = rshim_boot_fifo_write(pci_dev, addr, value);
+	else
+		retval = rshim_byte_acc_write(pci_dev, addr, value);
+
+	return retval;
+}
+
+static void rshim_pcie_delete(struct kref *kref)
+{
+	struct rshim_backend *bd;
+	struct rshim_pcie *dev;
+
+	bd = container_of(kref, struct rshim_backend, kref);
+	dev = container_of(bd, struct rshim_pcie, bd);
+
+	rshim_deregister(bd);
+	if (dev->pci_dev)
+		dev_set_drvdata(&dev->pci_dev->dev, NULL);
+	kfree(dev);
+}
+
+/* Probe routine */
+static int rshim_pcie_probe(struct pci_dev *pci_dev,
+				const struct pci_device_id *id)
+{
+	struct rshim_pcie *dev = NULL;
+	struct rshim_backend *bd = NULL;
+	char *pcie_dev_name;
+	int index, retval, err = 0, allocfail = 0;
+	const int max_name_len = 20;
+
+	for (index = 0; index < MAX_DEV_COUNT; index++)
+		if (instances[index] == NULL)
+			break;
+	if (index == MAX_DEV_COUNT) {
+		pr_err("Driver cannot handle any more devices.\n");
+		return -ENODEV;
+	}
+
+	pcie_dev_name = kzalloc(max_name_len, GFP_KERNEL);
+	if (pcie_dev_name == NULL)
+		return -ENOMEM;
+	retval = snprintf(pcie_dev_name, max_name_len,
+				"rshim_pcie%d", index);
+	if (WARN_ON_ONCE(retval >= max_name_len)) {
+		err = -EINVAL;
+		goto error;
+	}
+
+	pr_debug("Probing %s\n", pcie_dev_name);
+
+	rshim_lock();
+
+	/* Find the backend. */
+	bd = rshim_find(pcie_dev_name);
+	if (bd) {
+		kref_get(&bd->kref);
+		dev = container_of(bd, struct rshim_pcie, bd);
+	} else {
+		/* Get some memory for this device's driver state. */
+		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+		if (dev == NULL) {
+			err = -ENOMEM;
+			rshim_unlock();
+			goto error;
+		}
+
+		instances[index] = dev;
+		bd = &dev->bd;
+		bd->has_rshim = 1;
+		bd->has_tm = 1;
+		bd->owner = THIS_MODULE;
+		bd->dev_name = pcie_dev_name;
+		bd->destroy = rshim_pcie_delete;
+		bd->read_rshim = rshim_pcie_read;
+		bd->write_rshim = rshim_pcie_write;
+		dev->write_count = 0;
+		mutex_init(&bd->mutex);
+	}
+
+	retval = rshim_fifo_alloc(bd);
+	if (retval) {
+		rshim_unlock();
+		pr_err("Failed to allocate fifo\n");
+		err = -ENOMEM;
+		goto enable_failed;
+	}
+
+	allocfail |= rshim_fifo_alloc(bd);
+
+	if (!bd->read_buf) {
+		bd->read_buf = kzalloc(READ_BUF_SIZE,
+					   GFP_KERNEL);
+	}
+	allocfail |= bd->read_buf == 0;
+
+	if (!bd->write_buf) {
+		bd->write_buf = kzalloc(WRITE_BUF_SIZE,
+					    GFP_KERNEL);
+	}
+	allocfail |= bd->write_buf == 0;
+
+	if (allocfail) {
+		rshim_unlock();
+		pr_err("can't allocate buffers\n");
+		goto enable_failed;
+	}
+
+	rshim_unlock();
+
+	/* Enable the device. */
+	err = pci_enable_device(pci_dev);
+	if (err != 0) {
+		pr_err("Device enable failed with error %d\n", err);
+		goto enable_failed;
+	}
+
+	/* Initialize object */
+	dev->pci_dev = pci_dev;
+	dev_set_drvdata(&pci_dev->dev, dev);
+
+	/* Enable PCI bus mastering. */
+	pci_set_master(pci_dev);
+
+	/*
+	 * Register rshim here since it needs to detect whether other backend
+	 * has already registered or not, which involves reading/writing rshim
+	 * registers and has assumption that the under layer is working.
+	 */
+	rshim_lock();
+	if (!bd->registered) {
+		retval = rshim_register(bd);
+		if (retval) {
+			pr_err("Backend register failed with error %d\n",
+				 retval);
+			rshim_unlock();
+			goto register_failed;
+		}
+	}
+	rshim_unlock();
+
+	/* Notify that the device is attached */
+	mutex_lock(&bd->mutex);
+	retval = rshim_notify(bd, RSH_EVENT_ATTACH, 0);
+	mutex_unlock(&bd->mutex);
+	if (retval)
+		goto register_failed;
+
+	return 0;
+
+register_failed:
+	pci_disable_device(pci_dev);
+
+enable_failed:
+	rshim_lock();
+	kref_put(&dev->bd.kref, rshim_pcie_delete);
+	rshim_unlock();
+error:
+	kfree(pcie_dev_name);
+
+	return err;
+}
+
+/* Called via pci_unregister_driver() when the module is removed. */
+static void rshim_pcie_remove(struct pci_dev *pci_dev)
+{
+	struct rshim_pcie *dev = dev_get_drvdata(&pci_dev->dev);
+	int retval, flush_wq;
+
+	/*
+	 * Reset TRIO_PCIE_INTFC_RX_BAR0_ADDR_MASK and TRIO_MAP_RSH_BASE.
+	 * Otherwise, upon host reboot, the two registers will retain previous
+	 * values that don't match the new BAR0 address that is assigned to
+	 * the PCIe ports, causing host MMIO access to RShim to fail.
+	 */
+	retval = rshim_pcie_write(&dev->bd, (RSH_SWINT >> 16) & 0xF,
+			RSH_SWINT & 0xFFFF, RSH_INT_VEC0_RTC__SWINT3_MASK);
+	if (retval)
+		pr_err("RShim write failed\n");
+
+	/* Clear the flags before deleting the backend. */
+	dev->bd.has_rshim = 0;
+	dev->bd.has_tm = 0;
+
+	rshim_notify(&dev->bd, RSH_EVENT_DETACH, 0);
+	mutex_lock(&dev->bd.mutex);
+	flush_wq = !cancel_delayed_work(&dev->bd.work);
+	if (flush_wq)
+		flush_workqueue(rshim_wq);
+	dev->bd.has_cons_work = 0;
+	kfree(dev->bd.read_buf);
+	kfree(dev->bd.write_buf);
+	rshim_fifo_free(&dev->bd);
+	mutex_unlock(&dev->bd.mutex);
+
+	rshim_lock();
+	kref_put(&dev->bd.kref, rshim_pcie_delete);
+	rshim_unlock();
+
+	pci_disable_device(pci_dev);
+	dev_set_drvdata(&pci_dev->dev, NULL);
+}
+
+static struct pci_device_id rshim_pcie_table[] = {
+	{ PCI_DEVICE(TILERA_VENDOR_ID, BLUEFIELD_DEVICE_ID), },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, rshim_pcie_table);
+
+static struct pci_driver rshim_pcie_driver = {
+	.name = "rshim_pcie_lf",
+	.probe = rshim_pcie_probe,
+	.remove = rshim_pcie_remove,
+	.id_table = rshim_pcie_table,
+};
+
+static int __init rshim_pcie_init(void)
+{
+	int result;
+
+	/* Register the driver */
+	result = pci_register_driver(&rshim_pcie_driver);
+	if (result)
+		pr_err("pci_register failed, error number %d\n", result);
+
+	return result;
+}
+
+static void __exit rshim_pcie_exit(void)
+{
+	/* Unregister the driver. */
+	pci_unregister_driver(&rshim_pcie_driver);
+}
+
+module_init(rshim_pcie_init);
+module_exit(rshim_pcie_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.4");
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v7 9/9] soc: mellanox: host: Add the Rshim PCIe live-fish backend driver
@ 2019-01-03 19:17   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-03 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit adds the PCIe live-fish backend driver to access the
Rshim interface on the BlueField SoC, such as on the Smart NIC.
It is slow access and can be used for live-fish mode when the NIC
firmware hasn't been programmed yet.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/host/Makefile        |   2 +-
 drivers/soc/mellanox/host/rshim_pcie_lf.c | 695 ++++++++++++++++++++++++++++++
 2 files changed, 696 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/mellanox/host/rshim_pcie_lf.c

diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
index fa4b21c..79a1c86 100644
--- a/drivers/soc/mellanox/host/Makefile
+++ b/drivers/soc/mellanox/host/Makefile
@@ -1,2 +1,2 @@
-obj-m := rshim.o rshim_net.o rshim_usb.o rshim_pcie.o
+obj-m := rshim.o rshim_net.o rshim_usb.o rshim_pcie.o rshim_pcie_lf.o
 
diff --git a/drivers/soc/mellanox/host/rshim_pcie_lf.c b/drivers/soc/mellanox/host/rshim_pcie_lf.c
new file mode 100644
index 0000000..08e2c15
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_pcie_lf.c
@@ -0,0 +1,695 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_pcie_lf.c - Mellanox RShim PCIe Livefish driver for x86 host
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/pci.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+
+#include "rshim.h"
+
+/* Disable RSim access. */
+static int rshim_disable;
+module_param(rshim_disable, int, 0444);
+MODULE_PARM_DESC(rshim_disable, "Disable rshim (obsoleted)");
+
+/** Our Vendor/Device IDs. */
+#define TILERA_VENDOR_ID					0x15b3
+#define BLUEFIELD_DEVICE_ID					0x0211
+
+/* Maximum number of devices this driver can handle */
+#define MAX_DEV_COUNT						16
+
+/* Mellanox Address & Data Capabilities */
+#define MELLANOX_ADDR						0x58
+#define MELLANOX_DATA						0x5c
+#define MELLANOX_CAP_READ					0x1
+
+/* TRIO_CR_GATEWAY registers */
+#define TRIO_CR_GW_LOCK						0xe38a0
+#define TRIO_CR_GW_LOCK_CPY					0xe38a4
+#define TRIO_CR_GW_DATA_UPPER					0xe38ac
+#define TRIO_CR_GW_DATA_LOWER					0xe38b0
+#define TRIO_CR_GW_CTL						0xe38b4
+#define TRIO_CR_GW_ADDR_UPPER					0xe38b8
+#define TRIO_CR_GW_ADDR_LOWER					0xe38bc
+#define TRIO_CR_GW_LOCK_ACQUIRED				0x80000000
+#define TRIO_CR_GW_LOCK_RELEASE					0x0
+#define TRIO_CR_GW_BUSY						0x60000000
+#define TRIO_CR_GW_TRIGGER					0xe0000000
+#define TRIO_CR_GW_READ_4BYTE					0x6
+#define TRIO_CR_GW_WRITE_4BYTE					0x2
+
+/* Base RShim Address */
+#define RSH_BASE_ADDR						0x80000000
+#define RSH_CHANNEL1_BASE					0x80010000
+
+struct rshim_pcie {
+	/* RShim backend structure. */
+	struct rshim_backend	bd;
+
+	struct pci_dev *pci_dev;
+
+	/* Keep track of number of 8-byte word writes */
+	u8 write_count;
+};
+
+static struct rshim_pcie *instances[MAX_DEV_COUNT];
+
+/* Mechanism to access the CR space using hidden PCI capabilities */
+static int pci_cap_read(struct pci_dev *pci_dev, int offset,
+				u32 *result)
+{
+	int retval;
+
+	/*
+	 * Write target offset to MELLANOX_ADDR.
+	 * Set LSB to indicate a read operation.
+	 */
+	retval = pci_write_config_dword(pci_dev, MELLANOX_ADDR,
+				offset | MELLANOX_CAP_READ);
+	if (retval)
+		return retval;
+
+	/* Read result from MELLANOX_DATA */
+	retval = pci_read_config_dword(pci_dev, MELLANOX_DATA,
+				result);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+static int pci_cap_write(struct pci_dev *pci_dev, int offset,
+				u32 value)
+{
+	int retval;
+
+	/* Write data to MELLANOX_DATA */
+	retval = pci_write_config_dword(pci_dev, MELLANOX_DATA,
+				value);
+	if (retval)
+		return retval;
+
+	/*
+	 * Write target offset to MELLANOX_ADDR.
+	 * Leave LSB clear to indicate a write operation.
+	 */
+	retval = pci_write_config_dword(pci_dev, MELLANOX_ADDR,
+				offset);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/* Acquire and release the TRIO_CR_GW_LOCK. */
+static int trio_cr_gw_lock_acquire(struct pci_dev *pci_dev)
+{
+	int retval;
+	u32 read_value;
+
+	/* Wait until TRIO_CR_GW_LOCK is free */
+	do {
+		retval = pci_cap_read(pci_dev, TRIO_CR_GW_LOCK,
+				&read_value);
+		if (retval)
+			return retval;
+		if (signal_pending(current))
+			return -EINTR;
+	} while (read_value & TRIO_CR_GW_LOCK_ACQUIRED);
+
+	/* Acquire TRIO_CR_GW_LOCK */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_LOCK_ACQUIRED);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+static int trio_cr_gw_lock_release(struct pci_dev *pci_dev)
+{
+	int retval;
+
+	/* Release TRIO_CR_GW_LOCK */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_LOCK_RELEASE);
+
+	return retval;
+}
+
+/*
+ * Mechanism to access the RShim from the CR space using the
+ * TRIO_CR_GATEWAY.
+ */
+static int trio_cr_gw_read(struct pci_dev *pci_dev, int addr,
+				u32 *result)
+{
+	int retval;
+
+	/* Acquire TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_acquire(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write addr to TRIO_CR_GW_ADDR_LOWER */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_ADDR_LOWER,
+				addr);
+	if (retval)
+		return retval;
+
+	/* Set TRIO_CR_GW_READ_4BYTE */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_CTL,
+				TRIO_CR_GW_READ_4BYTE);
+	if (retval)
+		return retval;
+
+	/* Trigger TRIO_CR_GW to read from addr */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_TRIGGER);
+	if (retval)
+		return retval;
+
+	/* Read 32-bit data from TRIO_CR_GW_DATA_LOWER */
+	retval = pci_cap_read(pci_dev, TRIO_CR_GW_DATA_LOWER,
+				result);
+	if (retval)
+		return retval;
+
+	/* Release TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_release(pci_dev);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+static int trio_cr_gw_write(struct pci_dev *pci_dev, int addr,
+				u32 value)
+{
+	int retval;
+
+	/* Acquire TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_acquire(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write 32-bit data to TRIO_CR_GW_DATA_LOWER */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_DATA_LOWER,
+				value);
+	if (retval)
+		return retval;
+
+	/* Write addr to TRIO_CR_GW_ADDR_LOWER */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_ADDR_LOWER,
+				addr);
+	if (retval)
+		return retval;
+
+	/* Set TRIO_CR_GW_WRITE_4BYTE */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_CTL,
+				TRIO_CR_GW_WRITE_4BYTE);
+	if (retval)
+		return retval;
+
+	/* Trigger CR gateway to write to RShim */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_TRIGGER);
+	if (retval)
+		return retval;
+
+	/* Release TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_release(pci_dev);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/* Wait until the RSH_BYTE_ACC_CTL pending bit is cleared */
+static int rshim_byte_acc_pending_wait(struct pci_dev *pci_dev)
+{
+	int retval;
+	u32 read_value;
+
+	do {
+		retval = trio_cr_gw_read(pci_dev,
+			RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL, &read_value);
+		if (retval)
+			return retval;
+		if (signal_pending(current))
+			return -EINTR;
+	} while (read_value & (RSH_CHANNEL1_BASE + RSH_BYTE_ACC_PENDING));
+
+	return 0;
+}
+
+/*
+ * Mechanism to do an 8-byte access to the Rshim using
+ * two 4-byte accesses through the Rshim Byte Access Widget.
+ */
+static int rshim_byte_acc_read(struct pci_dev *pci_dev, int addr,
+				u64 *result)
+{
+	int retval;
+	u32 read_value;
+	u64 read_result;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_SIZE);
+	if (retval)
+		return retval;
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_ADDR, addr);
+	if (retval)
+		return retval;
+
+	/* Write trigger bits to perform read */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_READ_TRIGGER);
+	if (retval)
+		return retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read lower 32-bits of data */
+	retval = trio_cr_gw_read(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_RDAT,
+				&read_value);
+	if (retval)
+		return retval;
+
+	read_result = (u64)read_value << 32;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read upper 32-bits of data */
+	retval = trio_cr_gw_read(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_RDAT,
+				&read_value);
+	if (retval)
+		return retval;
+
+	read_result |= (u64)read_value;
+	*result = be64_to_cpu(read_result);
+
+	return 0;
+}
+
+static int rshim_byte_acc_write(struct pci_dev *pci_dev, int addr,
+				u64 value)
+{
+	int retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_SIZE);
+	if (retval)
+		return retval;
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_ADDR, addr);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_SIZE);
+	if (retval)
+		return retval;
+
+	/* Write lower 32 bits of data to TRIO_CR_GW_DATA */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_WDAT, (u32)(value >> 32));
+	if (retval)
+		return retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write upper 32 bits of data to TRIO_CR_GW_DATA */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_WDAT, (u32)(value));
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/*
+ * The RShim Boot FIFO has a holding register which can couple
+ * two consecutive 4-byte writes into a single 8-byte write
+ * before pushing the data into the FIFO.
+ * Hence the RShim Byte Access Widget is not necessary to write
+ * to the BOOT FIFO using 4-byte writes.
+ */
+static int rshim_boot_fifo_write(struct pci_dev *pci_dev, int addr,
+				u64 value)
+{
+	int retval;
+
+	/* Write lower 32 bits of data to RSH_BOOT_FIFO_DATA */
+	retval = trio_cr_gw_write(pci_dev, addr,
+				(u32)(value >> 32));
+	if (retval)
+		return retval;
+
+	/* Write upper 32 bits of data to RSH_BOOT_FIFO_DATA */
+	retval = trio_cr_gw_write(pci_dev, addr,
+				(u32)(value));
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/* RShim read/write routines */
+static int rshim_pcie_read(struct rshim_backend *bd, int chan, int addr,
+				u64 *result)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	struct pci_dev *pci_dev = dev->pci_dev;
+	int retval;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	dev->write_count = 0;
+
+	addr = RSH_BASE_ADDR + (addr | (chan << 16));
+	addr = be32_to_cpu(addr);
+
+	retval = rshim_byte_acc_read(pci_dev, addr, result);
+
+	return retval;
+}
+
+static int rshim_pcie_write(struct rshim_backend *bd, int chan, int addr,
+				u64 value)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	struct pci_dev *pci_dev = dev->pci_dev;
+	int retval;
+	u64 result;
+	bool is_boot_stream = (addr == RSH_BOOT_FIFO_DATA);
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	addr = RSH_BASE_ADDR + (addr | (chan << 16));
+	if (!is_boot_stream)
+		addr = be32_to_cpu(addr);
+
+	value = be64_to_cpu(value);
+
+	/*
+	 * We cannot stream large numbers of PCIe writes to the RShim.
+	 * Instead, we must write no more than 15 words before
+	 * doing a read from another register within the RShim,
+	 * which forces previous writes to drain.
+	 * Note that we allow a max write_count of 7 since each 8-byte
+	 * write is done using 2 4-byte writes in the boot fifo case.
+	 */
+	if (dev->write_count == 7) {
+		/* Add memory barrier to synchronize the order. */
+		mb();
+		rshim_pcie_read(bd, 1, RSH_SCRATCHPAD, &result);
+	}
+	dev->write_count++;
+
+	if (is_boot_stream)
+		retval = rshim_boot_fifo_write(pci_dev, addr, value);
+	else
+		retval = rshim_byte_acc_write(pci_dev, addr, value);
+
+	return retval;
+}
+
+static void rshim_pcie_delete(struct kref *kref)
+{
+	struct rshim_backend *bd;
+	struct rshim_pcie *dev;
+
+	bd = container_of(kref, struct rshim_backend, kref);
+	dev = container_of(bd, struct rshim_pcie, bd);
+
+	rshim_deregister(bd);
+	if (dev->pci_dev)
+		dev_set_drvdata(&dev->pci_dev->dev, NULL);
+	kfree(dev);
+}
+
+/* Probe routine */
+static int rshim_pcie_probe(struct pci_dev *pci_dev,
+				const struct pci_device_id *id)
+{
+	struct rshim_pcie *dev = NULL;
+	struct rshim_backend *bd = NULL;
+	char *pcie_dev_name;
+	int index, retval, err = 0, allocfail = 0;
+	const int max_name_len = 20;
+
+	for (index = 0; index < MAX_DEV_COUNT; index++)
+		if (instances[index] == NULL)
+			break;
+	if (index == MAX_DEV_COUNT) {
+		pr_err("Driver cannot handle any more devices.\n");
+		return -ENODEV;
+	}
+
+	pcie_dev_name = kzalloc(max_name_len, GFP_KERNEL);
+	if (pcie_dev_name == NULL)
+		return -ENOMEM;
+	retval = snprintf(pcie_dev_name, max_name_len,
+				"rshim_pcie%d", index);
+	if (WARN_ON_ONCE(retval >= max_name_len)) {
+		err = -EINVAL;
+		goto error;
+	}
+
+	pr_debug("Probing %s\n", pcie_dev_name);
+
+	rshim_lock();
+
+	/* Find the backend. */
+	bd = rshim_find(pcie_dev_name);
+	if (bd) {
+		kref_get(&bd->kref);
+		dev = container_of(bd, struct rshim_pcie, bd);
+	} else {
+		/* Get some memory for this device's driver state. */
+		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+		if (dev == NULL) {
+			err = -ENOMEM;
+			rshim_unlock();
+			goto error;
+		}
+
+		instances[index] = dev;
+		bd = &dev->bd;
+		bd->has_rshim = 1;
+		bd->has_tm = 1;
+		bd->owner = THIS_MODULE;
+		bd->dev_name = pcie_dev_name;
+		bd->destroy = rshim_pcie_delete;
+		bd->read_rshim = rshim_pcie_read;
+		bd->write_rshim = rshim_pcie_write;
+		dev->write_count = 0;
+		mutex_init(&bd->mutex);
+	}
+
+	retval = rshim_fifo_alloc(bd);
+	if (retval) {
+		rshim_unlock();
+		pr_err("Failed to allocate fifo\n");
+		err = -ENOMEM;
+		goto enable_failed;
+	}
+
+	allocfail |= rshim_fifo_alloc(bd);
+
+	if (!bd->read_buf) {
+		bd->read_buf = kzalloc(READ_BUF_SIZE,
+					   GFP_KERNEL);
+	}
+	allocfail |= bd->read_buf == 0;
+
+	if (!bd->write_buf) {
+		bd->write_buf = kzalloc(WRITE_BUF_SIZE,
+					    GFP_KERNEL);
+	}
+	allocfail |= bd->write_buf == 0;
+
+	if (allocfail) {
+		rshim_unlock();
+		pr_err("can't allocate buffers\n");
+		goto enable_failed;
+	}
+
+	rshim_unlock();
+
+	/* Enable the device. */
+	err = pci_enable_device(pci_dev);
+	if (err != 0) {
+		pr_err("Device enable failed with error %d\n", err);
+		goto enable_failed;
+	}
+
+	/* Initialize object */
+	dev->pci_dev = pci_dev;
+	dev_set_drvdata(&pci_dev->dev, dev);
+
+	/* Enable PCI bus mastering. */
+	pci_set_master(pci_dev);
+
+	/*
+	 * Register rshim here since it needs to detect whether other backend
+	 * has already registered or not, which involves reading/writing rshim
+	 * registers and has assumption that the under layer is working.
+	 */
+	rshim_lock();
+	if (!bd->registered) {
+		retval = rshim_register(bd);
+		if (retval) {
+			pr_err("Backend register failed with error %d\n",
+				 retval);
+			rshim_unlock();
+			goto register_failed;
+		}
+	}
+	rshim_unlock();
+
+	/* Notify that the device is attached */
+	mutex_lock(&bd->mutex);
+	retval = rshim_notify(bd, RSH_EVENT_ATTACH, 0);
+	mutex_unlock(&bd->mutex);
+	if (retval)
+		goto register_failed;
+
+	return 0;
+
+register_failed:
+	pci_disable_device(pci_dev);
+
+enable_failed:
+	rshim_lock();
+	kref_put(&dev->bd.kref, rshim_pcie_delete);
+	rshim_unlock();
+error:
+	kfree(pcie_dev_name);
+
+	return err;
+}
+
+/* Called via pci_unregister_driver() when the module is removed. */
+static void rshim_pcie_remove(struct pci_dev *pci_dev)
+{
+	struct rshim_pcie *dev = dev_get_drvdata(&pci_dev->dev);
+	int retval, flush_wq;
+
+	/*
+	 * Reset TRIO_PCIE_INTFC_RX_BAR0_ADDR_MASK and TRIO_MAP_RSH_BASE.
+	 * Otherwise, upon host reboot, the two registers will retain previous
+	 * values that don't match the new BAR0 address that is assigned to
+	 * the PCIe ports, causing host MMIO access to RShim to fail.
+	 */
+	retval = rshim_pcie_write(&dev->bd, (RSH_SWINT >> 16) & 0xF,
+			RSH_SWINT & 0xFFFF, RSH_INT_VEC0_RTC__SWINT3_MASK);
+	if (retval)
+		pr_err("RShim write failed\n");
+
+	/* Clear the flags before deleting the backend. */
+	dev->bd.has_rshim = 0;
+	dev->bd.has_tm = 0;
+
+	rshim_notify(&dev->bd, RSH_EVENT_DETACH, 0);
+	mutex_lock(&dev->bd.mutex);
+	flush_wq = !cancel_delayed_work(&dev->bd.work);
+	if (flush_wq)
+		flush_workqueue(rshim_wq);
+	dev->bd.has_cons_work = 0;
+	kfree(dev->bd.read_buf);
+	kfree(dev->bd.write_buf);
+	rshim_fifo_free(&dev->bd);
+	mutex_unlock(&dev->bd.mutex);
+
+	rshim_lock();
+	kref_put(&dev->bd.kref, rshim_pcie_delete);
+	rshim_unlock();
+
+	pci_disable_device(pci_dev);
+	dev_set_drvdata(&pci_dev->dev, NULL);
+}
+
+static struct pci_device_id rshim_pcie_table[] = {
+	{ PCI_DEVICE(TILERA_VENDOR_ID, BLUEFIELD_DEVICE_ID), },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, rshim_pcie_table);
+
+static struct pci_driver rshim_pcie_driver = {
+	.name = "rshim_pcie_lf",
+	.probe = rshim_pcie_probe,
+	.remove = rshim_pcie_remove,
+	.id_table = rshim_pcie_table,
+};
+
+static int __init rshim_pcie_init(void)
+{
+	int result;
+
+	/* Register the driver */
+	result = pci_register_driver(&rshim_pcie_driver);
+	if (result)
+		pr_err("pci_register failed, error number %d\n", result);
+
+	return result;
+}
+
+static void __exit rshim_pcie_exit(void)
+{
+	/* Unregister the driver. */
+	pci_unregister_driver(&rshim_pcie_driver);
+}
+
+module_init(rshim_pcie_init);
+module_exit(rshim_pcie_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.4");
-- 
1.8.3.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 179+ messages in thread

* RE: [PATCH v6 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-12-12 23:07     ` Matthias Brugger
@ 2019-01-03 19:20       ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-03 19:20 UTC (permalink / raw)
  To: Matthias Brugger, y, Olof Johansson, Arnd Bergmann, David Woods,
	Robin Murphy, arm-soc
  Cc: devicetree, mbrugger@suse.com >> Matthias Brugger,
	linux-arm-kernel

Thanks Matthias for the comments and sorry for the holiday delay.
The recommended changes have been posted in patch v7 1/9.

Thanks,
Liming

> -----Original Message-----
> From: Matthias Brugger <matthias.bgg@gmail.com>
> Sent: Wednesday, December 12, 2018 6:08 PM
> To: Liming Sun <lsun@mellanox.com>; y@bu-lab53.mtbu.labs.mlnx; Olof Johansson <olof@lixom.net>; Arnd Bergmann <arnd@arndb.de>;
> David Woods <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-soc <arm@kernel.org>
> Cc: devicetree@vger.kernel.org; linux-arm-kernel@lists.infradead.org; mbrugger@suse.com >> Matthias Brugger <mbrugger@suse.com>
> Subject: Re: [PATCH v6 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc
> 
> 
> 
> On 01/11/2018 17:23, Liming Sun wrote:> This commit adds the TmFifo driver for
> Mellanox BlueField Soc.
> > TmFifo is a shared FIFO which enables external host machine to
> > exchange data with the SoC via USB or PCIe. The driver is based on
> > virtio framework and has console and network access enabled.
> >
> > Reviewed-by: David Woods <dwoods@mellanox.com>
> > Signed-off-by: Liming Sun <lsun@mellanox.com>
> > ---
> >  drivers/soc/Kconfig                |    1 +
> >  drivers/soc/Makefile               |    1 +
> >  drivers/soc/mellanox/Kconfig       |   18 +
> >  drivers/soc/mellanox/Makefile      |    5 +
> >  drivers/soc/mellanox/tmfifo.c      | 1236 ++++++++++++++++++++++++++++++++++++
> >  drivers/soc/mellanox/tmfifo_regs.h |   76 +++
> >  6 files changed, 1337 insertions(+)
> >  create mode 100644 drivers/soc/mellanox/Kconfig
> >  create mode 100644 drivers/soc/mellanox/Makefile
> >  create mode 100644 drivers/soc/mellanox/tmfifo.c
> >  create mode 100644 drivers/soc/mellanox/tmfifo_regs.h
> >
> 
> [...]
> 
> > +
> > +/* Interrupt handler. */
> > +static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
> > +{
> > +	int i = (uintptr_t)dev_id % sizeof(void *);
> > +	struct tmfifo *fifo = dev_id - i;
> > +
> > +	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
> > +		schedule_work(&fifo->work);
> > +
> > +	return IRQ_HANDLED;
> > +}
> > +
> [...]
> > +
> > +/* Probe the TMFIFO. */
> > +static int tmfifo_probe(struct platform_device *pdev)
> > +{
> > +	u64 ctl;
> > +	struct tmfifo *fifo;
> > +	struct resource *rx_res, *tx_res;
> > +	struct virtio_net_config net_config;
> > +	int i, ret;
> > +
> > +	/* Get the resource of the Rx & Tx FIFO. */
> > +	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> > +	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> > +	if (!rx_res || !tx_res) {
> > +		ret = -EINVAL;
> > +		goto err;
> > +	}
> > +
> > +	if (request_mem_region(rx_res->start,
> > +			       resource_size(rx_res), "bf-tmfifo") == NULL) {
> > +		ret = -EBUSY;
> > +		goto early_err;
> > +	}
> > +
> > +	if (request_mem_region(tx_res->start,
> > +			       resource_size(tx_res), "bf-tmfifo") == NULL) {
> > +		release_mem_region(rx_res->start, resource_size(rx_res));
> > +		ret = -EBUSY;
> > +		goto early_err;
> > +	}
> > +
> > +	ret = -ENOMEM;
> > +	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
> > +	if (!fifo)
> > +		goto err;
> > +
> > +	fifo->pdev = pdev;
> > +	platform_set_drvdata(pdev, fifo);
> > +
> > +	spin_lock_init(&fifo->spin_lock);
> > +	INIT_WORK(&fifo->work, tmfifo_work_handler);
> > +
> > +	timer_setup(&fifo->timer, tmfifo_timer, 0);
> > +	fifo->timer.function = tmfifo_timer;
> > +
> > +	for (i = 0; i < TM_IRQ_CNT; i++) {
> > +		fifo->irq[i] = platform_get_irq(pdev, i);
> > +		ret = request_irq(fifo->irq[i], tmfifo_irq_handler, 0,
> > +				  "tmfifo", (u8 *)fifo + i);
> 
> I think it would be better if you create a struct that passes a pointer to fifo
> and the ID instead of "hiding" the ID inside the address.
> 
> Regards,
> Matthias

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v6 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc
@ 2019-01-03 19:20       ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-03 19:20 UTC (permalink / raw)
  To: Matthias Brugger, y, Olof Johansson, Arnd Bergmann, David Woods,
	Robin Murphy, arm-soc
  Cc: devicetree, mbrugger@suse.com >> Matthias Brugger,
	linux-arm-kernel

Thanks Matthias for the comments and sorry for the holiday delay.
The recommended changes have been posted in patch v7 1/9.

Thanks,
Liming

> -----Original Message-----
> From: Matthias Brugger <matthias.bgg@gmail.com>
> Sent: Wednesday, December 12, 2018 6:08 PM
> To: Liming Sun <lsun@mellanox.com>; y@bu-lab53.mtbu.labs.mlnx; Olof Johansson <olof@lixom.net>; Arnd Bergmann <arnd@arndb.de>;
> David Woods <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-soc <arm@kernel.org>
> Cc: devicetree@vger.kernel.org; linux-arm-kernel@lists.infradead.org; mbrugger@suse.com >> Matthias Brugger <mbrugger@suse.com>
> Subject: Re: [PATCH v6 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc
> 
> 
> 
> On 01/11/2018 17:23, Liming Sun wrote:> This commit adds the TmFifo driver for
> Mellanox BlueField Soc.
> > TmFifo is a shared FIFO which enables external host machine to
> > exchange data with the SoC via USB or PCIe. The driver is based on
> > virtio framework and has console and network access enabled.
> >
> > Reviewed-by: David Woods <dwoods@mellanox.com>
> > Signed-off-by: Liming Sun <lsun@mellanox.com>
> > ---
> >  drivers/soc/Kconfig                |    1 +
> >  drivers/soc/Makefile               |    1 +
> >  drivers/soc/mellanox/Kconfig       |   18 +
> >  drivers/soc/mellanox/Makefile      |    5 +
> >  drivers/soc/mellanox/tmfifo.c      | 1236 ++++++++++++++++++++++++++++++++++++
> >  drivers/soc/mellanox/tmfifo_regs.h |   76 +++
> >  6 files changed, 1337 insertions(+)
> >  create mode 100644 drivers/soc/mellanox/Kconfig
> >  create mode 100644 drivers/soc/mellanox/Makefile
> >  create mode 100644 drivers/soc/mellanox/tmfifo.c
> >  create mode 100644 drivers/soc/mellanox/tmfifo_regs.h
> >
> 
> [...]
> 
> > +
> > +/* Interrupt handler. */
> > +static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
> > +{
> > +	int i = (uintptr_t)dev_id % sizeof(void *);
> > +	struct tmfifo *fifo = dev_id - i;
> > +
> > +	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
> > +		schedule_work(&fifo->work);
> > +
> > +	return IRQ_HANDLED;
> > +}
> > +
> [...]
> > +
> > +/* Probe the TMFIFO. */
> > +static int tmfifo_probe(struct platform_device *pdev)
> > +{
> > +	u64 ctl;
> > +	struct tmfifo *fifo;
> > +	struct resource *rx_res, *tx_res;
> > +	struct virtio_net_config net_config;
> > +	int i, ret;
> > +
> > +	/* Get the resource of the Rx & Tx FIFO. */
> > +	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> > +	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> > +	if (!rx_res || !tx_res) {
> > +		ret = -EINVAL;
> > +		goto err;
> > +	}
> > +
> > +	if (request_mem_region(rx_res->start,
> > +			       resource_size(rx_res), "bf-tmfifo") == NULL) {
> > +		ret = -EBUSY;
> > +		goto early_err;
> > +	}
> > +
> > +	if (request_mem_region(tx_res->start,
> > +			       resource_size(tx_res), "bf-tmfifo") == NULL) {
> > +		release_mem_region(rx_res->start, resource_size(rx_res));
> > +		ret = -EBUSY;
> > +		goto early_err;
> > +	}
> > +
> > +	ret = -ENOMEM;
> > +	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
> > +	if (!fifo)
> > +		goto err;
> > +
> > +	fifo->pdev = pdev;
> > +	platform_set_drvdata(pdev, fifo);
> > +
> > +	spin_lock_init(&fifo->spin_lock);
> > +	INIT_WORK(&fifo->work, tmfifo_work_handler);
> > +
> > +	timer_setup(&fifo->timer, tmfifo_timer, 0);
> > +	fifo->timer.function = tmfifo_timer;
> > +
> > +	for (i = 0; i < TM_IRQ_CNT; i++) {
> > +		fifo->irq[i] = platform_get_irq(pdev, i);
> > +		ret = request_irq(fifo->irq[i], tmfifo_irq_handler, 0,
> > +				  "tmfifo", (u8 *)fifo + i);
> 
> I think it would be better if you create a struct that passes a pointer to fifo
> and the ID instead of "hiding" the ID inside the address.
> 
> Regards,
> Matthias
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
  2018-11-01 16:25   ` Liming Sun
  (?)
@ 2019-01-18 16:02     ` Arnd Bergmann
  -1 siblings, 0 replies; 179+ messages in thread
From: Arnd Bergmann @ 2019-01-18 16:02 UTC (permalink / raw)
  To: Liming Sun
  Cc: DTML, David Woods, linux-pci, Vincent Whitchurch, arm-soc,
	Olof Johansson, linux-ntb, Robin Murphy, Christoph Hellwig,
	Linux ARM

On Thu, Nov 1, 2018 at 5:49 PM Liming Sun <lsun@mellanox.com> wrote:
>
> An external host can connect to a Mellanox BlueField SoC via an
> interface called Rshim. The Rshim driver provides boot, console,
> and networking services over this interface. This commit is
> the common driver where the other backend (transport) driver will
> use.
>
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>

Hi Liming,

I've taken a new look at your patch series for drivers/soc/ now,
thanks for your continued submissions.

This is again just a set of very high-level comments, but I think we
should resolve some of the fundamental questions first.
Incidentally, Vincent Whitchurch has recently posted another
patch series with a very similar intention, but for other hardware
and taking a different approach.

In both cases, the idea is to use virtio based drivers to provide
services from a host machine into another Linux instance running
on an embedded system behind a PCIe slot or similar. Your
Bluefield SoC patches are well written, but are intentionally
kept specific to a particular use case and tied to one piece
of hardware. In contrast, Vincent uses the existing code from
drivers/misc/mic/vop/ that is equally hardware specific, but he
extends it to be applicable to other hardware as well.

It would be good if you could look at each other's approaches
to see where we could take it from here. I think ideally we
should have a common driver framework for doing the same
thing across both of your devices and as well as others.

That would also resolve my main concern about the drivers,
which is the placement in drivers/soc/ for a set of drivers
that are unlike most drivers in that directory not mean for
running on the SoC itself in order drive unusual functionality
on the SoC, but are (at least partially) meant for running on
a host machine to communicate with that SoC over PCIe
or USB.

As an example, your network driver should really be placed
in drivers/net/, though it is unclear to me how it relates
to the existing virtio_net driver. In the case of mic/vop,
the idea is to use virtio_net on the device side, but have
vhost_net or a user space implementation on the host side,
but that is apparently not what you do here. Can you
explain why?

Another high-level question I have is on how your various
drivers relate to one another. This should normally be
explained in the 0/9 email, but I don't seem to have received
such a mail. I see that you have multiple back-end drivers
for the underlying transport, with one of them based on USB.
Have you come up with a way to use the same high-level
driver such as the network link over this USB back-end,
or is this for something else?

      Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
@ 2019-01-18 16:02     ` Arnd Bergmann
  0 siblings, 0 replies; 179+ messages in thread
From: Arnd Bergmann @ 2019-01-18 16:02 UTC (permalink / raw)
  To: Liming Sun
  Cc: Olof Johansson, David Woods, Robin Murphy, arm-soc, DTML,
	Linux ARM, Vincent Whitchurch, linux-pci, linux-ntb,
	Christoph Hellwig

On Thu, Nov 1, 2018 at 5:49 PM Liming Sun <lsun@mellanox.com> wrote:
>
> An external host can connect to a Mellanox BlueField SoC via an
> interface called Rshim. The Rshim driver provides boot, console,
> and networking services over this interface. This commit is
> the common driver where the other backend (transport) driver will
> use.
>
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>

Hi Liming,

I've taken a new look at your patch series for drivers/soc/ now,
thanks for your continued submissions.

This is again just a set of very high-level comments, but I think we
should resolve some of the fundamental questions first.
Incidentally, Vincent Whitchurch has recently posted another
patch series with a very similar intention, but for other hardware
and taking a different approach.

In both cases, the idea is to use virtio based drivers to provide
services from a host machine into another Linux instance running
on an embedded system behind a PCIe slot or similar. Your
Bluefield SoC patches are well written, but are intentionally
kept specific to a particular use case and tied to one piece
of hardware. In contrast, Vincent uses the existing code from
drivers/misc/mic/vop/ that is equally hardware specific, but he
extends it to be applicable to other hardware as well.

It would be good if you could look at each other's approaches
to see where we could take it from here. I think ideally we
should have a common driver framework for doing the same
thing across both of your devices and as well as others.

That would also resolve my main concern about the drivers,
which is the placement in drivers/soc/ for a set of drivers
that are unlike most drivers in that directory not mean for
running on the SoC itself in order drive unusual functionality
on the SoC, but are (at least partially) meant for running on
a host machine to communicate with that SoC over PCIe
or USB.

As an example, your network driver should really be placed
in drivers/net/, though it is unclear to me how it relates
to the existing virtio_net driver. In the case of mic/vop,
the idea is to use virtio_net on the device side, but have
vhost_net or a user space implementation on the host side,
but that is apparently not what you do here. Can you
explain why?

Another high-level question I have is on how your various
drivers relate to one another. This should normally be
explained in the 0/9 email, but I don't seem to have received
such a mail. I see that you have multiple back-end drivers
for the underlying transport, with one of them based on USB.
Have you come up with a way to use the same high-level
driver such as the network link over this USB back-end,
or is this for something else?

      Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
@ 2019-01-18 16:02     ` Arnd Bergmann
  0 siblings, 0 replies; 179+ messages in thread
From: Arnd Bergmann @ 2019-01-18 16:02 UTC (permalink / raw)
  To: Liming Sun
  Cc: DTML, David Woods, linux-pci, Vincent Whitchurch, arm-soc,
	Olof Johansson, linux-ntb, Robin Murphy, Christoph Hellwig,
	Linux ARM

On Thu, Nov 1, 2018 at 5:49 PM Liming Sun <lsun@mellanox.com> wrote:
>
> An external host can connect to a Mellanox BlueField SoC via an
> interface called Rshim. The Rshim driver provides boot, console,
> and networking services over this interface. This commit is
> the common driver where the other backend (transport) driver will
> use.
>
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>

Hi Liming,

I've taken a new look at your patch series for drivers/soc/ now,
thanks for your continued submissions.

This is again just a set of very high-level comments, but I think we
should resolve some of the fundamental questions first.
Incidentally, Vincent Whitchurch has recently posted another
patch series with a very similar intention, but for other hardware
and taking a different approach.

In both cases, the idea is to use virtio based drivers to provide
services from a host machine into another Linux instance running
on an embedded system behind a PCIe slot or similar. Your
Bluefield SoC patches are well written, but are intentionally
kept specific to a particular use case and tied to one piece
of hardware. In contrast, Vincent uses the existing code from
drivers/misc/mic/vop/ that is equally hardware specific, but he
extends it to be applicable to other hardware as well.

It would be good if you could look at each other's approaches
to see where we could take it from here. I think ideally we
should have a common driver framework for doing the same
thing across both of your devices and as well as others.

That would also resolve my main concern about the drivers,
which is the placement in drivers/soc/ for a set of drivers
that are unlike most drivers in that directory not mean for
running on the SoC itself in order drive unusual functionality
on the SoC, but are (at least partially) meant for running on
a host machine to communicate with that SoC over PCIe
or USB.

As an example, your network driver should really be placed
in drivers/net/, though it is unclear to me how it relates
to the existing virtio_net driver. In the case of mic/vop,
the idea is to use virtio_net on the device side, but have
vhost_net or a user space implementation on the host side,
but that is apparently not what you do here. Can you
explain why?

Another high-level question I have is on how your various
drivers relate to one another. This should normally be
explained in the 0/9 email, but I don't seem to have received
such a mail. I see that you have multiple back-end drivers
for the underlying transport, with one of them based on USB.
Have you come up with a way to use the same high-level
driver such as the network link over this USB back-end,
or is this for something else?

      Arnd

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v7 0/9] Mellanox BlueField ARM SoC Rshim driver
  2018-05-25 16:06 ` Liming Sun
@ 2019-01-21 19:17   ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-21 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This patch series implements Linux driver support for the Rshim
component on Mellanox BlueField ARM SoC.

1. Overview of the Rshim component

The Rshim component provides a connection among different other
components of the SoC and can be accessed externally via USB or
PCIe. USB access is available on standard appliance and SmartNIC.
PCIe access is for SmartNIC only.

>From user perspective, Rshim provides the following
functionalities.

1.1 Boot FIFO

  The Boot FIFO is polled by the boot ROM. At a later booting
  phase it's read by UEFI as well to load the Linux kernel.

  The Boot FIFO is a one-way direction FIFO. The data could
  come from different sources.

    a) On-chip eMMC, which is the default configuration.

      After chip reset, HW will generate eMMC commands, which
      triggers the boot stream on the eMMC boot partition to show
      up in Boot FIFO.

      This one doesn't need any Linux driver.

    b) Injected externally from USB or PCIe.

      External host could overwrite the boot mode and inject
      boot stream into the Boot FIFO over USB or PCIe. It's
      used in board bring-up, fault recovering or debugging.

      This one usually needs driver support in the external host.

1.2 TmFifo

  TmFifo are FIFOs which can be used for communication between
  the SoC ARM core and the external side. Each direction has its
  own FIFO.

  TmFifo needs driver support on both the SoC side and the
  external host side.

1.3 Rshim register space access

  Rshim register space could be accessed externally via
  USB or PCIe. It's used for configuration, debugging, etc.
  More specifically, it could be used to access the ARM
  CoreSight component which could do on-chip debugging,
  such as for boot loader, drivers, or crash information
  collection.

  This one needs driver support from external host.

2. Driver Introduction

  This patch series provides several drivers.

  *) Patch 1/9 ~ 4/9 (tmfifo.c, tmfifo_regs.h, Kconfig, Makefile)

    They provide Linux driver support for the TmFifo on the
    SoC side (see above 1.2).

  *) Patch 5/9 ~ 9/9

    They provide Linux driver support for the external host.

    - Patch 5/9 (rshim.c, rshim.h, rshim_regs.h, Makefile)

      It provides common functionalities and register access
      to the RShim component. Several character devices are
      created like below for easy user-access.

      /dev/rshim<N>/boot 

        This device file is used to inject boot stream into
        the Boot FIFO, such as
              'cat boot-image > /dev/rshim<N>/boot'
        This one is to implement the above function 1.1.b

      /dev/rshim<N>/rshim

        This device file is used to access Rshim register
        space like a plat file.

      /dev/rshim<N>/console

        This device file is used to access the virtual
        console to the SoC via the TmFifo (see above
        functionality 1.2). It can be used by any standard tools
        like minicom, screen, etc.

    - Patch 6/9 (rshim_net.c)

      This patch provides virtual networking support over
      the Rshim TmFifo (see above functionality 1.2).

    - Patch 7/9 (rshim_usb.c)

      This is the USB backend driver, which implements the
      details to access the RShim component via USB. It supports
      endpoints, USB bulk-transfer and interrupts. After loaded
      by ACPI, it triggers the loading of the common driver
      and network driver, and provides low-level APIs to the
      common driver.

      Below is an example of the networking calling stack.

      Linux Network Stack --> virtio_net --> rshim_net -->
      rshim common driver --> rshim USB backend driver

    - Patch 8/9 (rshim_pcie.c)

      This is the PCIe backend driver, which implements the
      details to access the RShim component via PCIe when
      the HCA firmware is ready. In such case, the Rshim component
      is exposed as a standard PCIe PF and it's register space
      can be mapped in BAR0.

      Linux Network Stack --> virtio_net --> rshim_net -->
      rshim common driver --> rshim PCIe backend driver

   - Patch 9/9 (rshim_pcie_lf.c)

     Similarly, this is another PCIe backend driver, but provides
     slow access when the HCA firmware is not ready. In such as,
     the BAR0 mapping is not available. The Rshim register access
     is done via a slow access gateway. The below calling stack
     is also similar.

      Linux Network Stack --> virtio_net --> rshim_net -->
      rshim common driver --> rshim PCIe livefish backend driver

Liming Sun (9):
  soc: Add TmFifo driver for Mellanox BlueField Soc
  arm64: Add Mellanox BlueField SoC config option
  dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  MAINTAINERS: Add entry for Mellanox Bluefield Soc
  soc: mellanox: host: Add the common host side Rshim driver
  soc: mellanox: host: Add networking support over Rshim
  soc: mellanox: host: Add the Rshim USB backend driver
  soc: mellanox: host: Add the Rshim PCIe backend driver
  soc: mellanox: host: Add the Rshim PCIe live-fish backend driver

 .../devicetree/bindings/soc/mellanox/tmfifo.txt    |   23 +
 MAINTAINERS                                        |    8 +
 arch/arm64/Kconfig.platforms                       |    6 +
 arch/arm64/configs/defconfig                       |    1 +
 drivers/soc/Kconfig                                |    1 +
 drivers/soc/Makefile                               |    1 +
 drivers/soc/mellanox/Kconfig                       |   26 +
 drivers/soc/mellanox/Makefile                      |    6 +
 drivers/soc/mellanox/host/Makefile                 |    2 +
 drivers/soc/mellanox/host/rshim.c                  | 2673 ++++++++++++++++++++
 drivers/soc/mellanox/host/rshim.h                  |  361 +++
 drivers/soc/mellanox/host/rshim_net.c              |  834 ++++++
 drivers/soc/mellanox/host/rshim_pcie.c             |  478 ++++
 drivers/soc/mellanox/host/rshim_pcie_lf.c          |  695 +++++
 drivers/soc/mellanox/host/rshim_regs.h             |  152 ++
 drivers/soc/mellanox/host/rshim_usb.c              | 1035 ++++++++
 drivers/soc/mellanox/tmfifo.c                      | 1244 +++++++++
 drivers/soc/mellanox/tmfifo_regs.h                 |   76 +
 18 files changed, 7622 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
 create mode 100644 drivers/soc/mellanox/Kconfig
 create mode 100644 drivers/soc/mellanox/Makefile
 create mode 100644 drivers/soc/mellanox/host/Makefile
 create mode 100644 drivers/soc/mellanox/host/rshim.c
 create mode 100644 drivers/soc/mellanox/host/rshim.h
 create mode 100644 drivers/soc/mellanox/host/rshim_net.c
 create mode 100644 drivers/soc/mellanox/host/rshim_pcie.c
 create mode 100644 drivers/soc/mellanox/host/rshim_pcie_lf.c
 create mode 100644 drivers/soc/mellanox/host/rshim_regs.h
 create mode 100644 drivers/soc/mellanox/host/rshim_usb.c
 create mode 100644 drivers/soc/mellanox/tmfifo.c
 create mode 100644 drivers/soc/mellanox/tmfifo_regs.h

-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v7 0/9] Mellanox BlueField ARM SoC Rshim driver
@ 2019-01-21 19:17   ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-21 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This patch series implements Linux driver support for the Rshim
component on Mellanox BlueField ARM SoC.

1. Overview of the Rshim component

The Rshim component provides a connection among different other
components of the SoC and can be accessed externally via USB or
PCIe. USB access is available on standard appliance and SmartNIC.
PCIe access is for SmartNIC only.

From user perspective, Rshim provides the following
functionalities.

1.1 Boot FIFO

  The Boot FIFO is polled by the boot ROM. At a later booting
  phase it's read by UEFI as well to load the Linux kernel.

  The Boot FIFO is a one-way direction FIFO. The data could
  come from different sources.

    a) On-chip eMMC, which is the default configuration.

      After chip reset, HW will generate eMMC commands, which
      triggers the boot stream on the eMMC boot partition to show
      up in Boot FIFO.

      This one doesn't need any Linux driver.

    b) Injected externally from USB or PCIe.

      External host could overwrite the boot mode and inject
      boot stream into the Boot FIFO over USB or PCIe. It's
      used in board bring-up, fault recovering or debugging.

      This one usually needs driver support in the external host.

1.2 TmFifo

  TmFifo are FIFOs which can be used for communication between
  the SoC ARM core and the external side. Each direction has its
  own FIFO.

  TmFifo needs driver support on both the SoC side and the
  external host side.

1.3 Rshim register space access

  Rshim register space could be accessed externally via
  USB or PCIe. It's used for configuration, debugging, etc.
  More specifically, it could be used to access the ARM
  CoreSight component which could do on-chip debugging,
  such as for boot loader, drivers, or crash information
  collection.

  This one needs driver support from external host.

2. Driver Introduction

  This patch series provides several drivers.

  *) Patch 1/9 ~ 4/9 (tmfifo.c, tmfifo_regs.h, Kconfig, Makefile)

    They provide Linux driver support for the TmFifo on the
    SoC side (see above 1.2).

  *) Patch 5/9 ~ 9/9

    They provide Linux driver support for the external host.

    - Patch 5/9 (rshim.c, rshim.h, rshim_regs.h, Makefile)

      It provides common functionalities and register access
      to the RShim component. Several character devices are
      created like below for easy user-access.

      /dev/rshim<N>/boot 

        This device file is used to inject boot stream into
        the Boot FIFO, such as
              'cat boot-image > /dev/rshim<N>/boot'
        This one is to implement the above function 1.1.b

      /dev/rshim<N>/rshim

        This device file is used to access Rshim register
        space like a plat file.

      /dev/rshim<N>/console

        This device file is used to access the virtual
        console to the SoC via the TmFifo (see above
        functionality 1.2). It can be used by any standard tools
        like minicom, screen, etc.

    - Patch 6/9 (rshim_net.c)

      This patch provides virtual networking support over
      the Rshim TmFifo (see above functionality 1.2).

    - Patch 7/9 (rshim_usb.c)

      This is the USB backend driver, which implements the
      details to access the RShim component via USB. It supports
      endpoints, USB bulk-transfer and interrupts. After loaded
      by ACPI, it triggers the loading of the common driver
      and network driver, and provides low-level APIs to the
      common driver.

      Below is an example of the networking calling stack.

      Linux Network Stack --> virtio_net --> rshim_net -->
      rshim common driver --> rshim USB backend driver

    - Patch 8/9 (rshim_pcie.c)

      This is the PCIe backend driver, which implements the
      details to access the RShim component via PCIe when
      the HCA firmware is ready. In such case, the Rshim component
      is exposed as a standard PCIe PF and it's register space
      can be mapped in BAR0.

      Linux Network Stack --> virtio_net --> rshim_net -->
      rshim common driver --> rshim PCIe backend driver

   - Patch 9/9 (rshim_pcie_lf.c)

     Similarly, this is another PCIe backend driver, but provides
     slow access when the HCA firmware is not ready. In such as,
     the BAR0 mapping is not available. The Rshim register access
     is done via a slow access gateway. The below calling stack
     is also similar.

      Linux Network Stack --> virtio_net --> rshim_net -->
      rshim common driver --> rshim PCIe livefish backend driver

Liming Sun (9):
  soc: Add TmFifo driver for Mellanox BlueField Soc
  arm64: Add Mellanox BlueField SoC config option
  dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  MAINTAINERS: Add entry for Mellanox Bluefield Soc
  soc: mellanox: host: Add the common host side Rshim driver
  soc: mellanox: host: Add networking support over Rshim
  soc: mellanox: host: Add the Rshim USB backend driver
  soc: mellanox: host: Add the Rshim PCIe backend driver
  soc: mellanox: host: Add the Rshim PCIe live-fish backend driver

 .../devicetree/bindings/soc/mellanox/tmfifo.txt    |   23 +
 MAINTAINERS                                        |    8 +
 arch/arm64/Kconfig.platforms                       |    6 +
 arch/arm64/configs/defconfig                       |    1 +
 drivers/soc/Kconfig                                |    1 +
 drivers/soc/Makefile                               |    1 +
 drivers/soc/mellanox/Kconfig                       |   26 +
 drivers/soc/mellanox/Makefile                      |    6 +
 drivers/soc/mellanox/host/Makefile                 |    2 +
 drivers/soc/mellanox/host/rshim.c                  | 2673 ++++++++++++++++++++
 drivers/soc/mellanox/host/rshim.h                  |  361 +++
 drivers/soc/mellanox/host/rshim_net.c              |  834 ++++++
 drivers/soc/mellanox/host/rshim_pcie.c             |  478 ++++
 drivers/soc/mellanox/host/rshim_pcie_lf.c          |  695 +++++
 drivers/soc/mellanox/host/rshim_regs.h             |  152 ++
 drivers/soc/mellanox/host/rshim_usb.c              | 1035 ++++++++
 drivers/soc/mellanox/tmfifo.c                      | 1244 +++++++++
 drivers/soc/mellanox/tmfifo_regs.h                 |   76 +
 18 files changed, 7622 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
 create mode 100644 drivers/soc/mellanox/Kconfig
 create mode 100644 drivers/soc/mellanox/Makefile
 create mode 100644 drivers/soc/mellanox/host/Makefile
 create mode 100644 drivers/soc/mellanox/host/rshim.c
 create mode 100644 drivers/soc/mellanox/host/rshim.h
 create mode 100644 drivers/soc/mellanox/host/rshim_net.c
 create mode 100644 drivers/soc/mellanox/host/rshim_pcie.c
 create mode 100644 drivers/soc/mellanox/host/rshim_pcie_lf.c
 create mode 100644 drivers/soc/mellanox/host/rshim_regs.h
 create mode 100644 drivers/soc/mellanox/host/rshim_usb.c
 create mode 100644 drivers/soc/mellanox/tmfifo.c
 create mode 100644 drivers/soc/mellanox/tmfifo_regs.h

-- 
1.8.3.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
  2019-01-18 16:02     ` Arnd Bergmann
  (?)
@ 2019-01-21 19:22       ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-21 19:22 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: DTML, David Woods, linux-pci, Vincent Whitchurch, arm-soc,
	Olof Johansson, linux-ntb, Robin Murphy, Christoph Hellwig,
	Linux ARM

Thanks Arnd for the comments. The 0/9 email was sent out just now to
add more details about the design and changes. Please also see my response
below.

- Liming

> -----Original Message-----
> From: Arnd Bergmann <arnd@arndb.de>
> Sent: Friday, January 18, 2019 11:02 AM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-soc
> <arm@kernel.org>; DTML <devicetree@vger.kernel.org>; Linux ARM <linux-arm-kernel@lists.infradead.org>; Vincent Whitchurch
> <vincent.whitchurch@axis.com>; linux-pci <linux-pci@vger.kernel.org>; linux-ntb@googlegroups.com; Christoph Hellwig <hch@lst.de>
> Subject: Re: [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
> 
> On Thu, Nov 1, 2018 at 5:49 PM Liming Sun <lsun@mellanox.com> wrote:
> >
> > An external host can connect to a Mellanox BlueField SoC via an
> > interface called Rshim. The Rshim driver provides boot, console,
> > and networking services over this interface. This commit is
> > the common driver where the other backend (transport) driver will
> > use.
> >
> > Reviewed-by: David Woods <dwoods@mellanox.com>
> > Signed-off-by: Liming Sun <lsun@mellanox.com>
> 
> Hi Liming,
> 
> I've taken a new look at your patch series for drivers/soc/ now,
> thanks for your continued submissions.
> 
> This is again just a set of very high-level comments, but I think we
> should resolve some of the fundamental questions first.
> Incidentally, Vincent Whitchurch has recently posted another
> patch series with a very similar intention, but for other hardware
> and taking a different approach.
> 
> In both cases, the idea is to use virtio based drivers to provide
> services from a host machine into another Linux instance running
> on an embedded system behind a PCIe slot or similar. Your
> Bluefield SoC patches are well written, but are intentionally
> kept specific to a particular use case and tied to one piece
> of hardware. In contrast, Vincent uses the existing code from
> drivers/misc/mic/vop/ that is equally hardware specific, but he
> extends it to be applicable to other hardware as well.
> 
> It would be good if you could look at each other's approaches
> to see where we could take it from here. I think ideally we
> should have a common driver framework for doing the same
> thing across both of your devices and as well as others.

Yes, I checked drivers/misc/mic/vop and Vincent Whitchurch's patches 
(Virtio-over-PCIe on non-MIC) and related comments. I kind of feel 
that besides the common virtio infrastructure, there seems not much
to be reused in the rest of implementation yet, though they are trying
to do the similar things.  (Feel free to correct me if I misunderstood it.)

I just submitted the patch 0/9 to explain some details of the rshim
component and the driver patches. Could you help take a look?

The rshim driver of BlueField SoC has a few more functionalities 
which are very HW-specific. Some needs driver support from both 
ARM target and the external host, some only needs external host 
driver support.

As for common framework, we used to implement the drivers based on
the remote proc (Documentation/remoteproc.txt), which seems more
close to what we wanted (in my humble opinion). Later due to more 
functionalities to add and the lack of remote proc in old kernels, we 
changed to use virtio framework directly, which seems very helpful and
saved quite some driver work.

> 
> That would also resolve my main concern about the drivers,
> which is the placement in drivers/soc/ for a set of drivers
> that are unlike most drivers in that directory not mean for
> running on the SoC itself in order drive unusual functionality
> on the SoC, but are (at least partially) meant for running on
> a host machine to communicate with that SoC over PCIe
> or USB.
> 
> As an example, your network driver should really be placed
> in drivers/net/, though it is unclear to me how it relates
> to the existing virtio_net driver. In the case of mic/vop,
> the idea is to use virtio_net on the device side, but have
> vhost_net or a user space implementation on the host side,
> but that is apparently not what you do here. Can you
> explain why?

Yes, I actually have the same concerns where the host side
drivers should go.  For now ther're just added for code review
purpose. drivers/soc/ seems not a good place. One thought
is to move the rshim_net, rshim_pcie and rshim_pcie_lf backend
driver to drivers/net/ethernet/Mellanox/rshim/ and move the
rshim common driver to drivers/char as it creates the character
devices?

The device side of this patch uses the virtio_net driver as well. 

The host side is not just for networking, which was mentioned 
in the 0/9 patch. The host side driver manages the whole rshim
component and is called the 'rshim' driver. It includes driver
to access the TmFifo, where virtio_net is used to provide 
networking support. It needs to talk to the common
driver then the USB or PCIe backend driver.  It seems to me that
vhost_net doesn't quite fit this model and might make it 
over-complicated.

> 
> Another high-level question I have is on how your various
> drivers relate to one another. This should normally be
> explained in the 0/9 email, but I don't seem to have received
> such a mail. I see that you have multiple back-end drivers
> for the underlying transport, with one of them based on USB.
> Have you come up with a way to use the same high-level
> driver such as the network link over this USB back-end,
> or is this for something else?

Yes, 0/9 has been sent. Sorry I should have provided since beginning.

The USB (or PCIe) provide the general transport to access the RShim
component, for networking, console, register access, boot service,
etc. So it's not just for network link. The implementation seems very
HW specific, such as providing APIs like rshim_usb_read_rshim()
and rshim_usb_write_rshim(). In PCIe backend it has similar APIs
like rshim_pcie_read(), rshim_pcie_write().

Not very clear about what you meant the " the same high-level driver 
such as the network link over this USB back-end". Do you mean using
any existing network over USB framework or provide some mechanism
to be reused by other network over USB driver?

By the way, the 0/9 has been sent. Could you help take a look whether 
it clarifies a little bit or not?

> 
>       Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
@ 2019-01-21 19:22       ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-21 19:22 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Olof Johansson, David Woods, Robin Murphy, arm-soc, DTML,
	Linux ARM, Vincent Whitchurch, linux-pci, linux-ntb,
	Christoph Hellwig

Thanks Arnd for the comments. The 0/9 email was sent out just now to
add more details about the design and changes. Please also see my response
below.

- Liming

> -----Original Message-----
> From: Arnd Bergmann <arnd@arndb.de>
> Sent: Friday, January 18, 2019 11:02 AM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-soc
> <arm@kernel.org>; DTML <devicetree@vger.kernel.org>; Linux ARM <linux-arm-kernel@lists.infradead.org>; Vincent Whitchurch
> <vincent.whitchurch@axis.com>; linux-pci <linux-pci@vger.kernel.org>; linux-ntb@googlegroups.com; Christoph Hellwig <hch@lst.de>
> Subject: Re: [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
> 
> On Thu, Nov 1, 2018 at 5:49 PM Liming Sun <lsun@mellanox.com> wrote:
> >
> > An external host can connect to a Mellanox BlueField SoC via an
> > interface called Rshim. The Rshim driver provides boot, console,
> > and networking services over this interface. This commit is
> > the common driver where the other backend (transport) driver will
> > use.
> >
> > Reviewed-by: David Woods <dwoods@mellanox.com>
> > Signed-off-by: Liming Sun <lsun@mellanox.com>
> 
> Hi Liming,
> 
> I've taken a new look at your patch series for drivers/soc/ now,
> thanks for your continued submissions.
> 
> This is again just a set of very high-level comments, but I think we
> should resolve some of the fundamental questions first.
> Incidentally, Vincent Whitchurch has recently posted another
> patch series with a very similar intention, but for other hardware
> and taking a different approach.
> 
> In both cases, the idea is to use virtio based drivers to provide
> services from a host machine into another Linux instance running
> on an embedded system behind a PCIe slot or similar. Your
> Bluefield SoC patches are well written, but are intentionally
> kept specific to a particular use case and tied to one piece
> of hardware. In contrast, Vincent uses the existing code from
> drivers/misc/mic/vop/ that is equally hardware specific, but he
> extends it to be applicable to other hardware as well.
> 
> It would be good if you could look at each other's approaches
> to see where we could take it from here. I think ideally we
> should have a common driver framework for doing the same
> thing across both of your devices and as well as others.

Yes, I checked drivers/misc/mic/vop and Vincent Whitchurch's patches 
(Virtio-over-PCIe on non-MIC) and related comments. I kind of feel 
that besides the common virtio infrastructure, there seems not much
to be reused in the rest of implementation yet, though they are trying
to do the similar things.  (Feel free to correct me if I misunderstood it.)

I just submitted the patch 0/9 to explain some details of the rshim
component and the driver patches. Could you help take a look?

The rshim driver of BlueField SoC has a few more functionalities 
which are very HW-specific. Some needs driver support from both 
ARM target and the external host, some only needs external host 
driver support.

As for common framework, we used to implement the drivers based on
the remote proc (Documentation/remoteproc.txt), which seems more
close to what we wanted (in my humble opinion). Later due to more 
functionalities to add and the lack of remote proc in old kernels, we 
changed to use virtio framework directly, which seems very helpful and
saved quite some driver work.

> 
> That would also resolve my main concern about the drivers,
> which is the placement in drivers/soc/ for a set of drivers
> that are unlike most drivers in that directory not mean for
> running on the SoC itself in order drive unusual functionality
> on the SoC, but are (at least partially) meant for running on
> a host machine to communicate with that SoC over PCIe
> or USB.
> 
> As an example, your network driver should really be placed
> in drivers/net/, though it is unclear to me how it relates
> to the existing virtio_net driver. In the case of mic/vop,
> the idea is to use virtio_net on the device side, but have
> vhost_net or a user space implementation on the host side,
> but that is apparently not what you do here. Can you
> explain why?

Yes, I actually have the same concerns where the host side
drivers should go.  For now ther're just added for code review
purpose. drivers/soc/ seems not a good place. One thought
is to move the rshim_net, rshim_pcie and rshim_pcie_lf backend
driver to drivers/net/ethernet/Mellanox/rshim/ and move the
rshim common driver to drivers/char as it creates the character
devices?

The device side of this patch uses the virtio_net driver as well. 

The host side is not just for networking, which was mentioned 
in the 0/9 patch. The host side driver manages the whole rshim
component and is called the 'rshim' driver. It includes driver
to access the TmFifo, where virtio_net is used to provide 
networking support. It needs to talk to the common
driver then the USB or PCIe backend driver.  It seems to me that
vhost_net doesn't quite fit this model and might make it 
over-complicated.

> 
> Another high-level question I have is on how your various
> drivers relate to one another. This should normally be
> explained in the 0/9 email, but I don't seem to have received
> such a mail. I see that you have multiple back-end drivers
> for the underlying transport, with one of them based on USB.
> Have you come up with a way to use the same high-level
> driver such as the network link over this USB back-end,
> or is this for something else?

Yes, 0/9 has been sent. Sorry I should have provided since beginning.

The USB (or PCIe) provide the general transport to access the RShim
component, for networking, console, register access, boot service,
etc. So it's not just for network link. The implementation seems very
HW specific, such as providing APIs like rshim_usb_read_rshim()
and rshim_usb_write_rshim(). In PCIe backend it has similar APIs
like rshim_pcie_read(), rshim_pcie_write().

Not very clear about what you meant the " the same high-level driver 
such as the network link over this USB back-end". Do you mean using
any existing network over USB framework or provide some mechanism
to be reused by other network over USB driver?

By the way, the 0/9 has been sent. Could you help take a look whether 
it clarifies a little bit or not?

> 
>       Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
@ 2019-01-21 19:22       ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-21 19:22 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: DTML, David Woods, linux-pci, Vincent Whitchurch, arm-soc,
	Olof Johansson, linux-ntb, Robin Murphy, Christoph Hellwig,
	Linux ARM

Thanks Arnd for the comments. The 0/9 email was sent out just now to
add more details about the design and changes. Please also see my response
below.

- Liming

> -----Original Message-----
> From: Arnd Bergmann <arnd@arndb.de>
> Sent: Friday, January 18, 2019 11:02 AM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-soc
> <arm@kernel.org>; DTML <devicetree@vger.kernel.org>; Linux ARM <linux-arm-kernel@lists.infradead.org>; Vincent Whitchurch
> <vincent.whitchurch@axis.com>; linux-pci <linux-pci@vger.kernel.org>; linux-ntb@googlegroups.com; Christoph Hellwig <hch@lst.de>
> Subject: Re: [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
> 
> On Thu, Nov 1, 2018 at 5:49 PM Liming Sun <lsun@mellanox.com> wrote:
> >
> > An external host can connect to a Mellanox BlueField SoC via an
> > interface called Rshim. The Rshim driver provides boot, console,
> > and networking services over this interface. This commit is
> > the common driver where the other backend (transport) driver will
> > use.
> >
> > Reviewed-by: David Woods <dwoods@mellanox.com>
> > Signed-off-by: Liming Sun <lsun@mellanox.com>
> 
> Hi Liming,
> 
> I've taken a new look at your patch series for drivers/soc/ now,
> thanks for your continued submissions.
> 
> This is again just a set of very high-level comments, but I think we
> should resolve some of the fundamental questions first.
> Incidentally, Vincent Whitchurch has recently posted another
> patch series with a very similar intention, but for other hardware
> and taking a different approach.
> 
> In both cases, the idea is to use virtio based drivers to provide
> services from a host machine into another Linux instance running
> on an embedded system behind a PCIe slot or similar. Your
> Bluefield SoC patches are well written, but are intentionally
> kept specific to a particular use case and tied to one piece
> of hardware. In contrast, Vincent uses the existing code from
> drivers/misc/mic/vop/ that is equally hardware specific, but he
> extends it to be applicable to other hardware as well.
> 
> It would be good if you could look at each other's approaches
> to see where we could take it from here. I think ideally we
> should have a common driver framework for doing the same
> thing across both of your devices and as well as others.

Yes, I checked drivers/misc/mic/vop and Vincent Whitchurch's patches 
(Virtio-over-PCIe on non-MIC) and related comments. I kind of feel 
that besides the common virtio infrastructure, there seems not much
to be reused in the rest of implementation yet, though they are trying
to do the similar things.  (Feel free to correct me if I misunderstood it.)

I just submitted the patch 0/9 to explain some details of the rshim
component and the driver patches. Could you help take a look?

The rshim driver of BlueField SoC has a few more functionalities 
which are very HW-specific. Some needs driver support from both 
ARM target and the external host, some only needs external host 
driver support.

As for common framework, we used to implement the drivers based on
the remote proc (Documentation/remoteproc.txt), which seems more
close to what we wanted (in my humble opinion). Later due to more 
functionalities to add and the lack of remote proc in old kernels, we 
changed to use virtio framework directly, which seems very helpful and
saved quite some driver work.

> 
> That would also resolve my main concern about the drivers,
> which is the placement in drivers/soc/ for a set of drivers
> that are unlike most drivers in that directory not mean for
> running on the SoC itself in order drive unusual functionality
> on the SoC, but are (at least partially) meant for running on
> a host machine to communicate with that SoC over PCIe
> or USB.
> 
> As an example, your network driver should really be placed
> in drivers/net/, though it is unclear to me how it relates
> to the existing virtio_net driver. In the case of mic/vop,
> the idea is to use virtio_net on the device side, but have
> vhost_net or a user space implementation on the host side,
> but that is apparently not what you do here. Can you
> explain why?

Yes, I actually have the same concerns where the host side
drivers should go.  For now ther're just added for code review
purpose. drivers/soc/ seems not a good place. One thought
is to move the rshim_net, rshim_pcie and rshim_pcie_lf backend
driver to drivers/net/ethernet/Mellanox/rshim/ and move the
rshim common driver to drivers/char as it creates the character
devices?

The device side of this patch uses the virtio_net driver as well. 

The host side is not just for networking, which was mentioned 
in the 0/9 patch. The host side driver manages the whole rshim
component and is called the 'rshim' driver. It includes driver
to access the TmFifo, where virtio_net is used to provide 
networking support. It needs to talk to the common
driver then the USB or PCIe backend driver.  It seems to me that
vhost_net doesn't quite fit this model and might make it 
over-complicated.

> 
> Another high-level question I have is on how your various
> drivers relate to one another. This should normally be
> explained in the 0/9 email, but I don't seem to have received
> such a mail. I see that you have multiple back-end drivers
> for the underlying transport, with one of them based on USB.
> Have you come up with a way to use the same high-level
> driver such as the network link over this USB back-end,
> or is this for something else?

Yes, 0/9 has been sent. Sorry I should have provided since beginning.

The USB (or PCIe) provide the general transport to access the RShim
component, for networking, console, register access, boot service,
etc. So it's not just for network link. The implementation seems very
HW specific, such as providing APIs like rshim_usb_read_rshim()
and rshim_usb_write_rshim(). In PCIe backend it has similar APIs
like rshim_pcie_read(), rshim_pcie_write().

Not very clear about what you meant the " the same high-level driver 
such as the network link over this USB back-end". Do you mean using
any existing network over USB framework or provide some mechanism
to be reused by other network over USB driver?

By the way, the 0/9 has been sent. Could you help take a look whether 
it clarifies a little bit or not?

> 
>       Arnd
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
  2019-01-18 16:02     ` Arnd Bergmann
  (?)
@ 2019-01-22 12:20       ` Vincent Whitchurch
  -1 siblings, 0 replies; 179+ messages in thread
From: Vincent Whitchurch @ 2019-01-22 12:20 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: DTML, David Woods, linux-pci, Liming Sun, virtualization,
	arm-soc, Olof Johansson, linux-ntb, Robin Murphy,
	Christoph Hellwig, Linux ARM

On Fri, Jan 18, 2019 at 05:02:21PM +0100, Arnd Bergmann wrote:
> On Thu, Nov 1, 2018 at 5:49 PM Liming Sun <lsun@mellanox.com> wrote:
> >
> > An external host can connect to a Mellanox BlueField SoC via an
> > interface called Rshim. The Rshim driver provides boot, console,
> > and networking services over this interface. This commit is
> > the common driver where the other backend (transport) driver will
> > use.
> >
> > Reviewed-by: David Woods <dwoods@mellanox.com>
> > Signed-off-by: Liming Sun <lsun@mellanox.com>
> 
> Hi Liming,
> 
> I've taken a new look at your patch series for drivers/soc/ now,
> thanks for your continued submissions.
> 
> This is again just a set of very high-level comments, but I think we
> should resolve some of the fundamental questions first.
> Incidentally, Vincent Whitchurch has recently posted another
> patch series with a very similar intention, but for other hardware
> and taking a different approach.
> 
> In both cases, the idea is to use virtio based drivers to provide
> services from a host machine into another Linux instance running
> on an embedded system behind a PCIe slot or similar. Your
> Bluefield SoC patches are well written, but are intentionally
> kept specific to a particular use case and tied to one piece
> of hardware. In contrast, Vincent uses the existing code from
> drivers/misc/mic/vop/ that is equally hardware specific, but he
> extends it to be applicable to other hardware as well.
> 
> It would be good if you could look at each other's approaches
> to see where we could take it from here. I think ideally we
> should have a common driver framework for doing the same
> thing across both of your devices and as well as others.

As far as I can see the biggest difference is that Rshim appears to
support interfaces which do not have shared memory between the host and
the card, which means that it has to jump through a lot more hoops to
make virtio work.

For example, the card side seems to use normal virtio-net and
virto-console drivers, but the drivers/soc/mellanox/tmfifo.c driver,
also running on the card, appears to have to actually look inside the
virtqueues and shuffle the data over the TmFifo interface, and this
driver has hard-coded support for only network and console, since it
apparently needs to know the details of how the virtio drivers use their
virtqueues (see tmfifo_virtio_rxtx()).

And the host side appears to _also_ run the virtio-net driver and there
the drivers/soc/mellanox/host/rshim_net.c code instead has to look
inside the virtqueues and shuffle the data over the other side of the
TmFifo interface.

So to me this looks very different from a traditional virtio
driver/device setup (which is what mic/vop uses).  I may be missing
something, but I don't quite understand why it's even using virtio in
the first place.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
@ 2019-01-22 12:20       ` Vincent Whitchurch
  0 siblings, 0 replies; 179+ messages in thread
From: Vincent Whitchurch @ 2019-01-22 12:20 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Liming Sun, Olof Johansson, David Woods, Robin Murphy, arm-soc,
	DTML, Linux ARM, linux-pci, linux-ntb, Christoph Hellwig,
	virtualization

On Fri, Jan 18, 2019 at 05:02:21PM +0100, Arnd Bergmann wrote:
> On Thu, Nov 1, 2018 at 5:49 PM Liming Sun <lsun@mellanox.com> wrote:
> >
> > An external host can connect to a Mellanox BlueField SoC via an
> > interface called Rshim. The Rshim driver provides boot, console,
> > and networking services over this interface. This commit is
> > the common driver where the other backend (transport) driver will
> > use.
> >
> > Reviewed-by: David Woods <dwoods@mellanox.com>
> > Signed-off-by: Liming Sun <lsun@mellanox.com>
> 
> Hi Liming,
> 
> I've taken a new look at your patch series for drivers/soc/ now,
> thanks for your continued submissions.
> 
> This is again just a set of very high-level comments, but I think we
> should resolve some of the fundamental questions first.
> Incidentally, Vincent Whitchurch has recently posted another
> patch series with a very similar intention, but for other hardware
> and taking a different approach.
> 
> In both cases, the idea is to use virtio based drivers to provide
> services from a host machine into another Linux instance running
> on an embedded system behind a PCIe slot or similar. Your
> Bluefield SoC patches are well written, but are intentionally
> kept specific to a particular use case and tied to one piece
> of hardware. In contrast, Vincent uses the existing code from
> drivers/misc/mic/vop/ that is equally hardware specific, but he
> extends it to be applicable to other hardware as well.
> 
> It would be good if you could look at each other's approaches
> to see where we could take it from here. I think ideally we
> should have a common driver framework for doing the same
> thing across both of your devices and as well as others.

As far as I can see the biggest difference is that Rshim appears to
support interfaces which do not have shared memory between the host and
the card, which means that it has to jump through a lot more hoops to
make virtio work.

For example, the card side seems to use normal virtio-net and
virto-console drivers, but the drivers/soc/mellanox/tmfifo.c driver,
also running on the card, appears to have to actually look inside the
virtqueues and shuffle the data over the TmFifo interface, and this
driver has hard-coded support for only network and console, since it
apparently needs to know the details of how the virtio drivers use their
virtqueues (see tmfifo_virtio_rxtx()).

And the host side appears to _also_ run the virtio-net driver and there
the drivers/soc/mellanox/host/rshim_net.c code instead has to look
inside the virtqueues and shuffle the data over the other side of the
TmFifo interface.

So to me this looks very different from a traditional virtio
driver/device setup (which is what mic/vop uses).  I may be missing
something, but I don't quite understand why it's even using virtio in
the first place.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
@ 2019-01-22 12:20       ` Vincent Whitchurch
  0 siblings, 0 replies; 179+ messages in thread
From: Vincent Whitchurch @ 2019-01-22 12:20 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: DTML, David Woods, linux-pci, Liming Sun, virtualization,
	arm-soc, Olof Johansson, linux-ntb, Robin Murphy,
	Christoph Hellwig, Linux ARM

On Fri, Jan 18, 2019 at 05:02:21PM +0100, Arnd Bergmann wrote:
> On Thu, Nov 1, 2018 at 5:49 PM Liming Sun <lsun@mellanox.com> wrote:
> >
> > An external host can connect to a Mellanox BlueField SoC via an
> > interface called Rshim. The Rshim driver provides boot, console,
> > and networking services over this interface. This commit is
> > the common driver where the other backend (transport) driver will
> > use.
> >
> > Reviewed-by: David Woods <dwoods@mellanox.com>
> > Signed-off-by: Liming Sun <lsun@mellanox.com>
> 
> Hi Liming,
> 
> I've taken a new look at your patch series for drivers/soc/ now,
> thanks for your continued submissions.
> 
> This is again just a set of very high-level comments, but I think we
> should resolve some of the fundamental questions first.
> Incidentally, Vincent Whitchurch has recently posted another
> patch series with a very similar intention, but for other hardware
> and taking a different approach.
> 
> In both cases, the idea is to use virtio based drivers to provide
> services from a host machine into another Linux instance running
> on an embedded system behind a PCIe slot or similar. Your
> Bluefield SoC patches are well written, but are intentionally
> kept specific to a particular use case and tied to one piece
> of hardware. In contrast, Vincent uses the existing code from
> drivers/misc/mic/vop/ that is equally hardware specific, but he
> extends it to be applicable to other hardware as well.
> 
> It would be good if you could look at each other's approaches
> to see where we could take it from here. I think ideally we
> should have a common driver framework for doing the same
> thing across both of your devices and as well as others.

As far as I can see the biggest difference is that Rshim appears to
support interfaces which do not have shared memory between the host and
the card, which means that it has to jump through a lot more hoops to
make virtio work.

For example, the card side seems to use normal virtio-net and
virto-console drivers, but the drivers/soc/mellanox/tmfifo.c driver,
also running on the card, appears to have to actually look inside the
virtqueues and shuffle the data over the TmFifo interface, and this
driver has hard-coded support for only network and console, since it
apparently needs to know the details of how the virtio drivers use their
virtqueues (see tmfifo_virtio_rxtx()).

And the host side appears to _also_ run the virtio-net driver and there
the drivers/soc/mellanox/host/rshim_net.c code instead has to look
inside the virtqueues and shuffle the data over the other side of the
TmFifo interface.

So to me this looks very different from a traditional virtio
driver/device setup (which is what mic/vop uses).  I may be missing
something, but I don't quite understand why it's even using virtio in
the first place.

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
  2019-01-22 12:20       ` Vincent Whitchurch
  (?)
  (?)
@ 2019-01-22 13:27       ` Liming Sun
  2019-01-22 13:36           ` Liming Sun
  -1 siblings, 1 reply; 179+ messages in thread
From: Liming Sun @ 2019-01-22 13:27 UTC (permalink / raw)
  To: Vincent Whitchurch, Arnd Bergmann
  Cc: Olof Johansson, David Woods, Robin Murphy, arm-soc, DTML,
	Linux ARM, linux-pci, linux-ntb, Christoph Hellwig,
	virtualization

[-- Attachment #1: Type: text/plain, Size: 4365 bytes --]



________________________________
From: Vincent Whitchurch <vincent.whitchurch@axis.com>
Sent: Tuesday, January 22, 2019 7:20 AM
To: Arnd Bergmann
Cc: Liming Sun; Olof Johansson; David Woods; Robin Murphy; arm-soc; DTML; Linux ARM; linux-pci; linux-ntb@googlegroups.com; Christoph Hellwig; virtualization@lists.linux-foundation.org
Subject: Re: [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver

On Fri, Jan 18, 2019 at 05:02:21PM +0100, Arnd Bergmann wrote:
>> On Thu, Nov 1, 2018 at 5:49 PM Liming Sun <lsun@mellanox.com> wrote:
>> >
>> > An external host can connect to a Mellanox BlueField SoC via an
>> > interface called Rshim. The Rshim driver provides boot, console,
>> > and networking services over this interface. This commit is
>> > the common driver where the other backend (transport) driver will
>> > use.
>> >
>> > Reviewed-by: David Woods <dwoods@mellanox.com>
>> > Signed-off-by: Liming Sun <lsun@mellanox.com>
>>
>> Hi Liming,
>>
>> I've taken a new look at your patch series for drivers/soc/ now,
>> thanks for your continued submissions.
>>
>> This is again just a set of very high-level comments, but I think we
>> should resolve some of the fundamental questions first.
>> Incidentally, Vincent Whitchurch has recently posted another
>> patch series with a very similar intention, but for other hardware
>> and taking a different approach.
>>
>> In both cases, the idea is to use virtio based drivers to provide
>> services from a host machine into another Linux instance running
>> on an embedded system behind a PCIe slot or similar. Your
>> Bluefield SoC patches are well written, but are intentionally
>> kept specific to a particular use case and tied to one piece
>> of hardware. In contrast, Vincent uses the existing code from
>> drivers/misc/mic/vop/ that is equally hardware specific, but he
>> extends it to be applicable to other hardware as well.
>>
>> It would be good if you could look at each other's approaches
>> to see where we could take it from here. I think ideally we
>> should have a common driver framework for doing the same
>> thing across both of your devices and as well as others.

> As far as I can see the biggest difference is that Rshim appears to
> support interfaces which do not have shared memory between the host and
> the card, which means that it has to jump through a lot more hoops to
> make virtio work.

> For example, the card side seems to use normal virtio-net and
> virto-console drivers, but the drivers/soc/mellanox/tmfifo.c driver,
> also running on the card, appears to have to actually look inside the
> virtqueues and shuffle the data over the TmFifo interface, and this
> driver has hard-coded support for only network and console, since it
> apparently needs to know the details of how the virtio drivers use their
> virtqueues (see tmfifo_virtio_rxtx()).

> And the host side appears to _also_ run the virtio-net driver and there
> the drivers/soc/mellanox/host/rshim_net.c code instead has to look
> inside the virtqueues and shuffle the data over the other side of the
> TmFifo interface.

> So to me this looks very different from a traditional virtio
> driver/device setup (which is what mic/vop uses).  I may be missing
> something, but I don't quite understand why it's even using virtio in
> the first place.

Thanks Vincent! This appears to be very good summary of this driver
does on the tmfifo part and the difference between mic/vop. The fifo is
accessed by register instead of shared memory.

The reason to use virtio framework is that it can be easily used to add
more virtual devices as needed without implementing driver details for
each one. For example, the device side supports console and networking
for now over the FIFO. It only needs to implement function
tmfifo_virtio_rxtx() once to take care of the virtqueues Rx/Tx, which are
shared by all virtual devices. With minimum changes, we could easily add
another device over tmfifo, like a virtio block device, since the queue
handling is already there.

The host side handles the virtqueues as well in rshim_net.c. It behaves
like a peer-to-peer to the device side while the tmfifo behaves like a
'wire' (transport) to pass data between the host and the device without
worrying about the data details.

[-- Attachment #2: Type: text/html, Size: 10458 bytes --]

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
  2019-01-22 13:27       ` Liming Sun
  2019-01-22 13:36           ` Liming Sun
@ 2019-01-22 13:36           ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-22 13:36 UTC (permalink / raw)
  To: Vincent Whitchurch, Arnd Bergmann
  Cc: DTML, David Woods, linux-pci, virtualization, arm-soc,
	Olof Johansson, linux-ntb, Robin Murphy, Christoph Hellwig,
	Linux ARM



From: Vincent Whitchurch <vincent.whitchurch@axis.com>
Sent: Tuesday, January 22, 2019 7:20 AM
To: Arnd Bergmann
Cc: Liming Sun; Olof Johansson; David Woods; Robin Murphy; arm-soc; DTML; Linux ARM; linux-pci; linux-ntb@googlegroups.com; Christoph Hellwig; virtualization@lists.linux-foundation.org
Subject: Re: [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
  

On Fri, Jan 18, 2019 at 05:02:21PM +0100, Arnd Bergmann wrote:
>> On Thu, Nov 1, 2018 at 5:49 PM Liming Sun <lsun@mellanox.com> wrote:
>> >
>> > An external host can connect to a Mellanox BlueField SoC via an
>> > interface called Rshim. The Rshim driver provides boot, console,
>> > and networking services over this interface. This commit is
>> > the common driver where the other backend (transport) driver will
>> > use.
>> >
>> > Reviewed-by: David Woods <dwoods@mellanox.com>
>> > Signed-off-by: Liming Sun <lsun@mellanox.com>
>> 
>> Hi Liming,
>> 
>> I've taken a new look at your patch series for drivers/soc/ now,
>> thanks for your continued submissions.
>> 
>> This is again just a set of very high-level comments, but I think we
>> should resolve some of the fundamental questions first.
>> Incidentally, Vincent Whitchurch has recently posted another
>> patch series with a very similar intention, but for other hardware
>> and taking a different approach.
>> 
>> In both cases, the idea is to use virtio based drivers to provide
>> services from a host machine into another Linux instance running
>> on an embedded system behind a PCIe slot or similar. Your
>> Bluefield SoC patches are well written, but are intentionally
>> kept specific to a particular use case and tied to one piece
>> of hardware. In contrast, Vincent uses the existing code from
>> drivers/misc/mic/vop/ that is equally hardware specific, but he
>> extends it to be applicable to other hardware as well.
>> 
>> It would be good if you could look at each other's approaches
>> to see where we could take it from here. I think ideally we
>> should have a common driver framework for doing the same
>> thing across both of your devices and as well as others.

> As far as I can see the biggest difference is that Rshim appears to
> support interfaces which do not have shared memory between the host and
> the card, which means that it has to jump through a lot more hoops to
> make virtio work.

> For example, the card side seems to use normal virtio-net and
> virto-console drivers, but the drivers/soc/mellanox/tmfifo.c driver,
> also running on the card, appears to have to actually look inside the
> virtqueues and shuffle the data over the TmFifo interface, and this
> driver has hard-coded support for only network and console, since it
> apparently needs to know the details of how the virtio drivers use their
> virtqueues (see tmfifo_virtio_rxtx()).

> And the host side appears to _also_ run the virtio-net driver and there
> the drivers/soc/mellanox/host/rshim_net.c code instead has to look
> inside the virtqueues and shuffle the data over the other side of the
> TmFifo interface.

> So to me this looks very different from a traditional virtio
> driver/device setup (which is what mic/vop uses).  I may be missing
> something, but I don't quite understand why it's even using virtio in
> the first place.

Thanks  Vincent! This appears to be very good summary of this driver
does on the tmfifo part and the difference between  mic/vop. The fifo is
accessed by register instead of shared memory.

The reason to use virtio framework is that it can be easily used to add
more virtual devices as needed without implementing driver details for
each one. For example, the device side supports console and networking
for now over the FIFO. It only needs to implement function 
tmfifo_virtio_rxtx() once to take care of the virtqueues Rx/Tx, which are
shared by all virtual devices. With minimum changes, we could easily add
another device over tmfifo, like a virtio block device, since the queue
handling is already there. 

The host side handles the virtqueues as well in rshim_net.c. It behaves
like a peer-to-peer to the device side while the tmfifo behaves like a
'wire' (transport) to pass data between the host and the device without
worrying about the data details.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
@ 2019-01-22 13:36           ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-22 13:36 UTC (permalink / raw)
  To: Vincent Whitchurch, Arnd Bergmann
  Cc: Olof Johansson, David Woods, Robin Murphy, arm-soc, DTML,
	Linux ARM, linux-pci, linux-ntb, Christoph Hellwig,
	virtualization



From: Vincent Whitchurch <vincent.whitchurch@axis.com>
Sent: Tuesday, January 22, 2019 7:20 AM
To: Arnd Bergmann
Cc: Liming Sun; Olof Johansson; David Woods; Robin Murphy; arm-soc; DTML; Linux ARM; linux-pci; linux-ntb@googlegroups.com; Christoph Hellwig; virtualization@lists.linux-foundation.org
Subject: Re: [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
  

On Fri, Jan 18, 2019 at 05:02:21PM +0100, Arnd Bergmann wrote:
>> On Thu, Nov 1, 2018 at 5:49 PM Liming Sun <lsun@mellanox.com> wrote:
>> >
>> > An external host can connect to a Mellanox BlueField SoC via an
>> > interface called Rshim. The Rshim driver provides boot, console,
>> > and networking services over this interface. This commit is
>> > the common driver where the other backend (transport) driver will
>> > use.
>> >
>> > Reviewed-by: David Woods <dwoods@mellanox.com>
>> > Signed-off-by: Liming Sun <lsun@mellanox.com>
>> 
>> Hi Liming,
>> 
>> I've taken a new look at your patch series for drivers/soc/ now,
>> thanks for your continued submissions.
>> 
>> This is again just a set of very high-level comments, but I think we
>> should resolve some of the fundamental questions first.
>> Incidentally, Vincent Whitchurch has recently posted another
>> patch series with a very similar intention, but for other hardware
>> and taking a different approach.
>> 
>> In both cases, the idea is to use virtio based drivers to provide
>> services from a host machine into another Linux instance running
>> on an embedded system behind a PCIe slot or similar. Your
>> Bluefield SoC patches are well written, but are intentionally
>> kept specific to a particular use case and tied to one piece
>> of hardware. In contrast, Vincent uses the existing code from
>> drivers/misc/mic/vop/ that is equally hardware specific, but he
>> extends it to be applicable to other hardware as well.
>> 
>> It would be good if you could look at each other's approaches
>> to see where we could take it from here. I think ideally we
>> should have a common driver framework for doing the same
>> thing across both of your devices and as well as others.

> As far as I can see the biggest difference is that Rshim appears to
> support interfaces which do not have shared memory between the host and
> the card, which means that it has to jump through a lot more hoops to
> make virtio work.

> For example, the card side seems to use normal virtio-net and
> virto-console drivers, but the drivers/soc/mellanox/tmfifo.c driver,
> also running on the card, appears to have to actually look inside the
> virtqueues and shuffle the data over the TmFifo interface, and this
> driver has hard-coded support for only network and console, since it
> apparently needs to know the details of how the virtio drivers use their
> virtqueues (see tmfifo_virtio_rxtx()).

> And the host side appears to _also_ run the virtio-net driver and there
> the drivers/soc/mellanox/host/rshim_net.c code instead has to look
> inside the virtqueues and shuffle the data over the other side of the
> TmFifo interface.

> So to me this looks very different from a traditional virtio
> driver/device setup (which is what mic/vop uses).  I may be missing
> something, but I don't quite understand why it's even using virtio in
> the first place.

Thanks  Vincent! This appears to be very good summary of this driver
does on the tmfifo part and the difference between  mic/vop. The fifo is
accessed by register instead of shared memory.

The reason to use virtio framework is that it can be easily used to add
more virtual devices as needed without implementing driver details for
each one. For example, the device side supports console and networking
for now over the FIFO. It only needs to implement function 
tmfifo_virtio_rxtx() once to take care of the virtqueues Rx/Tx, which are
shared by all virtual devices. With minimum changes, we could easily add
another device over tmfifo, like a virtio block device, since the queue
handling is already there. 

The host side handles the virtqueues as well in rshim_net.c. It behaves
like a peer-to-peer to the device side while the tmfifo behaves like a
'wire' (transport) to pass data between the host and the device without
worrying about the data details.

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
@ 2019-01-22 13:36           ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-22 13:36 UTC (permalink / raw)
  To: Vincent Whitchurch, Arnd Bergmann
  Cc: DTML, David Woods, linux-pci, virtualization, arm-soc,
	Olof Johansson, linux-ntb, Robin Murphy, Christoph Hellwig,
	Linux ARM



From: Vincent Whitchurch <vincent.whitchurch@axis.com>
Sent: Tuesday, January 22, 2019 7:20 AM
To: Arnd Bergmann
Cc: Liming Sun; Olof Johansson; David Woods; Robin Murphy; arm-soc; DTML; Linux ARM; linux-pci; linux-ntb@googlegroups.com; Christoph Hellwig; virtualization@lists.linux-foundation.org
Subject: Re: [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
  

On Fri, Jan 18, 2019 at 05:02:21PM +0100, Arnd Bergmann wrote:
>> On Thu, Nov 1, 2018 at 5:49 PM Liming Sun <lsun@mellanox.com> wrote:
>> >
>> > An external host can connect to a Mellanox BlueField SoC via an
>> > interface called Rshim. The Rshim driver provides boot, console,
>> > and networking services over this interface. This commit is
>> > the common driver where the other backend (transport) driver will
>> > use.
>> >
>> > Reviewed-by: David Woods <dwoods@mellanox.com>
>> > Signed-off-by: Liming Sun <lsun@mellanox.com>
>> 
>> Hi Liming,
>> 
>> I've taken a new look at your patch series for drivers/soc/ now,
>> thanks for your continued submissions.
>> 
>> This is again just a set of very high-level comments, but I think we
>> should resolve some of the fundamental questions first.
>> Incidentally, Vincent Whitchurch has recently posted another
>> patch series with a very similar intention, but for other hardware
>> and taking a different approach.
>> 
>> In both cases, the idea is to use virtio based drivers to provide
>> services from a host machine into another Linux instance running
>> on an embedded system behind a PCIe slot or similar. Your
>> Bluefield SoC patches are well written, but are intentionally
>> kept specific to a particular use case and tied to one piece
>> of hardware. In contrast, Vincent uses the existing code from
>> drivers/misc/mic/vop/ that is equally hardware specific, but he
>> extends it to be applicable to other hardware as well.
>> 
>> It would be good if you could look at each other's approaches
>> to see where we could take it from here. I think ideally we
>> should have a common driver framework for doing the same
>> thing across both of your devices and as well as others.

> As far as I can see the biggest difference is that Rshim appears to
> support interfaces which do not have shared memory between the host and
> the card, which means that it has to jump through a lot more hoops to
> make virtio work.

> For example, the card side seems to use normal virtio-net and
> virto-console drivers, but the drivers/soc/mellanox/tmfifo.c driver,
> also running on the card, appears to have to actually look inside the
> virtqueues and shuffle the data over the TmFifo interface, and this
> driver has hard-coded support for only network and console, since it
> apparently needs to know the details of how the virtio drivers use their
> virtqueues (see tmfifo_virtio_rxtx()).

> And the host side appears to _also_ run the virtio-net driver and there
> the drivers/soc/mellanox/host/rshim_net.c code instead has to look
> inside the virtqueues and shuffle the data over the other side of the
> TmFifo interface.

> So to me this looks very different from a traditional virtio
> driver/device setup (which is what mic/vop uses).  I may be missing
> something, but I don't quite understand why it's even using virtio in
> the first place.

Thanks  Vincent! This appears to be very good summary of this driver
does on the tmfifo part and the difference between  mic/vop. The fifo is
accessed by register instead of shared memory.

The reason to use virtio framework is that it can be easily used to add
more virtual devices as needed without implementing driver details for
each one. For example, the device side supports console and networking
for now over the FIFO. It only needs to implement function 
tmfifo_virtio_rxtx() once to take care of the virtqueues Rx/Tx, which are
shared by all virtual devices. With minimum changes, we could easily add
another device over tmfifo, like a virtio block device, since the queue
handling is already there. 

The host side handles the virtqueues as well in rshim_net.c. It behaves
like a peer-to-peer to the device side while the tmfifo behaves like a
'wire' (transport) to pass data between the host and the device without
worrying about the data details.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-10-26 20:33         ` Arnd Bergmann
@ 2019-01-24 15:07           ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-24 15:07 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: DTML, David Woods, Vadim Pasternak, arm-soc, Andy Shevchenko,
	Olof Johansson, Darren Hart, Robin Murphy, Linux ARM

Arnd,

According to the emails and discussions I saw recently, I think that your comments "Finally, drivers/platform/mellanox might be a reasonable choice, and it would let you keep both sides of the driver in one place." does make more sense.

I'll try to resubmit the changes under the drivers/platform/mellanox directory.

Thanks!
Liming

> -----Original Message-----
> From: Arnd Bergmann <arnd@arndb.de>
> Sent: Friday, October 26, 2018 4:34 PM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-soc
> <arm@kernel.org>; DTML <devicetree@vger.kernel.org>; Linux ARM <linux-arm-kernel@lists.infradead.org>
> Subject: Re: [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
> 
> On Fri, Oct 26, 2018 at 9:36 PM Liming Sun <lsun@mellanox.com> wrote:
> > > -----Original Message-----
> > > From: arndbergmann@gmail.com [mailto:arndbergmann@gmail.com] On
> > > > --- /dev/null
> > > > +++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> > > > @@ -0,0 +1,23 @@
> > > > +* Mellanox BlueField SoC TmFifo
> > > > +
> > > > +BlueField TmFifo provides a shared FIFO between the target and the
> > > > +external host machine, which can be accessed by external host via
> > > > +USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
> > > > +to implement virtual console and network interface based on the virtio
> > > > +framework.
> > > > +
> > > > +Required properties:
> > > > +
> > > > +- compatible:      Should be "mellanox,bf-tmfifo"
> > > > +- reg:             Physical base address and length of Rx/Tx block
> > > > +- interrupts:      The interrupt number of Rx low water mark, Rx high water
> > > > mark
> > > > +           Tx low water mark, Tx high water mark respectively.
> > >
> > >
> > > This sounds like it might fit into the mailbox subsystem, and perhaps
> > > it should use the mailbox DT bindings. Have you had a look at that?
> >
> > This commit of dt-bindings is mainly to solve the warning of checkpatch.pl.
> > Like the response to patch 2/4, ACPI is actually used now instead of device tree.
> > The TMFIFO definition in the ACPI DSDT table would be something like below.
> >
> >     // RShim TMFIFO
> >     Device(RSH0) {
> >       Name(_HID, "MLNXBF01")
> >       Name(_UID, Zero)
> >       Name(_CCA, 1)
> >       Name(_CRS, ResourceTemplate() {
> >         Memory32Fixed(ReadWrite, 0x00800a20, 0x00000018)
> >         Memory32Fixed(ReadWrite, 0x00800a40, 0x00000018)
> >         Interrupt(ResourceConsumer, Edge, ActiveHigh, Exclusive)
> >           { BF1_RSH0_TM_HTT_LWM_INT,
> >             BF1_RSH0_TM_HTT_HWM_INT,
> >             BF1_RSH0_TM_TTH_LWM_INT,
> >             BF1_RSH0_TM_TTH_HWM_INT
> >           }
> >       })
> >     }
> >
> > Any suggestion how it should be added into Linux Documentation, or maybe I
> > should just remove this commit from this patch series?
> 
> Maybe the best way here would be to not use ACPI for the case
> where bluefin is integrated into a PCIe endpoint, since ACPI is
> not as flexible here and generally relies on having an SBSA
> compliant hardware that you no longer have if you require
> random platform devices for booting from and for your console.
> 
> For the case where a bluefin SoC is used in a standalone system,
> having ACPI makes more sense, as that lets you install Red Hat
> Linux or other operating systems that rely on SBBR and SBSA.
> 
> > As for the sub-component of this driver, the "soc" might be better fit than the mailbox
> > for some reasons. It's a communication between extern machines and the SoC via
> > USB / PCIe,  like pushing boot stream, console and network mgmt. Some of the features,
> > like pushing boot stream, doesn't communicate with the ARM core. The boot stream
> > is pushed to the SoC HW logic directly. I'll add the host-side virtio-based driver in patch v5.
> 
> Right, the drivers/mailbox subsystem was not the right idea here,
> I noticed that myself after actually reading the driver. Drivers/soc
> may also not be the best fit, since this is not really about it being
> a SoC, but rather a way to encapsulate virtual devices. The
> mic driver I mentioned is in drivers/misc, but I don't like to add stuff
> there if we can avoid it.
> 
> drivers/virtio, drivers/bus or drivers/mfd might also be an option that
> could fit better than drivers/soc, or you could have your own subdir
> below drivers/ as some others do. Finally, drivers/platform/mellanox
> might be a reasonable choice, and it would let you keep both sides
> of the driver in one place.
> 
>        Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
@ 2019-01-24 15:07           ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-24 15:07 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: DTML, David Woods, Vadim Pasternak, arm-soc, Andy Shevchenko,
	Olof Johansson, Darren Hart, Robin Murphy, Linux ARM

Arnd,

According to the emails and discussions I saw recently, I think that your comments "Finally, drivers/platform/mellanox might be a reasonable choice, and it would let you keep both sides of the driver in one place." does make more sense.

I'll try to resubmit the changes under the drivers/platform/mellanox directory.

Thanks!
Liming

> -----Original Message-----
> From: Arnd Bergmann <arnd@arndb.de>
> Sent: Friday, October 26, 2018 4:34 PM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-soc
> <arm@kernel.org>; DTML <devicetree@vger.kernel.org>; Linux ARM <linux-arm-kernel@lists.infradead.org>
> Subject: Re: [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
> 
> On Fri, Oct 26, 2018 at 9:36 PM Liming Sun <lsun@mellanox.com> wrote:
> > > -----Original Message-----
> > > From: arndbergmann@gmail.com [mailto:arndbergmann@gmail.com] On
> > > > --- /dev/null
> > > > +++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> > > > @@ -0,0 +1,23 @@
> > > > +* Mellanox BlueField SoC TmFifo
> > > > +
> > > > +BlueField TmFifo provides a shared FIFO between the target and the
> > > > +external host machine, which can be accessed by external host via
> > > > +USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
> > > > +to implement virtual console and network interface based on the virtio
> > > > +framework.
> > > > +
> > > > +Required properties:
> > > > +
> > > > +- compatible:      Should be "mellanox,bf-tmfifo"
> > > > +- reg:             Physical base address and length of Rx/Tx block
> > > > +- interrupts:      The interrupt number of Rx low water mark, Rx high water
> > > > mark
> > > > +           Tx low water mark, Tx high water mark respectively.
> > >
> > >
> > > This sounds like it might fit into the mailbox subsystem, and perhaps
> > > it should use the mailbox DT bindings. Have you had a look at that?
> >
> > This commit of dt-bindings is mainly to solve the warning of checkpatch.pl.
> > Like the response to patch 2/4, ACPI is actually used now instead of device tree.
> > The TMFIFO definition in the ACPI DSDT table would be something like below.
> >
> >     // RShim TMFIFO
> >     Device(RSH0) {
> >       Name(_HID, "MLNXBF01")
> >       Name(_UID, Zero)
> >       Name(_CCA, 1)
> >       Name(_CRS, ResourceTemplate() {
> >         Memory32Fixed(ReadWrite, 0x00800a20, 0x00000018)
> >         Memory32Fixed(ReadWrite, 0x00800a40, 0x00000018)
> >         Interrupt(ResourceConsumer, Edge, ActiveHigh, Exclusive)
> >           { BF1_RSH0_TM_HTT_LWM_INT,
> >             BF1_RSH0_TM_HTT_HWM_INT,
> >             BF1_RSH0_TM_TTH_LWM_INT,
> >             BF1_RSH0_TM_TTH_HWM_INT
> >           }
> >       })
> >     }
> >
> > Any suggestion how it should be added into Linux Documentation, or maybe I
> > should just remove this commit from this patch series?
> 
> Maybe the best way here would be to not use ACPI for the case
> where bluefin is integrated into a PCIe endpoint, since ACPI is
> not as flexible here and generally relies on having an SBSA
> compliant hardware that you no longer have if you require
> random platform devices for booting from and for your console.
> 
> For the case where a bluefin SoC is used in a standalone system,
> having ACPI makes more sense, as that lets you install Red Hat
> Linux or other operating systems that rely on SBBR and SBSA.
> 
> > As for the sub-component of this driver, the "soc" might be better fit than the mailbox
> > for some reasons. It's a communication between extern machines and the SoC via
> > USB / PCIe,  like pushing boot stream, console and network mgmt. Some of the features,
> > like pushing boot stream, doesn't communicate with the ARM core. The boot stream
> > is pushed to the SoC HW logic directly. I'll add the host-side virtio-based driver in patch v5.
> 
> Right, the drivers/mailbox subsystem was not the right idea here,
> I noticed that myself after actually reading the driver. Drivers/soc
> may also not be the best fit, since this is not really about it being
> a SoC, but rather a way to encapsulate virtual devices. The
> mic driver I mentioned is in drivers/misc, but I don't like to add stuff
> there if we can avoid it.
> 
> drivers/virtio, drivers/bus or drivers/mfd might also be an option that
> could fit better than drivers/soc, or you could have your own subdir
> below drivers/ as some others do. Finally, drivers/platform/mellanox
> might be a reasonable choice, and it would let you keep both sides
> of the driver in one place.
> 
>        Arnd
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v8 0/2] TmFifo platform driver for Mellanox BlueField SoC
  2018-05-25 16:06 ` Liming Sun
                   ` (41 preceding siblings ...)
  (?)
@ 2019-01-28 17:28 ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-28 17:28 UTC (permalink / raw)
  To: Rob Herring, Mark Rutland, Arnd Bergmann, David Woods,
	Andy Shevchenko, Darren Hart, Vadim Pasternak
  Cc: Liming Sun, devicetree, linux-kernel, platform-driver-x86

This patch series implements the device side platform driver
support for the TmFifo on Mellanox BlueField SoC.

TmFifo is part of the RShim component. It provides FIFOs to
communicate with external host machine via USB or PCIe (SmartNic
case). External host machine has driver to access the RShim
component as well, which is not covered in this patch series.

This patch series was submitted to drivers/soc in previous versions.
This version (v8) re-submit it to drivers/platform according to
the received comments / suggestions.

Patch v8 1/2 has changes according to some comments from Vadim
Pasternak during Mellanox internal review.

Patch v8 2/2 was reviewed by Rob Herring before, but might need
a second look since the location of the driver code is moved.

Liming Sun (2):
  platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC

 .../devicetree/bindings/soc/mellanox/tmfifo.txt    |   23 +
 drivers/platform/mellanox/Kconfig                  |   13 +-
 drivers/platform/mellanox/Makefile                 |    1 +
 drivers/platform/mellanox/mlxbf-tmfifo-regs.h      |   67 +
 drivers/platform/mellanox/mlxbf-tmfifo.c           | 1289 ++++++++++++++++++++
 5 files changed, 1392 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
 create mode 100644 drivers/platform/mellanox/mlxbf-tmfifo-regs.h
 create mode 100644 drivers/platform/mellanox/mlxbf-tmfifo.c

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v8 1/2] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 16:06 ` Liming Sun
                   ` (42 preceding siblings ...)
  (?)
@ 2019-01-28 17:28 ` Liming Sun
  2019-01-29 22:06   ` Andy Shevchenko
  2019-01-30  6:24     ` Vadim Pasternak
  -1 siblings, 2 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-28 17:28 UTC (permalink / raw)
  To: Rob Herring, Mark Rutland, Arnd Bergmann, David Woods,
	Andy Shevchenko, Darren Hart, Vadim Pasternak
  Cc: Liming Sun, devicetree, linux-kernel, platform-driver-x86

This commit adds the TmFifo platform driver for Mellanox BlueField
Soc. TmFifo is a shared FIFO which enables external host machine
to exchange data with the SoC via USB or PCIe. The driver is based
on virtio framework and has console and network access enabled.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/platform/mellanox/Kconfig             |   13 +-
 drivers/platform/mellanox/Makefile            |    1 +
 drivers/platform/mellanox/mlxbf-tmfifo-regs.h |   67 ++
 drivers/platform/mellanox/mlxbf-tmfifo.c      | 1289 +++++++++++++++++++++++++
 4 files changed, 1369 insertions(+), 1 deletion(-)
 create mode 100644 drivers/platform/mellanox/mlxbf-tmfifo-regs.h
 create mode 100644 drivers/platform/mellanox/mlxbf-tmfifo.c

diff --git a/drivers/platform/mellanox/Kconfig b/drivers/platform/mellanox/Kconfig
index cd8a908..a565070 100644
--- a/drivers/platform/mellanox/Kconfig
+++ b/drivers/platform/mellanox/Kconfig
@@ -5,7 +5,7 @@
 
 menuconfig MELLANOX_PLATFORM
 	bool "Platform support for Mellanox hardware"
-	depends on X86 || ARM || COMPILE_TEST
+	depends on X86 || ARM || ARM64 || COMPILE_TEST
 	---help---
 	  Say Y here to get to see options for platform support for
 	  Mellanox systems. This option alone does not add any kernel code.
@@ -34,4 +34,15 @@ config MLXREG_IO
 	  to system resets operation, system reset causes monitoring and some
 	  kinds of mux selection.
 
+config MLXBF_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo platform driver"
+	depends on ARM64
+	default m
+	select VIRTIO_CONSOLE
+	select VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides
+          platform driver support for the TmFifo which supports console
+          and networking based on the virtio framework.
+
 endif # MELLANOX_PLATFORM
diff --git a/drivers/platform/mellanox/Makefile b/drivers/platform/mellanox/Makefile
index 57074d9c..f0c061d 100644
--- a/drivers/platform/mellanox/Makefile
+++ b/drivers/platform/mellanox/Makefile
@@ -5,3 +5,4 @@
 #
 obj-$(CONFIG_MLXREG_HOTPLUG)	+= mlxreg-hotplug.o
 obj-$(CONFIG_MLXREG_IO) += mlxreg-io.o
+obj-$(CONFIG_MLXBF_TMFIFO)	+= mlxbf-tmfifo.o
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo-regs.h b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
new file mode 100644
index 0000000..90c9c2cf
--- /dev/null
+++ b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2019, Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef __MLXBF_TMFIFO_REGS_H__
+#define __MLXBF_TMFIFO_REGS_H__
+
+#include <linux/types.h>
+
+#define MLXBF_TMFIFO_TX_DATA 0x0
+
+#define MLXBF_TMFIFO_TX_STS 0x8
+#define MLXBF_TMFIFO_TX_STS__LENGTH 0x0001
+#define MLXBF_TMFIFO_TX_STS__COUNT_SHIFT 0
+#define MLXBF_TMFIFO_TX_STS__COUNT_WIDTH 9
+#define MLXBF_TMFIFO_TX_STS__COUNT_RESET_VAL 0
+#define MLXBF_TMFIFO_TX_STS__COUNT_RMASK 0x1ff
+#define MLXBF_TMFIFO_TX_STS__COUNT_MASK  0x1ff
+
+#define MLXBF_TMFIFO_TX_CTL 0x10
+#define MLXBF_TMFIFO_TX_CTL__LENGTH 0x0001
+#define MLXBF_TMFIFO_TX_CTL__LWM_SHIFT 0
+#define MLXBF_TMFIFO_TX_CTL__LWM_WIDTH 8
+#define MLXBF_TMFIFO_TX_CTL__LWM_RESET_VAL 128
+#define MLXBF_TMFIFO_TX_CTL__LWM_RMASK 0xff
+#define MLXBF_TMFIFO_TX_CTL__LWM_MASK  0xff
+#define MLXBF_TMFIFO_TX_CTL__HWM_SHIFT 8
+#define MLXBF_TMFIFO_TX_CTL__HWM_WIDTH 8
+#define MLXBF_TMFIFO_TX_CTL__HWM_RESET_VAL 128
+#define MLXBF_TMFIFO_TX_CTL__HWM_RMASK 0xff
+#define MLXBF_TMFIFO_TX_CTL__HWM_MASK  0xff00
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#define MLXBF_TMFIFO_RX_DATA 0x0
+
+#define MLXBF_TMFIFO_RX_STS 0x8
+#define MLXBF_TMFIFO_RX_STS__LENGTH 0x0001
+#define MLXBF_TMFIFO_RX_STS__COUNT_SHIFT 0
+#define MLXBF_TMFIFO_RX_STS__COUNT_WIDTH 9
+#define MLXBF_TMFIFO_RX_STS__COUNT_RESET_VAL 0
+#define MLXBF_TMFIFO_RX_STS__COUNT_RMASK 0x1ff
+#define MLXBF_TMFIFO_RX_STS__COUNT_MASK  0x1ff
+
+#define MLXBF_TMFIFO_RX_CTL 0x10
+#define MLXBF_TMFIFO_RX_CTL__LENGTH 0x0001
+#define MLXBF_TMFIFO_RX_CTL__LWM_SHIFT 0
+#define MLXBF_TMFIFO_RX_CTL__LWM_WIDTH 8
+#define MLXBF_TMFIFO_RX_CTL__LWM_RESET_VAL 128
+#define MLXBF_TMFIFO_RX_CTL__LWM_RMASK 0xff
+#define MLXBF_TMFIFO_RX_CTL__LWM_MASK  0xff
+#define MLXBF_TMFIFO_RX_CTL__HWM_SHIFT 8
+#define MLXBF_TMFIFO_RX_CTL__HWM_WIDTH 8
+#define MLXBF_TMFIFO_RX_CTL__HWM_RESET_VAL 128
+#define MLXBF_TMFIFO_RX_CTL__HWM_RMASK 0xff
+#define MLXBF_TMFIFO_RX_CTL__HWM_MASK  0xff00
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#endif /* !defined(__MLXBF_TMFIFO_REGS_H__) */
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
new file mode 100644
index 0000000..c1afe47
--- /dev/null
+++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
@@ -0,0 +1,1289 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Mellanox BlueField SoC TmFifo driver
+ *
+ * Copyright (C) 2019 Mellanox Technologies
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/cache.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <asm/byteorder.h>
+
+#include "mlxbf-tmfifo-regs.h"
+
+/* Vring size. */
+#define MLXBF_TMFIFO_VRING_SIZE			1024
+
+/* Console Tx buffer size. */
+#define MLXBF_TMFIFO_CONS_TX_BUF_SIZE		(32 * 1024)
+
+/* House-keeping timer interval. */
+static int mlxbf_tmfifo_timer_interval = HZ / 10;
+
+/* Global lock. */
+static DEFINE_MUTEX(mlxbf_tmfifo_lock);
+
+/* Virtual devices sharing the TM FIFO. */
+#define MLXBF_TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/* Struct declaration. */
+struct mlxbf_tmfifo;
+
+/* Structure to maintain the ring state. */
+struct mlxbf_tmfifo_vring {
+	void *va;			/* virtual address */
+	dma_addr_t dma;			/* dma address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	int vdev_id;			/* TMFIFO_VDEV_xxx */
+	u32 pkt_len;			/* packet total length */
+	__virtio16 next_avail;		/* next avail desc id */
+	struct mlxbf_tmfifo *fifo;	/* pointer back to the tmfifo */
+};
+
+/* Interrupt types. */
+enum {
+	MLXBF_TM_RX_LWM_IRQ,		/* Rx low water mark irq */
+	MLXBF_TM_RX_HWM_IRQ,		/* Rx high water mark irq */
+	MLXBF_TM_TX_LWM_IRQ,		/* Tx low water mark irq */
+	MLXBF_TM_TX_HWM_IRQ,		/* Tx high water mark irq */
+	MLXBF_TM_IRQ_CNT
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	MLXBF_TMFIFO_VRING_RX,		/* Rx ring */
+	MLXBF_TMFIFO_VRING_TX,		/* Tx ring */
+	MLXBF_TMFIFO_VRING_NUM
+};
+
+struct mlxbf_tmfifo_vdev {
+	struct virtio_device vdev;	/* virtual device */
+	u8 status;
+	u64 features;
+	union {				/* virtio config space */
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct mlxbf_tmfifo_vring vrings[MLXBF_TMFIFO_VRING_NUM];
+	u8 *tx_buf;			/* tx buffer */
+	u32 tx_head;			/* tx buffer head */
+	u32 tx_tail;			/* tx buffer tail */
+};
+
+struct mlxbf_tmfifo_irq_info {
+	struct mlxbf_tmfifo *fifo;	/* tmfifo structure */
+	int irq;			/* interrupt number */
+	int index;			/* array index */
+};
+
+/* TMFIFO device structure */
+struct mlxbf_tmfifo {
+	struct mlxbf_tmfifo_vdev *vdev[MLXBF_TMFIFO_VDEV_MAX]; /* devices */
+	struct platform_device *pdev;	/* platform device */
+	struct mutex lock;		/* fifo lock */
+	void __iomem *rx_base;		/* mapped register base */
+	void __iomem *tx_base;		/* mapped register base */
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	struct mlxbf_tmfifo_irq_info irq_info[MLXBF_TM_IRQ_CNT]; /* irq info */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	struct mlxbf_tmfifo_vring *vring[2];	/* current Tx/Rx ring */
+	bool is_ready;			/* ready flag */
+	spinlock_t spin_lock;		/* spin lock */
+};
+
+union mlxbf_tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 mlxbf_tmfifo_net_default_mac[6] = {
+	0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* MTU setting of the virtio-net interface. */
+#define MLXBF_TMFIFO_NET_MTU		1500
+
+/* Maximum L2 header length. */
+#define MLXBF_TMFIFO_NET_L2_OVERHEAD	36
+
+/* Supported virtio-net features. */
+#define MLXBF_TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
+					 (1UL << VIRTIO_NET_F_STATUS) | \
+					 (1UL << VIRTIO_NET_F_MAC))
+
+/* Return the consumed Tx buffer space. */
+static int mlxbf_tmfifo_vdev_tx_buf_len(struct mlxbf_tmfifo_vdev *vdev)
+{
+	return ((vdev->tx_tail >= vdev->tx_head) ?
+	       (vdev->tx_tail - vdev->tx_head) :
+	       (MLXBF_TMFIFO_CONS_TX_BUF_SIZE - vdev->tx_head + vdev->tx_tail));
+}
+
+/* Return the available Tx buffer space. */
+static int mlxbf_tmfifo_vdev_tx_buf_avail(struct mlxbf_tmfifo_vdev *vdev)
+{
+	return (MLXBF_TMFIFO_CONS_TX_BUF_SIZE - 8 -
+		mlxbf_tmfifo_vdev_tx_buf_len(vdev));
+}
+
+/* Update Tx buffer pointer after pushing data. */
+static void mlxbf_tmfifo_vdev_tx_buf_push(struct mlxbf_tmfifo_vdev *vdev,
+					  u32 len)
+{
+	vdev->tx_tail += len;
+	if (vdev->tx_tail >= MLXBF_TMFIFO_CONS_TX_BUF_SIZE)
+		vdev->tx_tail -= MLXBF_TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Update Tx buffer pointer after popping data. */
+static void mlxbf_tmfifo_vdev_tx_buf_pop(struct mlxbf_tmfifo_vdev *vdev,
+					 u32 len)
+{
+	vdev->tx_head += len;
+	if (vdev->tx_head >= MLXBF_TMFIFO_CONS_TX_BUF_SIZE)
+		vdev->tx_head -= MLXBF_TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Allocate vrings for the fifo. */
+static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
+				     struct mlxbf_tmfifo_vdev *tm_vdev,
+				     int vdev_id)
+{
+	struct mlxbf_tmfifo_vring *vring;
+	dma_addr_t dma;
+	int i, size;
+	void *va;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->size = MLXBF_TMFIFO_VRING_SIZE;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+		vring->vdev_id = vdev_id;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
+					GFP_KERNEL);
+		if (!va) {
+			dev_err(tm_vdev->vdev.dev.parent,
+				"vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void mlxbf_tmfifo_free_vrings(struct mlxbf_tmfifo *fifo, int vdev_id)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
+	struct mlxbf_tmfifo_vring *vring;
+	int i, size;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Free interrupts of the fifo device. */
+static void mlxbf_tmfifo_free_irqs(struct mlxbf_tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < MLXBF_TM_IRQ_CNT; i++) {
+		irq = fifo->irq_info[i].irq;
+		if (irq) {
+			fifo->irq_info[i].irq = 0;
+			disable_irq(irq);
+			free_irq(irq, (u8 *)fifo + i);
+		}
+	}
+}
+
+/* Interrupt handler. */
+static irqreturn_t mlxbf_tmfifo_irq_handler(int irq, void *arg)
+{
+	struct mlxbf_tmfifo_irq_info *irq_info;
+
+	irq_info = (struct mlxbf_tmfifo_irq_info *)arg;
+
+	if (irq_info->index < MLXBF_TM_IRQ_CNT &&
+	    !test_and_set_bit(irq_info->index, &irq_info->fifo->pend_events))
+		schedule_work(&irq_info->fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Nothing to do for now. */
+static void mlxbf_tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+mlxbf_tmfifo_virtio_get_next_desc(struct virtqueue *vq)
+{
+	struct mlxbf_tmfifo_vring *vring;
+	unsigned int idx, head;
+	struct vring *vr;
+
+	vring = (struct mlxbf_tmfifo_vring *)vq->priv;
+	vr = (struct vring *)virtqueue_get_vring(vq);
+
+	if (!vr || vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vr->num);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+static inline void mlxbf_tmfifo_virtio_release_desc(
+	struct virtio_device *vdev, struct vring *vr,
+	struct vring_desc *desc, u32 len)
+{
+	unsigned int idx;
+
+	idx = vr->used->idx % vr->num;
+	vr->used->ring[idx].id = desc - vr->desc;
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/* Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 mlxbf_tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
+						  struct vring_desc *desc,
+						  struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void mlxbf_tmfifo_release_pkt(struct virtio_device *vdev,
+				     struct mlxbf_tmfifo_vring *vring,
+				     struct vring_desc **desc)
+{
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+	struct vring_desc *desc_head;
+	uint32_t pkt_len = 0;
+
+	if (!vr)
+		return;
+
+	if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
+		desc_head = vring->desc_head;
+		pkt_len = vring->pkt_len;
+	} else {
+		desc_head = mlxbf_tmfifo_virtio_get_next_desc(vring->vq);
+		if (desc_head != NULL) {
+			pkt_len = mlxbf_tmfifo_virtio_get_pkt_len(
+				vdev, desc_head, vr);
+		}
+	}
+
+	if (desc_head != NULL)
+		mlxbf_tmfifo_virtio_release_desc(vdev, vr, desc_head, pkt_len);
+
+	if (desc != NULL)
+		*desc = NULL;
+	vring->pkt_len = 0;
+}
+
+/* House-keeping timer. */
+static void mlxbf_tmfifo_timer(struct timer_list *arg)
+{
+	struct mlxbf_tmfifo *fifo;
+
+	fifo = container_of(arg, struct mlxbf_tmfifo, timer);
+
+	/*
+	 * Wake up the work handler to poll the Rx FIFO in case interrupt
+	 * missing or any leftover bytes stuck in the FIFO.
+	 */
+	test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + mlxbf_tmfifo_timer_interval);
+}
+
+/* Buffer the console output. */
+static void mlxbf_tmfifo_console_output(struct mlxbf_tmfifo_vdev *cons,
+					struct virtqueue *vq)
+{
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct vring_desc *head_desc, *desc = NULL;
+	struct virtio_device *vdev = &cons->vdev;
+	u32 len, pkt_len, idx;
+	void *addr;
+
+	for (;;) {
+		head_desc = mlxbf_tmfifo_virtio_get_next_desc(vq);
+		if (head_desc == NULL)
+			break;
+
+		/* Release the packet if no more space. */
+		pkt_len = mlxbf_tmfifo_virtio_get_pkt_len(vdev, head_desc, vr);
+		if (pkt_len > mlxbf_tmfifo_vdev_tx_buf_avail(cons)) {
+			mlxbf_tmfifo_virtio_release_desc(vdev, vr, head_desc,
+							 pkt_len);
+			break;
+		}
+
+		desc = head_desc;
+
+		while (desc != NULL) {
+			addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+			len = virtio32_to_cpu(vdev, desc->len);
+
+			if (len <= MLXBF_TMFIFO_CONS_TX_BUF_SIZE -
+			    cons->tx_tail) {
+				memcpy(cons->tx_buf + cons->tx_tail, addr, len);
+			} else {
+				u32 seg;
+
+				seg = MLXBF_TMFIFO_CONS_TX_BUF_SIZE -
+					cons->tx_tail;
+				memcpy(cons->tx_buf + cons->tx_tail, addr, seg);
+				addr += seg;
+				memcpy(cons->tx_buf, addr, len - seg);
+			}
+			mlxbf_tmfifo_vdev_tx_buf_push(cons, len);
+
+			if (!(virtio16_to_cpu(vdev, desc->flags) &
+			    VRING_DESC_F_NEXT))
+				break;
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+		}
+
+		mlxbf_tmfifo_virtio_release_desc(vdev, vr, head_desc, pkt_len);
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void mlxbf_tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	int num_avail = 0, hdr_len, tx_reserve;
+	struct mlxbf_tmfifo_vring *vring;
+	struct mlxbf_tmfifo_vdev *cons;
+	struct virtio_device *vdev;
+	struct mlxbf_tmfifo *fifo;
+	struct vring_desc *desc;
+	unsigned long flags;
+	struct vring *vr;
+	u64 sts, data;
+	u32 len, idx;
+	void *addr;
+
+	if (!vq)
+		return;
+
+	vring = (struct mlxbf_tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+	vr = (struct vring *)virtqueue_get_vring(vq);
+
+	if (!fifo->vdev[vring->vdev_id])
+		return;
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+
+	/* Don't continue if another vring is running. */
+	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
+		return;
+
+	/* tx_reserve is used to reserved some room in FIFO for console. */
+	if (vring->vdev_id == VIRTIO_ID_NET) {
+		hdr_len = sizeof(struct virtio_net_hdr);
+		tx_reserve = fifo->tx_fifo_size / 16;
+	} else {
+		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
+		hdr_len = 0;
+		tx_reserve = 1;
+	}
+
+	desc = vring->desc;
+
+	while (1) {
+		/* Get available FIFO space. */
+		if (num_avail == 0) {
+			if (is_rx) {
+				/* Get the number of available words in FIFO. */
+				sts = readq(fifo->rx_base +
+					    MLXBF_TMFIFO_RX_STS);
+				num_avail = FIELD_GET(
+					MLXBF_TMFIFO_RX_STS__COUNT_MASK, sts);
+
+				/* Don't continue if nothing in FIFO. */
+				if (num_avail <= 0)
+					break;
+			} else {
+				/* Get available space in FIFO. */
+				sts = readq(fifo->tx_base +
+					    MLXBF_TMFIFO_TX_STS);
+				num_avail = fifo->tx_fifo_size - tx_reserve -
+					FIELD_GET(
+						MLXBF_TMFIFO_TX_STS__COUNT_MASK,
+						sts);
+
+				if (num_avail <= 0)
+					break;
+			}
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
+		    cons != NULL && cons->tx_buf != NULL) {
+			union mlxbf_tmfifo_msg_hdr hdr;
+			int size;
+
+			size = mlxbf_tmfifo_vdev_tx_buf_len(cons);
+			if (num_avail < 2 || size == 0)
+				return;
+			if (size + sizeof(hdr) > num_avail * sizeof(u64))
+				size = num_avail * sizeof(u64) - sizeof(hdr);
+			/* Write header. */
+			hdr.data = 0;
+			hdr.type = VIRTIO_ID_CONSOLE;
+			hdr.len = htons(size);
+			writeq(cpu_to_le64(hdr.data),
+			       fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			while (size > 0) {
+				addr = cons->tx_buf + cons->tx_head;
+
+				if (cons->tx_head + sizeof(u64) <=
+				    MLXBF_TMFIFO_CONS_TX_BUF_SIZE) {
+					memcpy(&data, addr, sizeof(u64));
+				} else {
+					int partial;
+
+					partial =
+						MLXBF_TMFIFO_CONS_TX_BUF_SIZE -
+						cons->tx_head;
+
+					memcpy(&data, addr, partial);
+					memcpy((u8 *)&data + partial,
+					       cons->tx_buf,
+					       sizeof(u64) - partial);
+				}
+				writeq(data,
+				       fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+
+				if (size >= sizeof(u64)) {
+					mlxbf_tmfifo_vdev_tx_buf_pop(
+						cons, sizeof(u64));
+					size -= sizeof(u64);
+				} else {
+					mlxbf_tmfifo_vdev_tx_buf_pop(
+						cons, size);
+					size = 0;
+				}
+			}
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			return;
+		}
+
+		/* Get the desc of next packet. */
+		if (!desc) {
+			/* Save the head desc of the chain. */
+			vring->desc_head =
+				mlxbf_tmfifo_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				return;
+			}
+			desc = vring->desc_head;
+			vring->desc = desc;
+
+			if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+						vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			}
+		}
+
+		/* Beginning of each packet. */
+		if (vring->pkt_len == 0) {
+			int vdev_id, vring_change = 0;
+			union mlxbf_tmfifo_msg_hdr hdr;
+
+			num_avail--;
+
+			/* Read/Write packet length. */
+			if (is_rx) {
+				hdr.data = readq(fifo->rx_base +
+						 MLXBF_TMFIFO_RX_DATA);
+				hdr.data = le64_to_cpu(hdr.data);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (hdr.len == 0)
+					continue;
+
+				/* Check packet type. */
+				if (hdr.type == VIRTIO_ID_NET) {
+					struct virtio_net_config *config;
+
+					vdev_id = VIRTIO_ID_NET;
+					hdr_len = sizeof(struct virtio_net_hdr);
+					config =
+					    &fifo->vdev[vdev_id]->config.net;
+					if (ntohs(hdr.len) > config->mtu +
+						MLXBF_TMFIFO_NET_L2_OVERHEAD)
+						continue;
+				} else if (hdr.type == VIRTIO_ID_CONSOLE) {
+					vdev_id = VIRTIO_ID_CONSOLE;
+					hdr_len = 0;
+				} else {
+					continue;
+				}
+
+				/*
+				 * Check whether the new packet still belongs
+				 * to this vring or not. If not, update the
+				 * pkt_len of the new vring and return.
+				 */
+				if (vdev_id != vring->vdev_id) {
+					struct mlxbf_tmfifo_vdev *dev2 =
+						fifo->vdev[vdev_id];
+
+					if (!dev2)
+						break;
+					vring->desc = desc;
+					vring =
+					  &dev2->vrings[MLXBF_TMFIFO_VRING_RX];
+					vring_change = 1;
+				}
+				vring->pkt_len = ntohs(hdr.len) + hdr_len;
+			} else {
+				vring->pkt_len =
+					mlxbf_tmfifo_virtio_get_pkt_len(
+						vdev, desc, vr);
+
+				hdr.data = 0;
+				hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+					VIRTIO_ID_NET :
+					VIRTIO_ID_CONSOLE;
+				hdr.len = htons(vring->pkt_len - hdr_len);
+				writeq(cpu_to_le64(hdr.data),
+				       fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+			}
+
+			vring->cur_len = hdr_len;
+			vring->rem_len = vring->pkt_len;
+			fifo->vring[is_rx] = vring;
+
+			if (vring_change)
+				return;
+			continue;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check if the current desc is already done. */
+		if (vring->cur_len == len)
+			goto check_done;
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		/* Read a word from FIFO for Rx. */
+		if (is_rx) {
+			data = readq(fifo->rx_base + MLXBF_TMFIFO_RX_DATA);
+			data = le64_to_cpu(data);
+		}
+
+		if (vring->cur_len + sizeof(u64) <= len) {
+			/* The whole word. */
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       sizeof(u64));
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       sizeof(u64));
+			}
+			vring->cur_len += sizeof(u64);
+		} else {
+			/* Leftover bytes. */
+			BUG_ON(vring->cur_len > len);
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       len - vring->cur_len);
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       len - vring->cur_len);
+			}
+			vring->cur_len = len;
+		}
+
+		/* Write the word into FIFO for Tx. */
+		if (!is_rx) {
+			writeq(cpu_to_le64(data),
+			       fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+		}
+
+		num_avail--;
+
+check_done:
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done and release the desc. */
+			mlxbf_tmfifo_release_pkt(vdev, vring, &desc);
+			fifo->vring[is_rx] = NULL;
+
+			/* Notify upper layer that packet is done. */
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			vring_interrupt(0, vq);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			continue;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool mlxbf_tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct mlxbf_tmfifo_vring *vring;
+	struct mlxbf_tmfifo *fifo;
+	unsigned long flags;
+
+	vring = (struct mlxbf_tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX HWM bit to start Rx. */
+		if (!test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			mlxbf_tmfifo_console_output(
+				fifo->vdev[VIRTIO_ID_CONSOLE], vq);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			schedule_work(&fifo->work);
+		} else if (!test_and_set_bit(MLXBF_TM_TX_LWM_IRQ,
+					     &fifo->pend_events))
+			schedule_work(&fifo->work);
+	}
+
+	return true;
+}
+
+/* Work handler for Rx and Tx case. */
+static void mlxbf_tmfifo_work_handler(struct work_struct *work)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+	struct mlxbf_tmfifo *fifo;
+	int i;
+
+	fifo = container_of(work, struct mlxbf_tmfifo, work);
+	if (!fifo->is_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx (Send data to the TmFifo). */
+	if (test_and_clear_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq_info[MLXBF_TM_TX_LWM_IRQ].irq) {
+		for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				mlxbf_tmfifo_virtio_rxtx(
+				    tm_vdev->vrings[MLXBF_TMFIFO_VRING_TX].vq,
+				    false);
+			}
+		}
+	}
+
+	/* Rx (Receive data from the TmFifo). */
+	if (test_and_clear_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq_info[MLXBF_TM_RX_HWM_IRQ].irq) {
+		for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				mlxbf_tmfifo_virtio_rxtx(
+				    tm_vdev->vrings[MLXBF_TMFIFO_VRING_RX].vq,
+				    true);
+			}
+		}
+	}
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* Get the array of feature bits for this device. */
+static u64 mlxbf_tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+
+	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int mlxbf_tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+
+	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
+	tm_vdev->features = vdev->features;
+
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void mlxbf_tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+	struct mlxbf_tmfifo_vring *vring;
+	struct virtqueue *vq;
+	int i;
+
+	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL) {
+			mlxbf_tmfifo_release_pkt(&tm_vdev->vdev, vring,
+						 &vring->desc);
+		}
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+					unsigned int nvqs,
+					struct virtqueue *vqs[],
+					vq_callback_t *callbacks[],
+					const char * const names[],
+					const bool *ctx,
+					struct irq_affinity *desc)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+	struct mlxbf_tmfifo_vring *vring;
+	int i, ret = -EINVAL, size;
+	struct virtqueue *vq;
+
+	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 mlxbf_tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	mlxbf_tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 mlxbf_tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+
+	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void mlxbf_tmfifo_virtio_set_status(struct virtio_device *vdev,
+					   u8 status)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+
+	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void mlxbf_tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+
+	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void mlxbf_tmfifo_virtio_get(struct virtio_device *vdev,
+			      unsigned int offset,
+			      void *buf,
+			      unsigned int len)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+
+	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void mlxbf_tmfifo_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+
+	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops mlxbf_tmfifo_virtio_config_ops = {
+	.get_features = mlxbf_tmfifo_virtio_get_features,
+	.finalize_features = mlxbf_tmfifo_virtio_finalize_features,
+	.find_vqs = mlxbf_tmfifo_virtio_find_vqs,
+	.del_vqs = mlxbf_tmfifo_virtio_del_vqs,
+	.reset = mlxbf_tmfifo_virtio_reset,
+	.set_status = mlxbf_tmfifo_virtio_set_status,
+	.get_status = mlxbf_tmfifo_virtio_get_status,
+	.get = mlxbf_tmfifo_virtio_get,
+	.set = mlxbf_tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+int mlxbf_tmfifo_create_vdev(struct mlxbf_tmfifo *fifo, int vdev_id,
+			     u64 features, void *config, u32 size)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+	int ret = 0;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev != NULL) {
+		pr_err("vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto already_exist;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto already_exist;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &mlxbf_tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = mlxbf_tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+	if (mlxbf_tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
+		pr_err("Unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto alloc_vring_fail;
+	}
+	if (vdev_id == VIRTIO_ID_CONSOLE) {
+		tm_vdev->tx_buf = kmalloc(MLXBF_TMFIFO_CONS_TX_BUF_SIZE,
+					  GFP_KERNEL);
+	}
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
+		goto register_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+register_fail:
+	mlxbf_tmfifo_free_vrings(fifo, vdev_id);
+	fifo->vdev[vdev_id] = NULL;
+alloc_vring_fail:
+	kfree(tm_vdev);
+already_exist:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+int mlxbf_tmfifo_delete_vdev(struct mlxbf_tmfifo *fifo, int vdev_id)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		mlxbf_tmfifo_free_vrings(fifo, vdev_id);
+		kfree(tm_vdev->tx_buf);
+		kfree(tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Device remove function. */
+static int mlxbf_tmfifo_remove(struct platform_device *pdev)
+{
+	struct mlxbf_tmfifo *fifo = platform_get_drvdata(pdev);
+	struct resource *rx_res, *tx_res;
+	int i;
+
+	if (fifo) {
+		mutex_lock(&mlxbf_tmfifo_lock);
+
+		fifo->is_ready = false;
+
+		/* Stop the timer. */
+		del_timer_sync(&fifo->timer);
+
+		/* Release interrupts. */
+		mlxbf_tmfifo_free_irqs(fifo);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&fifo->work);
+
+		for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++)
+			mlxbf_tmfifo_delete_vdev(fifo, i);
+
+		/* Release IO resources. */
+		if (fifo->rx_base)
+			iounmap(fifo->rx_base);
+		if (fifo->tx_base)
+			iounmap(fifo->tx_base);
+
+		platform_set_drvdata(pdev, NULL);
+		kfree(fifo);
+
+		mutex_unlock(&mlxbf_tmfifo_lock);
+	}
+
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (rx_res)
+		release_mem_region(rx_res->start, resource_size(rx_res));
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (tx_res)
+		release_mem_region(tx_res->start, resource_size(tx_res));
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void mlxbf_tmfifo_get_cfg_mac(u8 *mac)
+{
+	efi_char16_t name[] = {
+		'R', 's', 'h', 'i', 'm', 'M', 'a', 'c', 'A', 'd', 'd', 'r', 0 };
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+	efi_status_t status;
+	unsigned long size;
+	u8 buf[6];
+
+	size = sizeof(buf);
+	status = efi.get_variable(name, &guid, NULL, &size, buf);
+	if (status == EFI_SUCCESS && size == sizeof(buf))
+		memcpy(mac, buf, sizeof(buf));
+}
+
+/* Probe the TMFIFO. */
+static int mlxbf_tmfifo_probe(struct platform_device *pdev)
+{
+	struct virtio_net_config net_config;
+	struct resource *rx_res, *tx_res;
+	struct mlxbf_tmfifo *fifo;
+	int i, ret;
+	u64 ctl;
+
+	/* Get the resource of the Rx & Tx FIFO. */
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!rx_res || !tx_res) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (request_mem_region(rx_res->start,
+			       resource_size(rx_res), "bf-tmfifo") == NULL) {
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	if (request_mem_region(tx_res->start,
+			       resource_size(tx_res), "bf-tmfifo") == NULL) {
+		release_mem_region(rx_res->start, resource_size(rx_res));
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	ret = -ENOMEM;
+	fifo = kzalloc(sizeof(struct mlxbf_tmfifo), GFP_KERNEL);
+	if (!fifo)
+		goto err;
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	spin_lock_init(&fifo->spin_lock);
+	INIT_WORK(&fifo->work, mlxbf_tmfifo_work_handler);
+
+	timer_setup(&fifo->timer, mlxbf_tmfifo_timer, 0);
+	fifo->timer.function = mlxbf_tmfifo_timer;
+
+	for (i = 0; i < MLXBF_TM_IRQ_CNT; i++) {
+		fifo->irq_info[i].index = i;
+		fifo->irq_info[i].fifo = fifo;
+		fifo->irq_info[i].irq = platform_get_irq(pdev, i);
+		ret = request_irq(fifo->irq_info[i].irq,
+				  mlxbf_tmfifo_irq_handler, 0,
+				  "tmfifo", &fifo->irq_info[i]);
+		if (ret) {
+			pr_err("Unable to request irq\n");
+			fifo->irq_info[i].irq = 0;
+			goto err;
+		}
+	}
+
+	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
+	if (!fifo->rx_base)
+		goto err;
+
+	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
+	if (!fifo->tx_base)
+		goto err;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		FIELD_GET(MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__LWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_TX_CTL__LWM_MASK,
+			   fifo->tx_fifo_size / 2);
+	ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__HWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_TX_CTL__HWM_MASK,
+			   fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		FIELD_GET(MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__LWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__HWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
+
+	mutex_init(&fifo->lock);
+
+	/* Create the console vdev. */
+	ret = mlxbf_tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (ret)
+		goto err;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = MLXBF_TMFIFO_NET_MTU;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	memcpy(net_config.mac, mlxbf_tmfifo_net_default_mac, 6);
+	mlxbf_tmfifo_get_cfg_mac(net_config.mac);
+	ret = mlxbf_tmfifo_create_vdev(fifo, VIRTIO_ID_NET,
+		MLXBF_TMFIFO_NET_FEATURES, &net_config, sizeof(net_config));
+	if (ret)
+		goto err;
+
+	mod_timer(&fifo->timer, jiffies + mlxbf_tmfifo_timer_interval);
+
+	fifo->is_ready = true;
+
+	return 0;
+
+err:
+	mlxbf_tmfifo_remove(pdev);
+early_err:
+	dev_err(&pdev->dev, "Probe Failed\n");
+	return ret;
+}
+
+static const struct of_device_id mlxbf_tmfifo_match[] = {
+	{ .compatible = "mellanox,bf-tmfifo" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, mlxbf_tmfifo_match);
+
+static const struct acpi_device_id mlxbf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, mlxbf_tmfifo_acpi_match);
+
+static struct platform_driver mlxbf_tmfifo_driver = {
+	.probe = mlxbf_tmfifo_probe,
+	.remove = mlxbf_tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.of_match_table = mlxbf_tmfifo_match,
+		.acpi_match_table = ACPI_PTR(mlxbf_tmfifo_acpi_match),
+	},
+};
+
+module_platform_driver(mlxbf_tmfifo_driver);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TmFifo Driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v8 2/2] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-05-25 16:06 ` Liming Sun
                   ` (43 preceding siblings ...)
  (?)
@ 2019-01-28 17:28 ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-01-28 17:28 UTC (permalink / raw)
  To: Rob Herring, Mark Rutland, Arnd Bergmann, David Woods,
	Andy Shevchenko, Darren Hart, Vadim Pasternak
  Cc: Liming Sun, devicetree, linux-kernel, platform-driver-x86

Add devicetree bindings for the TmFifo which is found on Mellanox
BlueField SoCs.

Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 .../devicetree/bindings/soc/mellanox/tmfifo.txt    | 23 ++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt

diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
new file mode 100644
index 0000000..8a13fa6
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
@@ -0,0 +1,23 @@
+* Mellanox BlueField SoC TmFifo
+
+BlueField TmFifo provides a shared FIFO between the target and the
+external host machine, which can be accessed by external host via
+USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
+to implement virtual console and network interface based on the virtio
+framework.
+
+Required properties:
+
+- compatible:	Should be "mellanox,bf-tmfifo"
+- reg:		Physical base address and length of Rx/Tx block
+- interrupts:	The interrupt number of Rx low water mark, Rx high water mark
+		Tx low water mark, Tx high water mark respectively.
+
+Example:
+
+tmfifo@800a20 {
+	compatible = "mellanox,bf-tmfifo";
+	reg = <0x00800a20 0x00000018
+	       0x00800a40 0x00000018>;
+	interrupts = <41, 42, 43, 44>;
+};
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 179+ messages in thread

* Re: [PATCH v8 1/2] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2019-01-28 17:28 ` [PATCH v8 1/2] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
@ 2019-01-29 22:06   ` Andy Shevchenko
  2019-02-13 13:34     ` Liming Sun
  2019-02-13 16:33     ` Liming Sun
  2019-01-30  6:24     ` Vadim Pasternak
  1 sibling, 2 replies; 179+ messages in thread
From: Andy Shevchenko @ 2019-01-29 22:06 UTC (permalink / raw)
  To: Liming Sun
  Cc: Rob Herring, Mark Rutland, Arnd Bergmann, David Woods,
	Andy Shevchenko, Darren Hart, Vadim Pasternak, devicetree,
	Linux Kernel Mailing List, Platform Driver

On Mon, Jan 28, 2019 at 7:28 PM Liming Sun <lsun@mellanox.com> wrote:
>
> This commit adds the TmFifo platform driver for Mellanox BlueField
> Soc. TmFifo is a shared FIFO which enables external host machine
> to exchange data with the SoC via USB or PCIe. The driver is based
> on virtio framework and has console and network access enabled.
>
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>


Please, go through this series taking into account review I just did
for your another patch.

On top of that, see recent (for few years I think) drivers what modern
APIs they are using, e.g. devm_.

-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v8 1/2] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2019-01-28 17:28 ` [PATCH v8 1/2] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
@ 2019-01-30  6:24     ` Vadim Pasternak
  2019-01-30  6:24     ` Vadim Pasternak
  1 sibling, 0 replies; 179+ messages in thread
From: Vadim Pasternak @ 2019-01-30  6:24 UTC (permalink / raw)
  To: Liming Sun, Rob Herring, Mark Rutland, Arnd Bergmann,
	David Woods, Andy Shevchenko, Darren Hart
  Cc: Liming Sun, devicetree, linux-kernel, platform-driver-x86



> -----Original Message-----
> From: Liming Sun <lsun@mellanox.com>
> Sent: Monday, January 28, 2019 7:28 PM
> To: Rob Herring <robh+dt@kernel.org>; Mark Rutland
> <mark.rutland@arm.com>; Arnd Bergmann <arnd@arndb.de>; David Woods
> <dwoods@mellanox.com>; Andy Shevchenko <andy@infradead.org>; Darren
> Hart <dvhart@infradead.org>; Vadim Pasternak <vadimp@mellanox.com>
> Cc: Liming Sun <lsun@mellanox.com>; devicetree@vger.kernel.org; linux-
> kernel@vger.kernel.org; platform-driver-x86@vger.kernel.org
> Subject: [PATCH v8 1/2] platform/mellanox: Add TmFifo driver for Mellanox
> BlueField Soc
> 
> This commit adds the TmFifo platform driver for Mellanox BlueField Soc. TmFifo
> is a shared FIFO which enables external host machine to exchange data with the
> SoC via USB or PCIe. The driver is based on virtio framework and has console
> and network access enabled.
> 
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>
> ---
>  drivers/platform/mellanox/Kconfig             |   13 +-
>  drivers/platform/mellanox/Makefile            |    1 +
>  drivers/platform/mellanox/mlxbf-tmfifo-regs.h |   67 ++
>  drivers/platform/mellanox/mlxbf-tmfifo.c      | 1289
> +++++++++++++++++++++++++
>  4 files changed, 1369 insertions(+), 1 deletion(-)  create mode 100644
> drivers/platform/mellanox/mlxbf-tmfifo-regs.h
>  create mode 100644 drivers/platform/mellanox/mlxbf-tmfifo.c
> 
> diff --git a/drivers/platform/mellanox/Kconfig
> b/drivers/platform/mellanox/Kconfig
> index cd8a908..a565070 100644
> --- a/drivers/platform/mellanox/Kconfig
> +++ b/drivers/platform/mellanox/Kconfig
> @@ -5,7 +5,7 @@
> 
>  menuconfig MELLANOX_PLATFORM
>  	bool "Platform support for Mellanox hardware"
> -	depends on X86 || ARM || COMPILE_TEST
> +	depends on X86 || ARM || ARM64 || COMPILE_TEST
>  	---help---
>  	  Say Y here to get to see options for platform support for
>  	  Mellanox systems. This option alone does not add any kernel code.
> @@ -34,4 +34,15 @@ config MLXREG_IO
>  	  to system resets operation, system reset causes monitoring and some
>  	  kinds of mux selection.
> 
> +config MLXBF_TMFIFO
> +	tristate "Mellanox BlueField SoC TmFifo platform driver"
> +	depends on ARM64

Why you make it dependent on ARM64?
Should not it work on any host, x86?

> +	default m

User who needs it should select this option.
No need default 'm'.

> +	select VIRTIO_CONSOLE
> +	select VIRTIO_NET
> +	help
> +	  Say y here to enable TmFifo support. The TmFifo driver provides
> +          platform driver support for the TmFifo which supports console
> +          and networking based on the virtio framework.
> +
>  endif # MELLANOX_PLATFORM
> diff --git a/drivers/platform/mellanox/Makefile
> b/drivers/platform/mellanox/Makefile
> index 57074d9c..f0c061d 100644
> --- a/drivers/platform/mellanox/Makefile
> +++ b/drivers/platform/mellanox/Makefile
> @@ -5,3 +5,4 @@
>  #
>  obj-$(CONFIG_MLXREG_HOTPLUG)	+= mlxreg-hotplug.o
>  obj-$(CONFIG_MLXREG_IO) += mlxreg-io.o
> +obj-$(CONFIG_MLXBF_TMFIFO)	+= mlxbf-tmfifo.o
> diff --git a/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
> b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
> new file mode 100644
> index 0000000..90c9c2cf
> --- /dev/null
> +++ b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
> @@ -0,0 +1,67 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (c) 2019, Mellanox Technologies. All rights reserved.
> + */
> +
> +#ifndef __MLXBF_TMFIFO_REGS_H__
> +#define __MLXBF_TMFIFO_REGS_H__
> +
> +#include <linux/types.h>
> +
> +#define MLXBF_TMFIFO_TX_DATA 0x0
> +
> +#define MLXBF_TMFIFO_TX_STS 0x8
> +#define MLXBF_TMFIFO_TX_STS__LENGTH 0x0001 #define
> +MLXBF_TMFIFO_TX_STS__COUNT_SHIFT 0 #define
> +MLXBF_TMFIFO_TX_STS__COUNT_WIDTH 9 #define
> +MLXBF_TMFIFO_TX_STS__COUNT_RESET_VAL 0 #define
> +MLXBF_TMFIFO_TX_STS__COUNT_RMASK 0x1ff #define
> +MLXBF_TMFIFO_TX_STS__COUNT_MASK  0x1ff
> +
> +#define MLXBF_TMFIFO_TX_CTL 0x10
> +#define MLXBF_TMFIFO_TX_CTL__LENGTH 0x0001 #define
> +MLXBF_TMFIFO_TX_CTL__LWM_SHIFT 0 #define
> MLXBF_TMFIFO_TX_CTL__LWM_WIDTH
> +8 #define MLXBF_TMFIFO_TX_CTL__LWM_RESET_VAL 128 #define
> +MLXBF_TMFIFO_TX_CTL__LWM_RMASK 0xff #define
> +MLXBF_TMFIFO_TX_CTL__LWM_MASK  0xff #define
> +MLXBF_TMFIFO_TX_CTL__HWM_SHIFT 8 #define
> MLXBF_TMFIFO_TX_CTL__HWM_WIDTH
> +8 #define MLXBF_TMFIFO_TX_CTL__HWM_RESET_VAL 128 #define
> +MLXBF_TMFIFO_TX_CTL__HWM_RMASK 0xff #define
> +MLXBF_TMFIFO_TX_CTL__HWM_MASK  0xff00 #define
> +MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32 #define
> +MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9 #define
> +MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256 #define
> +MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff #define
> +MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
> +
> +#define MLXBF_TMFIFO_RX_DATA 0x0
> +
> +#define MLXBF_TMFIFO_RX_STS 0x8
> +#define MLXBF_TMFIFO_RX_STS__LENGTH 0x0001 #define
> +MLXBF_TMFIFO_RX_STS__COUNT_SHIFT 0 #define
> +MLXBF_TMFIFO_RX_STS__COUNT_WIDTH 9 #define
> +MLXBF_TMFIFO_RX_STS__COUNT_RESET_VAL 0 #define
> +MLXBF_TMFIFO_RX_STS__COUNT_RMASK 0x1ff #define
> +MLXBF_TMFIFO_RX_STS__COUNT_MASK  0x1ff
> +
> +#define MLXBF_TMFIFO_RX_CTL 0x10
> +#define MLXBF_TMFIFO_RX_CTL__LENGTH 0x0001 #define
> +MLXBF_TMFIFO_RX_CTL__LWM_SHIFT 0 #define
> MLXBF_TMFIFO_RX_CTL__LWM_WIDTH
> +8 #define MLXBF_TMFIFO_RX_CTL__LWM_RESET_VAL 128 #define
> +MLXBF_TMFIFO_RX_CTL__LWM_RMASK 0xff #define
> +MLXBF_TMFIFO_RX_CTL__LWM_MASK  0xff #define
> +MLXBF_TMFIFO_RX_CTL__HWM_SHIFT 8 #define
> MLXBF_TMFIFO_RX_CTL__HWM_WIDTH
> +8 #define MLXBF_TMFIFO_RX_CTL__HWM_RESET_VAL 128 #define
> +MLXBF_TMFIFO_RX_CTL__HWM_RMASK 0xff #define
> +MLXBF_TMFIFO_RX_CTL__HWM_MASK  0xff00 #define
> +MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32 #define
> +MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9 #define
> +MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256 #define
> +MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff #define
> +MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
> +
> +#endif /* !defined(__MLXBF_TMFIFO_REGS_H__) */
> diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c
> b/drivers/platform/mellanox/mlxbf-tmfifo.c
> new file mode 100644
> index 0000000..c1afe47
> --- /dev/null
> +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
> @@ -0,0 +1,1289 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * Mellanox BlueField SoC TmFifo driver
> + *
> + * Copyright (C) 2019 Mellanox Technologies  */
> +
> +#include <linux/acpi.h>
> +#include <linux/bitfield.h>
> +#include <linux/cache.h>
> +#include <linux/device.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/efi.h>
> +#include <linux/io.h>
> +#include <linux/interrupt.h>
> +#include <linux/irq.h>
> +#include <linux/kernel.h>
> +#include <linux/math64.h>
> +#include <linux/module.h>
> +#include <linux/moduleparam.h>
> +#include <linux/mutex.h>
> +#include <linux/platform_device.h>
> +#include <linux/resource.h>
> +#include <linux/slab.h>
> +#include <linux/types.h>
> +#include <linux/version.h>
> +#include <linux/virtio.h>
> +#include <linux/virtio_config.h>
> +#include <linux/virtio_console.h>
> +#include <linux/virtio_ids.h>
> +#include <linux/virtio_net.h>
> +#include <linux/virtio_ring.h>
> +#include <asm/byteorder.h>

Is it must ti include from asm?
Could it be replaced with something like
#include <linux/byteorder/generic.h>

> +
> +#include "mlxbf-tmfifo-regs.h"
> +
> +/* Vring size. */
> +#define MLXBF_TMFIFO_VRING_SIZE			1024
> +
> +/* Console Tx buffer size. */
> +#define MLXBF_TMFIFO_CONS_TX_BUF_SIZE		(32 * 1024)
> +
> +/* House-keeping timer interval. */
> +static int mlxbf_tmfifo_timer_interval = HZ / 10;
> +
> +/* Global lock. */
> +static DEFINE_MUTEX(mlxbf_tmfifo_lock);
> +
> +/* Virtual devices sharing the TM FIFO. */
> +#define MLXBF_TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
> +
> +/* Struct declaration. */
> +struct mlxbf_tmfifo;
> +
> +/* Structure to maintain the ring state. */ struct mlxbf_tmfifo_vring {
> +	void *va;			/* virtual address */
> +	dma_addr_t dma;			/* dma address */
> +	struct virtqueue *vq;		/* virtqueue pointer */
> +	struct vring_desc *desc;	/* current desc */
> +	struct vring_desc *desc_head;	/* current desc head */
> +	int cur_len;			/* processed len in current desc */
> +	int rem_len;			/* remaining length to be processed */
> +	int size;			/* vring size */
> +	int align;			/* vring alignment */
> +	int id;				/* vring id */
> +	int vdev_id;			/* TMFIFO_VDEV_xxx */
> +	u32 pkt_len;			/* packet total length */
> +	__virtio16 next_avail;		/* next avail desc id */
> +	struct mlxbf_tmfifo *fifo;	/* pointer back to the tmfifo */
> +};
> +
> +/* Interrupt types. */
> +enum {
> +	MLXBF_TM_RX_LWM_IRQ,		/* Rx low water mark irq */
> +	MLXBF_TM_RX_HWM_IRQ,		/* Rx high water mark irq */
> +	MLXBF_TM_TX_LWM_IRQ,		/* Tx low water mark irq */
> +	MLXBF_TM_TX_HWM_IRQ,		/* Tx high water mark irq */
> +	MLXBF_TM_IRQ_CNT
> +};
> +
> +/* Ring types (Rx & Tx). */
> +enum {
> +	MLXBF_TMFIFO_VRING_RX,		/* Rx ring */
> +	MLXBF_TMFIFO_VRING_TX,		/* Tx ring */
> +	MLXBF_TMFIFO_VRING_NUM
> +};
> +
> +struct mlxbf_tmfifo_vdev {
> +	struct virtio_device vdev;	/* virtual device */
> +	u8 status;
> +	u64 features;
> +	union {				/* virtio config space */
> +		struct virtio_console_config cons;
> +		struct virtio_net_config net;
> +	} config;
> +	struct mlxbf_tmfifo_vring vrings[MLXBF_TMFIFO_VRING_NUM];
> +	u8 *tx_buf;			/* tx buffer */
> +	u32 tx_head;			/* tx buffer head */
> +	u32 tx_tail;			/* tx buffer tail */
> +};
> +
> +struct mlxbf_tmfifo_irq_info {
> +	struct mlxbf_tmfifo *fifo;	/* tmfifo structure */
> +	int irq;			/* interrupt number */
> +	int index;			/* array index */
> +};
> +
> +/* TMFIFO device structure */
> +struct mlxbf_tmfifo {
> +	struct mlxbf_tmfifo_vdev *vdev[MLXBF_TMFIFO_VDEV_MAX]; /*
> devices */
> +	struct platform_device *pdev;	/* platform device */
> +	struct mutex lock;		/* fifo lock */
> +	void __iomem *rx_base;		/* mapped register base */
> +	void __iomem *tx_base;		/* mapped register base */
> +	int tx_fifo_size;		/* number of entries of the Tx FIFO */
> +	int rx_fifo_size;		/* number of entries of the Rx FIFO */
> +	unsigned long pend_events;	/* pending bits for deferred process */
> +	struct mlxbf_tmfifo_irq_info irq_info[MLXBF_TM_IRQ_CNT]; /* irq info
> */
> +	struct work_struct work;	/* work struct for deferred process */
> +	struct timer_list timer;	/* keepalive timer */
> +	struct mlxbf_tmfifo_vring *vring[2];	/* current Tx/Rx ring */
> +	bool is_ready;			/* ready flag */
> +	spinlock_t spin_lock;		/* spin lock */
> +};
> +
> +union mlxbf_tmfifo_msg_hdr {
> +	struct {
> +		u8 type;		/* message type */
> +		__be16 len;		/* payload length */
> +		u8 unused[5];		/* reserved, set to 0 */
> +	} __packed;
> +	u64 data;
> +};
> +
> +/*
> + * Default MAC.
> + * This MAC address will be read from EFI persistent variable if configured.
> + * It can also be reconfigured with standard Linux tools.
> + */
> +static u8 mlxbf_tmfifo_net_default_mac[6] = {
> +	0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
> +
> +/* MTU setting of the virtio-net interface. */
> +#define MLXBF_TMFIFO_NET_MTU		1500
> +
> +/* Maximum L2 header length. */
> +#define MLXBF_TMFIFO_NET_L2_OVERHEAD	36
> +
> +/* Supported virtio-net features. */
> +#define MLXBF_TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU)
> | \
> +					 (1UL << VIRTIO_NET_F_STATUS) | \
> +					 (1UL << VIRTIO_NET_F_MAC))
> +
> +/* Return the consumed Tx buffer space. */ static int
> +mlxbf_tmfifo_vdev_tx_buf_len(struct mlxbf_tmfifo_vdev *vdev) {
> +	return ((vdev->tx_tail >= vdev->tx_head) ?
> +	       (vdev->tx_tail - vdev->tx_head) :
> +	       (MLXBF_TMFIFO_CONS_TX_BUF_SIZE - vdev->tx_head +
> +vdev->tx_tail)); }

I would suggest to split the above. 

> +
> +/* Return the available Tx buffer space. */ static int
> +mlxbf_tmfifo_vdev_tx_buf_avail(struct mlxbf_tmfifo_vdev *vdev) {
> +	return (MLXBF_TMFIFO_CONS_TX_BUF_SIZE - 8 -

Thins about some extra define for
"MLXBF_TMFIFO_CONS_TX_BUF_SIZE - 8"

> +		mlxbf_tmfifo_vdev_tx_buf_len(vdev));
> +}
> +
> +/* Update Tx buffer pointer after pushing data. */ static void
> +mlxbf_tmfifo_vdev_tx_buf_push(struct mlxbf_tmfifo_vdev *vdev,
> +					  u32 len)
> +{
> +	vdev->tx_tail += len;
> +	if (vdev->tx_tail >= MLXBF_TMFIFO_CONS_TX_BUF_SIZE)
> +		vdev->tx_tail -= MLXBF_TMFIFO_CONS_TX_BUF_SIZE; }
> +
> +/* Update Tx buffer pointer after popping data. */ static void
> +mlxbf_tmfifo_vdev_tx_buf_pop(struct mlxbf_tmfifo_vdev *vdev,
> +					 u32 len)
> +{
> +	vdev->tx_head += len;
> +	if (vdev->tx_head >= MLXBF_TMFIFO_CONS_TX_BUF_SIZE)
> +		vdev->tx_head -= MLXBF_TMFIFO_CONS_TX_BUF_SIZE; }
> +
> +/* Allocate vrings for the fifo. */
> +static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
> +				     struct mlxbf_tmfifo_vdev *tm_vdev,
> +				     int vdev_id)
> +{
> +	struct mlxbf_tmfifo_vring *vring;
> +	dma_addr_t dma;
> +	int i, size;
> +	void *va;
> +
> +	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
> +		vring = &tm_vdev->vrings[i];
> +		vring->fifo = fifo;
> +		vring->size = MLXBF_TMFIFO_VRING_SIZE;
> +		vring->align = SMP_CACHE_BYTES;
> +		vring->id = i;
> +		vring->vdev_id = vdev_id;
> +
> +		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
> +		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size,
> &dma,
> +					GFP_KERNEL);
> +		if (!va) {
> +			dev_err(tm_vdev->vdev.dev.parent,
> +				"vring allocation failed\n");
> +			return -EINVAL;
> +		}
> +
> +		vring->va = va;
> +		vring->dma = dma;
> +	}
> +
> +	return 0;
> +}
> +
> +/* Free vrings of the fifo device. */
> +static void mlxbf_tmfifo_free_vrings(struct mlxbf_tmfifo *fifo, int
> +vdev_id) {
> +	struct mlxbf_tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
> +	struct mlxbf_tmfifo_vring *vring;
> +	int i, size;
> +
> +	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
> +		vring = &tm_vdev->vrings[i];
> +
> +		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
> +		if (vring->va) {
> +			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
> +					  vring->va, vring->dma);
> +			vring->va = NULL;
> +			if (vring->vq) {
> +				vring_del_virtqueue(vring->vq);
> +				vring->vq = NULL;
> +			}
> +		}
> +	}
> +}
> +
> +/* Free interrupts of the fifo device. */ static void
> +mlxbf_tmfifo_free_irqs(struct mlxbf_tmfifo *fifo) {
> +	int i, irq;
> +
> +	for (i = 0; i < MLXBF_TM_IRQ_CNT; i++) {
> +		irq = fifo->irq_info[i].irq;
> +		if (irq) {
> +			fifo->irq_info[i].irq = 0;
> +			disable_irq(irq);
> +			free_irq(irq, (u8 *)fifo + i);
> +		}
> +	}
> +}
> +
> +/* Interrupt handler. */
> +static irqreturn_t mlxbf_tmfifo_irq_handler(int irq, void *arg) {
> +	struct mlxbf_tmfifo_irq_info *irq_info;
> +
> +	irq_info = (struct mlxbf_tmfifo_irq_info *)arg;
> +
> +	if (irq_info->index < MLXBF_TM_IRQ_CNT &&
> +	    !test_and_set_bit(irq_info->index, &irq_info->fifo->pend_events))
> +		schedule_work(&irq_info->fifo->work);
> +
> +	return IRQ_HANDLED;
> +}
> +
> +/* Nothing to do for now. */
> +static void mlxbf_tmfifo_virtio_dev_release(struct device *dev) { }

If there is nothing to do - no reason to have it.

> +
> +/* Get the next packet descriptor from the vring. */ static inline
> +struct vring_desc * mlxbf_tmfifo_virtio_get_next_desc(struct virtqueue
> +*vq) {
> +	struct mlxbf_tmfifo_vring *vring;
> +	unsigned int idx, head;
> +	struct vring *vr;
> +
> +	vring = (struct mlxbf_tmfifo_vring *)vq->priv;
> +	vr = (struct vring *)virtqueue_get_vring(vq);
> +
> +	if (!vr || vring->next_avail == vr->avail->idx)
> +		return NULL;
> +
> +	idx = vring->next_avail % vr->num;
> +	head = vr->avail->ring[idx];
> +	BUG_ON(head >= vr->num);
> +	vring->next_avail++;
> +	return &vr->desc[head];
> +}
> +
> +static inline void mlxbf_tmfifo_virtio_release_desc(
> +	struct virtio_device *vdev, struct vring *vr,
> +	struct vring_desc *desc, u32 len)
> +{
> +	unsigned int idx;
> +
> +	idx = vr->used->idx % vr->num;
> +	vr->used->ring[idx].id = desc - vr->desc;
> +	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
> +
> +	/* Virtio could poll and check the 'idx' to decide
> +	 * whether the desc is done or not. Add a memory
> +	 * barrier here to make sure the update above completes
> +	 * before updating the idx.
> +	 */
> +	mb();
> +	vr->used->idx++;
> +}
> +
> +/* Get the total length of a descriptor chain. */ static inline u32
> +mlxbf_tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
> +						  struct vring_desc *desc,
> +						  struct vring *vr)
> +{
> +	u32 len = 0, idx;
> +
> +	while (desc) {
> +		len += virtio32_to_cpu(vdev, desc->len);
> +		if (!(virtio16_to_cpu(vdev, desc->flags) &
> VRING_DESC_F_NEXT))
> +			break;
> +		idx = virtio16_to_cpu(vdev, desc->next);
> +		desc = &vr->desc[idx];
> +	}
> +
> +	return len;
> +}
> +
> +static void mlxbf_tmfifo_release_pkt(struct virtio_device *vdev,
> +				     struct mlxbf_tmfifo_vring *vring,
> +				     struct vring_desc **desc)
> +{
> +	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
> +	struct vring_desc *desc_head;
> +	uint32_t pkt_len = 0;
> +
> +	if (!vr)
> +		return;
> +
> +	if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
> +		desc_head = vring->desc_head;
> +		pkt_len = vring->pkt_len;
> +	} else {
> +		desc_head = mlxbf_tmfifo_virtio_get_next_desc(vring->vq);
> +		if (desc_head != NULL) {
> +			pkt_len = mlxbf_tmfifo_virtio_get_pkt_len(
> +				vdev, desc_head, vr);
> +		}
> +	}
> +
> +	if (desc_head != NULL)
> +		mlxbf_tmfifo_virtio_release_desc(vdev, vr, desc_head,
> pkt_len);
> +
> +	if (desc != NULL)
> +		*desc = NULL;
> +	vring->pkt_len = 0;
> +}
> +
> +/* House-keeping timer. */
> +static void mlxbf_tmfifo_timer(struct timer_list *arg) {
> +	struct mlxbf_tmfifo *fifo;
> +
> +	fifo = container_of(arg, struct mlxbf_tmfifo, timer);
> +
> +	/*
> +	 * Wake up the work handler to poll the Rx FIFO in case interrupt
> +	 * missing or any leftover bytes stuck in the FIFO.
> +	 */
> +	test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events);
> +
> +	/*
> +	 * Wake up Tx handler in case virtio has queued too many packets
> +	 * and are waiting for buffer return.
> +	 */
> +	test_and_set_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events);
> +
> +	schedule_work(&fifo->work);
> +
> +	mod_timer(&fifo->timer, jiffies + mlxbf_tmfifo_timer_interval); }
> +
> +/* Buffer the console output. */
> +static void mlxbf_tmfifo_console_output(struct mlxbf_tmfifo_vdev *cons,
> +					struct virtqueue *vq)
> +{
> +	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
> +	struct vring_desc *head_desc, *desc = NULL;
> +	struct virtio_device *vdev = &cons->vdev;
> +	u32 len, pkt_len, idx;
> +	void *addr;
> +
> +	for (;;) {

It's better to modify it as while(on some condition)

> +		head_desc = mlxbf_tmfifo_virtio_get_next_desc(vq);
> +		if (head_desc == NULL)
> +			break;
> +
> +		/* Release the packet if no more space. */
> +		pkt_len = mlxbf_tmfifo_virtio_get_pkt_len(vdev, head_desc,
> vr);
> +		if (pkt_len > mlxbf_tmfifo_vdev_tx_buf_avail(cons)) {
> +			mlxbf_tmfifo_virtio_release_desc(vdev, vr, head_desc,
> +							 pkt_len);

Why do you break line here?

> +			break;
> +		}
> +
> +		desc = head_desc;
> +
> +		while (desc != NULL) {
> +			addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
> +			len = virtio32_to_cpu(vdev, desc->len);
> +
> +			if (len <= MLXBF_TMFIFO_CONS_TX_BUF_SIZE -
> +			    cons->tx_tail) {

Why do you break line here? Also below I see few strange breaks.

> +				memcpy(cons->tx_buf + cons->tx_tail, addr,
> len);
> +			} else {
> +				u32 seg;
> +
> +				seg = MLXBF_TMFIFO_CONS_TX_BUF_SIZE -
> +					cons->tx_tail;
> +				memcpy(cons->tx_buf + cons->tx_tail, addr,
> seg);
> +				addr += seg;
> +				memcpy(cons->tx_buf, addr, len - seg);
> +			}
> +			mlxbf_tmfifo_vdev_tx_buf_push(cons, len);
> +
> +			if (!(virtio16_to_cpu(vdev, desc->flags) &
> +			    VRING_DESC_F_NEXT))
> +				break;
> +			idx = virtio16_to_cpu(vdev, desc->next);
> +			desc = &vr->desc[idx];
> +		}
> +
> +		mlxbf_tmfifo_virtio_release_desc(vdev, vr, head_desc,
> pkt_len);
> +	}
> +}
> +
> +/* Rx & Tx processing of a virtual queue. */ static void
> +mlxbf_tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx) {
> +	int num_avail = 0, hdr_len, tx_reserve;
> +	struct mlxbf_tmfifo_vring *vring;
> +	struct mlxbf_tmfifo_vdev *cons;
> +	struct virtio_device *vdev;
> +	struct mlxbf_tmfifo *fifo;
> +	struct vring_desc *desc;
> +	unsigned long flags;
> +	struct vring *vr;
> +	u64 sts, data;
> +	u32 len, idx;
> +	void *addr;
> +
> +	if (!vq)
> +		return;
> +
> +	vring = (struct mlxbf_tmfifo_vring *)vq->priv;
> +	fifo = vring->fifo;
> +	vr = (struct vring *)virtqueue_get_vring(vq);
> +
> +	if (!fifo->vdev[vring->vdev_id])
> +		return;
> +	vdev = &fifo->vdev[vring->vdev_id]->vdev;
> +	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
> +
> +	/* Don't continue if another vring is running. */
> +	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
> +		return;
> +
> +	/* tx_reserve is used to reserved some room in FIFO for console. */
> +	if (vring->vdev_id == VIRTIO_ID_NET) {
> +		hdr_len = sizeof(struct virtio_net_hdr);
> +		tx_reserve = fifo->tx_fifo_size / 16;

Use some define instead of 16/

> +	} else {
> +		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
> +		hdr_len = 0;
> +		tx_reserve = 1;
> +	}
> +
> +	desc = vring->desc;
> +
> +	while (1) {

I see there are few drivers in platform which use while (1)
But it looks better to use while(some condition)
and instead of break change this condition to false.

> +		/* Get available FIFO space. */
> +		if (num_avail == 0) {
> +			if (is_rx) {
> +				/* Get the number of available words in FIFO.
> */
> +				sts = readq(fifo->rx_base +
> +					    MLXBF_TMFIFO_RX_STS);
> +				num_avail = FIELD_GET(
> +
> 	MLXBF_TMFIFO_RX_STS__COUNT_MASK, sts);

				num_avail = FIELD_GET(TMFIFO_RX_STS__COUNT_MASK, sts);

> +
> +				/* Don't continue if nothing in FIFO. */
> +				if (num_avail <= 0)
> +					break;
> +			} else {
> +				/* Get available space in FIFO. */
> +				sts = readq(fifo->tx_base +
> +					    MLXBF_TMFIFO_TX_STS);
> +				num_avail = fifo->tx_fifo_size - tx_reserve -
> +					FIELD_GET(
> +
> 	MLXBF_TMFIFO_TX_STS__COUNT_MASK,
> +						sts);

Same as above.

> +
> +				if (num_avail <= 0)
> +					break;
> +			}
> +		}
> +
> +		/* Console output always comes from the Tx buffer. */
> +		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
> +		    cons != NULL && cons->tx_buf != NULL) {
> +			union mlxbf_tmfifo_msg_hdr hdr;
> +			int size;
> +
> +			size = mlxbf_tmfifo_vdev_tx_buf_len(cons);
> +			if (num_avail < 2 || size == 0)
> +				return;
> +			if (size + sizeof(hdr) > num_avail * sizeof(u64))
> +				size = num_avail * sizeof(u64) - sizeof(hdr);
> +			/* Write header. */
> +			hdr.data = 0;
> +			hdr.type = VIRTIO_ID_CONSOLE;
> +			hdr.len = htons(size);
> +			writeq(cpu_to_le64(hdr.data),
> +			       fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
> +
> +			spin_lock_irqsave(&fifo->spin_lock, flags);
> +			while (size > 0) {
> +				addr = cons->tx_buf + cons->tx_head;
> +
> +				if (cons->tx_head + sizeof(u64) <=
> +				    MLXBF_TMFIFO_CONS_TX_BUF_SIZE) {
> +					memcpy(&data, addr, sizeof(u64));
> +				} else {
> +					int partial;
> +
> +					partial =
> +
> 	MLXBF_TMFIFO_CONS_TX_BUF_SIZE -
> +						cons->tx_head;
> +
> +					memcpy(&data, addr, partial);
> +					memcpy((u8 *)&data + partial,
> +					       cons->tx_buf,
> +					       sizeof(u64) - partial);
> +				}
> +				writeq(data,
> +				       fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
> +
> +				if (size >= sizeof(u64)) {
> +					mlxbf_tmfifo_vdev_tx_buf_pop(
> +						cons, sizeof(u64));
> +					size -= sizeof(u64);
> +				} else {
> +					mlxbf_tmfifo_vdev_tx_buf_pop(
> +						cons, size);
> +					size = 0;
> +				}
> +			}
> +			spin_unlock_irqrestore(&fifo->spin_lock, flags);
> +			return;
> +		}
> +
> +		/* Get the desc of next packet. */
> +		if (!desc) {
> +			/* Save the head desc of the chain. */
> +			vring->desc_head =
> +				mlxbf_tmfifo_virtio_get_next_desc(vq);
> +			if (!vring->desc_head) {
> +				vring->desc = NULL;
> +				return;
> +			}
> +			desc = vring->desc_head;
> +			vring->desc = desc;
> +
> +			if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
> +				struct virtio_net_hdr *net_hdr;
> +
> +				/* Initialize the packet header. */
> +				net_hdr = (struct virtio_net_hdr *)
> +					phys_to_virt(virtio64_to_cpu(
> +						vdev, desc->addr));
> +				memset(net_hdr, 0, sizeof(*net_hdr));
> +			}
> +		}
> +
> +		/* Beginning of each packet. */
> +		if (vring->pkt_len == 0) {
> +			int vdev_id, vring_change = 0;
> +			union mlxbf_tmfifo_msg_hdr hdr;
> +
> +			num_avail--;
> +
> +			/* Read/Write packet length. */
> +			if (is_rx) {
> +				hdr.data = readq(fifo->rx_base +
> +						 MLXBF_TMFIFO_RX_DATA);
> +				hdr.data = le64_to_cpu(hdr.data);
> +
> +				/* Skip the length 0 packet (keepalive). */
> +				if (hdr.len == 0)
> +					continue;
> +
> +				/* Check packet type. */
> +				if (hdr.type == VIRTIO_ID_NET) {
> +					struct virtio_net_config *config;
> +
> +					vdev_id = VIRTIO_ID_NET;
> +					hdr_len = sizeof(struct virtio_net_hdr);
> +					config =
> +					    &fifo->vdev[vdev_id]->config.net;
> +					if (ntohs(hdr.len) > config->mtu +
> +
> 	MLXBF_TMFIFO_NET_L2_OVERHEAD)
> +						continue;
> +				} else if (hdr.type == VIRTIO_ID_CONSOLE) {
> +					vdev_id = VIRTIO_ID_CONSOLE;
> +					hdr_len = 0;
> +				} else {
> +					continue;
> +				}
> +
> +				/*
> +				 * Check whether the new packet still belongs
> +				 * to this vring or not. If not, update the
> +				 * pkt_len of the new vring and return.
> +				 */
> +				if (vdev_id != vring->vdev_id) {
> +					struct mlxbf_tmfifo_vdev *dev2 =
> +						fifo->vdev[vdev_id];
> +
> +					if (!dev2)
> +						break;
> +					vring->desc = desc;
> +					vring =
> +					  &dev2-
> >vrings[MLXBF_TMFIFO_VRING_RX];
> +					vring_change = 1;
> +				}
> +				vring->pkt_len = ntohs(hdr.len) + hdr_len;
> +			} else {
> +				vring->pkt_len =
> +					mlxbf_tmfifo_virtio_get_pkt_len(
> +						vdev, desc, vr);
> +
> +				hdr.data = 0;
> +				hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
> +					VIRTIO_ID_NET :
> +					VIRTIO_ID_CONSOLE;
> +				hdr.len = htons(vring->pkt_len - hdr_len);
> +				writeq(cpu_to_le64(hdr.data),
> +				       fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
> +			}
> +
> +			vring->cur_len = hdr_len;
> +			vring->rem_len = vring->pkt_len;
> +			fifo->vring[is_rx] = vring;
> +
> +			if (vring_change)
> +				return;
> +			continue;
> +		}
> +
> +		/* Check available space in this desc. */
> +		len = virtio32_to_cpu(vdev, desc->len);
> +		if (len > vring->rem_len)
> +			len = vring->rem_len;
> +
> +		/* Check if the current desc is already done. */
> +		if (vring->cur_len == len)
> +			goto check_done;
> +
> +		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
> +
> +		/* Read a word from FIFO for Rx. */
> +		if (is_rx) {
> +			data = readq(fifo->rx_base +
> MLXBF_TMFIFO_RX_DATA);
> +			data = le64_to_cpu(data);
> +		}
> +
> +		if (vring->cur_len + sizeof(u64) <= len) {
> +			/* The whole word. */
> +			if (is_rx) {
> +				memcpy(addr + vring->cur_len, &data,
> +				       sizeof(u64));
> +			} else {
> +				memcpy(&data, addr + vring->cur_len,
> +				       sizeof(u64));
> +			}

Why not just.
Also few places like this one below.

			if (is_rx)
				memcpy(addr + vring->cur_len, &data, sizeof(u64));
			else
				memcpy(&data, addr + vring->cur_len, sizeof(u64));

> +			vring->cur_len += sizeof(u64);
> +		} else {
> +			/* Leftover bytes. */
> +			BUG_ON(vring->cur_len > len);
> +			if (is_rx) {
> +				memcpy(addr + vring->cur_len, &data,
> +				       len - vring->cur_len);
> +			} else {
> +				memcpy(&data, addr + vring->cur_len,
> +				       len - vring->cur_len);
> +			}
> +			vring->cur_len = len;
> +		}
> +
> +		/* Write the word into FIFO for Tx. */
> +		if (!is_rx) {
> +			writeq(cpu_to_le64(data),
> +			       fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
> +		}
> +
> +		num_avail--;
> +
> +check_done:
> +		/* Check whether this desc is full or completed. */
> +		if (vring->cur_len == len) {
> +			vring->cur_len = 0;
> +			vring->rem_len -= len;
> +
> +			/* Get the next desc on the chain. */
> +			if (vring->rem_len > 0 &&
> +			    (virtio16_to_cpu(vdev, desc->flags) &
> +						VRING_DESC_F_NEXT)) {
> +				idx = virtio16_to_cpu(vdev, desc->next);
> +				desc = &vr->desc[idx];
> +				continue;
> +			}
> +
> +			/* Done and release the desc. */
> +			mlxbf_tmfifo_release_pkt(vdev, vring, &desc);
> +			fifo->vring[is_rx] = NULL;
> +
> +			/* Notify upper layer that packet is done. */
> +			spin_lock_irqsave(&fifo->spin_lock, flags);
> +			vring_interrupt(0, vq);
> +			spin_unlock_irqrestore(&fifo->spin_lock, flags);
> +			continue;
> +		}
> +	}
> +
> +	/* Save the current desc. */
> +	vring->desc = desc;
> +}

I suggest to split mlxbf_tmfifo_virtio_rxtx() into few small routines.


> +
> +/* The notify function is called when new buffers are posted. */ static
> +bool mlxbf_tmfifo_virtio_notify(struct virtqueue *vq) {
> +	struct mlxbf_tmfifo_vring *vring;
> +	struct mlxbf_tmfifo *fifo;
> +	unsigned long flags;
> +
> +	vring = (struct mlxbf_tmfifo_vring *)vq->priv;
> +	fifo = vring->fifo;
> +
> +	/*
> +	 * Virtio maintains vrings in pairs, even number ring for Rx
> +	 * and odd number ring for Tx.
> +	 */
> +	if (!(vring->id & 1)) {
> +		/* Set the RX HWM bit to start Rx. */
> +		if (!test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo-
> >pend_events))
> +			schedule_work(&fifo->work);
> +	} else {
> +		/*
> +		 * Console could make blocking call with interrupts disabled.
> +		 * In such case, the vring needs to be served right away. For
> +		 * other cases, just set the TX LWM bit to start Tx in the
> +		 * worker handler.
> +		 */
> +		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
> +			spin_lock_irqsave(&fifo->spin_lock, flags);
> +			mlxbf_tmfifo_console_output(
> +				fifo->vdev[VIRTIO_ID_CONSOLE], vq);

			mlxbf_tmfifo_console_output(fifo->vdev[VIRTIO_ID_CONSOLE], vq);

> +			spin_unlock_irqrestore(&fifo->spin_lock, flags);
> +			schedule_work(&fifo->work);
> +		} else if (!test_and_set_bit(MLXBF_TM_TX_LWM_IRQ,
> +					     &fifo->pend_events))
> +			schedule_work(&fifo->work);

		If {
		} else if {
		}

For consistency.

> +	}
> +
> +	return true;
> +}
> +
> +/* Work handler for Rx and Tx case. */
> +static void mlxbf_tmfifo_work_handler(struct work_struct *work) {
> +	struct mlxbf_tmfifo_vdev *tm_vdev;
> +	struct mlxbf_tmfifo *fifo;
> +	int i;
> +
> +	fifo = container_of(work, struct mlxbf_tmfifo, work);
> +	if (!fifo->is_ready)
> +		return;
> +
> +	mutex_lock(&fifo->lock);
> +
> +	/* Tx (Send data to the TmFifo). */
> +	if (test_and_clear_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events)
> &&
> +		       fifo->irq_info[MLXBF_TM_TX_LWM_IRQ].irq) {
> +		for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++) {

I suggest to define local variable vq.
And have below:
				mlxbf_tmfifo_virtio_rxtx(vq, false);

> +			tm_vdev = fifo->vdev[i];
> +			if (tm_vdev != NULL) {
> +				mlxbf_tmfifo_virtio_rxtx(
> +				    tm_vdev-
> >vrings[MLXBF_TMFIFO_VRING_TX].vq,
> +				    false);
> +			}
> +		}
> +	}
> +
> +	/* Rx (Receive data from the TmFifo). */
> +	if (test_and_clear_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events)
> &&
> +		       fifo->irq_info[MLXBF_TM_RX_HWM_IRQ].irq) {
> +		for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++) {
> +			tm_vdev = fifo->vdev[i];

Same as above.

> +			if (tm_vdev != NULL) {
> +				mlxbf_tmfifo_virtio_rxtx(
> +				    tm_vdev-
> >vrings[MLXBF_TMFIFO_VRING_RX].vq,
> +				    true);
> +			}
> +		}
> +	}
> +
> +	mutex_unlock(&fifo->lock);
> +}
> +
> +/* Get the array of feature bits for this device. */ static u64
> +mlxbf_tmfifo_virtio_get_features(struct virtio_device *vdev) {
> +	struct mlxbf_tmfifo_vdev *tm_vdev;
> +
> +	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +	return tm_vdev->features;
> +}
> +
> +/* Confirm device features to use. */
> +static int mlxbf_tmfifo_virtio_finalize_features(struct virtio_device
> +*vdev) {
> +	struct mlxbf_tmfifo_vdev *tm_vdev;
> +
> +	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +	tm_vdev->features = vdev->features;
> +
> +	return 0;
> +}
> +
> +/* Free virtqueues found by find_vqs(). */ static void
> +mlxbf_tmfifo_virtio_del_vqs(struct virtio_device *vdev) {
> +	struct mlxbf_tmfifo_vdev *tm_vdev;
> +	struct mlxbf_tmfifo_vring *vring;
> +	struct virtqueue *vq;
> +	int i;
> +
> +	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +
> +	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
> +		vring = &tm_vdev->vrings[i];
> +
> +		/* Release the pending packet. */
> +		if (vring->desc != NULL) {
> +			mlxbf_tmfifo_release_pkt(&tm_vdev->vdev, vring,
> +						 &vring->desc);
> +		}
> +
> +		vq = vring->vq;
> +		if (vq) {
> +			vring->vq = NULL;
> +			vring_del_virtqueue(vq);
> +		}
> +	}
> +}
> +
> +/* Create and initialize the virtual queues. */ static int
> +mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev,
> +					unsigned int nvqs,
> +					struct virtqueue *vqs[],
> +					vq_callback_t *callbacks[],
> +					const char * const names[],
> +					const bool *ctx,
> +					struct irq_affinity *desc)
> +{
> +	struct mlxbf_tmfifo_vdev *tm_vdev;
> +	struct mlxbf_tmfifo_vring *vring;
> +	int i, ret = -EINVAL, size;

Don't initialize ret with -EINVAL.

> +	struct virtqueue *vq;
> +
> +	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
> +		return -EINVAL;
> +
> +	for (i = 0; i < nvqs; ++i) {
> +		if (!names[i])
> +			goto error;
> +		vring = &tm_vdev->vrings[i];
> +
> +		/* zero vring */
> +		size = vring_size(vring->size, vring->align);
> +		memset(vring->va, 0, size);
> +		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
> +					 false, false, vring->va,
> +					 mlxbf_tmfifo_virtio_notify,
> +					 callbacks[i], names[i]);
> +		if (!vq) {
> +			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
> +			ret = -ENOMEM;
> +			goto error;
> +		}
> +
> +		vqs[i] = vq;
> +		vring->vq = vq;
> +		vq->priv = vring;
> +	}
> +
> +	return 0;
> +
> +error:
> +	mlxbf_tmfifo_virtio_del_vqs(vdev);
> +	return ret;
> +}
> +
> +/* Read the status byte. */
> +static u8 mlxbf_tmfifo_virtio_get_status(struct virtio_device *vdev) {
> +	struct mlxbf_tmfifo_vdev *tm_vdev;
> +
> +	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +
> +	return tm_vdev->status;
> +}
> +
> +/* Write the status byte. */
> +static void mlxbf_tmfifo_virtio_set_status(struct virtio_device *vdev,
> +					   u8 status)
> +{
> +	struct mlxbf_tmfifo_vdev *tm_vdev;
> +
> +	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +	tm_vdev->status = status;
> +}
> +
> +/* Reset the device. Not much here for now. */ static void
> +mlxbf_tmfifo_virtio_reset(struct virtio_device *vdev) {
> +	struct mlxbf_tmfifo_vdev *tm_vdev;
> +
> +	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +	tm_vdev->status = 0;
> +}
> +
> +/* Read the value of a configuration field. */ static void
> +mlxbf_tmfifo_virtio_get(struct virtio_device *vdev,
> +			      unsigned int offset,
> +			      void *buf,
> +			      unsigned int len)
> +{
> +	struct mlxbf_tmfifo_vdev *tm_vdev;
> +
> +	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +

	unsigned int pos = offset + len;

	if (pos > sizeof(tm_vdev->config) || pos < len)


> +	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
> +		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
> +		return;
> +	}
> +
> +	memcpy(buf, (u8 *)&tm_vdev->config + offset, len); }
> +
> +/* Write the value of a configuration field. */ static void
> +mlxbf_tmfifo_virtio_set(struct virtio_device *vdev,
> +				 unsigned int offset,
> +				 const void *buf,
> +				 unsigned int len)
> +{
> +	struct mlxbf_tmfifo_vdev *tm_vdev;
> +
> +	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +
> +	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {

Same as above.

> +		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
> +		return;
> +	}
> +
> +	memcpy((u8 *)&tm_vdev->config + offset, buf, len); }
> +
> +/* Virtio config operations. */
> +static const struct virtio_config_ops mlxbf_tmfifo_virtio_config_ops = {
> +	.get_features = mlxbf_tmfifo_virtio_get_features,
> +	.finalize_features = mlxbf_tmfifo_virtio_finalize_features,
> +	.find_vqs = mlxbf_tmfifo_virtio_find_vqs,
> +	.del_vqs = mlxbf_tmfifo_virtio_del_vqs,
> +	.reset = mlxbf_tmfifo_virtio_reset,
> +	.set_status = mlxbf_tmfifo_virtio_set_status,
> +	.get_status = mlxbf_tmfifo_virtio_get_status,
> +	.get = mlxbf_tmfifo_virtio_get,
> +	.set = mlxbf_tmfifo_virtio_set,
> +};
> +
> +/* Create vdev type in a tmfifo. */
> +int mlxbf_tmfifo_create_vdev(struct mlxbf_tmfifo *fifo, int vdev_id,
> +			     u64 features, void *config, u32 size) {
> +	struct mlxbf_tmfifo_vdev *tm_vdev;
> +	int ret = 0;
> +
> +	mutex_lock(&fifo->lock);
> +
> +	tm_vdev = fifo->vdev[vdev_id];
> +	if (tm_vdev != NULL) {
> +		pr_err("vdev %d already exists\n", vdev_id);
> +		ret = -EEXIST;
> +		goto already_exist;
> +	}
> +
> +	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
> +	if (!tm_vdev) {
> +		ret = -ENOMEM;
> +		goto already_exist;
> +	}
> +
> +	tm_vdev->vdev.id.device = vdev_id;
> +	tm_vdev->vdev.config = &mlxbf_tmfifo_virtio_config_ops;
> +	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
> +	tm_vdev->vdev.dev.release = mlxbf_tmfifo_virtio_dev_release;
> +	tm_vdev->features = features;
> +	if (config)
> +		memcpy(&tm_vdev->config, config, size);
> +	if (mlxbf_tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
> +		pr_err("Unable to allocate vring\n");
> +		ret = -ENOMEM;
> +		goto alloc_vring_fail;
> +	}
> +	if (vdev_id == VIRTIO_ID_CONSOLE) {
> +		tm_vdev->tx_buf =
> kmalloc(MLXBF_TMFIFO_CONS_TX_BUF_SIZE,
> +					  GFP_KERNEL);
> +	}
> +	fifo->vdev[vdev_id] = tm_vdev;
> +
> +	/* Register the virtio device. */
> +	ret = register_virtio_device(&tm_vdev->vdev);
> +	if (ret) {
> +		dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
> +		goto register_fail;
> +	}
> +
> +	mutex_unlock(&fifo->lock);
> +	return 0;
> +
> +register_fail:
> +	mlxbf_tmfifo_free_vrings(fifo, vdev_id);
> +	fifo->vdev[vdev_id] = NULL;
> +alloc_vring_fail:
> +	kfree(tm_vdev);
> +already_exist:
> +	mutex_unlock(&fifo->lock);
> +	return ret;
> +}
> +
> +/* Delete vdev type from a tmfifo. */
> +int mlxbf_tmfifo_delete_vdev(struct mlxbf_tmfifo *fifo, int vdev_id) {
> +	struct mlxbf_tmfifo_vdev *tm_vdev;
> +
> +	mutex_lock(&fifo->lock);
> +
> +	/* Unregister vdev. */
> +	tm_vdev = fifo->vdev[vdev_id];
> +	if (tm_vdev) {
> +		unregister_virtio_device(&tm_vdev->vdev);
> +		mlxbf_tmfifo_free_vrings(fifo, vdev_id);
> +		kfree(tm_vdev->tx_buf);
> +		kfree(tm_vdev);
> +		fifo->vdev[vdev_id] = NULL;
> +	}
> +
> +	mutex_unlock(&fifo->lock);
> +
> +	return 0;
> +}
> +
> +/* Device remove function. */
> +static int mlxbf_tmfifo_remove(struct platform_device *pdev) {

Locate it after probe.
If you'll use all devm_, like Andy noted:
devm_ioremap
devm_ioremap_resource
devm_kzalloc
devm_request_mem_region
you can drop all kfree, release_mem_region, iounmap

And make the below as a separate routine, something like
mlxbf_tmfifo_cleanup(), if you still need it.

> +	struct mlxbf_tmfifo *fifo = platform_get_drvdata(pdev);
> +	struct resource *rx_res, *tx_res;
> +	int i;
> +
> +	if (fifo) {
> +		mutex_lock(&mlxbf_tmfifo_lock);
> +
> +		fifo->is_ready = false;
> +
> +		/* Stop the timer. */
> +		del_timer_sync(&fifo->timer);
> +
> +		/* Release interrupts. */
> +		mlxbf_tmfifo_free_irqs(fifo);
> +
> +		/* Cancel the pending work. */
> +		cancel_work_sync(&fifo->work);
> +
> +		for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++)
> +			mlxbf_tmfifo_delete_vdev(fifo, i);
> +
> +		/* Release IO resources. */
> +		if (fifo->rx_base)
> +			iounmap(fifo->rx_base);
> +		if (fifo->tx_base)
> +			iounmap(fifo->tx_base);
> +
> +		platform_set_drvdata(pdev, NULL);
> +		kfree(fifo);
> +
> +		mutex_unlock(&mlxbf_tmfifo_lock);
> +	}
> +
> +	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	if (rx_res)
> +		release_mem_region(rx_res->start, resource_size(rx_res));
> +	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> +	if (tx_res)
> +		release_mem_region(tx_res->start, resource_size(tx_res));
> +
> +	return 0;
> +}
> +
> +/* Read the configured network MAC address from efi variable. */ static
> +void mlxbf_tmfifo_get_cfg_mac(u8 *mac) {
> +	efi_char16_t name[] = {
> +		'R', 's', 'h', 'i', 'm', 'M', 'a', 'c', 'A', 'd', 'd', 'r', 0 };


Could it be moved out and set like:
static const efi_char16_t mlxbf_tmfifo_efi_name[] = "...";
Could you check if the are some examples in kernel, please?

> +	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
> +	efi_status_t status;
> +	unsigned long size;
> +	u8 buf[6];
> +
> +	size = sizeof(buf);
> +	status = efi.get_variable(name, &guid, NULL, &size, buf);
> +	if (status == EFI_SUCCESS && size == sizeof(buf))
> +		memcpy(mac, buf, sizeof(buf));
> +}
> +
> +/* Probe the TMFIFO. */
> +static int mlxbf_tmfifo_probe(struct platform_device *pdev) {
> +	struct virtio_net_config net_config;
> +	struct resource *rx_res, *tx_res;
> +	struct mlxbf_tmfifo *fifo;
> +	int i, ret;
> +	u64 ctl;
> +
> +	/* Get the resource of the Rx & Tx FIFO. */
> +	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> +	if (!rx_res || !tx_res) {
> +		ret = -EINVAL;
> +		goto err;
> +	}
> +
> +	if (request_mem_region(rx_res->start,
> +			       resource_size(rx_res), "bf-tmfifo") == NULL) {
> +		ret = -EBUSY;
> +		goto early_err;
> +	}
> +
> +	if (request_mem_region(tx_res->start,
> +			       resource_size(tx_res), "bf-tmfifo") == NULL) {
> +		release_mem_region(rx_res->start, resource_size(rx_res));
> +		ret = -EBUSY;
> +		goto early_err;
> +	}
> +
> +	ret = -ENOMEM;
> +	fifo = kzalloc(sizeof(struct mlxbf_tmfifo), GFP_KERNEL);
> +	if (!fifo)
> +		goto err;
> +
> +	fifo->pdev = pdev;
> +	platform_set_drvdata(pdev, fifo);
> +
> +	spin_lock_init(&fifo->spin_lock);
> +	INIT_WORK(&fifo->work, mlxbf_tmfifo_work_handler);
> +
> +	timer_setup(&fifo->timer, mlxbf_tmfifo_timer, 0);
> +	fifo->timer.function = mlxbf_tmfifo_timer;
> +
> +	for (i = 0; i < MLXBF_TM_IRQ_CNT; i++) {
> +		fifo->irq_info[i].index = i;
> +		fifo->irq_info[i].fifo = fifo;
> +		fifo->irq_info[i].irq = platform_get_irq(pdev, i);
> +		ret = request_irq(fifo->irq_info[i].irq,
> +				  mlxbf_tmfifo_irq_handler, 0,
> +				  "tmfifo", &fifo->irq_info[i]);
> +		if (ret) {
> +			pr_err("Unable to request irq\n");
> +			fifo->irq_info[i].irq = 0;
> +			goto err;
> +		}
> +	}
> +
> +	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
> +	if (!fifo->rx_base)
> +		goto err;
> +
> +	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
> +	if (!fifo->tx_base)
> +		goto err;
> +
> +	/* Get Tx FIFO size and set the low/high watermark. */
> +	ctl = readq(fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
> +	fifo->tx_fifo_size =
> +		FIELD_GET(MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK,
> ctl);
> +	ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__LWM_MASK) |
> +		FIELD_PREP(MLXBF_TMFIFO_TX_CTL__LWM_MASK,
> +			   fifo->tx_fifo_size / 2);
> +	ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__HWM_MASK) |
> +		FIELD_PREP(MLXBF_TMFIFO_TX_CTL__HWM_MASK,
> +			   fifo->tx_fifo_size - 1);
> +	writeq(ctl, fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
> +
> +	/* Get Rx FIFO size and set the low/high watermark. */
> +	ctl = readq(fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
> +	fifo->rx_fifo_size =
> +		FIELD_GET(MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK,
> ctl);
> +	ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__LWM_MASK) |
> +		FIELD_PREP(MLXBF_TMFIFO_RX_CTL__LWM_MASK, 0);
> +	ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__HWM_MASK) |
> +		FIELD_PREP(MLXBF_TMFIFO_RX_CTL__HWM_MASK, 1);
> +	writeq(ctl, fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
> +
> +	mutex_init(&fifo->lock);
> +
> +	/* Create the console vdev. */
> +	ret = mlxbf_tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
> +	if (ret)
> +		goto err;
> +
> +	/* Create the network vdev. */
> +	memset(&net_config, 0, sizeof(net_config));
> +	net_config.mtu = MLXBF_TMFIFO_NET_MTU;
> +	net_config.status = VIRTIO_NET_S_LINK_UP;
> +	memcpy(net_config.mac, mlxbf_tmfifo_net_default_mac, 6);
> +	mlxbf_tmfifo_get_cfg_mac(net_config.mac);
> +	ret = mlxbf_tmfifo_create_vdev(fifo, VIRTIO_ID_NET,
> +		MLXBF_TMFIFO_NET_FEATURES, &net_config,
> sizeof(net_config));
> +	if (ret)
> +		goto err;
> +
> +	mod_timer(&fifo->timer, jiffies + mlxbf_tmfifo_timer_interval);
> +
> +	fifo->is_ready = true;
> +
> +	return 0;
> +
> +err:
> +	mlxbf_tmfifo_remove(pdev);
> +early_err:
> +	dev_err(&pdev->dev, "Probe Failed\n");
> +	return ret;
> +}
> +
> +static const struct of_device_id mlxbf_tmfifo_match[] = {
> +	{ .compatible = "mellanox,bf-tmfifo" },
> +	{},
> +};
> +MODULE_DEVICE_TABLE(of, mlxbf_tmfifo_match);
> +
> +static const struct acpi_device_id mlxbf_tmfifo_acpi_match[] = {
> +	{ "MLNXBF01", 0 },
> +	{},
> +};
> +MODULE_DEVICE_TABLE(acpi, mlxbf_tmfifo_acpi_match);
> +
> +static struct platform_driver mlxbf_tmfifo_driver = {
> +	.probe = mlxbf_tmfifo_probe,
> +	.remove = mlxbf_tmfifo_remove,
> +	.driver = {
> +		.name = "bf-tmfifo",
> +		.of_match_table = mlxbf_tmfifo_match,
> +		.acpi_match_table = ACPI_PTR(mlxbf_tmfifo_acpi_match),
> +	},
> +};
> +
> +module_platform_driver(mlxbf_tmfifo_driver);
> +
> +MODULE_DESCRIPTION("Mellanox BlueField SoC TmFifo Driver");
> +MODULE_LICENSE("GPL"); MODULE_AUTHOR("Mellanox Technologies");
> --
> 1.8.3.1


^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v8 1/2] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
@ 2019-01-30  6:24     ` Vadim Pasternak
  0 siblings, 0 replies; 179+ messages in thread
From: Vadim Pasternak @ 2019-01-30  6:24 UTC (permalink / raw)
  To: Rob Herring, Mark Rutland, Arnd Bergmann, David Woods,
	Andy Shevchenko, Darren Hart
  Cc: Liming Sun, devicetree, linux-kernel, platform-driver-x86



> -----Original Message-----
> From: Liming Sun <lsun@mellanox.com>
> Sent: Monday, January 28, 2019 7:28 PM
> To: Rob Herring <robh+dt@kernel.org>; Mark Rutland
> <mark.rutland@arm.com>; Arnd Bergmann <arnd@arndb.de>; David Woods
> <dwoods@mellanox.com>; Andy Shevchenko <andy@infradead.org>; Darren
> Hart <dvhart@infradead.org>; Vadim Pasternak <vadimp@mellanox.com>
> Cc: Liming Sun <lsun@mellanox.com>; devicetree@vger.kernel.org; linux-
> kernel@vger.kernel.org; platform-driver-x86@vger.kernel.org
> Subject: [PATCH v8 1/2] platform/mellanox: Add TmFifo driver for Mellanox
> BlueField Soc
> 
> This commit adds the TmFifo platform driver for Mellanox BlueField Soc. TmFifo
> is a shared FIFO which enables external host machine to exchange data with the
> SoC via USB or PCIe. The driver is based on virtio framework and has console
> and network access enabled.
> 
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>
> ---
>  drivers/platform/mellanox/Kconfig             |   13 +-
>  drivers/platform/mellanox/Makefile            |    1 +
>  drivers/platform/mellanox/mlxbf-tmfifo-regs.h |   67 ++
>  drivers/platform/mellanox/mlxbf-tmfifo.c      | 1289
> +++++++++++++++++++++++++
>  4 files changed, 1369 insertions(+), 1 deletion(-)  create mode 100644
> drivers/platform/mellanox/mlxbf-tmfifo-regs.h
>  create mode 100644 drivers/platform/mellanox/mlxbf-tmfifo.c
> 
> diff --git a/drivers/platform/mellanox/Kconfig
> b/drivers/platform/mellanox/Kconfig
> index cd8a908..a565070 100644
> --- a/drivers/platform/mellanox/Kconfig
> +++ b/drivers/platform/mellanox/Kconfig
> @@ -5,7 +5,7 @@
> 
>  menuconfig MELLANOX_PLATFORM
>  	bool "Platform support for Mellanox hardware"
> -	depends on X86 || ARM || COMPILE_TEST
> +	depends on X86 || ARM || ARM64 || COMPILE_TEST
>  	---help---
>  	  Say Y here to get to see options for platform support for
>  	  Mellanox systems. This option alone does not add any kernel code.
> @@ -34,4 +34,15 @@ config MLXREG_IO
>  	  to system resets operation, system reset causes monitoring and some
>  	  kinds of mux selection.
> 
> +config MLXBF_TMFIFO
> +	tristate "Mellanox BlueField SoC TmFifo platform driver"
> +	depends on ARM64

Why you make it dependent on ARM64?
Should not it work on any host, x86?

> +	default m

User who needs it should select this option.
No need default 'm'.

> +	select VIRTIO_CONSOLE
> +	select VIRTIO_NET
> +	help
> +	  Say y here to enable TmFifo support. The TmFifo driver provides
> +          platform driver support for the TmFifo which supports console
> +          and networking based on the virtio framework.
> +
>  endif # MELLANOX_PLATFORM
> diff --git a/drivers/platform/mellanox/Makefile
> b/drivers/platform/mellanox/Makefile
> index 57074d9c..f0c061d 100644
> --- a/drivers/platform/mellanox/Makefile
> +++ b/drivers/platform/mellanox/Makefile
> @@ -5,3 +5,4 @@
>  #
>  obj-$(CONFIG_MLXREG_HOTPLUG)	+= mlxreg-hotplug.o
>  obj-$(CONFIG_MLXREG_IO) += mlxreg-io.o
> +obj-$(CONFIG_MLXBF_TMFIFO)	+= mlxbf-tmfifo.o
> diff --git a/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
> b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
> new file mode 100644
> index 0000000..90c9c2cf
> --- /dev/null
> +++ b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
> @@ -0,0 +1,67 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (c) 2019, Mellanox Technologies. All rights reserved.
> + */
> +
> +#ifndef __MLXBF_TMFIFO_REGS_H__
> +#define __MLXBF_TMFIFO_REGS_H__
> +
> +#include <linux/types.h>
> +
> +#define MLXBF_TMFIFO_TX_DATA 0x0
> +
> +#define MLXBF_TMFIFO_TX_STS 0x8
> +#define MLXBF_TMFIFO_TX_STS__LENGTH 0x0001 #define
> +MLXBF_TMFIFO_TX_STS__COUNT_SHIFT 0 #define
> +MLXBF_TMFIFO_TX_STS__COUNT_WIDTH 9 #define
> +MLXBF_TMFIFO_TX_STS__COUNT_RESET_VAL 0 #define
> +MLXBF_TMFIFO_TX_STS__COUNT_RMASK 0x1ff #define
> +MLXBF_TMFIFO_TX_STS__COUNT_MASK  0x1ff
> +
> +#define MLXBF_TMFIFO_TX_CTL 0x10
> +#define MLXBF_TMFIFO_TX_CTL__LENGTH 0x0001 #define
> +MLXBF_TMFIFO_TX_CTL__LWM_SHIFT 0 #define
> MLXBF_TMFIFO_TX_CTL__LWM_WIDTH
> +8 #define MLXBF_TMFIFO_TX_CTL__LWM_RESET_VAL 128 #define
> +MLXBF_TMFIFO_TX_CTL__LWM_RMASK 0xff #define
> +MLXBF_TMFIFO_TX_CTL__LWM_MASK  0xff #define
> +MLXBF_TMFIFO_TX_CTL__HWM_SHIFT 8 #define
> MLXBF_TMFIFO_TX_CTL__HWM_WIDTH
> +8 #define MLXBF_TMFIFO_TX_CTL__HWM_RESET_VAL 128 #define
> +MLXBF_TMFIFO_TX_CTL__HWM_RMASK 0xff #define
> +MLXBF_TMFIFO_TX_CTL__HWM_MASK  0xff00 #define
> +MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32 #define
> +MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9 #define
> +MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256 #define
> +MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff #define
> +MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
> +
> +#define MLXBF_TMFIFO_RX_DATA 0x0
> +
> +#define MLXBF_TMFIFO_RX_STS 0x8
> +#define MLXBF_TMFIFO_RX_STS__LENGTH 0x0001 #define
> +MLXBF_TMFIFO_RX_STS__COUNT_SHIFT 0 #define
> +MLXBF_TMFIFO_RX_STS__COUNT_WIDTH 9 #define
> +MLXBF_TMFIFO_RX_STS__COUNT_RESET_VAL 0 #define
> +MLXBF_TMFIFO_RX_STS__COUNT_RMASK 0x1ff #define
> +MLXBF_TMFIFO_RX_STS__COUNT_MASK  0x1ff
> +
> +#define MLXBF_TMFIFO_RX_CTL 0x10
> +#define MLXBF_TMFIFO_RX_CTL__LENGTH 0x0001 #define
> +MLXBF_TMFIFO_RX_CTL__LWM_SHIFT 0 #define
> MLXBF_TMFIFO_RX_CTL__LWM_WIDTH
> +8 #define MLXBF_TMFIFO_RX_CTL__LWM_RESET_VAL 128 #define
> +MLXBF_TMFIFO_RX_CTL__LWM_RMASK 0xff #define
> +MLXBF_TMFIFO_RX_CTL__LWM_MASK  0xff #define
> +MLXBF_TMFIFO_RX_CTL__HWM_SHIFT 8 #define
> MLXBF_TMFIFO_RX_CTL__HWM_WIDTH
> +8 #define MLXBF_TMFIFO_RX_CTL__HWM_RESET_VAL 128 #define
> +MLXBF_TMFIFO_RX_CTL__HWM_RMASK 0xff #define
> +MLXBF_TMFIFO_RX_CTL__HWM_MASK  0xff00 #define
> +MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32 #define
> +MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9 #define
> +MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256 #define
> +MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff #define
> +MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
> +
> +#endif /* !defined(__MLXBF_TMFIFO_REGS_H__) */
> diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c
> b/drivers/platform/mellanox/mlxbf-tmfifo.c
> new file mode 100644
> index 0000000..c1afe47
> --- /dev/null
> +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
> @@ -0,0 +1,1289 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * Mellanox BlueField SoC TmFifo driver
> + *
> + * Copyright (C) 2019 Mellanox Technologies  */
> +
> +#include <linux/acpi.h>
> +#include <linux/bitfield.h>
> +#include <linux/cache.h>
> +#include <linux/device.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/efi.h>
> +#include <linux/io.h>
> +#include <linux/interrupt.h>
> +#include <linux/irq.h>
> +#include <linux/kernel.h>
> +#include <linux/math64.h>
> +#include <linux/module.h>
> +#include <linux/moduleparam.h>
> +#include <linux/mutex.h>
> +#include <linux/platform_device.h>
> +#include <linux/resource.h>
> +#include <linux/slab.h>
> +#include <linux/types.h>
> +#include <linux/version.h>
> +#include <linux/virtio.h>
> +#include <linux/virtio_config.h>
> +#include <linux/virtio_console.h>
> +#include <linux/virtio_ids.h>
> +#include <linux/virtio_net.h>
> +#include <linux/virtio_ring.h>
> +#include <asm/byteorder.h>

Is it must ti include from asm?
Could it be replaced with something like
#include <linux/byteorder/generic.h>

> +
> +#include "mlxbf-tmfifo-regs.h"
> +
> +/* Vring size. */
> +#define MLXBF_TMFIFO_VRING_SIZE			1024
> +
> +/* Console Tx buffer size. */
> +#define MLXBF_TMFIFO_CONS_TX_BUF_SIZE		(32 * 1024)
> +
> +/* House-keeping timer interval. */
> +static int mlxbf_tmfifo_timer_interval = HZ / 10;
> +
> +/* Global lock. */
> +static DEFINE_MUTEX(mlxbf_tmfifo_lock);
> +
> +/* Virtual devices sharing the TM FIFO. */
> +#define MLXBF_TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
> +
> +/* Struct declaration. */
> +struct mlxbf_tmfifo;
> +
> +/* Structure to maintain the ring state. */ struct mlxbf_tmfifo_vring {
> +	void *va;			/* virtual address */
> +	dma_addr_t dma;			/* dma address */
> +	struct virtqueue *vq;		/* virtqueue pointer */
> +	struct vring_desc *desc;	/* current desc */
> +	struct vring_desc *desc_head;	/* current desc head */
> +	int cur_len;			/* processed len in current desc */
> +	int rem_len;			/* remaining length to be processed */
> +	int size;			/* vring size */
> +	int align;			/* vring alignment */
> +	int id;				/* vring id */
> +	int vdev_id;			/* TMFIFO_VDEV_xxx */
> +	u32 pkt_len;			/* packet total length */
> +	__virtio16 next_avail;		/* next avail desc id */
> +	struct mlxbf_tmfifo *fifo;	/* pointer back to the tmfifo */
> +};
> +
> +/* Interrupt types. */
> +enum {
> +	MLXBF_TM_RX_LWM_IRQ,		/* Rx low water mark irq */
> +	MLXBF_TM_RX_HWM_IRQ,		/* Rx high water mark irq */
> +	MLXBF_TM_TX_LWM_IRQ,		/* Tx low water mark irq */
> +	MLXBF_TM_TX_HWM_IRQ,		/* Tx high water mark irq */
> +	MLXBF_TM_IRQ_CNT
> +};
> +
> +/* Ring types (Rx & Tx). */
> +enum {
> +	MLXBF_TMFIFO_VRING_RX,		/* Rx ring */
> +	MLXBF_TMFIFO_VRING_TX,		/* Tx ring */
> +	MLXBF_TMFIFO_VRING_NUM
> +};
> +
> +struct mlxbf_tmfifo_vdev {
> +	struct virtio_device vdev;	/* virtual device */
> +	u8 status;
> +	u64 features;
> +	union {				/* virtio config space */
> +		struct virtio_console_config cons;
> +		struct virtio_net_config net;
> +	} config;
> +	struct mlxbf_tmfifo_vring vrings[MLXBF_TMFIFO_VRING_NUM];
> +	u8 *tx_buf;			/* tx buffer */
> +	u32 tx_head;			/* tx buffer head */
> +	u32 tx_tail;			/* tx buffer tail */
> +};
> +
> +struct mlxbf_tmfifo_irq_info {
> +	struct mlxbf_tmfifo *fifo;	/* tmfifo structure */
> +	int irq;			/* interrupt number */
> +	int index;			/* array index */
> +};
> +
> +/* TMFIFO device structure */
> +struct mlxbf_tmfifo {
> +	struct mlxbf_tmfifo_vdev *vdev[MLXBF_TMFIFO_VDEV_MAX]; /*
> devices */
> +	struct platform_device *pdev;	/* platform device */
> +	struct mutex lock;		/* fifo lock */
> +	void __iomem *rx_base;		/* mapped register base */
> +	void __iomem *tx_base;		/* mapped register base */
> +	int tx_fifo_size;		/* number of entries of the Tx FIFO */
> +	int rx_fifo_size;		/* number of entries of the Rx FIFO */
> +	unsigned long pend_events;	/* pending bits for deferred process */
> +	struct mlxbf_tmfifo_irq_info irq_info[MLXBF_TM_IRQ_CNT]; /* irq info
> */
> +	struct work_struct work;	/* work struct for deferred process */
> +	struct timer_list timer;	/* keepalive timer */
> +	struct mlxbf_tmfifo_vring *vring[2];	/* current Tx/Rx ring */
> +	bool is_ready;			/* ready flag */
> +	spinlock_t spin_lock;		/* spin lock */
> +};
> +
> +union mlxbf_tmfifo_msg_hdr {
> +	struct {
> +		u8 type;		/* message type */
> +		__be16 len;		/* payload length */
> +		u8 unused[5];		/* reserved, set to 0 */
> +	} __packed;
> +	u64 data;
> +};
> +
> +/*
> + * Default MAC.
> + * This MAC address will be read from EFI persistent variable if configured.
> + * It can also be reconfigured with standard Linux tools.
> + */
> +static u8 mlxbf_tmfifo_net_default_mac[6] = {
> +	0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
> +
> +/* MTU setting of the virtio-net interface. */
> +#define MLXBF_TMFIFO_NET_MTU		1500
> +
> +/* Maximum L2 header length. */
> +#define MLXBF_TMFIFO_NET_L2_OVERHEAD	36
> +
> +/* Supported virtio-net features. */
> +#define MLXBF_TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU)
> | \
> +					 (1UL << VIRTIO_NET_F_STATUS) | \
> +					 (1UL << VIRTIO_NET_F_MAC))
> +
> +/* Return the consumed Tx buffer space. */ static int
> +mlxbf_tmfifo_vdev_tx_buf_len(struct mlxbf_tmfifo_vdev *vdev) {
> +	return ((vdev->tx_tail >= vdev->tx_head) ?
> +	       (vdev->tx_tail - vdev->tx_head) :
> +	       (MLXBF_TMFIFO_CONS_TX_BUF_SIZE - vdev->tx_head +
> +vdev->tx_tail)); }

I would suggest to split the above. 

> +
> +/* Return the available Tx buffer space. */ static int
> +mlxbf_tmfifo_vdev_tx_buf_avail(struct mlxbf_tmfifo_vdev *vdev) {
> +	return (MLXBF_TMFIFO_CONS_TX_BUF_SIZE - 8 -

Thins about some extra define for
"MLXBF_TMFIFO_CONS_TX_BUF_SIZE - 8"

> +		mlxbf_tmfifo_vdev_tx_buf_len(vdev));
> +}
> +
> +/* Update Tx buffer pointer after pushing data. */ static void
> +mlxbf_tmfifo_vdev_tx_buf_push(struct mlxbf_tmfifo_vdev *vdev,
> +					  u32 len)
> +{
> +	vdev->tx_tail += len;
> +	if (vdev->tx_tail >= MLXBF_TMFIFO_CONS_TX_BUF_SIZE)
> +		vdev->tx_tail -= MLXBF_TMFIFO_CONS_TX_BUF_SIZE; }
> +
> +/* Update Tx buffer pointer after popping data. */ static void
> +mlxbf_tmfifo_vdev_tx_buf_pop(struct mlxbf_tmfifo_vdev *vdev,
> +					 u32 len)
> +{
> +	vdev->tx_head += len;
> +	if (vdev->tx_head >= MLXBF_TMFIFO_CONS_TX_BUF_SIZE)
> +		vdev->tx_head -= MLXBF_TMFIFO_CONS_TX_BUF_SIZE; }
> +
> +/* Allocate vrings for the fifo. */
> +static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
> +				     struct mlxbf_tmfifo_vdev *tm_vdev,
> +				     int vdev_id)
> +{
> +	struct mlxbf_tmfifo_vring *vring;
> +	dma_addr_t dma;
> +	int i, size;
> +	void *va;
> +
> +	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
> +		vring = &tm_vdev->vrings[i];
> +		vring->fifo = fifo;
> +		vring->size = MLXBF_TMFIFO_VRING_SIZE;
> +		vring->align = SMP_CACHE_BYTES;
> +		vring->id = i;
> +		vring->vdev_id = vdev_id;
> +
> +		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
> +		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size,
> &dma,
> +					GFP_KERNEL);
> +		if (!va) {
> +			dev_err(tm_vdev->vdev.dev.parent,
> +				"vring allocation failed\n");
> +			return -EINVAL;
> +		}
> +
> +		vring->va = va;
> +		vring->dma = dma;
> +	}
> +
> +	return 0;
> +}
> +
> +/* Free vrings of the fifo device. */
> +static void mlxbf_tmfifo_free_vrings(struct mlxbf_tmfifo *fifo, int
> +vdev_id) {
> +	struct mlxbf_tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
> +	struct mlxbf_tmfifo_vring *vring;
> +	int i, size;
> +
> +	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
> +		vring = &tm_vdev->vrings[i];
> +
> +		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
> +		if (vring->va) {
> +			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
> +					  vring->va, vring->dma);
> +			vring->va = NULL;
> +			if (vring->vq) {
> +				vring_del_virtqueue(vring->vq);
> +				vring->vq = NULL;
> +			}
> +		}
> +	}
> +}
> +
> +/* Free interrupts of the fifo device. */ static void
> +mlxbf_tmfifo_free_irqs(struct mlxbf_tmfifo *fifo) {
> +	int i, irq;
> +
> +	for (i = 0; i < MLXBF_TM_IRQ_CNT; i++) {
> +		irq = fifo->irq_info[i].irq;
> +		if (irq) {
> +			fifo->irq_info[i].irq = 0;
> +			disable_irq(irq);
> +			free_irq(irq, (u8 *)fifo + i);
> +		}
> +	}
> +}
> +
> +/* Interrupt handler. */
> +static irqreturn_t mlxbf_tmfifo_irq_handler(int irq, void *arg) {
> +	struct mlxbf_tmfifo_irq_info *irq_info;
> +
> +	irq_info = (struct mlxbf_tmfifo_irq_info *)arg;
> +
> +	if (irq_info->index < MLXBF_TM_IRQ_CNT &&
> +	    !test_and_set_bit(irq_info->index, &irq_info->fifo->pend_events))
> +		schedule_work(&irq_info->fifo->work);
> +
> +	return IRQ_HANDLED;
> +}
> +
> +/* Nothing to do for now. */
> +static void mlxbf_tmfifo_virtio_dev_release(struct device *dev) { }

If there is nothing to do - no reason to have it.

> +
> +/* Get the next packet descriptor from the vring. */ static inline
> +struct vring_desc * mlxbf_tmfifo_virtio_get_next_desc(struct virtqueue
> +*vq) {
> +	struct mlxbf_tmfifo_vring *vring;
> +	unsigned int idx, head;
> +	struct vring *vr;
> +
> +	vring = (struct mlxbf_tmfifo_vring *)vq->priv;
> +	vr = (struct vring *)virtqueue_get_vring(vq);
> +
> +	if (!vr || vring->next_avail == vr->avail->idx)
> +		return NULL;
> +
> +	idx = vring->next_avail % vr->num;
> +	head = vr->avail->ring[idx];
> +	BUG_ON(head >= vr->num);
> +	vring->next_avail++;
> +	return &vr->desc[head];
> +}
> +
> +static inline void mlxbf_tmfifo_virtio_release_desc(
> +	struct virtio_device *vdev, struct vring *vr,
> +	struct vring_desc *desc, u32 len)
> +{
> +	unsigned int idx;
> +
> +	idx = vr->used->idx % vr->num;
> +	vr->used->ring[idx].id = desc - vr->desc;
> +	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
> +
> +	/* Virtio could poll and check the 'idx' to decide
> +	 * whether the desc is done or not. Add a memory
> +	 * barrier here to make sure the update above completes
> +	 * before updating the idx.
> +	 */
> +	mb();
> +	vr->used->idx++;
> +}
> +
> +/* Get the total length of a descriptor chain. */ static inline u32
> +mlxbf_tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
> +						  struct vring_desc *desc,
> +						  struct vring *vr)
> +{
> +	u32 len = 0, idx;
> +
> +	while (desc) {
> +		len += virtio32_to_cpu(vdev, desc->len);
> +		if (!(virtio16_to_cpu(vdev, desc->flags) &
> VRING_DESC_F_NEXT))
> +			break;
> +		idx = virtio16_to_cpu(vdev, desc->next);
> +		desc = &vr->desc[idx];
> +	}
> +
> +	return len;
> +}
> +
> +static void mlxbf_tmfifo_release_pkt(struct virtio_device *vdev,
> +				     struct mlxbf_tmfifo_vring *vring,
> +				     struct vring_desc **desc)
> +{
> +	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
> +	struct vring_desc *desc_head;
> +	uint32_t pkt_len = 0;
> +
> +	if (!vr)
> +		return;
> +
> +	if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
> +		desc_head = vring->desc_head;
> +		pkt_len = vring->pkt_len;
> +	} else {
> +		desc_head = mlxbf_tmfifo_virtio_get_next_desc(vring->vq);
> +		if (desc_head != NULL) {
> +			pkt_len = mlxbf_tmfifo_virtio_get_pkt_len(
> +				vdev, desc_head, vr);
> +		}
> +	}
> +
> +	if (desc_head != NULL)
> +		mlxbf_tmfifo_virtio_release_desc(vdev, vr, desc_head,
> pkt_len);
> +
> +	if (desc != NULL)
> +		*desc = NULL;
> +	vring->pkt_len = 0;
> +}
> +
> +/* House-keeping timer. */
> +static void mlxbf_tmfifo_timer(struct timer_list *arg) {
> +	struct mlxbf_tmfifo *fifo;
> +
> +	fifo = container_of(arg, struct mlxbf_tmfifo, timer);
> +
> +	/*
> +	 * Wake up the work handler to poll the Rx FIFO in case interrupt
> +	 * missing or any leftover bytes stuck in the FIFO.
> +	 */
> +	test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events);
> +
> +	/*
> +	 * Wake up Tx handler in case virtio has queued too many packets
> +	 * and are waiting for buffer return.
> +	 */
> +	test_and_set_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events);
> +
> +	schedule_work(&fifo->work);
> +
> +	mod_timer(&fifo->timer, jiffies + mlxbf_tmfifo_timer_interval); }
> +
> +/* Buffer the console output. */
> +static void mlxbf_tmfifo_console_output(struct mlxbf_tmfifo_vdev *cons,
> +					struct virtqueue *vq)
> +{
> +	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
> +	struct vring_desc *head_desc, *desc = NULL;
> +	struct virtio_device *vdev = &cons->vdev;
> +	u32 len, pkt_len, idx;
> +	void *addr;
> +
> +	for (;;) {

It's better to modify it as while(on some condition)

> +		head_desc = mlxbf_tmfifo_virtio_get_next_desc(vq);
> +		if (head_desc == NULL)
> +			break;
> +
> +		/* Release the packet if no more space. */
> +		pkt_len = mlxbf_tmfifo_virtio_get_pkt_len(vdev, head_desc,
> vr);
> +		if (pkt_len > mlxbf_tmfifo_vdev_tx_buf_avail(cons)) {
> +			mlxbf_tmfifo_virtio_release_desc(vdev, vr, head_desc,
> +							 pkt_len);

Why do you break line here?

> +			break;
> +		}
> +
> +		desc = head_desc;
> +
> +		while (desc != NULL) {
> +			addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
> +			len = virtio32_to_cpu(vdev, desc->len);
> +
> +			if (len <= MLXBF_TMFIFO_CONS_TX_BUF_SIZE -
> +			    cons->tx_tail) {

Why do you break line here? Also below I see few strange breaks.

> +				memcpy(cons->tx_buf + cons->tx_tail, addr,
> len);
> +			} else {
> +				u32 seg;
> +
> +				seg = MLXBF_TMFIFO_CONS_TX_BUF_SIZE -
> +					cons->tx_tail;
> +				memcpy(cons->tx_buf + cons->tx_tail, addr,
> seg);
> +				addr += seg;
> +				memcpy(cons->tx_buf, addr, len - seg);
> +			}
> +			mlxbf_tmfifo_vdev_tx_buf_push(cons, len);
> +
> +			if (!(virtio16_to_cpu(vdev, desc->flags) &
> +			    VRING_DESC_F_NEXT))
> +				break;
> +			idx = virtio16_to_cpu(vdev, desc->next);
> +			desc = &vr->desc[idx];
> +		}
> +
> +		mlxbf_tmfifo_virtio_release_desc(vdev, vr, head_desc,
> pkt_len);
> +	}
> +}
> +
> +/* Rx & Tx processing of a virtual queue. */ static void
> +mlxbf_tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx) {
> +	int num_avail = 0, hdr_len, tx_reserve;
> +	struct mlxbf_tmfifo_vring *vring;
> +	struct mlxbf_tmfifo_vdev *cons;
> +	struct virtio_device *vdev;
> +	struct mlxbf_tmfifo *fifo;
> +	struct vring_desc *desc;
> +	unsigned long flags;
> +	struct vring *vr;
> +	u64 sts, data;
> +	u32 len, idx;
> +	void *addr;
> +
> +	if (!vq)
> +		return;
> +
> +	vring = (struct mlxbf_tmfifo_vring *)vq->priv;
> +	fifo = vring->fifo;
> +	vr = (struct vring *)virtqueue_get_vring(vq);
> +
> +	if (!fifo->vdev[vring->vdev_id])
> +		return;
> +	vdev = &fifo->vdev[vring->vdev_id]->vdev;
> +	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
> +
> +	/* Don't continue if another vring is running. */
> +	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
> +		return;
> +
> +	/* tx_reserve is used to reserved some room in FIFO for console. */
> +	if (vring->vdev_id == VIRTIO_ID_NET) {
> +		hdr_len = sizeof(struct virtio_net_hdr);
> +		tx_reserve = fifo->tx_fifo_size / 16;

Use some define instead of 16/

> +	} else {
> +		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
> +		hdr_len = 0;
> +		tx_reserve = 1;
> +	}
> +
> +	desc = vring->desc;
> +
> +	while (1) {

I see there are few drivers in platform which use while (1)
But it looks better to use while(some condition)
and instead of break change this condition to false.

> +		/* Get available FIFO space. */
> +		if (num_avail == 0) {
> +			if (is_rx) {
> +				/* Get the number of available words in FIFO.
> */
> +				sts = readq(fifo->rx_base +
> +					    MLXBF_TMFIFO_RX_STS);
> +				num_avail = FIELD_GET(
> +
> 	MLXBF_TMFIFO_RX_STS__COUNT_MASK, sts);

				num_avail = FIELD_GET(TMFIFO_RX_STS__COUNT_MASK, sts);

> +
> +				/* Don't continue if nothing in FIFO. */
> +				if (num_avail <= 0)
> +					break;
> +			} else {
> +				/* Get available space in FIFO. */
> +				sts = readq(fifo->tx_base +
> +					    MLXBF_TMFIFO_TX_STS);
> +				num_avail = fifo->tx_fifo_size - tx_reserve -
> +					FIELD_GET(
> +
> 	MLXBF_TMFIFO_TX_STS__COUNT_MASK,
> +						sts);

Same as above.

> +
> +				if (num_avail <= 0)
> +					break;
> +			}
> +		}
> +
> +		/* Console output always comes from the Tx buffer. */
> +		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
> +		    cons != NULL && cons->tx_buf != NULL) {
> +			union mlxbf_tmfifo_msg_hdr hdr;
> +			int size;
> +
> +			size = mlxbf_tmfifo_vdev_tx_buf_len(cons);
> +			if (num_avail < 2 || size == 0)
> +				return;
> +			if (size + sizeof(hdr) > num_avail * sizeof(u64))
> +				size = num_avail * sizeof(u64) - sizeof(hdr);
> +			/* Write header. */
> +			hdr.data = 0;
> +			hdr.type = VIRTIO_ID_CONSOLE;
> +			hdr.len = htons(size);
> +			writeq(cpu_to_le64(hdr.data),
> +			       fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
> +
> +			spin_lock_irqsave(&fifo->spin_lock, flags);
> +			while (size > 0) {
> +				addr = cons->tx_buf + cons->tx_head;
> +
> +				if (cons->tx_head + sizeof(u64) <=
> +				    MLXBF_TMFIFO_CONS_TX_BUF_SIZE) {
> +					memcpy(&data, addr, sizeof(u64));
> +				} else {
> +					int partial;
> +
> +					partial =
> +
> 	MLXBF_TMFIFO_CONS_TX_BUF_SIZE -
> +						cons->tx_head;
> +
> +					memcpy(&data, addr, partial);
> +					memcpy((u8 *)&data + partial,
> +					       cons->tx_buf,
> +					       sizeof(u64) - partial);
> +				}
> +				writeq(data,
> +				       fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
> +
> +				if (size >= sizeof(u64)) {
> +					mlxbf_tmfifo_vdev_tx_buf_pop(
> +						cons, sizeof(u64));
> +					size -= sizeof(u64);
> +				} else {
> +					mlxbf_tmfifo_vdev_tx_buf_pop(
> +						cons, size);
> +					size = 0;
> +				}
> +			}
> +			spin_unlock_irqrestore(&fifo->spin_lock, flags);
> +			return;
> +		}
> +
> +		/* Get the desc of next packet. */
> +		if (!desc) {
> +			/* Save the head desc of the chain. */
> +			vring->desc_head =
> +				mlxbf_tmfifo_virtio_get_next_desc(vq);
> +			if (!vring->desc_head) {
> +				vring->desc = NULL;
> +				return;
> +			}
> +			desc = vring->desc_head;
> +			vring->desc = desc;
> +
> +			if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
> +				struct virtio_net_hdr *net_hdr;
> +
> +				/* Initialize the packet header. */
> +				net_hdr = (struct virtio_net_hdr *)
> +					phys_to_virt(virtio64_to_cpu(
> +						vdev, desc->addr));
> +				memset(net_hdr, 0, sizeof(*net_hdr));
> +			}
> +		}
> +
> +		/* Beginning of each packet. */
> +		if (vring->pkt_len == 0) {
> +			int vdev_id, vring_change = 0;
> +			union mlxbf_tmfifo_msg_hdr hdr;
> +
> +			num_avail--;
> +
> +			/* Read/Write packet length. */
> +			if (is_rx) {
> +				hdr.data = readq(fifo->rx_base +
> +						 MLXBF_TMFIFO_RX_DATA);
> +				hdr.data = le64_to_cpu(hdr.data);
> +
> +				/* Skip the length 0 packet (keepalive). */
> +				if (hdr.len == 0)
> +					continue;
> +
> +				/* Check packet type. */
> +				if (hdr.type == VIRTIO_ID_NET) {
> +					struct virtio_net_config *config;
> +
> +					vdev_id = VIRTIO_ID_NET;
> +					hdr_len = sizeof(struct virtio_net_hdr);
> +					config =
> +					    &fifo->vdev[vdev_id]->config.net;
> +					if (ntohs(hdr.len) > config->mtu +
> +
> 	MLXBF_TMFIFO_NET_L2_OVERHEAD)
> +						continue;
> +				} else if (hdr.type == VIRTIO_ID_CONSOLE) {
> +					vdev_id = VIRTIO_ID_CONSOLE;
> +					hdr_len = 0;
> +				} else {
> +					continue;
> +				}
> +
> +				/*
> +				 * Check whether the new packet still belongs
> +				 * to this vring or not. If not, update the
> +				 * pkt_len of the new vring and return.
> +				 */
> +				if (vdev_id != vring->vdev_id) {
> +					struct mlxbf_tmfifo_vdev *dev2 =
> +						fifo->vdev[vdev_id];
> +
> +					if (!dev2)
> +						break;
> +					vring->desc = desc;
> +					vring =
> +					  &dev2-
> >vrings[MLXBF_TMFIFO_VRING_RX];
> +					vring_change = 1;
> +				}
> +				vring->pkt_len = ntohs(hdr.len) + hdr_len;
> +			} else {
> +				vring->pkt_len =
> +					mlxbf_tmfifo_virtio_get_pkt_len(
> +						vdev, desc, vr);
> +
> +				hdr.data = 0;
> +				hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
> +					VIRTIO_ID_NET :
> +					VIRTIO_ID_CONSOLE;
> +				hdr.len = htons(vring->pkt_len - hdr_len);
> +				writeq(cpu_to_le64(hdr.data),
> +				       fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
> +			}
> +
> +			vring->cur_len = hdr_len;
> +			vring->rem_len = vring->pkt_len;
> +			fifo->vring[is_rx] = vring;
> +
> +			if (vring_change)
> +				return;
> +			continue;
> +		}
> +
> +		/* Check available space in this desc. */
> +		len = virtio32_to_cpu(vdev, desc->len);
> +		if (len > vring->rem_len)
> +			len = vring->rem_len;
> +
> +		/* Check if the current desc is already done. */
> +		if (vring->cur_len == len)
> +			goto check_done;
> +
> +		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
> +
> +		/* Read a word from FIFO for Rx. */
> +		if (is_rx) {
> +			data = readq(fifo->rx_base +
> MLXBF_TMFIFO_RX_DATA);
> +			data = le64_to_cpu(data);
> +		}
> +
> +		if (vring->cur_len + sizeof(u64) <= len) {
> +			/* The whole word. */
> +			if (is_rx) {
> +				memcpy(addr + vring->cur_len, &data,
> +				       sizeof(u64));
> +			} else {
> +				memcpy(&data, addr + vring->cur_len,
> +				       sizeof(u64));
> +			}

Why not just.
Also few places like this one below.

			if (is_rx)
				memcpy(addr + vring->cur_len, &data, sizeof(u64));
			else
				memcpy(&data, addr + vring->cur_len, sizeof(u64));

> +			vring->cur_len += sizeof(u64);
> +		} else {
> +			/* Leftover bytes. */
> +			BUG_ON(vring->cur_len > len);
> +			if (is_rx) {
> +				memcpy(addr + vring->cur_len, &data,
> +				       len - vring->cur_len);
> +			} else {
> +				memcpy(&data, addr + vring->cur_len,
> +				       len - vring->cur_len);
> +			}
> +			vring->cur_len = len;
> +		}
> +
> +		/* Write the word into FIFO for Tx. */
> +		if (!is_rx) {
> +			writeq(cpu_to_le64(data),
> +			       fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
> +		}
> +
> +		num_avail--;
> +
> +check_done:
> +		/* Check whether this desc is full or completed. */
> +		if (vring->cur_len == len) {
> +			vring->cur_len = 0;
> +			vring->rem_len -= len;
> +
> +			/* Get the next desc on the chain. */
> +			if (vring->rem_len > 0 &&
> +			    (virtio16_to_cpu(vdev, desc->flags) &
> +						VRING_DESC_F_NEXT)) {
> +				idx = virtio16_to_cpu(vdev, desc->next);
> +				desc = &vr->desc[idx];
> +				continue;
> +			}
> +
> +			/* Done and release the desc. */
> +			mlxbf_tmfifo_release_pkt(vdev, vring, &desc);
> +			fifo->vring[is_rx] = NULL;
> +
> +			/* Notify upper layer that packet is done. */
> +			spin_lock_irqsave(&fifo->spin_lock, flags);
> +			vring_interrupt(0, vq);
> +			spin_unlock_irqrestore(&fifo->spin_lock, flags);
> +			continue;
> +		}
> +	}
> +
> +	/* Save the current desc. */
> +	vring->desc = desc;
> +}

I suggest to split mlxbf_tmfifo_virtio_rxtx() into few small routines.


> +
> +/* The notify function is called when new buffers are posted. */ static
> +bool mlxbf_tmfifo_virtio_notify(struct virtqueue *vq) {
> +	struct mlxbf_tmfifo_vring *vring;
> +	struct mlxbf_tmfifo *fifo;
> +	unsigned long flags;
> +
> +	vring = (struct mlxbf_tmfifo_vring *)vq->priv;
> +	fifo = vring->fifo;
> +
> +	/*
> +	 * Virtio maintains vrings in pairs, even number ring for Rx
> +	 * and odd number ring for Tx.
> +	 */
> +	if (!(vring->id & 1)) {
> +		/* Set the RX HWM bit to start Rx. */
> +		if (!test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo-
> >pend_events))
> +			schedule_work(&fifo->work);
> +	} else {
> +		/*
> +		 * Console could make blocking call with interrupts disabled.
> +		 * In such case, the vring needs to be served right away. For
> +		 * other cases, just set the TX LWM bit to start Tx in the
> +		 * worker handler.
> +		 */
> +		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
> +			spin_lock_irqsave(&fifo->spin_lock, flags);
> +			mlxbf_tmfifo_console_output(
> +				fifo->vdev[VIRTIO_ID_CONSOLE], vq);

			mlxbf_tmfifo_console_output(fifo->vdev[VIRTIO_ID_CONSOLE], vq);

> +			spin_unlock_irqrestore(&fifo->spin_lock, flags);
> +			schedule_work(&fifo->work);
> +		} else if (!test_and_set_bit(MLXBF_TM_TX_LWM_IRQ,
> +					     &fifo->pend_events))
> +			schedule_work(&fifo->work);

		If {
		} else if {
		}

For consistency.

> +	}
> +
> +	return true;
> +}
> +
> +/* Work handler for Rx and Tx case. */
> +static void mlxbf_tmfifo_work_handler(struct work_struct *work) {
> +	struct mlxbf_tmfifo_vdev *tm_vdev;
> +	struct mlxbf_tmfifo *fifo;
> +	int i;
> +
> +	fifo = container_of(work, struct mlxbf_tmfifo, work);
> +	if (!fifo->is_ready)
> +		return;
> +
> +	mutex_lock(&fifo->lock);
> +
> +	/* Tx (Send data to the TmFifo). */
> +	if (test_and_clear_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events)
> &&
> +		       fifo->irq_info[MLXBF_TM_TX_LWM_IRQ].irq) {
> +		for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++) {

I suggest to define local variable vq.
And have below:
				mlxbf_tmfifo_virtio_rxtx(vq, false);

> +			tm_vdev = fifo->vdev[i];
> +			if (tm_vdev != NULL) {
> +				mlxbf_tmfifo_virtio_rxtx(
> +				    tm_vdev-
> >vrings[MLXBF_TMFIFO_VRING_TX].vq,
> +				    false);
> +			}
> +		}
> +	}
> +
> +	/* Rx (Receive data from the TmFifo). */
> +	if (test_and_clear_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events)
> &&
> +		       fifo->irq_info[MLXBF_TM_RX_HWM_IRQ].irq) {
> +		for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++) {
> +			tm_vdev = fifo->vdev[i];

Same as above.

> +			if (tm_vdev != NULL) {
> +				mlxbf_tmfifo_virtio_rxtx(
> +				    tm_vdev-
> >vrings[MLXBF_TMFIFO_VRING_RX].vq,
> +				    true);
> +			}
> +		}
> +	}
> +
> +	mutex_unlock(&fifo->lock);
> +}
> +
> +/* Get the array of feature bits for this device. */ static u64
> +mlxbf_tmfifo_virtio_get_features(struct virtio_device *vdev) {
> +	struct mlxbf_tmfifo_vdev *tm_vdev;
> +
> +	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +	return tm_vdev->features;
> +}
> +
> +/* Confirm device features to use. */
> +static int mlxbf_tmfifo_virtio_finalize_features(struct virtio_device
> +*vdev) {
> +	struct mlxbf_tmfifo_vdev *tm_vdev;
> +
> +	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +	tm_vdev->features = vdev->features;
> +
> +	return 0;
> +}
> +
> +/* Free virtqueues found by find_vqs(). */ static void
> +mlxbf_tmfifo_virtio_del_vqs(struct virtio_device *vdev) {
> +	struct mlxbf_tmfifo_vdev *tm_vdev;
> +	struct mlxbf_tmfifo_vring *vring;
> +	struct virtqueue *vq;
> +	int i;
> +
> +	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +
> +	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
> +		vring = &tm_vdev->vrings[i];
> +
> +		/* Release the pending packet. */
> +		if (vring->desc != NULL) {
> +			mlxbf_tmfifo_release_pkt(&tm_vdev->vdev, vring,
> +						 &vring->desc);
> +		}
> +
> +		vq = vring->vq;
> +		if (vq) {
> +			vring->vq = NULL;
> +			vring_del_virtqueue(vq);
> +		}
> +	}
> +}
> +
> +/* Create and initialize the virtual queues. */ static int
> +mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev,
> +					unsigned int nvqs,
> +					struct virtqueue *vqs[],
> +					vq_callback_t *callbacks[],
> +					const char * const names[],
> +					const bool *ctx,
> +					struct irq_affinity *desc)
> +{
> +	struct mlxbf_tmfifo_vdev *tm_vdev;
> +	struct mlxbf_tmfifo_vring *vring;
> +	int i, ret = -EINVAL, size;

Don't initialize ret with -EINVAL.

> +	struct virtqueue *vq;
> +
> +	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
> +		return -EINVAL;
> +
> +	for (i = 0; i < nvqs; ++i) {
> +		if (!names[i])
> +			goto error;
> +		vring = &tm_vdev->vrings[i];
> +
> +		/* zero vring */
> +		size = vring_size(vring->size, vring->align);
> +		memset(vring->va, 0, size);
> +		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
> +					 false, false, vring->va,
> +					 mlxbf_tmfifo_virtio_notify,
> +					 callbacks[i], names[i]);
> +		if (!vq) {
> +			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
> +			ret = -ENOMEM;
> +			goto error;
> +		}
> +
> +		vqs[i] = vq;
> +		vring->vq = vq;
> +		vq->priv = vring;
> +	}
> +
> +	return 0;
> +
> +error:
> +	mlxbf_tmfifo_virtio_del_vqs(vdev);
> +	return ret;
> +}
> +
> +/* Read the status byte. */
> +static u8 mlxbf_tmfifo_virtio_get_status(struct virtio_device *vdev) {
> +	struct mlxbf_tmfifo_vdev *tm_vdev;
> +
> +	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +
> +	return tm_vdev->status;
> +}
> +
> +/* Write the status byte. */
> +static void mlxbf_tmfifo_virtio_set_status(struct virtio_device *vdev,
> +					   u8 status)
> +{
> +	struct mlxbf_tmfifo_vdev *tm_vdev;
> +
> +	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +	tm_vdev->status = status;
> +}
> +
> +/* Reset the device. Not much here for now. */ static void
> +mlxbf_tmfifo_virtio_reset(struct virtio_device *vdev) {
> +	struct mlxbf_tmfifo_vdev *tm_vdev;
> +
> +	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +	tm_vdev->status = 0;
> +}
> +
> +/* Read the value of a configuration field. */ static void
> +mlxbf_tmfifo_virtio_get(struct virtio_device *vdev,
> +			      unsigned int offset,
> +			      void *buf,
> +			      unsigned int len)
> +{
> +	struct mlxbf_tmfifo_vdev *tm_vdev;
> +
> +	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +

	unsigned int pos = offset + len;

	if (pos > sizeof(tm_vdev->config) || pos < len)


> +	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
> +		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
> +		return;
> +	}
> +
> +	memcpy(buf, (u8 *)&tm_vdev->config + offset, len); }
> +
> +/* Write the value of a configuration field. */ static void
> +mlxbf_tmfifo_virtio_set(struct virtio_device *vdev,
> +				 unsigned int offset,
> +				 const void *buf,
> +				 unsigned int len)
> +{
> +	struct mlxbf_tmfifo_vdev *tm_vdev;
> +
> +	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +
> +	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {

Same as above.

> +		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
> +		return;
> +	}
> +
> +	memcpy((u8 *)&tm_vdev->config + offset, buf, len); }
> +
> +/* Virtio config operations. */
> +static const struct virtio_config_ops mlxbf_tmfifo_virtio_config_ops = {
> +	.get_features = mlxbf_tmfifo_virtio_get_features,
> +	.finalize_features = mlxbf_tmfifo_virtio_finalize_features,
> +	.find_vqs = mlxbf_tmfifo_virtio_find_vqs,
> +	.del_vqs = mlxbf_tmfifo_virtio_del_vqs,
> +	.reset = mlxbf_tmfifo_virtio_reset,
> +	.set_status = mlxbf_tmfifo_virtio_set_status,
> +	.get_status = mlxbf_tmfifo_virtio_get_status,
> +	.get = mlxbf_tmfifo_virtio_get,
> +	.set = mlxbf_tmfifo_virtio_set,
> +};
> +
> +/* Create vdev type in a tmfifo. */
> +int mlxbf_tmfifo_create_vdev(struct mlxbf_tmfifo *fifo, int vdev_id,
> +			     u64 features, void *config, u32 size) {
> +	struct mlxbf_tmfifo_vdev *tm_vdev;
> +	int ret = 0;
> +
> +	mutex_lock(&fifo->lock);
> +
> +	tm_vdev = fifo->vdev[vdev_id];
> +	if (tm_vdev != NULL) {
> +		pr_err("vdev %d already exists\n", vdev_id);
> +		ret = -EEXIST;
> +		goto already_exist;
> +	}
> +
> +	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
> +	if (!tm_vdev) {
> +		ret = -ENOMEM;
> +		goto already_exist;
> +	}
> +
> +	tm_vdev->vdev.id.device = vdev_id;
> +	tm_vdev->vdev.config = &mlxbf_tmfifo_virtio_config_ops;
> +	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
> +	tm_vdev->vdev.dev.release = mlxbf_tmfifo_virtio_dev_release;
> +	tm_vdev->features = features;
> +	if (config)
> +		memcpy(&tm_vdev->config, config, size);
> +	if (mlxbf_tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
> +		pr_err("Unable to allocate vring\n");
> +		ret = -ENOMEM;
> +		goto alloc_vring_fail;
> +	}
> +	if (vdev_id == VIRTIO_ID_CONSOLE) {
> +		tm_vdev->tx_buf =
> kmalloc(MLXBF_TMFIFO_CONS_TX_BUF_SIZE,
> +					  GFP_KERNEL);
> +	}
> +	fifo->vdev[vdev_id] = tm_vdev;
> +
> +	/* Register the virtio device. */
> +	ret = register_virtio_device(&tm_vdev->vdev);
> +	if (ret) {
> +		dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
> +		goto register_fail;
> +	}
> +
> +	mutex_unlock(&fifo->lock);
> +	return 0;
> +
> +register_fail:
> +	mlxbf_tmfifo_free_vrings(fifo, vdev_id);
> +	fifo->vdev[vdev_id] = NULL;
> +alloc_vring_fail:
> +	kfree(tm_vdev);
> +already_exist:
> +	mutex_unlock(&fifo->lock);
> +	return ret;
> +}
> +
> +/* Delete vdev type from a tmfifo. */
> +int mlxbf_tmfifo_delete_vdev(struct mlxbf_tmfifo *fifo, int vdev_id) {
> +	struct mlxbf_tmfifo_vdev *tm_vdev;
> +
> +	mutex_lock(&fifo->lock);
> +
> +	/* Unregister vdev. */
> +	tm_vdev = fifo->vdev[vdev_id];
> +	if (tm_vdev) {
> +		unregister_virtio_device(&tm_vdev->vdev);
> +		mlxbf_tmfifo_free_vrings(fifo, vdev_id);
> +		kfree(tm_vdev->tx_buf);
> +		kfree(tm_vdev);
> +		fifo->vdev[vdev_id] = NULL;
> +	}
> +
> +	mutex_unlock(&fifo->lock);
> +
> +	return 0;
> +}
> +
> +/* Device remove function. */
> +static int mlxbf_tmfifo_remove(struct platform_device *pdev) {

Locate it after probe.
If you'll use all devm_, like Andy noted:
devm_ioremap
devm_ioremap_resource
devm_kzalloc
devm_request_mem_region
you can drop all kfree, release_mem_region, iounmap

And make the below as a separate routine, something like
mlxbf_tmfifo_cleanup(), if you still need it.

> +	struct mlxbf_tmfifo *fifo = platform_get_drvdata(pdev);
> +	struct resource *rx_res, *tx_res;
> +	int i;
> +
> +	if (fifo) {
> +		mutex_lock(&mlxbf_tmfifo_lock);
> +
> +		fifo->is_ready = false;
> +
> +		/* Stop the timer. */
> +		del_timer_sync(&fifo->timer);
> +
> +		/* Release interrupts. */
> +		mlxbf_tmfifo_free_irqs(fifo);
> +
> +		/* Cancel the pending work. */
> +		cancel_work_sync(&fifo->work);
> +
> +		for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++)
> +			mlxbf_tmfifo_delete_vdev(fifo, i);
> +
> +		/* Release IO resources. */
> +		if (fifo->rx_base)
> +			iounmap(fifo->rx_base);
> +		if (fifo->tx_base)
> +			iounmap(fifo->tx_base);
> +
> +		platform_set_drvdata(pdev, NULL);
> +		kfree(fifo);
> +
> +		mutex_unlock(&mlxbf_tmfifo_lock);
> +	}
> +
> +	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	if (rx_res)
> +		release_mem_region(rx_res->start, resource_size(rx_res));
> +	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> +	if (tx_res)
> +		release_mem_region(tx_res->start, resource_size(tx_res));
> +
> +	return 0;
> +}
> +
> +/* Read the configured network MAC address from efi variable. */ static
> +void mlxbf_tmfifo_get_cfg_mac(u8 *mac) {
> +	efi_char16_t name[] = {
> +		'R', 's', 'h', 'i', 'm', 'M', 'a', 'c', 'A', 'd', 'd', 'r', 0 };


Could it be moved out and set like:
static const efi_char16_t mlxbf_tmfifo_efi_name[] = "...";
Could you check if the are some examples in kernel, please?

> +	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
> +	efi_status_t status;
> +	unsigned long size;
> +	u8 buf[6];
> +
> +	size = sizeof(buf);
> +	status = efi.get_variable(name, &guid, NULL, &size, buf);
> +	if (status == EFI_SUCCESS && size == sizeof(buf))
> +		memcpy(mac, buf, sizeof(buf));
> +}
> +
> +/* Probe the TMFIFO. */
> +static int mlxbf_tmfifo_probe(struct platform_device *pdev) {
> +	struct virtio_net_config net_config;
> +	struct resource *rx_res, *tx_res;
> +	struct mlxbf_tmfifo *fifo;
> +	int i, ret;
> +	u64 ctl;
> +
> +	/* Get the resource of the Rx & Tx FIFO. */
> +	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> +	if (!rx_res || !tx_res) {
> +		ret = -EINVAL;
> +		goto err;
> +	}
> +
> +	if (request_mem_region(rx_res->start,
> +			       resource_size(rx_res), "bf-tmfifo") == NULL) {
> +		ret = -EBUSY;
> +		goto early_err;
> +	}
> +
> +	if (request_mem_region(tx_res->start,
> +			       resource_size(tx_res), "bf-tmfifo") == NULL) {
> +		release_mem_region(rx_res->start, resource_size(rx_res));
> +		ret = -EBUSY;
> +		goto early_err;
> +	}
> +
> +	ret = -ENOMEM;
> +	fifo = kzalloc(sizeof(struct mlxbf_tmfifo), GFP_KERNEL);
> +	if (!fifo)
> +		goto err;
> +
> +	fifo->pdev = pdev;
> +	platform_set_drvdata(pdev, fifo);
> +
> +	spin_lock_init(&fifo->spin_lock);
> +	INIT_WORK(&fifo->work, mlxbf_tmfifo_work_handler);
> +
> +	timer_setup(&fifo->timer, mlxbf_tmfifo_timer, 0);
> +	fifo->timer.function = mlxbf_tmfifo_timer;
> +
> +	for (i = 0; i < MLXBF_TM_IRQ_CNT; i++) {
> +		fifo->irq_info[i].index = i;
> +		fifo->irq_info[i].fifo = fifo;
> +		fifo->irq_info[i].irq = platform_get_irq(pdev, i);
> +		ret = request_irq(fifo->irq_info[i].irq,
> +				  mlxbf_tmfifo_irq_handler, 0,
> +				  "tmfifo", &fifo->irq_info[i]);
> +		if (ret) {
> +			pr_err("Unable to request irq\n");
> +			fifo->irq_info[i].irq = 0;
> +			goto err;
> +		}
> +	}
> +
> +	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
> +	if (!fifo->rx_base)
> +		goto err;
> +
> +	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
> +	if (!fifo->tx_base)
> +		goto err;
> +
> +	/* Get Tx FIFO size and set the low/high watermark. */
> +	ctl = readq(fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
> +	fifo->tx_fifo_size =
> +		FIELD_GET(MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK,
> ctl);
> +	ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__LWM_MASK) |
> +		FIELD_PREP(MLXBF_TMFIFO_TX_CTL__LWM_MASK,
> +			   fifo->tx_fifo_size / 2);
> +	ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__HWM_MASK) |
> +		FIELD_PREP(MLXBF_TMFIFO_TX_CTL__HWM_MASK,
> +			   fifo->tx_fifo_size - 1);
> +	writeq(ctl, fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
> +
> +	/* Get Rx FIFO size and set the low/high watermark. */
> +	ctl = readq(fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
> +	fifo->rx_fifo_size =
> +		FIELD_GET(MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK,
> ctl);
> +	ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__LWM_MASK) |
> +		FIELD_PREP(MLXBF_TMFIFO_RX_CTL__LWM_MASK, 0);
> +	ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__HWM_MASK) |
> +		FIELD_PREP(MLXBF_TMFIFO_RX_CTL__HWM_MASK, 1);
> +	writeq(ctl, fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
> +
> +	mutex_init(&fifo->lock);
> +
> +	/* Create the console vdev. */
> +	ret = mlxbf_tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
> +	if (ret)
> +		goto err;
> +
> +	/* Create the network vdev. */
> +	memset(&net_config, 0, sizeof(net_config));
> +	net_config.mtu = MLXBF_TMFIFO_NET_MTU;
> +	net_config.status = VIRTIO_NET_S_LINK_UP;
> +	memcpy(net_config.mac, mlxbf_tmfifo_net_default_mac, 6);
> +	mlxbf_tmfifo_get_cfg_mac(net_config.mac);
> +	ret = mlxbf_tmfifo_create_vdev(fifo, VIRTIO_ID_NET,
> +		MLXBF_TMFIFO_NET_FEATURES, &net_config,
> sizeof(net_config));
> +	if (ret)
> +		goto err;
> +
> +	mod_timer(&fifo->timer, jiffies + mlxbf_tmfifo_timer_interval);
> +
> +	fifo->is_ready = true;
> +
> +	return 0;
> +
> +err:
> +	mlxbf_tmfifo_remove(pdev);
> +early_err:
> +	dev_err(&pdev->dev, "Probe Failed\n");
> +	return ret;
> +}
> +
> +static const struct of_device_id mlxbf_tmfifo_match[] = {
> +	{ .compatible = "mellanox,bf-tmfifo" },
> +	{},
> +};
> +MODULE_DEVICE_TABLE(of, mlxbf_tmfifo_match);
> +
> +static const struct acpi_device_id mlxbf_tmfifo_acpi_match[] = {
> +	{ "MLNXBF01", 0 },
> +	{},
> +};
> +MODULE_DEVICE_TABLE(acpi, mlxbf_tmfifo_acpi_match);
> +
> +static struct platform_driver mlxbf_tmfifo_driver = {
> +	.probe = mlxbf_tmfifo_probe,
> +	.remove = mlxbf_tmfifo_remove,
> +	.driver = {
> +		.name = "bf-tmfifo",
> +		.of_match_table = mlxbf_tmfifo_match,
> +		.acpi_match_table = ACPI_PTR(mlxbf_tmfifo_acpi_match),
> +	},
> +};
> +
> +module_platform_driver(mlxbf_tmfifo_driver);
> +
> +MODULE_DESCRIPTION("Mellanox BlueField SoC TmFifo Driver");
> +MODULE_LICENSE("GPL"); MODULE_AUTHOR("Mellanox Technologies");
> --
> 1.8.3.1

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v9] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 16:06 ` Liming Sun
                   ` (44 preceding siblings ...)
  (?)
@ 2019-02-13 13:27 ` Liming Sun
  2019-02-13 18:11   ` Andy Shevchenko
  -1 siblings, 1 reply; 179+ messages in thread
From: Liming Sun @ 2019-02-13 13:27 UTC (permalink / raw)
  To: David Woods, Andy Shevchenko, Darren Hart, Vadim Pasternak
  Cc: Liming Sun, linux-kernel, platform-driver-x86

This commit adds the TmFifo platform driver for Mellanox BlueField
Soc. TmFifo is a shared FIFO which enables external host machine
to exchange data with the SoC via USB or PCIe. The driver is based
on virtio framework and has console and network access enabled.

Signed-off-by: Liming Sun <lsun@mellanox.com>

---

v9: Fix coding styles. Adjust code to use devm_xxx() APIs.
    Removed the DT binding documentation since only ACPI is
    supported for now by UEFI on the SoC.
v8: Re-submit under drivers/platform/mellanox with target-size
    platform driver only.
v7: Added host side drivers into the same patch set.
v5~v6: Coding style fix.
v1~v4: Initial version for directory drivers/soc/mellanox.
---
 drivers/platform/mellanox/Kconfig             |   10 +-
 drivers/platform/mellanox/Makefile            |    1 +
 drivers/platform/mellanox/mlxbf-tmfifo-regs.h |   67 ++
 drivers/platform/mellanox/mlxbf-tmfifo.c      | 1361 +++++++++++++++++++++++++
 4 files changed, 1438 insertions(+), 1 deletion(-)
 create mode 100644 drivers/platform/mellanox/mlxbf-tmfifo-regs.h
 create mode 100644 drivers/platform/mellanox/mlxbf-tmfifo.c

diff --git a/drivers/platform/mellanox/Kconfig b/drivers/platform/mellanox/Kconfig
index cd8a908..6feceb1 100644
--- a/drivers/platform/mellanox/Kconfig
+++ b/drivers/platform/mellanox/Kconfig
@@ -5,7 +5,7 @@
 
 menuconfig MELLANOX_PLATFORM
 	bool "Platform support for Mellanox hardware"
-	depends on X86 || ARM || COMPILE_TEST
+	depends on X86 || ARM || ARM64 || COMPILE_TEST
 	---help---
 	  Say Y here to get to see options for platform support for
 	  Mellanox systems. This option alone does not add any kernel code.
@@ -34,4 +34,12 @@ config MLXREG_IO
 	  to system resets operation, system reset causes monitoring and some
 	  kinds of mux selection.
 
+config MLXBF_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo platform driver"
+	depends on ARM64 && ACPI && VIRTIO_CONSOLE && VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides
+          platform driver support for the TmFifo which supports console
+          and networking based on the virtio framework.
+
 endif # MELLANOX_PLATFORM
diff --git a/drivers/platform/mellanox/Makefile b/drivers/platform/mellanox/Makefile
index 57074d9c..f0c061d 100644
--- a/drivers/platform/mellanox/Makefile
+++ b/drivers/platform/mellanox/Makefile
@@ -5,3 +5,4 @@
 #
 obj-$(CONFIG_MLXREG_HOTPLUG)	+= mlxreg-hotplug.o
 obj-$(CONFIG_MLXREG_IO) += mlxreg-io.o
+obj-$(CONFIG_MLXBF_TMFIFO)	+= mlxbf-tmfifo.o
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo-regs.h b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
new file mode 100644
index 0000000..90c9c2cf
--- /dev/null
+++ b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2019, Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef __MLXBF_TMFIFO_REGS_H__
+#define __MLXBF_TMFIFO_REGS_H__
+
+#include <linux/types.h>
+
+#define MLXBF_TMFIFO_TX_DATA 0x0
+
+#define MLXBF_TMFIFO_TX_STS 0x8
+#define MLXBF_TMFIFO_TX_STS__LENGTH 0x0001
+#define MLXBF_TMFIFO_TX_STS__COUNT_SHIFT 0
+#define MLXBF_TMFIFO_TX_STS__COUNT_WIDTH 9
+#define MLXBF_TMFIFO_TX_STS__COUNT_RESET_VAL 0
+#define MLXBF_TMFIFO_TX_STS__COUNT_RMASK 0x1ff
+#define MLXBF_TMFIFO_TX_STS__COUNT_MASK  0x1ff
+
+#define MLXBF_TMFIFO_TX_CTL 0x10
+#define MLXBF_TMFIFO_TX_CTL__LENGTH 0x0001
+#define MLXBF_TMFIFO_TX_CTL__LWM_SHIFT 0
+#define MLXBF_TMFIFO_TX_CTL__LWM_WIDTH 8
+#define MLXBF_TMFIFO_TX_CTL__LWM_RESET_VAL 128
+#define MLXBF_TMFIFO_TX_CTL__LWM_RMASK 0xff
+#define MLXBF_TMFIFO_TX_CTL__LWM_MASK  0xff
+#define MLXBF_TMFIFO_TX_CTL__HWM_SHIFT 8
+#define MLXBF_TMFIFO_TX_CTL__HWM_WIDTH 8
+#define MLXBF_TMFIFO_TX_CTL__HWM_RESET_VAL 128
+#define MLXBF_TMFIFO_TX_CTL__HWM_RMASK 0xff
+#define MLXBF_TMFIFO_TX_CTL__HWM_MASK  0xff00
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#define MLXBF_TMFIFO_RX_DATA 0x0
+
+#define MLXBF_TMFIFO_RX_STS 0x8
+#define MLXBF_TMFIFO_RX_STS__LENGTH 0x0001
+#define MLXBF_TMFIFO_RX_STS__COUNT_SHIFT 0
+#define MLXBF_TMFIFO_RX_STS__COUNT_WIDTH 9
+#define MLXBF_TMFIFO_RX_STS__COUNT_RESET_VAL 0
+#define MLXBF_TMFIFO_RX_STS__COUNT_RMASK 0x1ff
+#define MLXBF_TMFIFO_RX_STS__COUNT_MASK  0x1ff
+
+#define MLXBF_TMFIFO_RX_CTL 0x10
+#define MLXBF_TMFIFO_RX_CTL__LENGTH 0x0001
+#define MLXBF_TMFIFO_RX_CTL__LWM_SHIFT 0
+#define MLXBF_TMFIFO_RX_CTL__LWM_WIDTH 8
+#define MLXBF_TMFIFO_RX_CTL__LWM_RESET_VAL 128
+#define MLXBF_TMFIFO_RX_CTL__LWM_RMASK 0xff
+#define MLXBF_TMFIFO_RX_CTL__LWM_MASK  0xff
+#define MLXBF_TMFIFO_RX_CTL__HWM_SHIFT 8
+#define MLXBF_TMFIFO_RX_CTL__HWM_WIDTH 8
+#define MLXBF_TMFIFO_RX_CTL__HWM_RESET_VAL 128
+#define MLXBF_TMFIFO_RX_CTL__HWM_RMASK 0xff
+#define MLXBF_TMFIFO_RX_CTL__HWM_MASK  0xff00
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#endif /* !defined(__MLXBF_TMFIFO_REGS_H__) */
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
new file mode 100644
index 0000000..ce55fca
--- /dev/null
+++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
@@ -0,0 +1,1361 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Mellanox BlueField SoC TmFifo driver
+ *
+ * Copyright (C) 2019 Mellanox Technologies
+ */
+
+#include <linux/acpi.h>
+#include <linux/byteorder/generic.h>
+#include <linux/bitfield.h>
+#include <linux/cache.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+
+#include "mlxbf-tmfifo-regs.h"
+
+/* Vring size. */
+#define MLXBF_TMFIFO_VRING_SIZE			1024
+
+/* Console Tx buffer size. */
+#define MLXBF_TMFIFO_CONS_TX_BUF_SIZE		(32 * 1024)
+
+/* Console Tx buffer size with some reservation. */
+#define MLXBF_TMFIFO_CONS_TX_BUF_RSV_SIZE	\
+	(MLXBF_TMFIFO_CONS_TX_BUF_SIZE - 8)
+
+/* House-keeping timer interval. */
+static int mlxbf_tmfifo_timer_interval = HZ / 10;
+
+/* Global lock. */
+static DEFINE_MUTEX(mlxbf_tmfifo_lock);
+
+/* Virtual devices sharing the TM FIFO. */
+#define MLXBF_TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/*
+ * Reserve 1/16 of TmFifo space, so console messages are not starved by
+ * the networking traffic.
+ */
+#define MLXBF_TMFIFO_RESERVE_RATIO		16
+
+/* Message with data needs at least two words (for header & data). */
+#define MLXBF_TMFIFO_DATA_MIN_WORDS		2
+
+/* Struct declaration. */
+struct mlxbf_tmfifo;
+
+/* Structure to maintain the ring state. */
+struct mlxbf_tmfifo_vring {
+	void *va;			/* virtual address */
+	dma_addr_t dma;			/* dma address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	int vdev_id;			/* TMFIFO_VDEV_xxx */
+	u32 pkt_len;			/* packet total length */
+	u16 next_avail;			/* next avail desc id */
+	struct mlxbf_tmfifo *fifo;	/* pointer back to the tmfifo */
+};
+
+/* Interrupt types. */
+enum {
+	MLXBF_TM_RX_LWM_IRQ,		/* Rx low water mark irq */
+	MLXBF_TM_RX_HWM_IRQ,		/* Rx high water mark irq */
+	MLXBF_TM_TX_LWM_IRQ,		/* Tx low water mark irq */
+	MLXBF_TM_TX_HWM_IRQ,		/* Tx high water mark irq */
+	MLXBF_TM_IRQ_CNT
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	MLXBF_TMFIFO_VRING_RX,		/* Rx ring */
+	MLXBF_TMFIFO_VRING_TX,		/* Tx ring */
+	MLXBF_TMFIFO_VRING_NUM
+};
+
+/* Structure for the virtual device. */
+struct mlxbf_tmfifo_vdev {
+	struct virtio_device vdev;	/* virtual device */
+	u8 status;
+	u64 features;
+	union {				/* virtio config space */
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct mlxbf_tmfifo_vring vrings[MLXBF_TMFIFO_VRING_NUM];
+	u8 *tx_buf;			/* tx buffer */
+	u32 tx_head;			/* tx buffer head */
+	u32 tx_tail;			/* tx buffer tail */
+};
+
+/* Structure of the interrupt information. */
+struct mlxbf_tmfifo_irq_info {
+	struct mlxbf_tmfifo *fifo;	/* tmfifo structure */
+	int irq;			/* interrupt number */
+	int index;			/* array index */
+};
+
+/* Structure of the TmFifo information. */
+struct mlxbf_tmfifo {
+	struct mlxbf_tmfifo_vdev *vdev[MLXBF_TMFIFO_VDEV_MAX]; /* devices */
+	struct platform_device *pdev;	/* platform device */
+	struct mutex lock;		/* fifo lock */
+	void __iomem *rx_base;		/* mapped register base */
+	void __iomem *tx_base;		/* mapped register base */
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	struct mlxbf_tmfifo_irq_info irq_info[MLXBF_TM_IRQ_CNT]; /* irq info */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	struct mlxbf_tmfifo_vring *vring[2];	/* current Tx/Rx ring */
+	bool is_ready;			/* ready flag */
+	spinlock_t spin_lock;		/* spin lock */
+};
+
+/* Use a union struction for 64-bit little/big endian. */
+union mlxbf_tmfifo_data_64bit {
+	u64 data;
+	__le64 data_le;
+};
+
+/* Message header used to demux data in the TmFifo. */
+union mlxbf_tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	union mlxbf_tmfifo_data_64bit u;	/* 64-bit data */
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 mlxbf_tmfifo_net_default_mac[6] = {
+	0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* EFI variable name of the MAC address. */
+static efi_char16_t mlxbf_tmfifo_efi_name[] = L"RshimMacAddr";
+
+/* MTU setting of the virtio-net interface. */
+#define MLXBF_TMFIFO_NET_MTU		1500
+
+/* Maximum L2 header length. */
+#define MLXBF_TMFIFO_NET_L2_OVERHEAD	36
+
+/* Supported virtio-net features. */
+#define MLXBF_TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
+					 (1UL << VIRTIO_NET_F_STATUS) | \
+					 (1UL << VIRTIO_NET_F_MAC))
+
+/* Function declarations. */
+static int mlxbf_tmfifo_remove(struct platform_device *pdev);
+
+/* Console output are buffered and can be accessed with the functions below. */
+
+/* Return the consumed Tx buffer space. */
+static int mlxbf_tmfifo_vdev_tx_buf_len(struct mlxbf_tmfifo_vdev *vdev)
+{
+	return ((vdev->tx_tail >= vdev->tx_head) ?
+		(vdev->tx_tail - vdev->tx_head) :
+		(MLXBF_TMFIFO_CONS_TX_BUF_SIZE - vdev->tx_head +
+		 vdev->tx_tail));
+}
+
+/* Return the available Tx buffer space. */
+static int mlxbf_tmfifo_vdev_tx_buf_avail(struct mlxbf_tmfifo_vdev *vdev)
+{
+	return (MLXBF_TMFIFO_CONS_TX_BUF_RSV_SIZE -
+		mlxbf_tmfifo_vdev_tx_buf_len(vdev));
+}
+
+/* Update Rx/Tx buffer index pointer. */
+static void mlxbf_tmfifo_vdev_tx_buf_index_inc(u32 *index, u32 len)
+{
+	*index += len;
+	if (*index >= MLXBF_TMFIFO_CONS_TX_BUF_SIZE)
+		*index -= MLXBF_TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Allocate vrings for the fifo. */
+static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
+				     struct mlxbf_tmfifo_vdev *tm_vdev,
+				     int vdev_id)
+{
+	struct mlxbf_tmfifo_vring *vring;
+	dma_addr_t dma;
+	int i, size;
+	void *va;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->size = MLXBF_TMFIFO_VRING_SIZE;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+		vring->vdev_id = vdev_id;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
+					GFP_KERNEL);
+		if (!va) {
+			dev_err(tm_vdev->vdev.dev.parent,
+				"dma_alloc_coherent failed\n");
+			return -ENOMEM;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void mlxbf_tmfifo_free_vrings(struct mlxbf_tmfifo *fifo, int vdev_id)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
+	struct mlxbf_tmfifo_vring *vring;
+	int i, size;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		if (vring->va) {
+			size = PAGE_ALIGN(vring_size(vring->size,
+						     vring->align));
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Disable interrupts of the fifo device. */
+static void mlxbf_tmfifo_disable_irqs(struct mlxbf_tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < MLXBF_TM_IRQ_CNT; i++) {
+		irq = fifo->irq_info[i].irq;
+		if (irq) {
+			fifo->irq_info[i].irq = 0;
+			disable_irq(irq);
+		}
+	}
+}
+
+/* Interrupt handler. */
+static irqreturn_t mlxbf_tmfifo_irq_handler(int irq, void *arg)
+{
+	struct mlxbf_tmfifo_irq_info *irq_info;
+
+	irq_info = (struct mlxbf_tmfifo_irq_info *)arg;
+
+	if (irq_info->index < MLXBF_TM_IRQ_CNT &&
+	    !test_and_set_bit(irq_info->index, &irq_info->fifo->pend_events))
+		schedule_work(&irq_info->fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Get the next packet descriptor from the vring. */
+static struct vring_desc *mlxbf_tmfifo_get_next_desc(struct virtqueue *vq)
+{
+	struct mlxbf_tmfifo_vring *vring;
+	unsigned int idx, head;
+	struct vring *vr;
+
+	vr = (struct vring *)virtqueue_get_vring(vq);
+	if (!vr)
+		return NULL;
+	vring = (struct mlxbf_tmfifo_vring *)vq->priv;
+	if (vring->next_avail == virtio16_to_cpu(vq->vdev, vr->avail->idx))
+		return NULL;
+	idx = vring->next_avail % vr->num;
+	head = virtio16_to_cpu(vq->vdev, vr->avail->ring[idx]);
+	if (WARN_ON(head >= vr->num))
+		return NULL;
+	vring->next_avail++;
+
+	return &vr->desc[head];
+}
+
+/* Release virtio descriptor. */
+static void mlxbf_tmfifo_release_desc(struct virtio_device *vdev,
+				      struct vring *vr, struct vring_desc *desc,
+				      u32 len)
+{
+	u16 idx, vr_idx;
+
+	vr_idx = virtio16_to_cpu(vdev, vr->used->idx);
+	idx = vr_idx % vr->num;
+	vr->used->ring[idx].id = cpu_to_virtio32(vdev, desc - vr->desc);
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/* Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx = cpu_to_virtio16(vdev, vr_idx + 1);
+}
+
+/* Get the total length of the descriptor chain. */
+static u32 mlxbf_tmfifo_get_pkt_len(struct virtio_device *vdev,
+				    struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void mlxbf_tmfifo_release_pkt(struct virtio_device *vdev,
+				     struct mlxbf_tmfifo_vring *vring,
+				     struct vring_desc **desc)
+{
+	struct vring_desc *desc_head;
+	struct vring *vr;
+	u32 len = 0;
+
+	vr = (struct vring *)virtqueue_get_vring(vring->vq);
+	if (!vr)
+		return;
+
+	if (desc && *desc && vring->desc_head) {
+		desc_head = vring->desc_head;
+		len = vring->pkt_len;
+	} else {
+		desc_head = mlxbf_tmfifo_get_next_desc(vring->vq);
+		if (desc_head)
+			len = mlxbf_tmfifo_get_pkt_len(vdev, desc_head, vr);
+	}
+
+	if (desc_head)
+		mlxbf_tmfifo_release_desc(vdev, vr, desc_head, len);
+
+	if (desc)
+		*desc = NULL;
+	vring->pkt_len = 0;
+}
+
+/* Get and initialize the next packet. */
+static struct vring_desc *
+mlxbf_tmfifo_get_next_pkt(struct virtio_device *vdev,
+			  struct mlxbf_tmfifo_vring *vring, bool is_rx)
+{
+	struct vring_desc *desc;
+
+	desc = mlxbf_tmfifo_get_next_desc(vring->vq);
+
+	/* Initialize the packet header for received network packet. */
+	if (desc && is_rx && vring->vdev_id == VIRTIO_ID_NET) {
+		struct virtio_net_hdr *net_hdr;
+
+		net_hdr = (struct virtio_net_hdr *)
+			phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+		memset(net_hdr, 0, sizeof(*net_hdr));
+	}
+
+	vring->desc_head = desc;
+	vring->desc = desc;
+
+	return desc;
+}
+
+/* House-keeping timer. */
+static void mlxbf_tmfifo_timer(struct timer_list *arg)
+{
+	struct mlxbf_tmfifo *fifo;
+
+	fifo = container_of(arg, struct mlxbf_tmfifo, timer);
+
+	/*
+	 * Wake up the work handler to poll the Rx FIFO in case interrupt
+	 * missing or any leftover bytes stuck in the FIFO.
+	 */
+	test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + mlxbf_tmfifo_timer_interval);
+}
+
+/* Copy one console packet into the output buffer. */
+static void mlxbf_tmfifo_console_output_one(struct mlxbf_tmfifo_vdev *cons,
+					    struct virtio_device *vdev,
+					    struct vring *vr,
+					    struct vring_desc *desc)
+{
+	u32 len, idx, seg;
+	void *addr;
+
+	while (desc) {
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+		len = virtio32_to_cpu(vdev, desc->len);
+
+		if (len <= MLXBF_TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail) {
+			memcpy(cons->tx_buf + cons->tx_tail, addr, len);
+		} else {
+			seg = MLXBF_TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail;
+			memcpy(cons->tx_buf + cons->tx_tail, addr, seg);
+			addr += seg;
+			memcpy(cons->tx_buf, addr, len - seg);
+		}
+		mlxbf_tmfifo_vdev_tx_buf_index_inc(&cons->tx_tail, len);
+
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+}
+
+/* Copy console data into the output buffer. */
+static void mlxbf_tmfifo_console_output(struct mlxbf_tmfifo_vdev *cons,
+					struct virtqueue *vq)
+{
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &cons->vdev;
+	struct vring_desc *desc;
+	u32 len;
+
+	desc = mlxbf_tmfifo_get_next_desc(vq);
+	while (desc) {
+		/* Release the packet if not enough space. */
+		len = mlxbf_tmfifo_get_pkt_len(vdev, desc, vr);
+		if (len > mlxbf_tmfifo_vdev_tx_buf_avail(cons)) {
+			mlxbf_tmfifo_release_desc(vdev, vr, desc, len);
+			break;
+		}
+
+		/* Output this packet. */
+		mlxbf_tmfifo_console_output_one(cons, vdev, vr, desc);
+
+		/* Release the head desc. */
+		mlxbf_tmfifo_release_desc(vdev, vr, desc, len);
+
+		/* Get next packet. */
+		desc = mlxbf_tmfifo_get_next_desc(vq);
+	}
+}
+
+/* Get the number of available words in Rx FIFO for receiving. */
+static int mlxbf_tmfifo_get_rx_avail(struct mlxbf_tmfifo *fifo)
+{
+	u64 sts;
+
+	sts = readq(fifo->rx_base + MLXBF_TMFIFO_RX_STS);
+	return FIELD_GET(MLXBF_TMFIFO_RX_STS__COUNT_MASK, sts);
+}
+
+/* Get the number of available words in the TmFifo for sending. */
+static int mlxbf_tmfifo_get_tx_avail(struct mlxbf_tmfifo *fifo,
+				     struct mlxbf_tmfifo_vring *vring)
+{
+	int tx_reserve;
+	u64 sts;
+
+	/* Reserve some room in FIFO for console messages. */
+	if (vring->vdev_id == VIRTIO_ID_NET)
+		tx_reserve = fifo->tx_fifo_size / MLXBF_TMFIFO_RESERVE_RATIO;
+	else
+		tx_reserve = 1;
+
+	sts = readq(fifo->tx_base + MLXBF_TMFIFO_TX_STS);
+	return (fifo->tx_fifo_size - tx_reserve -
+		FIELD_GET(MLXBF_TMFIFO_TX_STS__COUNT_MASK, sts));
+}
+
+/* Console Tx (move data from the output buffer into the TmFifo). */
+static void mlxbf_tmfifo_console_tx(struct mlxbf_tmfifo *fifo, int avail)
+{
+	union mlxbf_tmfifo_msg_hdr hdr;
+	struct mlxbf_tmfifo_vdev *cons;
+	unsigned long flags;
+	int size, partial;
+	void *addr;
+	u64 data;
+
+	/* Return if not enough space available. */
+	if (avail < MLXBF_TMFIFO_DATA_MIN_WORDS)
+		return;
+
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+	if (!cons || !cons->tx_buf)
+		return;
+
+	/* Return if no data to send. */
+	size = mlxbf_tmfifo_vdev_tx_buf_len(cons);
+	if (size == 0)
+		return;
+
+	/* Adjust the size to available space. */
+	if (size + sizeof(hdr) > avail * sizeof(u64))
+		size = avail * sizeof(u64) - sizeof(hdr);
+
+	/* Write header. */
+	hdr.u.data = 0;
+	hdr.type = VIRTIO_ID_CONSOLE;
+	hdr.len = htons(size);
+	hdr.u.data_le = cpu_to_le64(hdr.u.data);
+	writeq(hdr.u.data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+
+	spin_lock_irqsave(&fifo->spin_lock, flags);
+
+	while (size > 0) {
+		addr = cons->tx_buf + cons->tx_head;
+
+		if (cons->tx_head + sizeof(u64) <=
+		    MLXBF_TMFIFO_CONS_TX_BUF_SIZE) {
+			memcpy(&data, addr, sizeof(u64));
+		} else {
+			partial = MLXBF_TMFIFO_CONS_TX_BUF_SIZE - cons->tx_head;
+			memcpy(&data, addr, partial);
+			memcpy((u8 *)&data + partial, cons->tx_buf,
+			       sizeof(u64) - partial);
+		}
+		writeq(data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+
+		if (size >= sizeof(u64)) {
+			mlxbf_tmfifo_vdev_tx_buf_index_inc(&cons->tx_head,
+							   sizeof(u64));
+			size -= sizeof(u64);
+		} else {
+			mlxbf_tmfifo_vdev_tx_buf_index_inc(&cons->tx_head,
+							   size);
+			size = 0;
+		}
+	}
+
+	spin_unlock_irqrestore(&fifo->spin_lock, flags);
+}
+
+/* Rx/Tx one word in the descriptor buffer. */
+static void mlxbf_tmfifo_rxtx_word(struct mlxbf_tmfifo *fifo,
+				   struct virtio_device *vdev,
+				   struct mlxbf_tmfifo_vring *vring,
+				   struct vring_desc *desc,
+				   bool is_rx, int *avail, int len)
+{
+	union mlxbf_tmfifo_data_64bit u;
+	void *addr;
+
+	/* Get the buffer address of this desc. */
+	addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+	/* Read a word from FIFO for Rx. */
+	if (is_rx) {
+		u.data = readq(fifo->rx_base + MLXBF_TMFIFO_RX_DATA);
+		u.data = le64_to_cpu(u.data_le);
+	}
+
+	if (vring->cur_len + sizeof(u64) <= len) {
+		/* The whole word. */
+		if (is_rx)
+			memcpy(addr + vring->cur_len, &u.data, sizeof(u64));
+		else
+			memcpy(&u.data, addr + vring->cur_len, sizeof(u64));
+		vring->cur_len += sizeof(u64);
+	} else {
+		/* Leftover bytes. */
+		if (WARN_ON(vring->cur_len > len))
+			return;
+		if (is_rx)
+			memcpy(addr + vring->cur_len, &u.data,
+			       len - vring->cur_len);
+		else
+			memcpy(&u.data, addr + vring->cur_len,
+			       len - vring->cur_len);
+		vring->cur_len = len;
+	}
+
+	/* Write the word into FIFO for Tx. */
+	if (!is_rx) {
+		u.data_le = cpu_to_le64(u.data);
+		writeq(u.data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+	}
+
+	(*avail)--;
+}
+
+/*
+ * Rx/Tx packet header.
+ *
+ * In Rx case, the packet might be found to belong to a different vring since
+ * the TmFifo is shared by different services. In such case, the 'vring_change'
+ * flag is set.
+ */
+static void mlxbf_tmfifo_rxtx_header(struct mlxbf_tmfifo *fifo,
+				     struct virtio_device *vdev,
+				     struct mlxbf_tmfifo_vring *vring,
+				     struct vring *vr,
+				     struct vring_desc *desc,
+				     bool is_rx, int *avail,
+				     int *vring_change)
+{
+	struct virtio_net_config *config;
+	union mlxbf_tmfifo_msg_hdr hdr;
+	int vdev_id;
+	int hdr_len;
+
+	/* Update the available data in the FIFO for the header. */
+	(*avail)--;
+
+	/* Read/Write packet header. */
+	if (is_rx) {
+		/* Drain one word from the FIFO. */
+		hdr.u.data = readq(fifo->rx_base + MLXBF_TMFIFO_RX_DATA);
+		hdr.u.data = le64_to_cpu(hdr.u.data_le);
+
+		/* Skip the length 0 packets (keepalive). */
+		if (hdr.len == 0)
+			return;
+
+		/* Check packet type. */
+		if (hdr.type == VIRTIO_ID_NET) {
+			vdev_id = VIRTIO_ID_NET;
+			hdr_len = sizeof(struct virtio_net_hdr);
+			config = &fifo->vdev[vdev_id]->config.net;
+			if (ntohs(hdr.len) > config->mtu +
+			    MLXBF_TMFIFO_NET_L2_OVERHEAD)
+				return;
+		} else {
+			vdev_id = VIRTIO_ID_CONSOLE;
+			hdr_len = 0;
+		}
+
+		/*
+		 * Check whether the new packet still belongs to this vring.
+		 * If not, update the pkt_len of the new vring.
+		 */
+		if (vdev_id != vring->vdev_id) {
+			struct mlxbf_tmfifo_vdev *dev2 = fifo->vdev[vdev_id];
+
+			if (!dev2)
+				return;
+			vring->desc = desc;
+			vring = &dev2->vrings[MLXBF_TMFIFO_VRING_RX];
+			*vring_change = 1;
+		}
+		vring->pkt_len = ntohs(hdr.len) + hdr_len;
+	} else {
+		/* Network virtio has an extra header. */
+		hdr_len = (vring->vdev_id == VIRTIO_ID_NET) ?
+			   sizeof(struct virtio_net_hdr) : 0;
+		vring->pkt_len = mlxbf_tmfifo_get_pkt_len(vdev, desc, vr);
+		hdr.u.data = 0;
+		hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+			    VIRTIO_ID_NET : VIRTIO_ID_CONSOLE;
+		hdr.len = htons(vring->pkt_len - hdr_len);
+		hdr.u.data_le = cpu_to_le64(hdr.u.data);
+		writeq(hdr.u.data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+	}
+
+	vring->cur_len = hdr_len;
+	vring->rem_len = vring->pkt_len;
+	fifo->vring[is_rx] = vring;
+}
+
+/*
+ * Rx/Tx one descriptor.
+ *
+ * Return true to indicate more data available.
+ */
+static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo *fifo,
+				       struct mlxbf_tmfifo_vring *vring,
+				       bool is_rx, int *avail)
+{
+	struct virtio_device *vdev;
+	struct vring_desc *desc;
+	unsigned long flags;
+	struct vring *vr;
+	u32 len, idx;
+
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+
+	/* Get the descriptor of the next packet. */
+	desc = vring->desc;
+	if (!desc) {
+		desc = mlxbf_tmfifo_get_next_pkt(vdev, vring, is_rx);
+		if (!desc)
+			return false;
+	}
+
+	vr = (struct vring *)virtqueue_get_vring(vring->vq);
+
+	/* Beginning of a packet. Start to Rx/Tx packet header. */
+	if (vring->pkt_len == 0) {
+		int vring_change = 0;
+
+		mlxbf_tmfifo_rxtx_header(fifo, vdev, vring, vr, desc, is_rx,
+					 avail, &vring_change);
+		/* Return if new packet is for another ring. */
+		if (vring_change)
+			return false;
+		goto done;
+	}
+
+	/* Get the length of this desc. */
+	len = virtio32_to_cpu(vdev, desc->len);
+	if (len > vring->rem_len)
+		len = vring->rem_len;
+
+	/* Rx/Tx one word (8 bytes) if not done. */
+	if (vring->cur_len != len)
+		mlxbf_tmfifo_rxtx_word(fifo, vdev, vring, desc, is_rx, avail,
+				       len);
+
+	/* Check again whether it's done. */
+	if (vring->cur_len == len) {
+		vring->cur_len = 0;
+		vring->rem_len -= len;
+
+		/* Get the next desc on the chain. */
+		if (vring->rem_len > 0 &&
+		    (virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT)) {
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+			goto done;
+		}
+
+		/* Done and release the desc. */
+		mlxbf_tmfifo_release_pkt(vdev, vring, &desc);
+		fifo->vring[is_rx] = NULL;
+
+		/* Notify upper layer that packet is done. */
+		spin_lock_irqsave(&fifo->spin_lock, flags);
+		vring_interrupt(0, vring->vq);
+		spin_unlock_irqrestore(&fifo->spin_lock, flags);
+	}
+
+done:
+	/* Save the current desc. */
+	vring->desc = desc;
+
+	return true;
+}
+
+/* Rx & Tx processing of a queue. */
+static void mlxbf_tmfifo_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct mlxbf_tmfifo_vring *vring;
+	struct mlxbf_tmfifo *fifo;
+	int avail = 0;
+	bool more;
+
+	vring = (struct mlxbf_tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+
+	/* Return if vdev is not ready. */
+	if (!fifo->vdev[vring->vdev_id])
+		return;
+
+	/* Return if another vring is running. */
+	if (fifo->vring[is_rx] && fifo->vring[is_rx] != vring)
+		return;
+
+	/* Only handle console and network for now. */
+	if (WARN_ON(vring->vdev_id != VIRTIO_ID_NET &&
+		    vring->vdev_id != VIRTIO_ID_CONSOLE))
+		return;
+
+	do {
+		/* Get available FIFO space. */
+		if (avail == 0) {
+			if (is_rx)
+				avail = mlxbf_tmfifo_get_rx_avail(fifo);
+			else
+				avail = mlxbf_tmfifo_get_tx_avail(fifo, vring);
+			if (avail <= 0)
+				break;
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			mlxbf_tmfifo_console_tx(fifo, avail);
+			break;
+		}
+
+		/* Try to handle one descriptor. */
+		more = mlxbf_tmfifo_rxtx_one_desc(fifo, vring, is_rx, &avail);
+	} while (more);
+}
+
+/* Handle Rx or Tx queues. */
+static void mlxbf_tmfifo_work_rxtx(struct mlxbf_tmfifo *fifo, int queue_id,
+				   int irq_id, bool is_rx)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+	struct virtqueue *vq;
+	int i;
+
+	if (!test_and_clear_bit(irq_id, &fifo->pend_events) ||
+	    !fifo->irq_info[irq_id].irq)
+		return;
+
+	for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++) {
+		tm_vdev = fifo->vdev[i];
+		if (tm_vdev) {
+			vq = tm_vdev->vrings[queue_id].vq;
+			if (vq)
+				mlxbf_tmfifo_rxtx(vq, is_rx);
+		}
+	}
+}
+
+/* Work handler for Rx and Tx case. */
+static void mlxbf_tmfifo_work_handler(struct work_struct *work)
+{
+	struct mlxbf_tmfifo *fifo;
+
+	fifo = container_of(work, struct mlxbf_tmfifo, work);
+	if (!fifo->is_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx (Send data to the TmFifo). */
+	mlxbf_tmfifo_work_rxtx(fifo, MLXBF_TMFIFO_VRING_TX,
+			       MLXBF_TM_TX_LWM_IRQ, false);
+
+	/* Rx (Receive data from the TmFifo). */
+	mlxbf_tmfifo_work_rxtx(fifo, MLXBF_TMFIFO_VRING_RX,
+			       MLXBF_TM_RX_HWM_IRQ, true);
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool mlxbf_tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct mlxbf_tmfifo_vring *vring;
+	struct mlxbf_tmfifo_vdev *vdev;
+	struct mlxbf_tmfifo *fifo;
+	unsigned long flags;
+
+	vring = (struct mlxbf_tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX HWM bit to start Rx. */
+		if (!test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			vdev = fifo->vdev[VIRTIO_ID_CONSOLE];
+			mlxbf_tmfifo_console_output(vdev, vq);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			schedule_work(&fifo->work);
+		} else if (!test_and_set_bit(MLXBF_TM_TX_LWM_IRQ,
+					     &fifo->pend_events)) {
+			schedule_work(&fifo->work);
+		}
+	}
+
+	return true;
+}
+
+/* Get the array of feature bits for this device. */
+static u64 mlxbf_tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+
+	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int mlxbf_tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+
+	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
+	tm_vdev->features = vdev->features;
+
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void mlxbf_tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+	struct mlxbf_tmfifo_vring *vring;
+	struct virtqueue *vq;
+	int i;
+
+	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc)
+			mlxbf_tmfifo_release_pkt(&tm_vdev->vdev, vring,
+						 &vring->desc);
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+					unsigned int nvqs,
+					struct virtqueue *vqs[],
+					vq_callback_t *callbacks[],
+					const char * const names[],
+					const bool *ctx,
+					struct irq_affinity *desc)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+	struct mlxbf_tmfifo_vring *vring;
+	struct virtqueue *vq;
+	int i, ret, size;
+
+	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i]) {
+			ret = -EINVAL;
+			goto error;
+		}
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 mlxbf_tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	mlxbf_tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 mlxbf_tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+
+	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void mlxbf_tmfifo_virtio_set_status(struct virtio_device *vdev,
+					   u8 status)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+
+	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void mlxbf_tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+
+	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void mlxbf_tmfifo_virtio_get(struct virtio_device *vdev,
+				    unsigned int offset,
+				    void *buf,
+				    unsigned int len)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+
+	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
+
+	if (offset + len > sizeof(tm_vdev->config)) {
+		dev_err(vdev->dev.parent, "virtio_get out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void mlxbf_tmfifo_virtio_set(struct virtio_device *vdev,
+				    unsigned int offset,
+				    const void *buf,
+				    unsigned int len)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+
+	tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
+
+	if (offset + len > sizeof(tm_vdev->config)) {
+		dev_err(vdev->dev.parent, "virtio_set out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops mlxbf_tmfifo_virtio_config_ops = {
+	.get_features = mlxbf_tmfifo_virtio_get_features,
+	.finalize_features = mlxbf_tmfifo_virtio_finalize_features,
+	.find_vqs = mlxbf_tmfifo_virtio_find_vqs,
+	.del_vqs = mlxbf_tmfifo_virtio_del_vqs,
+	.reset = mlxbf_tmfifo_virtio_reset,
+	.set_status = mlxbf_tmfifo_virtio_set_status,
+	.get_status = mlxbf_tmfifo_virtio_get_status,
+	.get = mlxbf_tmfifo_virtio_get,
+	.set = mlxbf_tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+static int mlxbf_tmfifo_create_vdev(struct device *dev,
+				    struct mlxbf_tmfifo *fifo,
+				    int vdev_id, u64 features,
+				    void *config, u32 size)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+	int ret = 0;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		dev_err(dev, "vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto fail;
+	}
+
+	tm_vdev = devm_kzalloc(dev, sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &mlxbf_tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+	if (mlxbf_tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
+		dev_err(dev, "unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto fail;
+	}
+	if (vdev_id == VIRTIO_ID_CONSOLE)
+		tm_vdev->tx_buf = devm_kmalloc(dev,
+					       MLXBF_TMFIFO_CONS_TX_BUF_SIZE,
+					       GFP_KERNEL);
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device failed\n");
+		goto register_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+register_fail:
+	mlxbf_tmfifo_free_vrings(fifo, vdev_id);
+	fifo->vdev[vdev_id] = NULL;
+fail:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+static int mlxbf_tmfifo_delete_vdev(struct mlxbf_tmfifo *fifo, int vdev_id)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		mlxbf_tmfifo_free_vrings(fifo, vdev_id);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void mlxbf_tmfifo_get_cfg_mac(u8 *mac)
+{
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+	efi_status_t status;
+	unsigned long size;
+	u8 buf[6];
+
+	size = sizeof(buf);
+	status = efi.get_variable(mlxbf_tmfifo_efi_name, &guid, NULL, &size,
+				  buf);
+	if (status == EFI_SUCCESS && size == sizeof(buf))
+		memcpy(mac, buf, sizeof(buf));
+}
+
+/* Set TmFifo thresolds which is used to trigger interrupts. */
+static void mlxbf_tmfifo_set_threshold(struct mlxbf_tmfifo *fifo)
+{
+	u64 ctl;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		FIELD_GET(MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__LWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_TX_CTL__LWM_MASK,
+			   fifo->tx_fifo_size / 2);
+	ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__HWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_TX_CTL__HWM_MASK,
+			   fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		FIELD_GET(MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__LWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__HWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
+}
+
+/* Device remove function. */
+static void mlxbf_tmfifo_cleanup(struct mlxbf_tmfifo *fifo)
+{
+	int i;
+
+	mutex_lock(&mlxbf_tmfifo_lock);
+	fifo->is_ready = false;
+
+	/* Stop the timer. */
+	del_timer_sync(&fifo->timer);
+
+	/* Disable interrupts. */
+	mlxbf_tmfifo_disable_irqs(fifo);
+
+	/* Cancel the pending work. */
+	cancel_work_sync(&fifo->work);
+
+	for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++)
+		mlxbf_tmfifo_delete_vdev(fifo, i);
+
+	mutex_unlock(&mlxbf_tmfifo_lock);
+}
+
+/* Probe the TMFIFO. */
+static int mlxbf_tmfifo_probe(struct platform_device *pdev)
+{
+	struct virtio_net_config net_config;
+	struct resource *rx_res, *tx_res;
+	struct mlxbf_tmfifo *fifo;
+	int i, ret;
+
+	/* Get the resource of the Rx FIFO. */
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!rx_res)
+		return -ENODEV;
+
+	/* Get the resource of the Tx FIFO. */
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!tx_res)
+		return -ENODEV;
+
+	if (!devm_request_mem_region(&pdev->dev, rx_res->start,
+				     resource_size(rx_res), "bf-tmfifo"))
+		return -EBUSY;
+
+	if (!devm_request_mem_region(&pdev->dev, tx_res->start,
+				     resource_size(tx_res), "bf-tmfifo"))
+		return -EBUSY;
+
+	fifo = devm_kzalloc(&pdev->dev, sizeof(*fifo), GFP_KERNEL);
+	if (!fifo)
+		return -ENOMEM;
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	spin_lock_init(&fifo->spin_lock);
+	INIT_WORK(&fifo->work, mlxbf_tmfifo_work_handler);
+
+	timer_setup(&fifo->timer, mlxbf_tmfifo_timer, 0);
+
+	for (i = 0; i < MLXBF_TM_IRQ_CNT; i++) {
+		fifo->irq_info[i].index = i;
+		fifo->irq_info[i].fifo = fifo;
+		fifo->irq_info[i].irq = platform_get_irq(pdev, i);
+		ret = devm_request_irq(&pdev->dev, fifo->irq_info[i].irq,
+				       mlxbf_tmfifo_irq_handler, 0,
+				       "tmfifo", &fifo->irq_info[i]);
+		if (ret) {
+			dev_err(&pdev->dev, "devm_request_irq failed\n");
+			fifo->irq_info[i].irq = 0;
+			return ret;
+		}
+	}
+
+	fifo->rx_base = devm_ioremap(&pdev->dev, rx_res->start,
+				     resource_size(rx_res));
+	if (!fifo->rx_base)
+		return -ENOMEM;
+
+	fifo->tx_base = devm_ioremap(&pdev->dev, tx_res->start,
+				     resource_size(tx_res));
+	if (!fifo->tx_base)
+		return -ENOMEM;
+
+	mutex_init(&fifo->lock);
+
+	mlxbf_tmfifo_set_threshold(fifo);
+
+	/* Create the console vdev. */
+	ret = mlxbf_tmfifo_create_vdev(&pdev->dev, fifo, VIRTIO_ID_CONSOLE, 0,
+				       NULL, 0);
+	if (ret)
+		goto fail;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = MLXBF_TMFIFO_NET_MTU;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	memcpy(net_config.mac, mlxbf_tmfifo_net_default_mac, 6);
+	mlxbf_tmfifo_get_cfg_mac(net_config.mac);
+	ret = mlxbf_tmfifo_create_vdev(&pdev->dev, fifo, VIRTIO_ID_NET,
+				       MLXBF_TMFIFO_NET_FEATURES, &net_config,
+				       sizeof(net_config));
+	if (ret)
+		goto fail;
+
+	mod_timer(&fifo->timer, jiffies + mlxbf_tmfifo_timer_interval);
+
+	fifo->is_ready = true;
+	return 0;
+
+fail:
+	mlxbf_tmfifo_cleanup(fifo);
+	return ret;
+}
+
+/* Device remove function. */
+static int mlxbf_tmfifo_remove(struct platform_device *pdev)
+{
+	struct mlxbf_tmfifo *fifo = platform_get_drvdata(pdev);
+
+	if (fifo)
+		mlxbf_tmfifo_cleanup(fifo);
+
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+static const struct acpi_device_id mlxbf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, mlxbf_tmfifo_acpi_match);
+
+static struct platform_driver mlxbf_tmfifo_driver = {
+	.probe = mlxbf_tmfifo_probe,
+	.remove = mlxbf_tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.acpi_match_table = mlxbf_tmfifo_acpi_match,
+	},
+};
+
+module_platform_driver(mlxbf_tmfifo_driver);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TmFifo Driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 179+ messages in thread

* Re: [PATCH v8 1/2] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2019-01-29 22:06   ` Andy Shevchenko
@ 2019-02-13 13:34     ` Liming Sun
  2019-02-13 16:33     ` Liming Sun
  1 sibling, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-02-13 13:34 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: Rob Herring, Mark Rutland, Arnd Bergmann, David Woods,
	Andy Shevchenko, Darren Hart, Vadim Pasternak, devicetree,
	Linux Kernel Mailing List, Platform Driver

[-- Attachment #1: Type: text/plain, Size: 1344 bytes --]

Thanks Andy!


V9 has been posted with the devm_ changes and coding style fixes according to the received comments in the other patch. It also has changes for comments from Vadim during Mellanox internal review.


Regards,

Liming


________________________________
From: Andy Shevchenko <andy.shevchenko@gmail.com>
Sent: Tuesday, January 29, 2019 5:06 PM
To: Liming Sun
Cc: Rob Herring; Mark Rutland; Arnd Bergmann; David Woods; Andy Shevchenko; Darren Hart; Vadim Pasternak; devicetree; Linux Kernel Mailing List; Platform Driver
Subject: Re: [PATCH v8 1/2] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc

On Mon, Jan 28, 2019 at 7:28 PM Liming Sun <lsun@mellanox.com> wrote:
>
> This commit adds the TmFifo platform driver for Mellanox BlueField
> Soc. TmFifo is a shared FIFO which enables external host machine
> to exchange data with the SoC via USB or PCIe. The driver is based
> on virtio framework and has console and network access enabled.
>
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>


Please, go through this series taking into account review I just did
for your another patch.

On top of that, see recent (for few years I think) drivers what modern
APIs they are using, e.g. devm_.

--
With Best Regards,
Andy Shevchenko

[-- Attachment #2: Type: text/html, Size: 2690 bytes --]

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v8 1/2] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2019-01-30  6:24     ` Vadim Pasternak
  (?)
@ 2019-02-13 13:42     ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-02-13 13:42 UTC (permalink / raw)
  To: Vadim Pasternak, Rob Herring, Mark Rutland, Arnd Bergmann,
	David Woods, Andy Shevchenko, Darren Hart
  Cc: devicetree, linux-kernel, platform-driver-x86

[-- Attachment #1: Type: text/plain, Size: 58936 bytes --]

Thanks Vadim! The v9 has been posted to solve these comments. (Also thanks a lot for the comments in the internal review.)


Regards,

Liming

________________________________
From: Vadim Pasternak
Sent: Wednesday, January 30, 2019 1:24 AM
To: Liming Sun; Rob Herring; Mark Rutland; Arnd Bergmann; David Woods; Andy Shevchenko; Darren Hart
Cc: Liming Sun; devicetree@vger.kernel.org; linux-kernel@vger.kernel.org; platform-driver-x86@vger.kernel.org
Subject: RE: [PATCH v8 1/2] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc



> -----Original Message-----
> From: Liming Sun <lsun@mellanox.com>
> Sent: Monday, January 28, 2019 7:28 PM
> To: Rob Herring <robh+dt@kernel.org>; Mark Rutland
> <mark.rutland@arm.com>; Arnd Bergmann <arnd@arndb.de>; David Woods
> <dwoods@mellanox.com>; Andy Shevchenko <andy@infradead.org>; Darren
> Hart <dvhart@infradead.org>; Vadim Pasternak <vadimp@mellanox.com>
> Cc: Liming Sun <lsun@mellanox.com>; devicetree@vger.kernel.org; linux-
> kernel@vger.kernel.org; platform-driver-x86@vger.kernel.org
> Subject: [PATCH v8 1/2] platform/mellanox: Add TmFifo driver for Mellanox
> BlueField Soc
>
> This commit adds the TmFifo platform driver for Mellanox BlueField Soc. TmFifo
> is a shared FIFO which enables external host machine to exchange data with the
> SoC via USB or PCIe. The driver is based on virtio framework and has console
> and network access enabled.
>
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>
> ---
>  drivers/platform/mellanox/Kconfig             |   13 +-
>  drivers/platform/mellanox/Makefile            |    1 +
>  drivers/platform/mellanox/mlxbf-tmfifo-regs.h |   67 ++
>  drivers/platform/mellanox/mlxbf-tmfifo.c      | 1289
> +++++++++++++++++++++++++
>  4 files changed, 1369 insertions(+), 1 deletion(-)  create mode 100644
> drivers/platform/mellanox/mlxbf-tmfifo-regs.h
>  create mode 100644 drivers/platform/mellanox/mlxbf-tmfifo.c
>
> diff --git a/drivers/platform/mellanox/Kconfig
> b/drivers/platform/mellanox/Kconfig
> index cd8a908..a565070 100644
> --- a/drivers/platform/mellanox/Kconfig
> +++ b/drivers/platform/mellanox/Kconfig
> @@ -5,7 +5,7 @@
>
>  menuconfig MELLANOX_PLATFORM
>        bool "Platform support for Mellanox hardware"
> -     depends on X86 || ARM || COMPILE_TEST
> +     depends on X86 || ARM || ARM64 || COMPILE_TEST
>        ---help---
>          Say Y here to get to see options for platform support for
>          Mellanox systems. This option alone does not add any kernel code.
> @@ -34,4 +34,15 @@ config MLXREG_IO
>          to system resets operation, system reset causes monitoring and some
>          kinds of mux selection.
>
> +config MLXBF_TMFIFO
> +     tristate "Mellanox BlueField SoC TmFifo platform driver"
> +     depends on ARM64

Why you make it dependent on ARM64?
Should not it work on any host, x86?

> +     default m

User who needs it should select this option.
No need default 'm'.

> +     select VIRTIO_CONSOLE
> +     select VIRTIO_NET
> +     help
> +       Say y here to enable TmFifo support. The TmFifo driver provides
> +          platform driver support for the TmFifo which supports console
> +          and networking based on the virtio framework.
> +
>  endif # MELLANOX_PLATFORM
> diff --git a/drivers/platform/mellanox/Makefile
> b/drivers/platform/mellanox/Makefile
> index 57074d9c..f0c061d 100644
> --- a/drivers/platform/mellanox/Makefile
> +++ b/drivers/platform/mellanox/Makefile
> @@ -5,3 +5,4 @@
>  #
>  obj-$(CONFIG_MLXREG_HOTPLUG) += mlxreg-hotplug.o
>  obj-$(CONFIG_MLXREG_IO) += mlxreg-io.o
> +obj-$(CONFIG_MLXBF_TMFIFO)   += mlxbf-tmfifo.o
> diff --git a/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
> b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
> new file mode 100644
> index 0000000..90c9c2cf
> --- /dev/null
> +++ b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
> @@ -0,0 +1,67 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (c) 2019, Mellanox Technologies. All rights reserved.
> + */
> +
> +#ifndef __MLXBF_TMFIFO_REGS_H__
> +#define __MLXBF_TMFIFO_REGS_H__
> +
> +#include <linux/types.h>
> +
> +#define MLXBF_TMFIFO_TX_DATA 0x0
> +
> +#define MLXBF_TMFIFO_TX_STS 0x8
> +#define MLXBF_TMFIFO_TX_STS__LENGTH 0x0001 #define
> +MLXBF_TMFIFO_TX_STS__COUNT_SHIFT 0 #define
> +MLXBF_TMFIFO_TX_STS__COUNT_WIDTH 9 #define
> +MLXBF_TMFIFO_TX_STS__COUNT_RESET_VAL 0 #define
> +MLXBF_TMFIFO_TX_STS__COUNT_RMASK 0x1ff #define
> +MLXBF_TMFIFO_TX_STS__COUNT_MASK  0x1ff
> +
> +#define MLXBF_TMFIFO_TX_CTL 0x10
> +#define MLXBF_TMFIFO_TX_CTL__LENGTH 0x0001 #define
> +MLXBF_TMFIFO_TX_CTL__LWM_SHIFT 0 #define
> MLXBF_TMFIFO_TX_CTL__LWM_WIDTH
> +8 #define MLXBF_TMFIFO_TX_CTL__LWM_RESET_VAL 128 #define
> +MLXBF_TMFIFO_TX_CTL__LWM_RMASK 0xff #define
> +MLXBF_TMFIFO_TX_CTL__LWM_MASK  0xff #define
> +MLXBF_TMFIFO_TX_CTL__HWM_SHIFT 8 #define
> MLXBF_TMFIFO_TX_CTL__HWM_WIDTH
> +8 #define MLXBF_TMFIFO_TX_CTL__HWM_RESET_VAL 128 #define
> +MLXBF_TMFIFO_TX_CTL__HWM_RMASK 0xff #define
> +MLXBF_TMFIFO_TX_CTL__HWM_MASK  0xff00 #define
> +MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32 #define
> +MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9 #define
> +MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256 #define
> +MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff #define
> +MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
> +
> +#define MLXBF_TMFIFO_RX_DATA 0x0
> +
> +#define MLXBF_TMFIFO_RX_STS 0x8
> +#define MLXBF_TMFIFO_RX_STS__LENGTH 0x0001 #define
> +MLXBF_TMFIFO_RX_STS__COUNT_SHIFT 0 #define
> +MLXBF_TMFIFO_RX_STS__COUNT_WIDTH 9 #define
> +MLXBF_TMFIFO_RX_STS__COUNT_RESET_VAL 0 #define
> +MLXBF_TMFIFO_RX_STS__COUNT_RMASK 0x1ff #define
> +MLXBF_TMFIFO_RX_STS__COUNT_MASK  0x1ff
> +
> +#define MLXBF_TMFIFO_RX_CTL 0x10
> +#define MLXBF_TMFIFO_RX_CTL__LENGTH 0x0001 #define
> +MLXBF_TMFIFO_RX_CTL__LWM_SHIFT 0 #define
> MLXBF_TMFIFO_RX_CTL__LWM_WIDTH
> +8 #define MLXBF_TMFIFO_RX_CTL__LWM_RESET_VAL 128 #define
> +MLXBF_TMFIFO_RX_CTL__LWM_RMASK 0xff #define
> +MLXBF_TMFIFO_RX_CTL__LWM_MASK  0xff #define
> +MLXBF_TMFIFO_RX_CTL__HWM_SHIFT 8 #define
> MLXBF_TMFIFO_RX_CTL__HWM_WIDTH
> +8 #define MLXBF_TMFIFO_RX_CTL__HWM_RESET_VAL 128 #define
> +MLXBF_TMFIFO_RX_CTL__HWM_RMASK 0xff #define
> +MLXBF_TMFIFO_RX_CTL__HWM_MASK  0xff00 #define
> +MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32 #define
> +MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9 #define
> +MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256 #define
> +MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff #define
> +MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
> +
> +#endif /* !defined(__MLXBF_TMFIFO_REGS_H__) */
> diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c
> b/drivers/platform/mellanox/mlxbf-tmfifo.c
> new file mode 100644
> index 0000000..c1afe47
> --- /dev/null
> +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
> @@ -0,0 +1,1289 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * Mellanox BlueField SoC TmFifo driver
> + *
> + * Copyright (C) 2019 Mellanox Technologies  */
> +
> +#include <linux/acpi.h>
> +#include <linux/bitfield.h>
> +#include <linux/cache.h>
> +#include <linux/device.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/efi.h>
> +#include <linux/io.h>
> +#include <linux/interrupt.h>
> +#include <linux/irq.h>
> +#include <linux/kernel.h>
> +#include <linux/math64.h>
> +#include <linux/module.h>
> +#include <linux/moduleparam.h>
> +#include <linux/mutex.h>
> +#include <linux/platform_device.h>
> +#include <linux/resource.h>
> +#include <linux/slab.h>
> +#include <linux/types.h>
> +#include <linux/version.h>
> +#include <linux/virtio.h>
> +#include <linux/virtio_config.h>
> +#include <linux/virtio_console.h>
> +#include <linux/virtio_ids.h>
> +#include <linux/virtio_net.h>
> +#include <linux/virtio_ring.h>
> +#include <asm/byteorder.h>

Is it must ti include from asm?
Could it be replaced with something like
#include <linux/byteorder/generic.h>

> +
> +#include "mlxbf-tmfifo-regs.h"
> +
> +/* Vring size. */
> +#define MLXBF_TMFIFO_VRING_SIZE                      1024
> +
> +/* Console Tx buffer size. */
> +#define MLXBF_TMFIFO_CONS_TX_BUF_SIZE                (32 * 1024)
> +
> +/* House-keeping timer interval. */
> +static int mlxbf_tmfifo_timer_interval = HZ / 10;
> +
> +/* Global lock. */
> +static DEFINE_MUTEX(mlxbf_tmfifo_lock);
> +
> +/* Virtual devices sharing the TM FIFO. */
> +#define MLXBF_TMFIFO_VDEV_MAX                (VIRTIO_ID_CONSOLE + 1)
> +
> +/* Struct declaration. */
> +struct mlxbf_tmfifo;
> +
> +/* Structure to maintain the ring state. */ struct mlxbf_tmfifo_vring {
> +     void *va;                       /* virtual address */
> +     dma_addr_t dma;                 /* dma address */
> +     struct virtqueue *vq;           /* virtqueue pointer */
> +     struct vring_desc *desc;        /* current desc */
> +     struct vring_desc *desc_head;   /* current desc head */
> +     int cur_len;                    /* processed len in current desc */
> +     int rem_len;                    /* remaining length to be processed */
> +     int size;                       /* vring size */
> +     int align;                      /* vring alignment */
> +     int id;                         /* vring id */
> +     int vdev_id;                    /* TMFIFO_VDEV_xxx */
> +     u32 pkt_len;                    /* packet total length */
> +     __virtio16 next_avail;          /* next avail desc id */
> +     struct mlxbf_tmfifo *fifo;      /* pointer back to the tmfifo */
> +};
> +
> +/* Interrupt types. */
> +enum {
> +     MLXBF_TM_RX_LWM_IRQ,            /* Rx low water mark irq */
> +     MLXBF_TM_RX_HWM_IRQ,            /* Rx high water mark irq */
> +     MLXBF_TM_TX_LWM_IRQ,            /* Tx low water mark irq */
> +     MLXBF_TM_TX_HWM_IRQ,            /* Tx high water mark irq */
> +     MLXBF_TM_IRQ_CNT
> +};
> +
> +/* Ring types (Rx & Tx). */
> +enum {
> +     MLXBF_TMFIFO_VRING_RX,          /* Rx ring */
> +     MLXBF_TMFIFO_VRING_TX,          /* Tx ring */
> +     MLXBF_TMFIFO_VRING_NUM
> +};
> +
> +struct mlxbf_tmfifo_vdev {
> +     struct virtio_device vdev;      /* virtual device */
> +     u8 status;
> +     u64 features;
> +     union {                         /* virtio config space */
> +             struct virtio_console_config cons;
> +             struct virtio_net_config net;
> +     } config;
> +     struct mlxbf_tmfifo_vring vrings[MLXBF_TMFIFO_VRING_NUM];
> +     u8 *tx_buf;                     /* tx buffer */
> +     u32 tx_head;                    /* tx buffer head */
> +     u32 tx_tail;                    /* tx buffer tail */
> +};
> +
> +struct mlxbf_tmfifo_irq_info {
> +     struct mlxbf_tmfifo *fifo;      /* tmfifo structure */
> +     int irq;                        /* interrupt number */
> +     int index;                      /* array index */
> +};
> +
> +/* TMFIFO device structure */
> +struct mlxbf_tmfifo {
> +     struct mlxbf_tmfifo_vdev *vdev[MLXBF_TMFIFO_VDEV_MAX]; /*
> devices */
> +     struct platform_device *pdev;   /* platform device */
> +     struct mutex lock;              /* fifo lock */
> +     void __iomem *rx_base;          /* mapped register base */
> +     void __iomem *tx_base;          /* mapped register base */
> +     int tx_fifo_size;               /* number of entries of the Tx FIFO */
> +     int rx_fifo_size;               /* number of entries of the Rx FIFO */
> +     unsigned long pend_events;      /* pending bits for deferred process */
> +     struct mlxbf_tmfifo_irq_info irq_info[MLXBF_TM_IRQ_CNT]; /* irq info
> */
> +     struct work_struct work;        /* work struct for deferred process */
> +     struct timer_list timer;        /* keepalive timer */
> +     struct mlxbf_tmfifo_vring *vring[2];    /* current Tx/Rx ring */
> +     bool is_ready;                  /* ready flag */
> +     spinlock_t spin_lock;           /* spin lock */
> +};
> +
> +union mlxbf_tmfifo_msg_hdr {
> +     struct {
> +             u8 type;                /* message type */
> +             __be16 len;             /* payload length */
> +             u8 unused[5];           /* reserved, set to 0 */
> +     } __packed;
> +     u64 data;
> +};
> +
> +/*
> + * Default MAC.
> + * This MAC address will be read from EFI persistent variable if configured.
> + * It can also be reconfigured with standard Linux tools.
> + */
> +static u8 mlxbf_tmfifo_net_default_mac[6] = {
> +     0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
> +
> +/* MTU setting of the virtio-net interface. */
> +#define MLXBF_TMFIFO_NET_MTU         1500
> +
> +/* Maximum L2 header length. */
> +#define MLXBF_TMFIFO_NET_L2_OVERHEAD 36
> +
> +/* Supported virtio-net features. */
> +#define MLXBF_TMFIFO_NET_FEATURES    ((1UL << VIRTIO_NET_F_MTU)
> | \
> +                                      (1UL << VIRTIO_NET_F_STATUS) | \
> +                                      (1UL << VIRTIO_NET_F_MAC))
> +
> +/* Return the consumed Tx buffer space. */ static int
> +mlxbf_tmfifo_vdev_tx_buf_len(struct mlxbf_tmfifo_vdev *vdev) {
> +     return ((vdev->tx_tail >= vdev->tx_head) ?
> +            (vdev->tx_tail - vdev->tx_head) :
> +            (MLXBF_TMFIFO_CONS_TX_BUF_SIZE - vdev->tx_head +
> +vdev->tx_tail)); }

I would suggest to split the above.

> +
> +/* Return the available Tx buffer space. */ static int
> +mlxbf_tmfifo_vdev_tx_buf_avail(struct mlxbf_tmfifo_vdev *vdev) {
> +     return (MLXBF_TMFIFO_CONS_TX_BUF_SIZE - 8 -

Thins about some extra define for
"MLXBF_TMFIFO_CONS_TX_BUF_SIZE - 8"

> +             mlxbf_tmfifo_vdev_tx_buf_len(vdev));
> +}
> +
> +/* Update Tx buffer pointer after pushing data. */ static void
> +mlxbf_tmfifo_vdev_tx_buf_push(struct mlxbf_tmfifo_vdev *vdev,
> +                                       u32 len)
> +{
> +     vdev->tx_tail += len;
> +     if (vdev->tx_tail >= MLXBF_TMFIFO_CONS_TX_BUF_SIZE)
> +             vdev->tx_tail -= MLXBF_TMFIFO_CONS_TX_BUF_SIZE; }
> +
> +/* Update Tx buffer pointer after popping data. */ static void
> +mlxbf_tmfifo_vdev_tx_buf_pop(struct mlxbf_tmfifo_vdev *vdev,
> +                                      u32 len)
> +{
> +     vdev->tx_head += len;
> +     if (vdev->tx_head >= MLXBF_TMFIFO_CONS_TX_BUF_SIZE)
> +             vdev->tx_head -= MLXBF_TMFIFO_CONS_TX_BUF_SIZE; }
> +
> +/* Allocate vrings for the fifo. */
> +static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
> +                                  struct mlxbf_tmfifo_vdev *tm_vdev,
> +                                  int vdev_id)
> +{
> +     struct mlxbf_tmfifo_vring *vring;
> +     dma_addr_t dma;
> +     int i, size;
> +     void *va;
> +
> +     for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
> +             vring = &tm_vdev->vrings[i];
> +             vring->fifo = fifo;
> +             vring->size = MLXBF_TMFIFO_VRING_SIZE;
> +             vring->align = SMP_CACHE_BYTES;
> +             vring->id = i;
> +             vring->vdev_id = vdev_id;
> +
> +             size = PAGE_ALIGN(vring_size(vring->size, vring->align));
> +             va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size,
> &dma,
> +                                     GFP_KERNEL);
> +             if (!va) {
> +                     dev_err(tm_vdev->vdev.dev.parent,
> +                             "vring allocation failed\n");
> +                     return -EINVAL;
> +             }
> +
> +             vring->va = va;
> +             vring->dma = dma;
> +     }
> +
> +     return 0;
> +}
> +
> +/* Free vrings of the fifo device. */
> +static void mlxbf_tmfifo_free_vrings(struct mlxbf_tmfifo *fifo, int
> +vdev_id) {
> +     struct mlxbf_tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
> +     struct mlxbf_tmfifo_vring *vring;
> +     int i, size;
> +
> +     for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
> +             vring = &tm_vdev->vrings[i];
> +
> +             size = PAGE_ALIGN(vring_size(vring->size, vring->align));
> +             if (vring->va) {
> +                     dma_free_coherent(tm_vdev->vdev.dev.parent, size,
> +                                       vring->va, vring->dma);
> +                     vring->va = NULL;
> +                     if (vring->vq) {
> +                             vring_del_virtqueue(vring->vq);
> +                             vring->vq = NULL;
> +                     }
> +             }
> +     }
> +}
> +
> +/* Free interrupts of the fifo device. */ static void
> +mlxbf_tmfifo_free_irqs(struct mlxbf_tmfifo *fifo) {
> +     int i, irq;
> +
> +     for (i = 0; i < MLXBF_TM_IRQ_CNT; i++) {
> +             irq = fifo->irq_info[i].irq;
> +             if (irq) {
> +                     fifo->irq_info[i].irq = 0;
> +                     disable_irq(irq);
> +                     free_irq(irq, (u8 *)fifo + i);
> +             }
> +     }
> +}
> +
> +/* Interrupt handler. */
> +static irqreturn_t mlxbf_tmfifo_irq_handler(int irq, void *arg) {
> +     struct mlxbf_tmfifo_irq_info *irq_info;
> +
> +     irq_info = (struct mlxbf_tmfifo_irq_info *)arg;
> +
> +     if (irq_info->index < MLXBF_TM_IRQ_CNT &&
> +         !test_and_set_bit(irq_info->index, &irq_info->fifo->pend_events))
> +             schedule_work(&irq_info->fifo->work);
> +
> +     return IRQ_HANDLED;
> +}
> +
> +/* Nothing to do for now. */
> +static void mlxbf_tmfifo_virtio_dev_release(struct device *dev) { }

If there is nothing to do - no reason to have it.

> +
> +/* Get the next packet descriptor from the vring. */ static inline
> +struct vring_desc * mlxbf_tmfifo_virtio_get_next_desc(struct virtqueue
> +*vq) {
> +     struct mlxbf_tmfifo_vring *vring;
> +     unsigned int idx, head;
> +     struct vring *vr;
> +
> +     vring = (struct mlxbf_tmfifo_vring *)vq->priv;
> +     vr = (struct vring *)virtqueue_get_vring(vq);
> +
> +     if (!vr || vring->next_avail == vr->avail->idx)
> +             return NULL;
> +
> +     idx = vring->next_avail % vr->num;
> +     head = vr->avail->ring[idx];
> +     BUG_ON(head >= vr->num);
> +     vring->next_avail++;
> +     return &vr->desc[head];
> +}
> +
> +static inline void mlxbf_tmfifo_virtio_release_desc(
> +     struct virtio_device *vdev, struct vring *vr,
> +     struct vring_desc *desc, u32 len)
> +{
> +     unsigned int idx;
> +
> +     idx = vr->used->idx % vr->num;
> +     vr->used->ring[idx].id = desc - vr->desc;
> +     vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
> +
> +     /* Virtio could poll and check the 'idx' to decide
> +      * whether the desc is done or not. Add a memory
> +      * barrier here to make sure the update above completes
> +      * before updating the idx.
> +      */
> +     mb();
> +     vr->used->idx++;
> +}
> +
> +/* Get the total length of a descriptor chain. */ static inline u32
> +mlxbf_tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
> +                                               struct vring_desc *desc,
> +                                               struct vring *vr)
> +{
> +     u32 len = 0, idx;
> +
> +     while (desc) {
> +             len += virtio32_to_cpu(vdev, desc->len);
> +             if (!(virtio16_to_cpu(vdev, desc->flags) &
> VRING_DESC_F_NEXT))
> +                     break;
> +             idx = virtio16_to_cpu(vdev, desc->next);
> +             desc = &vr->desc[idx];
> +     }
> +
> +     return len;
> +}
> +
> +static void mlxbf_tmfifo_release_pkt(struct virtio_device *vdev,
> +                                  struct mlxbf_tmfifo_vring *vring,
> +                                  struct vring_desc **desc)
> +{
> +     struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
> +     struct vring_desc *desc_head;
> +     uint32_t pkt_len = 0;
> +
> +     if (!vr)
> +             return;
> +
> +     if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
> +             desc_head = vring->desc_head;
> +             pkt_len = vring->pkt_len;
> +     } else {
> +             desc_head = mlxbf_tmfifo_virtio_get_next_desc(vring->vq);
> +             if (desc_head != NULL) {
> +                     pkt_len = mlxbf_tmfifo_virtio_get_pkt_len(
> +                             vdev, desc_head, vr);
> +             }
> +     }
> +
> +     if (desc_head != NULL)
> +             mlxbf_tmfifo_virtio_release_desc(vdev, vr, desc_head,
> pkt_len);
> +
> +     if (desc != NULL)
> +             *desc = NULL;
> +     vring->pkt_len = 0;
> +}
> +
> +/* House-keeping timer. */
> +static void mlxbf_tmfifo_timer(struct timer_list *arg) {
> +     struct mlxbf_tmfifo *fifo;
> +
> +     fifo = container_of(arg, struct mlxbf_tmfifo, timer);
> +
> +     /*
> +      * Wake up the work handler to poll the Rx FIFO in case interrupt
> +      * missing or any leftover bytes stuck in the FIFO.
> +      */
> +     test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events);
> +
> +     /*
> +      * Wake up Tx handler in case virtio has queued too many packets
> +      * and are waiting for buffer return.
> +      */
> +     test_and_set_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events);
> +
> +     schedule_work(&fifo->work);
> +
> +     mod_timer(&fifo->timer, jiffies + mlxbf_tmfifo_timer_interval); }
> +
> +/* Buffer the console output. */
> +static void mlxbf_tmfifo_console_output(struct mlxbf_tmfifo_vdev *cons,
> +                                     struct virtqueue *vq)
> +{
> +     struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
> +     struct vring_desc *head_desc, *desc = NULL;
> +     struct virtio_device *vdev = &cons->vdev;
> +     u32 len, pkt_len, idx;
> +     void *addr;
> +
> +     for (;;) {

It's better to modify it as while(on some condition)

> +             head_desc = mlxbf_tmfifo_virtio_get_next_desc(vq);
> +             if (head_desc == NULL)
> +                     break;
> +
> +             /* Release the packet if no more space. */
> +             pkt_len = mlxbf_tmfifo_virtio_get_pkt_len(vdev, head_desc,
> vr);
> +             if (pkt_len > mlxbf_tmfifo_vdev_tx_buf_avail(cons)) {
> +                     mlxbf_tmfifo_virtio_release_desc(vdev, vr, head_desc,
> +                                                      pkt_len);

Why do you break line here?

> +                     break;
> +             }
> +
> +             desc = head_desc;
> +
> +             while (desc != NULL) {
> +                     addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
> +                     len = virtio32_to_cpu(vdev, desc->len);
> +
> +                     if (len <= MLXBF_TMFIFO_CONS_TX_BUF_SIZE -
> +                         cons->tx_tail) {

Why do you break line here? Also below I see few strange breaks.

> +                             memcpy(cons->tx_buf + cons->tx_tail, addr,
> len);
> +                     } else {
> +                             u32 seg;
> +
> +                             seg = MLXBF_TMFIFO_CONS_TX_BUF_SIZE -
> +                                     cons->tx_tail;
> +                             memcpy(cons->tx_buf + cons->tx_tail, addr,
> seg);
> +                             addr += seg;
> +                             memcpy(cons->tx_buf, addr, len - seg);
> +                     }
> +                     mlxbf_tmfifo_vdev_tx_buf_push(cons, len);
> +
> +                     if (!(virtio16_to_cpu(vdev, desc->flags) &
> +                         VRING_DESC_F_NEXT))
> +                             break;
> +                     idx = virtio16_to_cpu(vdev, desc->next);
> +                     desc = &vr->desc[idx];
> +             }
> +
> +             mlxbf_tmfifo_virtio_release_desc(vdev, vr, head_desc,
> pkt_len);
> +     }
> +}
> +
> +/* Rx & Tx processing of a virtual queue. */ static void
> +mlxbf_tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx) {
> +     int num_avail = 0, hdr_len, tx_reserve;
> +     struct mlxbf_tmfifo_vring *vring;
> +     struct mlxbf_tmfifo_vdev *cons;
> +     struct virtio_device *vdev;
> +     struct mlxbf_tmfifo *fifo;
> +     struct vring_desc *desc;
> +     unsigned long flags;
> +     struct vring *vr;
> +     u64 sts, data;
> +     u32 len, idx;
> +     void *addr;
> +
> +     if (!vq)
> +             return;
> +
> +     vring = (struct mlxbf_tmfifo_vring *)vq->priv;
> +     fifo = vring->fifo;
> +     vr = (struct vring *)virtqueue_get_vring(vq);
> +
> +     if (!fifo->vdev[vring->vdev_id])
> +             return;
> +     vdev = &fifo->vdev[vring->vdev_id]->vdev;
> +     cons = fifo->vdev[VIRTIO_ID_CONSOLE];
> +
> +     /* Don't continue if another vring is running. */
> +     if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
> +             return;
> +
> +     /* tx_reserve is used to reserved some room in FIFO for console. */
> +     if (vring->vdev_id == VIRTIO_ID_NET) {
> +             hdr_len = sizeof(struct virtio_net_hdr);
> +             tx_reserve = fifo->tx_fifo_size / 16;

Use some define instead of 16/

> +     } else {
> +             BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
> +             hdr_len = 0;
> +             tx_reserve = 1;
> +     }
> +
> +     desc = vring->desc;
> +
> +     while (1) {

I see there are few drivers in platform which use while (1)
But it looks better to use while(some condition)
and instead of break change this condition to false.

> +             /* Get available FIFO space. */
> +             if (num_avail == 0) {
> +                     if (is_rx) {
> +                             /* Get the number of available words in FIFO.
> */
> +                             sts = readq(fifo->rx_base +
> +                                         MLXBF_TMFIFO_RX_STS);
> +                             num_avail = FIELD_GET(
> +
>        MLXBF_TMFIFO_RX_STS__COUNT_MASK, sts);

                                num_avail = FIELD_GET(TMFIFO_RX_STS__COUNT_MASK, sts);

> +
> +                             /* Don't continue if nothing in FIFO. */
> +                             if (num_avail <= 0)
> +                                     break;
> +                     } else {
> +                             /* Get available space in FIFO. */
> +                             sts = readq(fifo->tx_base +
> +                                         MLXBF_TMFIFO_TX_STS);
> +                             num_avail = fifo->tx_fifo_size - tx_reserve -
> +                                     FIELD_GET(
> +
>        MLXBF_TMFIFO_TX_STS__COUNT_MASK,
> +                                             sts);

Same as above.

> +
> +                             if (num_avail <= 0)
> +                                     break;
> +                     }
> +             }
> +
> +             /* Console output always comes from the Tx buffer. */
> +             if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
> +                 cons != NULL && cons->tx_buf != NULL) {
> +                     union mlxbf_tmfifo_msg_hdr hdr;
> +                     int size;
> +
> +                     size = mlxbf_tmfifo_vdev_tx_buf_len(cons);
> +                     if (num_avail < 2 || size == 0)
> +                             return;
> +                     if (size + sizeof(hdr) > num_avail * sizeof(u64))
> +                             size = num_avail * sizeof(u64) - sizeof(hdr);
> +                     /* Write header. */
> +                     hdr.data = 0;
> +                     hdr.type = VIRTIO_ID_CONSOLE;
> +                     hdr.len = htons(size);
> +                     writeq(cpu_to_le64(hdr.data),
> +                            fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
> +
> +                     spin_lock_irqsave(&fifo->spin_lock, flags);
> +                     while (size > 0) {
> +                             addr = cons->tx_buf + cons->tx_head;
> +
> +                             if (cons->tx_head + sizeof(u64) <=
> +                                 MLXBF_TMFIFO_CONS_TX_BUF_SIZE) {
> +                                     memcpy(&data, addr, sizeof(u64));
> +                             } else {
> +                                     int partial;
> +
> +                                     partial =
> +
>        MLXBF_TMFIFO_CONS_TX_BUF_SIZE -
> +                                             cons->tx_head;
> +
> +                                     memcpy(&data, addr, partial);
> +                                     memcpy((u8 *)&data + partial,
> +                                            cons->tx_buf,
> +                                            sizeof(u64) - partial);
> +                             }
> +                             writeq(data,
> +                                    fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
> +
> +                             if (size >= sizeof(u64)) {
> +                                     mlxbf_tmfifo_vdev_tx_buf_pop(
> +                                             cons, sizeof(u64));
> +                                     size -= sizeof(u64);
> +                             } else {
> +                                     mlxbf_tmfifo_vdev_tx_buf_pop(
> +                                             cons, size);
> +                                     size = 0;
> +                             }
> +                     }
> +                     spin_unlock_irqrestore(&fifo->spin_lock, flags);
> +                     return;
> +             }
> +
> +             /* Get the desc of next packet. */
> +             if (!desc) {
> +                     /* Save the head desc of the chain. */
> +                     vring->desc_head =
> +                             mlxbf_tmfifo_virtio_get_next_desc(vq);
> +                     if (!vring->desc_head) {
> +                             vring->desc = NULL;
> +                             return;
> +                     }
> +                     desc = vring->desc_head;
> +                     vring->desc = desc;
> +
> +                     if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
> +                             struct virtio_net_hdr *net_hdr;
> +
> +                             /* Initialize the packet header. */
> +                             net_hdr = (struct virtio_net_hdr *)
> +                                     phys_to_virt(virtio64_to_cpu(
> +                                             vdev, desc->addr));
> +                             memset(net_hdr, 0, sizeof(*net_hdr));
> +                     }
> +             }
> +
> +             /* Beginning of each packet. */
> +             if (vring->pkt_len == 0) {
> +                     int vdev_id, vring_change = 0;
> +                     union mlxbf_tmfifo_msg_hdr hdr;
> +
> +                     num_avail--;
> +
> +                     /* Read/Write packet length. */
> +                     if (is_rx) {
> +                             hdr.data = readq(fifo->rx_base +
> +                                              MLXBF_TMFIFO_RX_DATA);
> +                             hdr.data = le64_to_cpu(hdr.data);
> +
> +                             /* Skip the length 0 packet (keepalive). */
> +                             if (hdr.len == 0)
> +                                     continue;
> +
> +                             /* Check packet type. */
> +                             if (hdr.type == VIRTIO_ID_NET) {
> +                                     struct virtio_net_config *config;
> +
> +                                     vdev_id = VIRTIO_ID_NET;
> +                                     hdr_len = sizeof(struct virtio_net_hdr);
> +                                     config =
> +                                         &fifo->vdev[vdev_id]->config.net;
> +                                     if (ntohs(hdr.len) > config->mtu +
> +
>        MLXBF_TMFIFO_NET_L2_OVERHEAD)
> +                                             continue;
> +                             } else if (hdr.type == VIRTIO_ID_CONSOLE) {
> +                                     vdev_id = VIRTIO_ID_CONSOLE;
> +                                     hdr_len = 0;
> +                             } else {
> +                                     continue;
> +                             }
> +
> +                             /*
> +                              * Check whether the new packet still belongs
> +                              * to this vring or not. If not, update the
> +                              * pkt_len of the new vring and return.
> +                              */
> +                             if (vdev_id != vring->vdev_id) {
> +                                     struct mlxbf_tmfifo_vdev *dev2 =
> +                                             fifo->vdev[vdev_id];
> +
> +                                     if (!dev2)
> +                                             break;
> +                                     vring->desc = desc;
> +                                     vring =
> +                                       &dev2-
> >vrings[MLXBF_TMFIFO_VRING_RX];
> +                                     vring_change = 1;
> +                             }
> +                             vring->pkt_len = ntohs(hdr.len) + hdr_len;
> +                     } else {
> +                             vring->pkt_len =
> +                                     mlxbf_tmfifo_virtio_get_pkt_len(
> +                                             vdev, desc, vr);
> +
> +                             hdr.data = 0;
> +                             hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
> +                                     VIRTIO_ID_NET :
> +                                     VIRTIO_ID_CONSOLE;
> +                             hdr.len = htons(vring->pkt_len - hdr_len);
> +                             writeq(cpu_to_le64(hdr.data),
> +                                    fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
> +                     }
> +
> +                     vring->cur_len = hdr_len;
> +                     vring->rem_len = vring->pkt_len;
> +                     fifo->vring[is_rx] = vring;
> +
> +                     if (vring_change)
> +                             return;
> +                     continue;
> +             }
> +
> +             /* Check available space in this desc. */
> +             len = virtio32_to_cpu(vdev, desc->len);
> +             if (len > vring->rem_len)
> +                     len = vring->rem_len;
> +
> +             /* Check if the current desc is already done. */
> +             if (vring->cur_len == len)
> +                     goto check_done;
> +
> +             addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
> +
> +             /* Read a word from FIFO for Rx. */
> +             if (is_rx) {
> +                     data = readq(fifo->rx_base +
> MLXBF_TMFIFO_RX_DATA);
> +                     data = le64_to_cpu(data);
> +             }
> +
> +             if (vring->cur_len + sizeof(u64) <= len) {
> +                     /* The whole word. */
> +                     if (is_rx) {
> +                             memcpy(addr + vring->cur_len, &data,
> +                                    sizeof(u64));
> +                     } else {
> +                             memcpy(&data, addr + vring->cur_len,
> +                                    sizeof(u64));
> +                     }

Why not just.
Also few places like this one below.

                        if (is_rx)
                                memcpy(addr + vring->cur_len, &data, sizeof(u64));
                        else
                                memcpy(&data, addr + vring->cur_len, sizeof(u64));

> +                     vring->cur_len += sizeof(u64);
> +             } else {
> +                     /* Leftover bytes. */
> +                     BUG_ON(vring->cur_len > len);
> +                     if (is_rx) {
> +                             memcpy(addr + vring->cur_len, &data,
> +                                    len - vring->cur_len);
> +                     } else {
> +                             memcpy(&data, addr + vring->cur_len,
> +                                    len - vring->cur_len);
> +                     }
> +                     vring->cur_len = len;
> +             }
> +
> +             /* Write the word into FIFO for Tx. */
> +             if (!is_rx) {
> +                     writeq(cpu_to_le64(data),
> +                            fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
> +             }
> +
> +             num_avail--;
> +
> +check_done:
> +             /* Check whether this desc is full or completed. */
> +             if (vring->cur_len == len) {
> +                     vring->cur_len = 0;
> +                     vring->rem_len -= len;
> +
> +                     /* Get the next desc on the chain. */
> +                     if (vring->rem_len > 0 &&
> +                         (virtio16_to_cpu(vdev, desc->flags) &
> +                                             VRING_DESC_F_NEXT)) {
> +                             idx = virtio16_to_cpu(vdev, desc->next);
> +                             desc = &vr->desc[idx];
> +                             continue;
> +                     }
> +
> +                     /* Done and release the desc. */
> +                     mlxbf_tmfifo_release_pkt(vdev, vring, &desc);
> +                     fifo->vring[is_rx] = NULL;
> +
> +                     /* Notify upper layer that packet is done. */
> +                     spin_lock_irqsave(&fifo->spin_lock, flags);
> +                     vring_interrupt(0, vq);
> +                     spin_unlock_irqrestore(&fifo->spin_lock, flags);
> +                     continue;
> +             }
> +     }
> +
> +     /* Save the current desc. */
> +     vring->desc = desc;
> +}

I suggest to split mlxbf_tmfifo_virtio_rxtx() into few small routines.


> +
> +/* The notify function is called when new buffers are posted. */ static
> +bool mlxbf_tmfifo_virtio_notify(struct virtqueue *vq) {
> +     struct mlxbf_tmfifo_vring *vring;
> +     struct mlxbf_tmfifo *fifo;
> +     unsigned long flags;
> +
> +     vring = (struct mlxbf_tmfifo_vring *)vq->priv;
> +     fifo = vring->fifo;
> +
> +     /*
> +      * Virtio maintains vrings in pairs, even number ring for Rx
> +      * and odd number ring for Tx.
> +      */
> +     if (!(vring->id & 1)) {
> +             /* Set the RX HWM bit to start Rx. */
> +             if (!test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo-
> >pend_events))
> +                     schedule_work(&fifo->work);
> +     } else {
> +             /*
> +              * Console could make blocking call with interrupts disabled.
> +              * In such case, the vring needs to be served right away. For
> +              * other cases, just set the TX LWM bit to start Tx in the
> +              * worker handler.
> +              */
> +             if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
> +                     spin_lock_irqsave(&fifo->spin_lock, flags);
> +                     mlxbf_tmfifo_console_output(
> +                             fifo->vdev[VIRTIO_ID_CONSOLE], vq);

                        mlxbf_tmfifo_console_output(fifo->vdev[VIRTIO_ID_CONSOLE], vq);

> +                     spin_unlock_irqrestore(&fifo->spin_lock, flags);
> +                     schedule_work(&fifo->work);
> +             } else if (!test_and_set_bit(MLXBF_TM_TX_LWM_IRQ,
> +                                          &fifo->pend_events))
> +                     schedule_work(&fifo->work);

                If {
                } else if {
                }

For consistency.

> +     }
> +
> +     return true;
> +}
> +
> +/* Work handler for Rx and Tx case. */
> +static void mlxbf_tmfifo_work_handler(struct work_struct *work) {
> +     struct mlxbf_tmfifo_vdev *tm_vdev;
> +     struct mlxbf_tmfifo *fifo;
> +     int i;
> +
> +     fifo = container_of(work, struct mlxbf_tmfifo, work);
> +     if (!fifo->is_ready)
> +             return;
> +
> +     mutex_lock(&fifo->lock);
> +
> +     /* Tx (Send data to the TmFifo). */
> +     if (test_and_clear_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events)
> &&
> +                    fifo->irq_info[MLXBF_TM_TX_LWM_IRQ].irq) {
> +             for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++) {

I suggest to define local variable vq.
And have below:
                                mlxbf_tmfifo_virtio_rxtx(vq, false);

> +                     tm_vdev = fifo->vdev[i];
> +                     if (tm_vdev != NULL) {
> +                             mlxbf_tmfifo_virtio_rxtx(
> +                                 tm_vdev-
> >vrings[MLXBF_TMFIFO_VRING_TX].vq,
> +                                 false);
> +                     }
> +             }
> +     }
> +
> +     /* Rx (Receive data from the TmFifo). */
> +     if (test_and_clear_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events)
> &&
> +                    fifo->irq_info[MLXBF_TM_RX_HWM_IRQ].irq) {
> +             for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++) {
> +                     tm_vdev = fifo->vdev[i];

Same as above.

> +                     if (tm_vdev != NULL) {
> +                             mlxbf_tmfifo_virtio_rxtx(
> +                                 tm_vdev-
> >vrings[MLXBF_TMFIFO_VRING_RX].vq,
> +                                 true);
> +                     }
> +             }
> +     }
> +
> +     mutex_unlock(&fifo->lock);
> +}
> +
> +/* Get the array of feature bits for this device. */ static u64
> +mlxbf_tmfifo_virtio_get_features(struct virtio_device *vdev) {
> +     struct mlxbf_tmfifo_vdev *tm_vdev;
> +
> +     tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +     return tm_vdev->features;
> +}
> +
> +/* Confirm device features to use. */
> +static int mlxbf_tmfifo_virtio_finalize_features(struct virtio_device
> +*vdev) {
> +     struct mlxbf_tmfifo_vdev *tm_vdev;
> +
> +     tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +     tm_vdev->features = vdev->features;
> +
> +     return 0;
> +}
> +
> +/* Free virtqueues found by find_vqs(). */ static void
> +mlxbf_tmfifo_virtio_del_vqs(struct virtio_device *vdev) {
> +     struct mlxbf_tmfifo_vdev *tm_vdev;
> +     struct mlxbf_tmfifo_vring *vring;
> +     struct virtqueue *vq;
> +     int i;
> +
> +     tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +
> +     for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
> +             vring = &tm_vdev->vrings[i];
> +
> +             /* Release the pending packet. */
> +             if (vring->desc != NULL) {
> +                     mlxbf_tmfifo_release_pkt(&tm_vdev->vdev, vring,
> +                                              &vring->desc);
> +             }
> +
> +             vq = vring->vq;
> +             if (vq) {
> +                     vring->vq = NULL;
> +                     vring_del_virtqueue(vq);
> +             }
> +     }
> +}
> +
> +/* Create and initialize the virtual queues. */ static int
> +mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev,
> +                                     unsigned int nvqs,
> +                                     struct virtqueue *vqs[],
> +                                     vq_callback_t *callbacks[],
> +                                     const char * const names[],
> +                                     const bool *ctx,
> +                                     struct irq_affinity *desc)
> +{
> +     struct mlxbf_tmfifo_vdev *tm_vdev;
> +     struct mlxbf_tmfifo_vring *vring;
> +     int i, ret = -EINVAL, size;

Don't initialize ret with -EINVAL.

> +     struct virtqueue *vq;
> +
> +     tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +     if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
> +             return -EINVAL;
> +
> +     for (i = 0; i < nvqs; ++i) {
> +             if (!names[i])
> +                     goto error;
> +             vring = &tm_vdev->vrings[i];
> +
> +             /* zero vring */
> +             size = vring_size(vring->size, vring->align);
> +             memset(vring->va, 0, size);
> +             vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
> +                                      false, false, vring->va,
> +                                      mlxbf_tmfifo_virtio_notify,
> +                                      callbacks[i], names[i]);
> +             if (!vq) {
> +                     dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
> +                     ret = -ENOMEM;
> +                     goto error;
> +             }
> +
> +             vqs[i] = vq;
> +             vring->vq = vq;
> +             vq->priv = vring;
> +     }
> +
> +     return 0;
> +
> +error:
> +     mlxbf_tmfifo_virtio_del_vqs(vdev);
> +     return ret;
> +}
> +
> +/* Read the status byte. */
> +static u8 mlxbf_tmfifo_virtio_get_status(struct virtio_device *vdev) {
> +     struct mlxbf_tmfifo_vdev *tm_vdev;
> +
> +     tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +
> +     return tm_vdev->status;
> +}
> +
> +/* Write the status byte. */
> +static void mlxbf_tmfifo_virtio_set_status(struct virtio_device *vdev,
> +                                        u8 status)
> +{
> +     struct mlxbf_tmfifo_vdev *tm_vdev;
> +
> +     tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +     tm_vdev->status = status;
> +}
> +
> +/* Reset the device. Not much here for now. */ static void
> +mlxbf_tmfifo_virtio_reset(struct virtio_device *vdev) {
> +     struct mlxbf_tmfifo_vdev *tm_vdev;
> +
> +     tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +     tm_vdev->status = 0;
> +}
> +
> +/* Read the value of a configuration field. */ static void
> +mlxbf_tmfifo_virtio_get(struct virtio_device *vdev,
> +                           unsigned int offset,
> +                           void *buf,
> +                           unsigned int len)
> +{
> +     struct mlxbf_tmfifo_vdev *tm_vdev;
> +
> +     tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +

        unsigned int pos = offset + len;

        if (pos > sizeof(tm_vdev->config) || pos < len)


> +     if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
> +             dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
> +             return;
> +     }
> +
> +     memcpy(buf, (u8 *)&tm_vdev->config + offset, len); }
> +
> +/* Write the value of a configuration field. */ static void
> +mlxbf_tmfifo_virtio_set(struct virtio_device *vdev,
> +                              unsigned int offset,
> +                              const void *buf,
> +                              unsigned int len)
> +{
> +     struct mlxbf_tmfifo_vdev *tm_vdev;
> +
> +     tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +
> +     if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {

Same as above.

> +             dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
> +             return;
> +     }
> +
> +     memcpy((u8 *)&tm_vdev->config + offset, buf, len); }
> +
> +/* Virtio config operations. */
> +static const struct virtio_config_ops mlxbf_tmfifo_virtio_config_ops = {
> +     .get_features = mlxbf_tmfifo_virtio_get_features,
> +     .finalize_features = mlxbf_tmfifo_virtio_finalize_features,
> +     .find_vqs = mlxbf_tmfifo_virtio_find_vqs,
> +     .del_vqs = mlxbf_tmfifo_virtio_del_vqs,
> +     .reset = mlxbf_tmfifo_virtio_reset,
> +     .set_status = mlxbf_tmfifo_virtio_set_status,
> +     .get_status = mlxbf_tmfifo_virtio_get_status,
> +     .get = mlxbf_tmfifo_virtio_get,
> +     .set = mlxbf_tmfifo_virtio_set,
> +};
> +
> +/* Create vdev type in a tmfifo. */
> +int mlxbf_tmfifo_create_vdev(struct mlxbf_tmfifo *fifo, int vdev_id,
> +                          u64 features, void *config, u32 size) {
> +     struct mlxbf_tmfifo_vdev *tm_vdev;
> +     int ret = 0;
> +
> +     mutex_lock(&fifo->lock);
> +
> +     tm_vdev = fifo->vdev[vdev_id];
> +     if (tm_vdev != NULL) {
> +             pr_err("vdev %d already exists\n", vdev_id);
> +             ret = -EEXIST;
> +             goto already_exist;
> +     }
> +
> +     tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
> +     if (!tm_vdev) {
> +             ret = -ENOMEM;
> +             goto already_exist;
> +     }
> +
> +     tm_vdev->vdev.id.device = vdev_id;
> +     tm_vdev->vdev.config = &mlxbf_tmfifo_virtio_config_ops;
> +     tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
> +     tm_vdev->vdev.dev.release = mlxbf_tmfifo_virtio_dev_release;
> +     tm_vdev->features = features;
> +     if (config)
> +             memcpy(&tm_vdev->config, config, size);
> +     if (mlxbf_tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
> +             pr_err("Unable to allocate vring\n");
> +             ret = -ENOMEM;
> +             goto alloc_vring_fail;
> +     }
> +     if (vdev_id == VIRTIO_ID_CONSOLE) {
> +             tm_vdev->tx_buf =
> kmalloc(MLXBF_TMFIFO_CONS_TX_BUF_SIZE,
> +                                       GFP_KERNEL);
> +     }
> +     fifo->vdev[vdev_id] = tm_vdev;
> +
> +     /* Register the virtio device. */
> +     ret = register_virtio_device(&tm_vdev->vdev);
> +     if (ret) {
> +             dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
> +             goto register_fail;
> +     }
> +
> +     mutex_unlock(&fifo->lock);
> +     return 0;
> +
> +register_fail:
> +     mlxbf_tmfifo_free_vrings(fifo, vdev_id);
> +     fifo->vdev[vdev_id] = NULL;
> +alloc_vring_fail:
> +     kfree(tm_vdev);
> +already_exist:
> +     mutex_unlock(&fifo->lock);
> +     return ret;
> +}
> +
> +/* Delete vdev type from a tmfifo. */
> +int mlxbf_tmfifo_delete_vdev(struct mlxbf_tmfifo *fifo, int vdev_id) {
> +     struct mlxbf_tmfifo_vdev *tm_vdev;
> +
> +     mutex_lock(&fifo->lock);
> +
> +     /* Unregister vdev. */
> +     tm_vdev = fifo->vdev[vdev_id];
> +     if (tm_vdev) {
> +             unregister_virtio_device(&tm_vdev->vdev);
> +             mlxbf_tmfifo_free_vrings(fifo, vdev_id);
> +             kfree(tm_vdev->tx_buf);
> +             kfree(tm_vdev);
> +             fifo->vdev[vdev_id] = NULL;
> +     }
> +
> +     mutex_unlock(&fifo->lock);
> +
> +     return 0;
> +}
> +
> +/* Device remove function. */
> +static int mlxbf_tmfifo_remove(struct platform_device *pdev) {

Locate it after probe.
If you'll use all devm_, like Andy noted:
devm_ioremap
devm_ioremap_resource
devm_kzalloc
devm_request_mem_region
you can drop all kfree, release_mem_region, iounmap

And make the below as a separate routine, something like
mlxbf_tmfifo_cleanup(), if you still need it.

> +     struct mlxbf_tmfifo *fifo = platform_get_drvdata(pdev);
> +     struct resource *rx_res, *tx_res;
> +     int i;
> +
> +     if (fifo) {
> +             mutex_lock(&mlxbf_tmfifo_lock);
> +
> +             fifo->is_ready = false;
> +
> +             /* Stop the timer. */
> +             del_timer_sync(&fifo->timer);
> +
> +             /* Release interrupts. */
> +             mlxbf_tmfifo_free_irqs(fifo);
> +
> +             /* Cancel the pending work. */
> +             cancel_work_sync(&fifo->work);
> +
> +             for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++)
> +                     mlxbf_tmfifo_delete_vdev(fifo, i);
> +
> +             /* Release IO resources. */
> +             if (fifo->rx_base)
> +                     iounmap(fifo->rx_base);
> +             if (fifo->tx_base)
> +                     iounmap(fifo->tx_base);
> +
> +             platform_set_drvdata(pdev, NULL);
> +             kfree(fifo);
> +
> +             mutex_unlock(&mlxbf_tmfifo_lock);
> +     }
> +
> +     rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +     if (rx_res)
> +             release_mem_region(rx_res->start, resource_size(rx_res));
> +     tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> +     if (tx_res)
> +             release_mem_region(tx_res->start, resource_size(tx_res));
> +
> +     return 0;
> +}
> +
> +/* Read the configured network MAC address from efi variable. */ static
> +void mlxbf_tmfifo_get_cfg_mac(u8 *mac) {
> +     efi_char16_t name[] = {
> +             'R', 's', 'h', 'i', 'm', 'M', 'a', 'c', 'A', 'd', 'd', 'r', 0 };


Could it be moved out and set like:
static const efi_char16_t mlxbf_tmfifo_efi_name[] = "...";
Could you check if the are some examples in kernel, please?

> +     efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
> +     efi_status_t status;
> +     unsigned long size;
> +     u8 buf[6];
> +
> +     size = sizeof(buf);
> +     status = efi.get_variable(name, &guid, NULL, &size, buf);
> +     if (status == EFI_SUCCESS && size == sizeof(buf))
> +             memcpy(mac, buf, sizeof(buf));
> +}
> +
> +/* Probe the TMFIFO. */
> +static int mlxbf_tmfifo_probe(struct platform_device *pdev) {
> +     struct virtio_net_config net_config;
> +     struct resource *rx_res, *tx_res;
> +     struct mlxbf_tmfifo *fifo;
> +     int i, ret;
> +     u64 ctl;
> +
> +     /* Get the resource of the Rx & Tx FIFO. */
> +     rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +     tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> +     if (!rx_res || !tx_res) {
> +             ret = -EINVAL;
> +             goto err;
> +     }
> +
> +     if (request_mem_region(rx_res->start,
> +                            resource_size(rx_res), "bf-tmfifo") == NULL) {
> +             ret = -EBUSY;
> +             goto early_err;
> +     }
> +
> +     if (request_mem_region(tx_res->start,
> +                            resource_size(tx_res), "bf-tmfifo") == NULL) {
> +             release_mem_region(rx_res->start, resource_size(rx_res));
> +             ret = -EBUSY;
> +             goto early_err;
> +     }
> +
> +     ret = -ENOMEM;
> +     fifo = kzalloc(sizeof(struct mlxbf_tmfifo), GFP_KERNEL);
> +     if (!fifo)
> +             goto err;
> +
> +     fifo->pdev = pdev;
> +     platform_set_drvdata(pdev, fifo);
> +
> +     spin_lock_init(&fifo->spin_lock);
> +     INIT_WORK(&fifo->work, mlxbf_tmfifo_work_handler);
> +
> +     timer_setup(&fifo->timer, mlxbf_tmfifo_timer, 0);
> +     fifo->timer.function = mlxbf_tmfifo_timer;
> +
> +     for (i = 0; i < MLXBF_TM_IRQ_CNT; i++) {
> +             fifo->irq_info[i].index = i;
> +             fifo->irq_info[i].fifo = fifo;
> +             fifo->irq_info[i].irq = platform_get_irq(pdev, i);
> +             ret = request_irq(fifo->irq_info[i].irq,
> +                               mlxbf_tmfifo_irq_handler, 0,
> +                               "tmfifo", &fifo->irq_info[i]);
> +             if (ret) {
> +                     pr_err("Unable to request irq\n");
> +                     fifo->irq_info[i].irq = 0;
> +                     goto err;
> +             }
> +     }
> +
> +     fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
> +     if (!fifo->rx_base)
> +             goto err;
> +
> +     fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
> +     if (!fifo->tx_base)
> +             goto err;
> +
> +     /* Get Tx FIFO size and set the low/high watermark. */
> +     ctl = readq(fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
> +     fifo->tx_fifo_size =
> +             FIELD_GET(MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK,
> ctl);
> +     ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__LWM_MASK) |
> +             FIELD_PREP(MLXBF_TMFIFO_TX_CTL__LWM_MASK,
> +                        fifo->tx_fifo_size / 2);
> +     ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__HWM_MASK) |
> +             FIELD_PREP(MLXBF_TMFIFO_TX_CTL__HWM_MASK,
> +                        fifo->tx_fifo_size - 1);
> +     writeq(ctl, fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
> +
> +     /* Get Rx FIFO size and set the low/high watermark. */
> +     ctl = readq(fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
> +     fifo->rx_fifo_size =
> +             FIELD_GET(MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK,
> ctl);
> +     ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__LWM_MASK) |
> +             FIELD_PREP(MLXBF_TMFIFO_RX_CTL__LWM_MASK, 0);
> +     ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__HWM_MASK) |
> +             FIELD_PREP(MLXBF_TMFIFO_RX_CTL__HWM_MASK, 1);
> +     writeq(ctl, fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
> +
> +     mutex_init(&fifo->lock);
> +
> +     /* Create the console vdev. */
> +     ret = mlxbf_tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
> +     if (ret)
> +             goto err;
> +
> +     /* Create the network vdev. */
> +     memset(&net_config, 0, sizeof(net_config));
> +     net_config.mtu = MLXBF_TMFIFO_NET_MTU;
> +     net_config.status = VIRTIO_NET_S_LINK_UP;
> +     memcpy(net_config.mac, mlxbf_tmfifo_net_default_mac, 6);
> +     mlxbf_tmfifo_get_cfg_mac(net_config.mac);
> +     ret = mlxbf_tmfifo_create_vdev(fifo, VIRTIO_ID_NET,
> +             MLXBF_TMFIFO_NET_FEATURES, &net_config,
> sizeof(net_config));
> +     if (ret)
> +             goto err;
> +
> +     mod_timer(&fifo->timer, jiffies + mlxbf_tmfifo_timer_interval);
> +
> +     fifo->is_ready = true;
> +
> +     return 0;
> +
> +err:
> +     mlxbf_tmfifo_remove(pdev);
> +early_err:
> +     dev_err(&pdev->dev, "Probe Failed\n");
> +     return ret;
> +}
> +
> +static const struct of_device_id mlxbf_tmfifo_match[] = {
> +     { .compatible = "mellanox,bf-tmfifo" },
> +     {},
> +};
> +MODULE_DEVICE_TABLE(of, mlxbf_tmfifo_match);
> +
> +static const struct acpi_device_id mlxbf_tmfifo_acpi_match[] = {
> +     { "MLNXBF01", 0 },
> +     {},
> +};
> +MODULE_DEVICE_TABLE(acpi, mlxbf_tmfifo_acpi_match);
> +
> +static struct platform_driver mlxbf_tmfifo_driver = {
> +     .probe = mlxbf_tmfifo_probe,
> +     .remove = mlxbf_tmfifo_remove,
> +     .driver = {
> +             .name = "bf-tmfifo",
> +             .of_match_table = mlxbf_tmfifo_match,
> +             .acpi_match_table = ACPI_PTR(mlxbf_tmfifo_acpi_match),
> +     },
> +};
> +
> +module_platform_driver(mlxbf_tmfifo_driver);
> +
> +MODULE_DESCRIPTION("Mellanox BlueField SoC TmFifo Driver");
> +MODULE_LICENSE("GPL"); MODULE_AUTHOR("Mellanox Technologies");
> --
> 1.8.3.1


[-- Attachment #2: Type: text/html, Size: 142927 bytes --]

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v8 1/2] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2019-01-29 22:06   ` Andy Shevchenko
  2019-02-13 13:34     ` Liming Sun
@ 2019-02-13 16:33     ` Liming Sun
  1 sibling, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-02-13 16:33 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: Rob Herring, Mark Rutland, Arnd Bergmann, David Woods,
	Andy Shevchenko, Darren Hart, Vadim Pasternak, devicetree,
	Linux Kernel Mailing List, Platform Driver

Thanks Andy. Sorry I had email issue earlier today. Not sure the reply was sent out or not. So sent another one just in case...

v9 of this patch has been posted to address the 'devm_' comment. It also has the coding-style changes according to the comments I got for another patch.

Regards,
Liming

> -----Original Message-----
> From: Andy Shevchenko <andy.shevchenko@gmail.com>
> Sent: Tuesday, January 29, 2019 5:07 PM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Rob Herring <robh+dt@kernel.org>; Mark Rutland <mark.rutland@arm.com>; Arnd Bergmann <arnd@arndb.de>; David Woods
> <dwoods@mellanox.com>; Andy Shevchenko <andy@infradead.org>; Darren Hart <dvhart@infradead.org>; Vadim Pasternak
> <vadimp@mellanox.com>; devicetree <devicetree@vger.kernel.org>; Linux Kernel Mailing List <linux-kernel@vger.kernel.org>;
> Platform Driver <platform-driver-x86@vger.kernel.org>
> Subject: Re: [PATCH v8 1/2] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
> 
> On Mon, Jan 28, 2019 at 7:28 PM Liming Sun <lsun@mellanox.com> wrote:
> >
> > This commit adds the TmFifo platform driver for Mellanox BlueField
> > Soc. TmFifo is a shared FIFO which enables external host machine
> > to exchange data with the SoC via USB or PCIe. The driver is based
> > on virtio framework and has console and network access enabled.
> >
> > Reviewed-by: David Woods <dwoods@mellanox.com>
> > Signed-off-by: Liming Sun <lsun@mellanox.com>
> 
> 
> Please, go through this series taking into account review I just did
> for your another patch.
> 
> On top of that, see recent (for few years I think) drivers what modern
> APIs they are using, e.g. devm_.
> 
> --
> With Best Regards,
> Andy Shevchenko

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v9] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2019-02-13 13:27 ` [PATCH v9] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
@ 2019-02-13 18:11   ` Andy Shevchenko
  2019-02-13 18:34     ` Liming Sun
                       ` (2 more replies)
  0 siblings, 3 replies; 179+ messages in thread
From: Andy Shevchenko @ 2019-02-13 18:11 UTC (permalink / raw)
  To: Liming Sun
  Cc: David Woods, Andy Shevchenko, Darren Hart, Vadim Pasternak,
	Linux Kernel Mailing List, Platform Driver

On Wed, Feb 13, 2019 at 3:27 PM Liming Sun <lsun@mellanox.com> wrote:
>
> This commit adds the TmFifo platform driver for Mellanox BlueField
> Soc. TmFifo is a shared FIFO which enables external host machine
> to exchange data with the SoC via USB or PCIe. The driver is based
> on virtio framework and has console and network access enabled.

Thanks for an update, my comments below.

Again, to Mellanox: guys, please, establish internal mailing list for
review and don't come with such quality of code.

Next time I would like to see Reviewed-by from Mellanox people I know,
like Vadim or Leon.

> +config MLXBF_TMFIFO
> +       tristate "Mellanox BlueField SoC TmFifo platform driver"

> +       depends on ARM64 && ACPI && VIRTIO_CONSOLE && VIRTIO_NET

Split this to three logical parts.

> +       help
> +         Say y here to enable TmFifo support. The TmFifo driver provides
> +          platform driver support for the TmFifo which supports console
> +          and networking based on the virtio framework.

>  obj-$(CONFIG_MLXREG_HOTPLUG)   += mlxreg-hotplug.o
>  obj-$(CONFIG_MLXREG_IO) += mlxreg-io.o
> +obj-$(CONFIG_MLXBF_TMFIFO)     += mlxbf-tmfifo.o

I would suggest to keep it sorted.

> +#define MLXBF_TMFIFO_TX_DATA 0x0

I suggest to use same fixed format for offsets.
Here, for example, 0x00 would be better.

> +#define MLXBF_TMFIFO_TX_STS__COUNT_RMASK 0x1ff
> +#define MLXBF_TMFIFO_TX_STS__COUNT_MASK  0x1ff

#include <linux/bits.h>
...
GENMASK()

> +#define MLXBF_TMFIFO_TX_CTL__LWM_RMASK 0xff
> +#define MLXBF_TMFIFO_TX_CTL__LWM_MASK  0xff

> +#define MLXBF_TMFIFO_TX_CTL__HWM_RMASK 0xff
> +#define MLXBF_TMFIFO_TX_CTL__HWM_MASK  0xff00

> +#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
> +#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL

GENMASK() / GENMASK_ULL()

> +#define MLXBF_TMFIFO_RX_STS__COUNT_RMASK 0x1ff
> +#define MLXBF_TMFIFO_RX_STS__COUNT_MASK  0x1ff

GENMASK()

> +#define MLXBF_TMFIFO_RX_CTL__LWM_RMASK 0xff
> +#define MLXBF_TMFIFO_RX_CTL__LWM_MASK  0xff

> +#define MLXBF_TMFIFO_RX_CTL__HWM_RMASK 0xff
> +#define MLXBF_TMFIFO_RX_CTL__HWM_MASK  0xff00

> +#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
> +#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL

Ditto.

> +#include <linux/acpi.h>
> +#include <linux/byteorder/generic.h>
> +#include <linux/bitfield.h>
> +#include <linux/cache.h>
> +#include <linux/device.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/efi.h>
> +#include <linux/io.h>
> +#include <linux/interrupt.h>
> +#include <linux/irq.h>
> +#include <linux/kernel.h>
> +#include <linux/math64.h>
> +#include <linux/module.h>
> +#include <linux/moduleparam.h>
> +#include <linux/mutex.h>
> +#include <linux/platform_device.h>
> +#include <linux/resource.h>
> +#include <linux/slab.h>
> +#include <linux/types.h>
> +#include <linux/version.h>
> +#include <linux/virtio.h>
> +#include <linux/virtio_config.h>
> +#include <linux/virtio_console.h>
> +#include <linux/virtio_ids.h>
> +#include <linux/virtio_net.h>
> +#include <linux/virtio_ring.h>

Do you need all of them?

> +#define MLXBF_TMFIFO_VRING_SIZE                        1024

SZ_1K ?

> +/* Console Tx buffer size. */
> +#define MLXBF_TMFIFO_CONS_TX_BUF_SIZE          (32 * 1024)

SZ_32K ?

> +/* House-keeping timer interval. */
> +static int mlxbf_tmfifo_timer_interval = HZ / 10;

> +/* Global lock. */

Noise. Either explain what it protects, or remove.

> +static DEFINE_MUTEX(mlxbf_tmfifo_lock);

> +/* Struct declaration. */

Noise.

> +/* Structure to maintain the ring state. */
> +struct mlxbf_tmfifo_vring {
> +       void *va;                       /* virtual address */
> +       dma_addr_t dma;                 /* dma address */
> +       struct virtqueue *vq;           /* virtqueue pointer */
> +       struct vring_desc *desc;        /* current desc */
> +       struct vring_desc *desc_head;   /* current desc head */
> +       int cur_len;                    /* processed len in current desc */
> +       int rem_len;                    /* remaining length to be processed */
> +       int size;                       /* vring size */
> +       int align;                      /* vring alignment */
> +       int id;                         /* vring id */
> +       int vdev_id;                    /* TMFIFO_VDEV_xxx */
> +       u32 pkt_len;                    /* packet total length */
> +       u16 next_avail;                 /* next avail desc id */
> +       struct mlxbf_tmfifo *fifo;      /* pointer back to the tmfifo */
> +};

Perhaps kernel-doc?

> +/* Interrupt types. */
> +enum {
> +       MLXBF_TM_RX_LWM_IRQ,            /* Rx low water mark irq */
> +       MLXBF_TM_RX_HWM_IRQ,            /* Rx high water mark irq */
> +       MLXBF_TM_TX_LWM_IRQ,            /* Tx low water mark irq */
> +       MLXBF_TM_TX_HWM_IRQ,            /* Tx high water mark irq */
> +       MLXBF_TM_IRQ_CNT

CNT...

> +};
> +
> +/* Ring types (Rx & Tx). */
> +enum {
> +       MLXBF_TMFIFO_VRING_RX,          /* Rx ring */
> +       MLXBF_TMFIFO_VRING_TX,          /* Tx ring */
> +       MLXBF_TMFIFO_VRING_NUM

...NUM

Perhaps one style for max numbers?

> +};

> +
> +/* Structure for the virtual device. */
> +struct mlxbf_tmfifo_vdev {
> +       struct virtio_device vdev;      /* virtual device */
> +       u8 status;
> +       u64 features;
> +       union {                         /* virtio config space */
> +               struct virtio_console_config cons;
> +               struct virtio_net_config net;
> +       } config;

Describe, which field allows to distinguish what type of the data is in a union.

> +       struct mlxbf_tmfifo_vring vrings[MLXBF_TMFIFO_VRING_NUM];
> +       u8 *tx_buf;                     /* tx buffer */
> +       u32 tx_head;                    /* tx buffer head */
> +       u32 tx_tail;                    /* tx buffer tail */
> +};

kernel-doc?

> +/* Structure of the interrupt information. */
> +struct mlxbf_tmfifo_irq_info {
> +       struct mlxbf_tmfifo *fifo;      /* tmfifo structure */
> +       int irq;                        /* interrupt number */
> +       int index;                      /* array index */
> +};

Ditto.

> +
> +/* Structure of the TmFifo information. */
> +struct mlxbf_tmfifo {
> +       struct mlxbf_tmfifo_vdev *vdev[MLXBF_TMFIFO_VDEV_MAX]; /* devices */
> +       struct platform_device *pdev;   /* platform device */
> +       struct mutex lock;              /* fifo lock */
> +       void __iomem *rx_base;          /* mapped register base */
> +       void __iomem *tx_base;          /* mapped register base */
> +       int tx_fifo_size;               /* number of entries of the Tx FIFO */
> +       int rx_fifo_size;               /* number of entries of the Rx FIFO */
> +       unsigned long pend_events;      /* pending bits for deferred process */
> +       struct mlxbf_tmfifo_irq_info irq_info[MLXBF_TM_IRQ_CNT]; /* irq info */
> +       struct work_struct work;        /* work struct for deferred process */
> +       struct timer_list timer;        /* keepalive timer */
> +       struct mlxbf_tmfifo_vring *vring[2];    /* current Tx/Rx ring */
> +       bool is_ready;                  /* ready flag */
> +       spinlock_t spin_lock;           /* spin lock */
> +};

Ditto.

> +/* Use a union struction for 64-bit little/big endian. */

What does this mean?

> +union mlxbf_tmfifo_data_64bit {
> +       u64 data;
> +       __le64 data_le;
> +};
> +
> +/* Message header used to demux data in the TmFifo. */
> +union mlxbf_tmfifo_msg_hdr {
> +       struct {
> +               u8 type;                /* message type */
> +               __be16 len;             /* payload length */
> +               u8 unused[5];           /* reserved, set to 0 */
> +       } __packed;

It's already packed. No?

> +       union mlxbf_tmfifo_data_64bit u;        /* 64-bit data */
> +};

> +/* MTU setting of the virtio-net interface. */
> +#define MLXBF_TMFIFO_NET_MTU           1500

Don't we have this globally defined?

> +/* Supported virtio-net features. */
> +#define MLXBF_TMFIFO_NET_FEATURES      ((1UL << VIRTIO_NET_F_MTU) | \
> +                                        (1UL << VIRTIO_NET_F_STATUS) | \
> +                                        (1UL << VIRTIO_NET_F_MAC))

BIT_UL() ?

> +/* Function declarations. */

Noise.

> +static int mlxbf_tmfifo_remove(struct platform_device *pdev);

Why do you need forward declaration for this?

> +/* Return the consumed Tx buffer space. */
> +static int mlxbf_tmfifo_vdev_tx_buf_len(struct mlxbf_tmfifo_vdev *vdev)
> +{
> +       return ((vdev->tx_tail >= vdev->tx_head) ?
> +               (vdev->tx_tail - vdev->tx_head) :
> +               (MLXBF_TMFIFO_CONS_TX_BUF_SIZE - vdev->tx_head +
> +                vdev->tx_tail));

Split this for better reading.

> +}
> +
> +/* Return the available Tx buffer space. */
> +static int mlxbf_tmfifo_vdev_tx_buf_avail(struct mlxbf_tmfifo_vdev *vdev)
> +{
> +       return (MLXBF_TMFIFO_CONS_TX_BUF_RSV_SIZE -
> +               mlxbf_tmfifo_vdev_tx_buf_len(vdev));

Redundant parens.
Moreover, you might consider temporary variable for better reading.

> +}
> +
> +/* Update Rx/Tx buffer index pointer. */
> +static void mlxbf_tmfifo_vdev_tx_buf_index_inc(u32 *index, u32 len)
> +{
> +       *index += len;
> +       if (*index >= MLXBF_TMFIFO_CONS_TX_BUF_SIZE)
> +               *index -= MLXBF_TMFIFO_CONS_TX_BUF_SIZE;
> +}
> +
> +/* Allocate vrings for the fifo. */
> +static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
> +                                    struct mlxbf_tmfifo_vdev *tm_vdev,
> +                                    int vdev_id)
> +{
> +       struct mlxbf_tmfifo_vring *vring;
> +       dma_addr_t dma;
> +       int i, size;
> +       void *va;
> +
> +       for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
> +               vring = &tm_vdev->vrings[i];
> +               vring->fifo = fifo;
> +               vring->size = MLXBF_TMFIFO_VRING_SIZE;
> +               vring->align = SMP_CACHE_BYTES;
> +               vring->id = i;
> +               vring->vdev_id = vdev_id;
> +

> +               size = PAGE_ALIGN(vring_size(vring->size, vring->align));

Why do you need this?
dma_alloc_coherent() allocates memory on page granularity anyway.

> +               va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
> +                                       GFP_KERNEL);
> +               if (!va) {

> +                       dev_err(tm_vdev->vdev.dev.parent,

Would be much easy if you have temporary variable for this device.

> +                               "dma_alloc_coherent failed\n");
> +                       return -ENOMEM;
> +               }
> +
> +               vring->va = va;
> +               vring->dma = dma;
> +       }
> +
> +       return 0;
> +}

> +/* Interrupt handler. */
> +static irqreturn_t mlxbf_tmfifo_irq_handler(int irq, void *arg)
> +{
> +       struct mlxbf_tmfifo_irq_info *irq_info;
> +
> +       irq_info = (struct mlxbf_tmfifo_irq_info *)arg;

Useless casting.
Assignment can be done in definition block.

> +       if (irq_info->index < MLXBF_TM_IRQ_CNT &&
> +           !test_and_set_bit(irq_info->index, &irq_info->fifo->pend_events))
> +               schedule_work(&irq_info->fifo->work);
> +
> +       return IRQ_HANDLED;
> +}
> +
> +/* Get the next packet descriptor from the vring. */
> +static struct vring_desc *mlxbf_tmfifo_get_next_desc(struct virtqueue *vq)
> +{
> +       struct mlxbf_tmfifo_vring *vring;
> +       unsigned int idx, head;
> +       struct vring *vr;
> +
> +       vr = (struct vring *)virtqueue_get_vring(vq);

Return type is different? Is it safe to cast? Why?

> +       if (!vr)
> +               return NULL;

+ blank line

> +       vring = (struct mlxbf_tmfifo_vring *)vq->priv;

Do you need explicit casting?

> +       if (vring->next_avail == virtio16_to_cpu(vq->vdev, vr->avail->idx))
> +               return NULL;

+blank line

> +       idx = vring->next_avail % vr->num;
> +       head = virtio16_to_cpu(vq->vdev, vr->avail->ring[idx]);
> +       if (WARN_ON(head >= vr->num))
> +               return NULL;
> +       vring->next_avail++;
> +
> +       return &vr->desc[head];
> +}
> +
> +/* Release virtio descriptor. */
> +static void mlxbf_tmfifo_release_desc(struct virtio_device *vdev,
> +                                     struct vring *vr, struct vring_desc *desc,
> +                                     u32 len)
> +{
> +       u16 idx, vr_idx;
> +
> +       vr_idx = virtio16_to_cpu(vdev, vr->used->idx);
> +       idx = vr_idx % vr->num;
> +       vr->used->ring[idx].id = cpu_to_virtio32(vdev, desc - vr->desc);
> +       vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
> +
> +       /* Virtio could poll and check the 'idx' to decide
> +        * whether the desc is done or not. Add a memory
> +        * barrier here to make sure the update above completes
> +        * before updating the idx.
> +        */

Multi-line comment style is broken.

> +       mb();
> +       vr->used->idx = cpu_to_virtio16(vdev, vr_idx + 1);
> +}

> +/* House-keeping timer. */
> +static void mlxbf_tmfifo_timer(struct timer_list *arg)
> +{
> +       struct mlxbf_tmfifo *fifo;

> +       fifo = container_of(arg, struct mlxbf_tmfifo, timer);

Can't be done in the definition block?

> +       /*
> +        * Wake up the work handler to poll the Rx FIFO in case interrupt
> +        * missing or any leftover bytes stuck in the FIFO.
> +        */
> +       test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events);

How do you utilize test results?

> +
> +       /*
> +        * Wake up Tx handler in case virtio has queued too many packets
> +        * and are waiting for buffer return.
> +        */
> +       test_and_set_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events);

Ditto.

> +
> +       schedule_work(&fifo->work);
> +
> +       mod_timer(&fifo->timer, jiffies + mlxbf_tmfifo_timer_interval);
> +}

> +       /* Adjust the size to available space. */
> +       if (size + sizeof(hdr) > avail * sizeof(u64))
> +               size = avail * sizeof(u64) - sizeof(hdr);

Can avail be 0?

> +       /* Write header. */
> +       hdr.u.data = 0;
> +       hdr.type = VIRTIO_ID_CONSOLE;
> +       hdr.len = htons(size);
> +       hdr.u.data_le = cpu_to_le64(hdr.u.data);

> +       writeq(hdr.u.data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);

So, this one is not protected anyhow? Potential race condition?

> +
> +       spin_lock_irqsave(&fifo->spin_lock, flags);
> +
> +       while (size > 0) {
> +               addr = cons->tx_buf + cons->tx_head;
> +
> +               if (cons->tx_head + sizeof(u64) <=
> +                   MLXBF_TMFIFO_CONS_TX_BUF_SIZE) {
> +                       memcpy(&data, addr, sizeof(u64));
> +               } else {
> +                       partial = MLXBF_TMFIFO_CONS_TX_BUF_SIZE - cons->tx_head;
> +                       memcpy(&data, addr, partial);
> +                       memcpy((u8 *)&data + partial, cons->tx_buf,
> +                              sizeof(u64) - partial);
> +               }
> +               writeq(data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
> +
> +               if (size >= sizeof(u64)) {
> +                       mlxbf_tmfifo_vdev_tx_buf_index_inc(&cons->tx_head,
> +                                                          sizeof(u64));
> +                       size -= sizeof(u64);
> +               } else {
> +                       mlxbf_tmfifo_vdev_tx_buf_index_inc(&cons->tx_head,
> +                                                          size);
> +                       size = 0;
> +               }
> +       }
> +
> +       spin_unlock_irqrestore(&fifo->spin_lock, flags);
> +}

> +       /* Rx/Tx one word (8 bytes) if not done. */
> +       if (vring->cur_len != len)
> +               mlxbf_tmfifo_rxtx_word(fifo, vdev, vring, desc, is_rx, avail,
> +                                      len);

In such case better to keep it in one line.

> +/* Get the array of feature bits for this device. */
> +static u64 mlxbf_tmfifo_virtio_get_features(struct virtio_device *vdev)
> +{
> +       struct mlxbf_tmfifo_vdev *tm_vdev;
> +
> +       tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> +       return tm_vdev->features;
> +}
> +
> +/* Confirm device features to use. */
> +static int mlxbf_tmfifo_virtio_finalize_features(struct virtio_device *vdev)
> +{
> +       struct mlxbf_tmfifo_vdev *tm_vdev;
> +

> +       tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);

This is candidate to be a macro

#define mlxbt_vdev_to_tmfifo(...) ...

> +       tm_vdev->features = vdev->features;
> +
> +       return 0;
> +}

> +/* Create vdev type in a tmfifo. */
> +static int mlxbf_tmfifo_create_vdev(struct device *dev,
> +                                   struct mlxbf_tmfifo *fifo,
> +                                   int vdev_id, u64 features,
> +                                   void *config, u32 size)
> +{
> +       struct mlxbf_tmfifo_vdev *tm_vdev;
> +       int ret = 0;
> +
> +       mutex_lock(&fifo->lock);
> +
> +       tm_vdev = fifo->vdev[vdev_id];
> +       if (tm_vdev) {
> +               dev_err(dev, "vdev %d already exists\n", vdev_id);
> +               ret = -EEXIST;
> +               goto fail;
> +       }
> +
> +       tm_vdev = devm_kzalloc(dev, sizeof(*tm_vdev), GFP_KERNEL);
> +       if (!tm_vdev) {
> +               ret = -ENOMEM;
> +               goto fail;
> +       }
> +
> +       tm_vdev->vdev.id.device = vdev_id;
> +       tm_vdev->vdev.config = &mlxbf_tmfifo_virtio_config_ops;
> +       tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
> +       tm_vdev->features = features;
> +       if (config)
> +               memcpy(&tm_vdev->config, config, size);
> +       if (mlxbf_tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
> +               dev_err(dev, "unable to allocate vring\n");
> +               ret = -ENOMEM;
> +               goto fail;
> +       }
> +       if (vdev_id == VIRTIO_ID_CONSOLE)

> +               tm_vdev->tx_buf = devm_kmalloc(dev,
> +                                              MLXBF_TMFIFO_CONS_TX_BUF_SIZE,
> +                                              GFP_KERNEL);

Are you sure devm_ suits here?

> +       fifo->vdev[vdev_id] = tm_vdev;
> +
> +       /* Register the virtio device. */
> +       ret = register_virtio_device(&tm_vdev->vdev);
> +       if (ret) {
> +               dev_err(&fifo->pdev->dev, "register_virtio_device failed\n");
> +               goto register_fail;
> +       }
> +
> +       mutex_unlock(&fifo->lock);
> +       return 0;
> +
> +register_fail:
> +       mlxbf_tmfifo_free_vrings(fifo, vdev_id);
> +       fifo->vdev[vdev_id] = NULL;
> +fail:
> +       mutex_unlock(&fifo->lock);
> +       return ret;
> +}

> +/* Read the configured network MAC address from efi variable. */
> +static void mlxbf_tmfifo_get_cfg_mac(u8 *mac)
> +{
> +       efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
> +       efi_status_t status;
> +       unsigned long size;
> +       u8 buf[6];
> +
> +       size = sizeof(buf);
> +       status = efi.get_variable(mlxbf_tmfifo_efi_name, &guid, NULL, &size,
> +                                 buf);
> +       if (status == EFI_SUCCESS && size == sizeof(buf))
> +               memcpy(mac, buf, sizeof(buf));
> +}

Shouldn't be rather helper in EFI lib in kernel?

> +/* Probe the TMFIFO. */
> +static int mlxbf_tmfifo_probe(struct platform_device *pdev)
> +{
> +       struct virtio_net_config net_config;
> +       struct resource *rx_res, *tx_res;
> +       struct mlxbf_tmfifo *fifo;
> +       int i, ret;
> +
> +       /* Get the resource of the Rx FIFO. */
> +       rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +       if (!rx_res)
> +               return -ENODEV;
> +
> +       /* Get the resource of the Tx FIFO. */
> +       tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> +       if (!tx_res)
> +               return -ENODEV;
> +
> +       if (!devm_request_mem_region(&pdev->dev, rx_res->start,
> +                                    resource_size(rx_res), "bf-tmfifo"))
> +               return -EBUSY;
> +
> +       if (!devm_request_mem_region(&pdev->dev, tx_res->start,
> +                                    resource_size(tx_res), "bf-tmfifo"))
> +               return -EBUSY;
> +
> +       fifo = devm_kzalloc(&pdev->dev, sizeof(*fifo), GFP_KERNEL);
> +       if (!fifo)
> +               return -ENOMEM;
> +
> +       fifo->pdev = pdev;
> +       platform_set_drvdata(pdev, fifo);
> +
> +       spin_lock_init(&fifo->spin_lock);
> +       INIT_WORK(&fifo->work, mlxbf_tmfifo_work_handler);
> +
> +       timer_setup(&fifo->timer, mlxbf_tmfifo_timer, 0);
> +
> +       for (i = 0; i < MLXBF_TM_IRQ_CNT; i++) {
> +               fifo->irq_info[i].index = i;
> +               fifo->irq_info[i].fifo = fifo;

> +               fifo->irq_info[i].irq = platform_get_irq(pdev, i);


> +               ret = devm_request_irq(&pdev->dev, fifo->irq_info[i].irq,
> +                                      mlxbf_tmfifo_irq_handler, 0,
> +                                      "tmfifo", &fifo->irq_info[i]);
> +               if (ret) {
> +                       dev_err(&pdev->dev, "devm_request_irq failed\n");
> +                       fifo->irq_info[i].irq = 0;
> +                       return ret;
> +               }
> +       }
> +

> +       fifo->rx_base = devm_ioremap(&pdev->dev, rx_res->start,
> +                                    resource_size(rx_res));
> +       if (!fifo->rx_base)
> +               return -ENOMEM;
> +
> +       fifo->tx_base = devm_ioremap(&pdev->dev, tx_res->start,
> +                                    resource_size(tx_res));
> +       if (!fifo->tx_base)
> +               return -ENOMEM;

Switch to devm_ioremap_resource().
However, I think you probably need memremap().

> +       mutex_init(&fifo->lock);

Isn't too late for initializing this one?


> +/* Device remove function. */
> +static int mlxbf_tmfifo_remove(struct platform_device *pdev)
> +{
> +       struct mlxbf_tmfifo *fifo = platform_get_drvdata(pdev);
> +

> +       if (fifo)

How is it possible to be not true?

> +               mlxbf_tmfifo_cleanup(fifo);
> +

> +       platform_set_drvdata(pdev, NULL);

Redundant.

> +
> +       return 0;
> +}

> +MODULE_LICENSE("GPL");

Is it correct?

-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v9] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2019-02-13 18:11   ` Andy Shevchenko
@ 2019-02-13 18:34     ` Liming Sun
  2019-02-14 16:25     ` Liming Sun
  2019-02-28 15:51     ` Liming Sun
  2 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-02-13 18:34 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: David Woods, Andy Shevchenko, Darren Hart, Vadim Pasternak,
	Linux Kernel Mailing List, Platform Driver

Thanks Andy. We actually had some internal reviews as you mentioned.
I'll try to solve the comments and update the 'Reviewed-by' in next revision.

Regards,
Liming

> -----Original Message-----
> From: Andy Shevchenko <andy.shevchenko@gmail.com>
> Sent: Wednesday, February 13, 2019 1:11 PM
> To: Liming Sun <lsun@mellanox.com>
> Cc: David Woods <dwoods@mellanox.com>; Andy Shevchenko <andy@infradead.org>; Darren Hart <dvhart@infradead.org>; Vadim
> Pasternak <vadimp@mellanox.com>; Linux Kernel Mailing List <linux-kernel@vger.kernel.org>; Platform Driver <platform-driver-
> x86@vger.kernel.org>
> Subject: Re: [PATCH v9] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
> 
> On Wed, Feb 13, 2019 at 3:27 PM Liming Sun <lsun@mellanox.com> wrote:
> >
> > This commit adds the TmFifo platform driver for Mellanox BlueField
> > Soc. TmFifo is a shared FIFO which enables external host machine
> > to exchange data with the SoC via USB or PCIe. The driver is based
> > on virtio framework and has console and network access enabled.
> 
> Thanks for an update, my comments below.
> 
> Again, to Mellanox: guys, please, establish internal mailing list for
> review and don't come with such quality of code.
> 
> Next time I would like to see Reviewed-by from Mellanox people I know,
> like Vadim or Leon.
> 
> > +config MLXBF_TMFIFO
> > +       tristate "Mellanox BlueField SoC TmFifo platform driver"
> 
> > +       depends on ARM64 && ACPI && VIRTIO_CONSOLE && VIRTIO_NET
> 
> Split this to three logical parts.
> 
> > +       help
> > +         Say y here to enable TmFifo support. The TmFifo driver provides
> > +          platform driver support for the TmFifo which supports console
> > +          and networking based on the virtio framework.
> 
> >  obj-$(CONFIG_MLXREG_HOTPLUG)   += mlxreg-hotplug.o
> >  obj-$(CONFIG_MLXREG_IO) += mlxreg-io.o
> > +obj-$(CONFIG_MLXBF_TMFIFO)     += mlxbf-tmfifo.o
> 
> I would suggest to keep it sorted.
> 
> > +#define MLXBF_TMFIFO_TX_DATA 0x0
> 
> I suggest to use same fixed format for offsets.
> Here, for example, 0x00 would be better.
> 
> > +#define MLXBF_TMFIFO_TX_STS__COUNT_RMASK 0x1ff
> > +#define MLXBF_TMFIFO_TX_STS__COUNT_MASK  0x1ff
> 
> #include <linux/bits.h>
> ...
> GENMASK()
> 
> > +#define MLXBF_TMFIFO_TX_CTL__LWM_RMASK 0xff
> > +#define MLXBF_TMFIFO_TX_CTL__LWM_MASK  0xff
> 
> > +#define MLXBF_TMFIFO_TX_CTL__HWM_RMASK 0xff
> > +#define MLXBF_TMFIFO_TX_CTL__HWM_MASK  0xff00
> 
> > +#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
> > +#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
> 
> GENMASK() / GENMASK_ULL()
> 
> > +#define MLXBF_TMFIFO_RX_STS__COUNT_RMASK 0x1ff
> > +#define MLXBF_TMFIFO_RX_STS__COUNT_MASK  0x1ff
> 
> GENMASK()
> 
> > +#define MLXBF_TMFIFO_RX_CTL__LWM_RMASK 0xff
> > +#define MLXBF_TMFIFO_RX_CTL__LWM_MASK  0xff
> 
> > +#define MLXBF_TMFIFO_RX_CTL__HWM_RMASK 0xff
> > +#define MLXBF_TMFIFO_RX_CTL__HWM_MASK  0xff00
> 
> > +#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
> > +#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
> 
> Ditto.
> 
> > +#include <linux/acpi.h>
> > +#include <linux/byteorder/generic.h>
> > +#include <linux/bitfield.h>
> > +#include <linux/cache.h>
> > +#include <linux/device.h>
> > +#include <linux/dma-mapping.h>
> > +#include <linux/efi.h>
> > +#include <linux/io.h>
> > +#include <linux/interrupt.h>
> > +#include <linux/irq.h>
> > +#include <linux/kernel.h>
> > +#include <linux/math64.h>
> > +#include <linux/module.h>
> > +#include <linux/moduleparam.h>
> > +#include <linux/mutex.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/resource.h>
> > +#include <linux/slab.h>
> > +#include <linux/types.h>
> > +#include <linux/version.h>
> > +#include <linux/virtio.h>
> > +#include <linux/virtio_config.h>
> > +#include <linux/virtio_console.h>
> > +#include <linux/virtio_ids.h>
> > +#include <linux/virtio_net.h>
> > +#include <linux/virtio_ring.h>
> 
> Do you need all of them?
> 
> > +#define MLXBF_TMFIFO_VRING_SIZE                        1024
> 
> SZ_1K ?
> 
> > +/* Console Tx buffer size. */
> > +#define MLXBF_TMFIFO_CONS_TX_BUF_SIZE          (32 * 1024)
> 
> SZ_32K ?
> 
> > +/* House-keeping timer interval. */
> > +static int mlxbf_tmfifo_timer_interval = HZ / 10;
> 
> > +/* Global lock. */
> 
> Noise. Either explain what it protects, or remove.
> 
> > +static DEFINE_MUTEX(mlxbf_tmfifo_lock);
> 
> > +/* Struct declaration. */
> 
> Noise.
> 
> > +/* Structure to maintain the ring state. */
> > +struct mlxbf_tmfifo_vring {
> > +       void *va;                       /* virtual address */
> > +       dma_addr_t dma;                 /* dma address */
> > +       struct virtqueue *vq;           /* virtqueue pointer */
> > +       struct vring_desc *desc;        /* current desc */
> > +       struct vring_desc *desc_head;   /* current desc head */
> > +       int cur_len;                    /* processed len in current desc */
> > +       int rem_len;                    /* remaining length to be processed */
> > +       int size;                       /* vring size */
> > +       int align;                      /* vring alignment */
> > +       int id;                         /* vring id */
> > +       int vdev_id;                    /* TMFIFO_VDEV_xxx */
> > +       u32 pkt_len;                    /* packet total length */
> > +       u16 next_avail;                 /* next avail desc id */
> > +       struct mlxbf_tmfifo *fifo;      /* pointer back to the tmfifo */
> > +};
> 
> Perhaps kernel-doc?
> 
> > +/* Interrupt types. */
> > +enum {
> > +       MLXBF_TM_RX_LWM_IRQ,            /* Rx low water mark irq */
> > +       MLXBF_TM_RX_HWM_IRQ,            /* Rx high water mark irq */
> > +       MLXBF_TM_TX_LWM_IRQ,            /* Tx low water mark irq */
> > +       MLXBF_TM_TX_HWM_IRQ,            /* Tx high water mark irq */
> > +       MLXBF_TM_IRQ_CNT
> 
> CNT...
> 
> > +};
> > +
> > +/* Ring types (Rx & Tx). */
> > +enum {
> > +       MLXBF_TMFIFO_VRING_RX,          /* Rx ring */
> > +       MLXBF_TMFIFO_VRING_TX,          /* Tx ring */
> > +       MLXBF_TMFIFO_VRING_NUM
> 
> ...NUM
> 
> Perhaps one style for max numbers?
> 
> > +};
> 
> > +
> > +/* Structure for the virtual device. */
> > +struct mlxbf_tmfifo_vdev {
> > +       struct virtio_device vdev;      /* virtual device */
> > +       u8 status;
> > +       u64 features;
> > +       union {                         /* virtio config space */
> > +               struct virtio_console_config cons;
> > +               struct virtio_net_config net;
> > +       } config;
> 
> Describe, which field allows to distinguish what type of the data is in a union.
> 
> > +       struct mlxbf_tmfifo_vring vrings[MLXBF_TMFIFO_VRING_NUM];
> > +       u8 *tx_buf;                     /* tx buffer */
> > +       u32 tx_head;                    /* tx buffer head */
> > +       u32 tx_tail;                    /* tx buffer tail */
> > +};
> 
> kernel-doc?
> 
> > +/* Structure of the interrupt information. */
> > +struct mlxbf_tmfifo_irq_info {
> > +       struct mlxbf_tmfifo *fifo;      /* tmfifo structure */
> > +       int irq;                        /* interrupt number */
> > +       int index;                      /* array index */
> > +};
> 
> Ditto.
> 
> > +
> > +/* Structure of the TmFifo information. */
> > +struct mlxbf_tmfifo {
> > +       struct mlxbf_tmfifo_vdev *vdev[MLXBF_TMFIFO_VDEV_MAX]; /* devices */
> > +       struct platform_device *pdev;   /* platform device */
> > +       struct mutex lock;              /* fifo lock */
> > +       void __iomem *rx_base;          /* mapped register base */
> > +       void __iomem *tx_base;          /* mapped register base */
> > +       int tx_fifo_size;               /* number of entries of the Tx FIFO */
> > +       int rx_fifo_size;               /* number of entries of the Rx FIFO */
> > +       unsigned long pend_events;      /* pending bits for deferred process */
> > +       struct mlxbf_tmfifo_irq_info irq_info[MLXBF_TM_IRQ_CNT]; /* irq info */
> > +       struct work_struct work;        /* work struct for deferred process */
> > +       struct timer_list timer;        /* keepalive timer */
> > +       struct mlxbf_tmfifo_vring *vring[2];    /* current Tx/Rx ring */
> > +       bool is_ready;                  /* ready flag */
> > +       spinlock_t spin_lock;           /* spin lock */
> > +};
> 
> Ditto.
> 
> > +/* Use a union struction for 64-bit little/big endian. */
> 
> What does this mean?
> 
> > +union mlxbf_tmfifo_data_64bit {
> > +       u64 data;
> > +       __le64 data_le;
> > +};
> > +
> > +/* Message header used to demux data in the TmFifo. */
> > +union mlxbf_tmfifo_msg_hdr {
> > +       struct {
> > +               u8 type;                /* message type */
> > +               __be16 len;             /* payload length */
> > +               u8 unused[5];           /* reserved, set to 0 */
> > +       } __packed;
> 
> It's already packed. No?
> 
> > +       union mlxbf_tmfifo_data_64bit u;        /* 64-bit data */
> > +};
> 
> > +/* MTU setting of the virtio-net interface. */
> > +#define MLXBF_TMFIFO_NET_MTU           1500
> 
> Don't we have this globally defined?
> 
> > +/* Supported virtio-net features. */
> > +#define MLXBF_TMFIFO_NET_FEATURES      ((1UL << VIRTIO_NET_F_MTU) | \
> > +                                        (1UL << VIRTIO_NET_F_STATUS) | \
> > +                                        (1UL << VIRTIO_NET_F_MAC))
> 
> BIT_UL() ?
> 
> > +/* Function declarations. */
> 
> Noise.
> 
> > +static int mlxbf_tmfifo_remove(struct platform_device *pdev);
> 
> Why do you need forward declaration for this?
> 
> > +/* Return the consumed Tx buffer space. */
> > +static int mlxbf_tmfifo_vdev_tx_buf_len(struct mlxbf_tmfifo_vdev *vdev)
> > +{
> > +       return ((vdev->tx_tail >= vdev->tx_head) ?
> > +               (vdev->tx_tail - vdev->tx_head) :
> > +               (MLXBF_TMFIFO_CONS_TX_BUF_SIZE - vdev->tx_head +
> > +                vdev->tx_tail));
> 
> Split this for better reading.
> 
> > +}
> > +
> > +/* Return the available Tx buffer space. */
> > +static int mlxbf_tmfifo_vdev_tx_buf_avail(struct mlxbf_tmfifo_vdev *vdev)
> > +{
> > +       return (MLXBF_TMFIFO_CONS_TX_BUF_RSV_SIZE -
> > +               mlxbf_tmfifo_vdev_tx_buf_len(vdev));
> 
> Redundant parens.
> Moreover, you might consider temporary variable for better reading.
> 
> > +}
> > +
> > +/* Update Rx/Tx buffer index pointer. */
> > +static void mlxbf_tmfifo_vdev_tx_buf_index_inc(u32 *index, u32 len)
> > +{
> > +       *index += len;
> > +       if (*index >= MLXBF_TMFIFO_CONS_TX_BUF_SIZE)
> > +               *index -= MLXBF_TMFIFO_CONS_TX_BUF_SIZE;
> > +}
> > +
> > +/* Allocate vrings for the fifo. */
> > +static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
> > +                                    struct mlxbf_tmfifo_vdev *tm_vdev,
> > +                                    int vdev_id)
> > +{
> > +       struct mlxbf_tmfifo_vring *vring;
> > +       dma_addr_t dma;
> > +       int i, size;
> > +       void *va;
> > +
> > +       for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
> > +               vring = &tm_vdev->vrings[i];
> > +               vring->fifo = fifo;
> > +               vring->size = MLXBF_TMFIFO_VRING_SIZE;
> > +               vring->align = SMP_CACHE_BYTES;
> > +               vring->id = i;
> > +               vring->vdev_id = vdev_id;
> > +
> 
> > +               size = PAGE_ALIGN(vring_size(vring->size, vring->align));
> 
> Why do you need this?
> dma_alloc_coherent() allocates memory on page granularity anyway.
> 
> > +               va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
> > +                                       GFP_KERNEL);
> > +               if (!va) {
> 
> > +                       dev_err(tm_vdev->vdev.dev.parent,
> 
> Would be much easy if you have temporary variable for this device.
> 
> > +                               "dma_alloc_coherent failed\n");
> > +                       return -ENOMEM;
> > +               }
> > +
> > +               vring->va = va;
> > +               vring->dma = dma;
> > +       }
> > +
> > +       return 0;
> > +}
> 
> > +/* Interrupt handler. */
> > +static irqreturn_t mlxbf_tmfifo_irq_handler(int irq, void *arg)
> > +{
> > +       struct mlxbf_tmfifo_irq_info *irq_info;
> > +
> > +       irq_info = (struct mlxbf_tmfifo_irq_info *)arg;
> 
> Useless casting.
> Assignment can be done in definition block.
> 
> > +       if (irq_info->index < MLXBF_TM_IRQ_CNT &&
> > +           !test_and_set_bit(irq_info->index, &irq_info->fifo->pend_events))
> > +               schedule_work(&irq_info->fifo->work);
> > +
> > +       return IRQ_HANDLED;
> > +}
> > +
> > +/* Get the next packet descriptor from the vring. */
> > +static struct vring_desc *mlxbf_tmfifo_get_next_desc(struct virtqueue *vq)
> > +{
> > +       struct mlxbf_tmfifo_vring *vring;
> > +       unsigned int idx, head;
> > +       struct vring *vr;
> > +
> > +       vr = (struct vring *)virtqueue_get_vring(vq);
> 
> Return type is different? Is it safe to cast? Why?
> 
> > +       if (!vr)
> > +               return NULL;
> 
> + blank line
> 
> > +       vring = (struct mlxbf_tmfifo_vring *)vq->priv;
> 
> Do you need explicit casting?
> 
> > +       if (vring->next_avail == virtio16_to_cpu(vq->vdev, vr->avail->idx))
> > +               return NULL;
> 
> +blank line
> 
> > +       idx = vring->next_avail % vr->num;
> > +       head = virtio16_to_cpu(vq->vdev, vr->avail->ring[idx]);
> > +       if (WARN_ON(head >= vr->num))
> > +               return NULL;
> > +       vring->next_avail++;
> > +
> > +       return &vr->desc[head];
> > +}
> > +
> > +/* Release virtio descriptor. */
> > +static void mlxbf_tmfifo_release_desc(struct virtio_device *vdev,
> > +                                     struct vring *vr, struct vring_desc *desc,
> > +                                     u32 len)
> > +{
> > +       u16 idx, vr_idx;
> > +
> > +       vr_idx = virtio16_to_cpu(vdev, vr->used->idx);
> > +       idx = vr_idx % vr->num;
> > +       vr->used->ring[idx].id = cpu_to_virtio32(vdev, desc - vr->desc);
> > +       vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
> > +
> > +       /* Virtio could poll and check the 'idx' to decide
> > +        * whether the desc is done or not. Add a memory
> > +        * barrier here to make sure the update above completes
> > +        * before updating the idx.
> > +        */
> 
> Multi-line comment style is broken.
> 
> > +       mb();
> > +       vr->used->idx = cpu_to_virtio16(vdev, vr_idx + 1);
> > +}
> 
> > +/* House-keeping timer. */
> > +static void mlxbf_tmfifo_timer(struct timer_list *arg)
> > +{
> > +       struct mlxbf_tmfifo *fifo;
> 
> > +       fifo = container_of(arg, struct mlxbf_tmfifo, timer);
> 
> Can't be done in the definition block?
> 
> > +       /*
> > +        * Wake up the work handler to poll the Rx FIFO in case interrupt
> > +        * missing or any leftover bytes stuck in the FIFO.
> > +        */
> > +       test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events);
> 
> How do you utilize test results?
> 
> > +
> > +       /*
> > +        * Wake up Tx handler in case virtio has queued too many packets
> > +        * and are waiting for buffer return.
> > +        */
> > +       test_and_set_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events);
> 
> Ditto.
> 
> > +
> > +       schedule_work(&fifo->work);
> > +
> > +       mod_timer(&fifo->timer, jiffies + mlxbf_tmfifo_timer_interval);
> > +}
> 
> > +       /* Adjust the size to available space. */
> > +       if (size + sizeof(hdr) > avail * sizeof(u64))
> > +               size = avail * sizeof(u64) - sizeof(hdr);
> 
> Can avail be 0?
> 
> > +       /* Write header. */
> > +       hdr.u.data = 0;
> > +       hdr.type = VIRTIO_ID_CONSOLE;
> > +       hdr.len = htons(size);
> > +       hdr.u.data_le = cpu_to_le64(hdr.u.data);
> 
> > +       writeq(hdr.u.data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
> 
> So, this one is not protected anyhow? Potential race condition?
> 
> > +
> > +       spin_lock_irqsave(&fifo->spin_lock, flags);
> > +
> > +       while (size > 0) {
> > +               addr = cons->tx_buf + cons->tx_head;
> > +
> > +               if (cons->tx_head + sizeof(u64) <=
> > +                   MLXBF_TMFIFO_CONS_TX_BUF_SIZE) {
> > +                       memcpy(&data, addr, sizeof(u64));
> > +               } else {
> > +                       partial = MLXBF_TMFIFO_CONS_TX_BUF_SIZE - cons->tx_head;
> > +                       memcpy(&data, addr, partial);
> > +                       memcpy((u8 *)&data + partial, cons->tx_buf,
> > +                              sizeof(u64) - partial);
> > +               }
> > +               writeq(data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
> > +
> > +               if (size >= sizeof(u64)) {
> > +                       mlxbf_tmfifo_vdev_tx_buf_index_inc(&cons->tx_head,
> > +                                                          sizeof(u64));
> > +                       size -= sizeof(u64);
> > +               } else {
> > +                       mlxbf_tmfifo_vdev_tx_buf_index_inc(&cons->tx_head,
> > +                                                          size);
> > +                       size = 0;
> > +               }
> > +       }
> > +
> > +       spin_unlock_irqrestore(&fifo->spin_lock, flags);
> > +}
> 
> > +       /* Rx/Tx one word (8 bytes) if not done. */
> > +       if (vring->cur_len != len)
> > +               mlxbf_tmfifo_rxtx_word(fifo, vdev, vring, desc, is_rx, avail,
> > +                                      len);
> 
> In such case better to keep it in one line.
> 
> > +/* Get the array of feature bits for this device. */
> > +static u64 mlxbf_tmfifo_virtio_get_features(struct virtio_device *vdev)
> > +{
> > +       struct mlxbf_tmfifo_vdev *tm_vdev;
> > +
> > +       tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> > +       return tm_vdev->features;
> > +}
> > +
> > +/* Confirm device features to use. */
> > +static int mlxbf_tmfifo_virtio_finalize_features(struct virtio_device *vdev)
> > +{
> > +       struct mlxbf_tmfifo_vdev *tm_vdev;
> > +
> 
> > +       tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> 
> This is candidate to be a macro
> 
> #define mlxbt_vdev_to_tmfifo(...) ...
> 
> > +       tm_vdev->features = vdev->features;
> > +
> > +       return 0;
> > +}
> 
> > +/* Create vdev type in a tmfifo. */
> > +static int mlxbf_tmfifo_create_vdev(struct device *dev,
> > +                                   struct mlxbf_tmfifo *fifo,
> > +                                   int vdev_id, u64 features,
> > +                                   void *config, u32 size)
> > +{
> > +       struct mlxbf_tmfifo_vdev *tm_vdev;
> > +       int ret = 0;
> > +
> > +       mutex_lock(&fifo->lock);
> > +
> > +       tm_vdev = fifo->vdev[vdev_id];
> > +       if (tm_vdev) {
> > +               dev_err(dev, "vdev %d already exists\n", vdev_id);
> > +               ret = -EEXIST;
> > +               goto fail;
> > +       }
> > +
> > +       tm_vdev = devm_kzalloc(dev, sizeof(*tm_vdev), GFP_KERNEL);
> > +       if (!tm_vdev) {
> > +               ret = -ENOMEM;
> > +               goto fail;
> > +       }
> > +
> > +       tm_vdev->vdev.id.device = vdev_id;
> > +       tm_vdev->vdev.config = &mlxbf_tmfifo_virtio_config_ops;
> > +       tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
> > +       tm_vdev->features = features;
> > +       if (config)
> > +               memcpy(&tm_vdev->config, config, size);
> > +       if (mlxbf_tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
> > +               dev_err(dev, "unable to allocate vring\n");
> > +               ret = -ENOMEM;
> > +               goto fail;
> > +       }
> > +       if (vdev_id == VIRTIO_ID_CONSOLE)
> 
> > +               tm_vdev->tx_buf = devm_kmalloc(dev,
> > +                                              MLXBF_TMFIFO_CONS_TX_BUF_SIZE,
> > +                                              GFP_KERNEL);
> 
> Are you sure devm_ suits here?
> 
> > +       fifo->vdev[vdev_id] = tm_vdev;
> > +
> > +       /* Register the virtio device. */
> > +       ret = register_virtio_device(&tm_vdev->vdev);
> > +       if (ret) {
> > +               dev_err(&fifo->pdev->dev, "register_virtio_device failed\n");
> > +               goto register_fail;
> > +       }
> > +
> > +       mutex_unlock(&fifo->lock);
> > +       return 0;
> > +
> > +register_fail:
> > +       mlxbf_tmfifo_free_vrings(fifo, vdev_id);
> > +       fifo->vdev[vdev_id] = NULL;
> > +fail:
> > +       mutex_unlock(&fifo->lock);
> > +       return ret;
> > +}
> 
> > +/* Read the configured network MAC address from efi variable. */
> > +static void mlxbf_tmfifo_get_cfg_mac(u8 *mac)
> > +{
> > +       efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
> > +       efi_status_t status;
> > +       unsigned long size;
> > +       u8 buf[6];
> > +
> > +       size = sizeof(buf);
> > +       status = efi.get_variable(mlxbf_tmfifo_efi_name, &guid, NULL, &size,
> > +                                 buf);
> > +       if (status == EFI_SUCCESS && size == sizeof(buf))
> > +               memcpy(mac, buf, sizeof(buf));
> > +}
> 
> Shouldn't be rather helper in EFI lib in kernel?
> 
> > +/* Probe the TMFIFO. */
> > +static int mlxbf_tmfifo_probe(struct platform_device *pdev)
> > +{
> > +       struct virtio_net_config net_config;
> > +       struct resource *rx_res, *tx_res;
> > +       struct mlxbf_tmfifo *fifo;
> > +       int i, ret;
> > +
> > +       /* Get the resource of the Rx FIFO. */
> > +       rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> > +       if (!rx_res)
> > +               return -ENODEV;
> > +
> > +       /* Get the resource of the Tx FIFO. */
> > +       tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> > +       if (!tx_res)
> > +               return -ENODEV;
> > +
> > +       if (!devm_request_mem_region(&pdev->dev, rx_res->start,
> > +                                    resource_size(rx_res), "bf-tmfifo"))
> > +               return -EBUSY;
> > +
> > +       if (!devm_request_mem_region(&pdev->dev, tx_res->start,
> > +                                    resource_size(tx_res), "bf-tmfifo"))
> > +               return -EBUSY;
> > +
> > +       fifo = devm_kzalloc(&pdev->dev, sizeof(*fifo), GFP_KERNEL);
> > +       if (!fifo)
> > +               return -ENOMEM;
> > +
> > +       fifo->pdev = pdev;
> > +       platform_set_drvdata(pdev, fifo);
> > +
> > +       spin_lock_init(&fifo->spin_lock);
> > +       INIT_WORK(&fifo->work, mlxbf_tmfifo_work_handler);
> > +
> > +       timer_setup(&fifo->timer, mlxbf_tmfifo_timer, 0);
> > +
> > +       for (i = 0; i < MLXBF_TM_IRQ_CNT; i++) {
> > +               fifo->irq_info[i].index = i;
> > +               fifo->irq_info[i].fifo = fifo;
> 
> > +               fifo->irq_info[i].irq = platform_get_irq(pdev, i);
> 
> 
> > +               ret = devm_request_irq(&pdev->dev, fifo->irq_info[i].irq,
> > +                                      mlxbf_tmfifo_irq_handler, 0,
> > +                                      "tmfifo", &fifo->irq_info[i]);
> > +               if (ret) {
> > +                       dev_err(&pdev->dev, "devm_request_irq failed\n");
> > +                       fifo->irq_info[i].irq = 0;
> > +                       return ret;
> > +               }
> > +       }
> > +
> 
> > +       fifo->rx_base = devm_ioremap(&pdev->dev, rx_res->start,
> > +                                    resource_size(rx_res));
> > +       if (!fifo->rx_base)
> > +               return -ENOMEM;
> > +
> > +       fifo->tx_base = devm_ioremap(&pdev->dev, tx_res->start,
> > +                                    resource_size(tx_res));
> > +       if (!fifo->tx_base)
> > +               return -ENOMEM;
> 
> Switch to devm_ioremap_resource().
> However, I think you probably need memremap().
> 
> > +       mutex_init(&fifo->lock);
> 
> Isn't too late for initializing this one?
> 
> 
> > +/* Device remove function. */
> > +static int mlxbf_tmfifo_remove(struct platform_device *pdev)
> > +{
> > +       struct mlxbf_tmfifo *fifo = platform_get_drvdata(pdev);
> > +
> 
> > +       if (fifo)
> 
> How is it possible to be not true?
> 
> > +               mlxbf_tmfifo_cleanup(fifo);
> > +
> 
> > +       platform_set_drvdata(pdev, NULL);
> 
> Redundant.
> 
> > +
> > +       return 0;
> > +}
> 
> > +MODULE_LICENSE("GPL");
> 
> Is it correct?
> 
> --
> With Best Regards,
> Andy Shevchenko

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v9] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2019-02-13 18:11   ` Andy Shevchenko
  2019-02-13 18:34     ` Liming Sun
@ 2019-02-14 16:25     ` Liming Sun
  2019-02-28 15:51     ` Liming Sun
  2 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-02-14 16:25 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: David Woods, Andy Shevchenko, Darren Hart, Vadim Pasternak,
	Linux Kernel Mailing List, Platform Driver

Thanks Andy. Please see my response and questions on some of the comments below.

Regards,
Liming

> -----Original Message-----
> From: Andy Shevchenko <andy.shevchenko@gmail.com>
> Sent: Wednesday, February 13, 2019 1:11 PM
> To: Liming Sun <lsun@mellanox.com>
> Cc: David Woods <dwoods@mellanox.com>; Andy Shevchenko <andy@infradead.org>; Darren Hart <dvhart@infradead.org>; Vadim
> Pasternak <vadimp@mellanox.com>; Linux Kernel Mailing List <linux-kernel@vger.kernel.org>; Platform Driver <platform-driver-
> x86@vger.kernel.org>
> Subject: Re: [PATCH v9] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
> 
> On Wed, Feb 13, 2019 at 3:27 PM Liming Sun <lsun@mellanox.com> wrote:
> >
> ...
> 
> > +/* Use a union struction for 64-bit little/big endian. */
> 
> What does this mean?
> 
> > +union mlxbf_tmfifo_data_64bit {
> > +       u64 data;
> > +       __le64 data_le;
> > +};

The purpose is to send 8 bytes into the FIFO without data casting in writeq().

Below is the example with the cast.

u64 data = 0x1234;
__le64 data_le;
data_le = cpu_to_le64(data)
writeq(*(u64 *)&data_le, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);

Below is the alternative trying to use union to avoid the cast.

mlxbf_tmfifo_data_64bit u;
u.data = 0x1234;
u. data_le = cpu_to_le64(u.data);
writeq(u.data_le, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);

Which way might be better or any other suggestions?

> > +
> > +/* Message header used to demux data in the TmFifo. */
> > +union mlxbf_tmfifo_msg_hdr {
> > +       struct {
> > +               u8 type;                /* message type */
> > +               __be16 len;             /* payload length */
> > +               u8 unused[5];           /* reserved, set to 0 */
> > +       } __packed;
> 
> It's already packed. No?

The '__packed' is needed here. Without it the compiler will make the
structure size exceeding 8 bytes which is not desired.

>...
> > +       if (vdev_id == VIRTIO_ID_CONSOLE)
> 
> > +               tm_vdev->tx_buf = devm_kmalloc(dev,
> > +                                              MLXBF_TMFIFO_CONS_TX_BUF_SIZE,
> > +                                              GFP_KERNEL);
> 
> Are you sure devm_ suits here?

The 'tx_buf' are normal buffer to hold the console output. 
It seems ok to use devm_kmalloc so it could automatically freed
on driver detach. Please correct me if I am wrong.

>>...
> > +
> > +       fifo->tx_base = devm_ioremap(&pdev->dev, tx_res->start,
> > +                                    resource_size(tx_res));
> > +       if (!fifo->tx_base)
> > +               return -ENOMEM;
> 
> Switch to devm_ioremap_resource().
> However, I think you probably need memremap().

This is device registers accessed by arm64 core.
In arm64/include/asm/io.h, several apis are defined the same.

#define ioremap(addr, size)		__ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
#define ioremap_nocache(addr, size)	__ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
#define ioremap_wt(addr, size)		__ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))

How about using devm_ioremap_nocache()?
It could take advantage of the devm_xx() api.

>...
> Is it correct?
> 
> --
> With Best Regards,
> Andy Shevchenko

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v7 0/9] Mellanox BlueField ARM SoC Rshim driver
  2019-01-21 19:17   ` Liming Sun
@ 2019-02-18 13:24     ` Arnd Bergmann
  -1 siblings, 0 replies; 179+ messages in thread
From: Arnd Bergmann @ 2019-02-18 13:24 UTC (permalink / raw)
  To: Liming Sun
  Cc: DTML, David Woods, arm-soc, Olof Johansson, Robin Murphy, Linux ARM

On Mon, Jan 21, 2019 at 8:18 PM Liming Sun <lsun@mellanox.com> wrote:
>
> Liming Sun (9):
>   soc: Add TmFifo driver for Mellanox BlueField Soc
>   arm64: Add Mellanox BlueField SoC config option
>   dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
>   MAINTAINERS: Add entry for Mellanox Bluefield Soc

Could you send the arch/arm64 changes to soc@kernel.org for
inclusion? I'm not sure what the status of the rshim driver is now,
but we should probably merge the base support through the soc
tree.

     Arnd

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v7 0/9] Mellanox BlueField ARM SoC Rshim driver
@ 2019-02-18 13:24     ` Arnd Bergmann
  0 siblings, 0 replies; 179+ messages in thread
From: Arnd Bergmann @ 2019-02-18 13:24 UTC (permalink / raw)
  To: Liming Sun
  Cc: DTML, David Woods, arm-soc, Olof Johansson, Robin Murphy, Linux ARM

On Mon, Jan 21, 2019 at 8:18 PM Liming Sun <lsun@mellanox.com> wrote:
>
> Liming Sun (9):
>   soc: Add TmFifo driver for Mellanox BlueField Soc
>   arm64: Add Mellanox BlueField SoC config option
>   dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
>   MAINTAINERS: Add entry for Mellanox Bluefield Soc

Could you send the arch/arm64 changes to soc@kernel.org for
inclusion? I'm not sure what the status of the rshim driver is now,
but we should probably merge the base support through the soc
tree.

     Arnd

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v9] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2019-02-13 18:11   ` Andy Shevchenko
  2019-02-13 18:34     ` Liming Sun
  2019-02-14 16:25     ` Liming Sun
@ 2019-02-28 15:51     ` Liming Sun
  2 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-02-28 15:51 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: David Woods, Andy Shevchenko, Darren Hart, Vadim Pasternak,
	Linux Kernel Mailing List, Platform Driver

Thanks Andy for the comments. Please see the responses below.
I'll also post the v10 patch after this email.

Regards,
Liming

> -----Original Message-----
> From: Andy Shevchenko <andy.shevchenko@gmail.com>
> Sent: Wednesday, February 13, 2019 1:11 PM
> To: Liming Sun <lsun@mellanox.com>
> Cc: David Woods <dwoods@mellanox.com>; Andy Shevchenko <andy@infradead.org>; Darren Hart <dvhart@infradead.org>; Vadim
> Pasternak <vadimp@mellanox.com>; Linux Kernel Mailing List <linux-kernel@vger.kernel.org>; Platform Driver <platform-driver-
> x86@vger.kernel.org>
> Subject: Re: [PATCH v9] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
> 
> On Wed, Feb 13, 2019 at 3:27 PM Liming Sun <lsun@mellanox.com> wrote:
> >
> > This commit adds the TmFifo platform driver for Mellanox BlueField
> > Soc. TmFifo is a shared FIFO which enables external host machine
> > to exchange data with the SoC via USB or PCIe. The driver is based
> > on virtio framework and has console and network access enabled.
> 
> Thanks for an update, my comments below.
> 
> Again, to Mellanox: guys, please, establish internal mailing list for
> review and don't come with such quality of code.

Yes, the patch went through internal review. I updated the 
Reviewed-by section of the commit message.

> 
> Next time I would like to see Reviewed-by from Mellanox people I know,
> like Vadim or Leon.
> 
> > +config MLXBF_TMFIFO
> > +       tristate "Mellanox BlueField SoC TmFifo platform driver"
> 
> > +       depends on ARM64 && ACPI && VIRTIO_CONSOLE && VIRTIO_NET
> 
> Split this to three logical parts.

Updated in v10.

> 
> > +       help
> > +         Say y here to enable TmFifo support. The TmFifo driver provides
> > +          platform driver support for the TmFifo which supports console
> > +          and networking based on the virtio framework.
> 
> >  obj-$(CONFIG_MLXREG_HOTPLUG)   += mlxreg-hotplug.o
> >  obj-$(CONFIG_MLXREG_IO) += mlxreg-io.o
> > +obj-$(CONFIG_MLXBF_TMFIFO)     += mlxbf-tmfifo.o
> 
> I would suggest to keep it sorted.

Updated in v10.

> 
> > +#define MLXBF_TMFIFO_TX_DATA 0x0
> 
> I suggest to use same fixed format for offsets.
> Here, for example, 0x00 would be better.
> 
> > +#define MLXBF_TMFIFO_TX_STS__COUNT_RMASK 0x1ff
> > +#define MLXBF_TMFIFO_TX_STS__COUNT_MASK  0x1ff
> 
> #include <linux/bits.h>
> ...
> GENMASK()

Updated in v10.

> 
> > +#define MLXBF_TMFIFO_TX_CTL__LWM_RMASK 0xff
> > +#define MLXBF_TMFIFO_TX_CTL__LWM_MASK  0xff
> 
> > +#define MLXBF_TMFIFO_TX_CTL__HWM_RMASK 0xff
> > +#define MLXBF_TMFIFO_TX_CTL__HWM_MASK  0xff00
> 
> > +#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
> > +#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
> 
> GENMASK() / GENMASK_ULL()

Updated in v10.

> 
> > +#define MLXBF_TMFIFO_RX_STS__COUNT_RMASK 0x1ff
> > +#define MLXBF_TMFIFO_RX_STS__COUNT_MASK  0x1ff
> 
> GENMASK()

Updated in v10.

> 
> > +#define MLXBF_TMFIFO_RX_CTL__LWM_RMASK 0xff
> > +#define MLXBF_TMFIFO_RX_CTL__LWM_MASK  0xff
> 
> > +#define MLXBF_TMFIFO_RX_CTL__HWM_RMASK 0xff
> > +#define MLXBF_TMFIFO_RX_CTL__HWM_MASK  0xff00
> 
> > +#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
> > +#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
> 
> Ditto.

Updated in v10.

> 
> > +#include <linux/acpi.h>
> > +#include <linux/byteorder/generic.h>
> > +#include <linux/bitfield.h>
> > +#include <linux/cache.h>
> > +#include <linux/device.h>
> > +#include <linux/dma-mapping.h>
> > +#include <linux/efi.h>
> > +#include <linux/io.h>
> > +#include <linux/interrupt.h>
> > +#include <linux/irq.h>
> > +#include <linux/kernel.h>
> > +#include <linux/math64.h>
> > +#include <linux/module.h>
> > +#include <linux/moduleparam.h>
> > +#include <linux/mutex.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/resource.h>
> > +#include <linux/slab.h>
> > +#include <linux/types.h>
> > +#include <linux/version.h>
> > +#include <linux/virtio.h>
> > +#include <linux/virtio_config.h>
> > +#include <linux/virtio_console.h>
> > +#include <linux/virtio_ids.h>
> > +#include <linux/virtio_net.h>
> > +#include <linux/virtio_ring.h>
> 
> Do you need all of them?

Cleaned up quite a few and updated in v10.

> 
> > +#define MLXBF_TMFIFO_VRING_SIZE                        1024
> 
> SZ_1K ?

Updated in v10.

> 
> > +/* Console Tx buffer size. */
> > +#define MLXBF_TMFIFO_CONS_TX_BUF_SIZE          (32 * 1024)
> 
> SZ_32K ?

Updated in v10.

> 
> > +/* House-keeping timer interval. */
> > +static int mlxbf_tmfifo_timer_interval = HZ / 10;
> 
> > +/* Global lock. */
> 
> Noise. Either explain what it protects, or remove.

Removed in v10.

> 
> > +static DEFINE_MUTEX(mlxbf_tmfifo_lock);
> 
> > +/* Struct declaration. */
> 
> Noise.

Removed in v10.

> 
> > +/* Structure to maintain the ring state. */
> > +struct mlxbf_tmfifo_vring {
> > +       void *va;                       /* virtual address */
> > +       dma_addr_t dma;                 /* dma address */
> > +       struct virtqueue *vq;           /* virtqueue pointer */
> > +       struct vring_desc *desc;        /* current desc */
> > +       struct vring_desc *desc_head;   /* current desc head */
> > +       int cur_len;                    /* processed len in current desc */
> > +       int rem_len;                    /* remaining length to be processed */
> > +       int size;                       /* vring size */
> > +       int align;                      /* vring alignment */
> > +       int id;                         /* vring id */
> > +       int vdev_id;                    /* TMFIFO_VDEV_xxx */
> > +       u32 pkt_len;                    /* packet total length */
> > +       u16 next_avail;                 /* next avail desc id */
> > +       struct mlxbf_tmfifo *fifo;      /* pointer back to the tmfifo */
> > +};
> 
> Perhaps kernel-doc?

Updated in v10.

> 
> > +/* Interrupt types. */
> > +enum {
> > +       MLXBF_TM_RX_LWM_IRQ,            /* Rx low water mark irq */
> > +       MLXBF_TM_RX_HWM_IRQ,            /* Rx high water mark irq */
> > +       MLXBF_TM_TX_LWM_IRQ,            /* Tx low water mark irq */
> > +       MLXBF_TM_TX_HWM_IRQ,            /* Tx high water mark irq */
> > +       MLXBF_TM_IRQ_CNT
> 
> CNT...
> 
> > +};
> > +
> > +/* Ring types (Rx & Tx). */
> > +enum {
> > +       MLXBF_TMFIFO_VRING_RX,          /* Rx ring */
> > +       MLXBF_TMFIFO_VRING_TX,          /* Tx ring */
> > +       MLXBF_TMFIFO_VRING_NUM
> 
> ...NUM
> 
> Perhaps one style for max numbers?

Updated in v10.

> 
> > +};
> 
> > +
> > +/* Structure for the virtual device. */
> > +struct mlxbf_tmfifo_vdev {
> > +       struct virtio_device vdev;      /* virtual device */
> > +       u8 status;
> > +       u64 features;
> > +       union {                         /* virtio config space */
> > +               struct virtio_console_config cons;
> > +               struct virtio_net_config net;
> > +       } config;
> 
> Describe, which field allows to distinguish what type of the data is in a union.

Added comments in v10.

> 
> > +       struct mlxbf_tmfifo_vring vrings[MLXBF_TMFIFO_VRING_NUM];
> > +       u8 *tx_buf;                     /* tx buffer */
> > +       u32 tx_head;                    /* tx buffer head */
> > +       u32 tx_tail;                    /* tx buffer tail */
> > +};
> 
> kernel-doc?

Updated in v10

> 
> > +/* Structure of the interrupt information. */
> > +struct mlxbf_tmfifo_irq_info {
> > +       struct mlxbf_tmfifo *fifo;      /* tmfifo structure */
> > +       int irq;                        /* interrupt number */
> > +       int index;                      /* array index */
> > +};
> 
> Ditto.

Updated in v10

> 
> > +
> > +/* Structure of the TmFifo information. */
> > +struct mlxbf_tmfifo {
> > +       struct mlxbf_tmfifo_vdev *vdev[MLXBF_TMFIFO_VDEV_MAX]; /* devices */
> > +       struct platform_device *pdev;   /* platform device */
> > +       struct mutex lock;              /* fifo lock */
> > +       void __iomem *rx_base;          /* mapped register base */
> > +       void __iomem *tx_base;          /* mapped register base */
> > +       int tx_fifo_size;               /* number of entries of the Tx FIFO */
> > +       int rx_fifo_size;               /* number of entries of the Rx FIFO */
> > +       unsigned long pend_events;      /* pending bits for deferred process */
> > +       struct mlxbf_tmfifo_irq_info irq_info[MLXBF_TM_IRQ_CNT]; /* irq info */
> > +       struct work_struct work;        /* work struct for deferred process */
> > +       struct timer_list timer;        /* keepalive timer */
> > +       struct mlxbf_tmfifo_vring *vring[2];    /* current Tx/Rx ring */
> > +       bool is_ready;                  /* ready flag */
> > +       spinlock_t spin_lock;           /* spin lock */
> > +};
> 
> Ditto.

Updated in v10

> 
> > +/* Use a union struction for 64-bit little/big endian. */
> 
> What does this mean?

Updated in v10 with the following comments to explain it.
/*
 * It's expected to send 64-bit little-endian value (__le64) into the TmFifo.
 * readq() and writeq() expect u64 instead. A union structure is used here
 * to workaround the explicit casting usage like writeq(*(u64 *)&data_le).
 */

> 
> > +union mlxbf_tmfifo_data_64bit {
> > +       u64 data;
> > +       __le64 data_le;
> > +};
> > +
> > +/* Message header used to demux data in the TmFifo. */
> > +union mlxbf_tmfifo_msg_hdr {
> > +       struct {
> > +               u8 type;                /* message type */
> > +               __be16 len;             /* payload length */
> > +               u8 unused[5];           /* reserved, set to 0 */
> > +       } __packed;
> 
> It's already packed. No?

It's not packed by default due to the 16-bit len. We need the '__packed'
to make sure the size of the structure is 8 bytes.

> 
> > +       union mlxbf_tmfifo_data_64bit u;        /* 64-bit data */
> > +};
> 
> > +/* MTU setting of the virtio-net interface. */
> > +#define MLXBF_TMFIFO_NET_MTU           1500
> 
> Don't we have this globally defined?

Updated in v10

> 
> > +/* Supported virtio-net features. */
> > +#define MLXBF_TMFIFO_NET_FEATURES      ((1UL << VIRTIO_NET_F_MTU) | \
> > +                                        (1UL << VIRTIO_NET_F_STATUS) | \
> > +                                        (1UL << VIRTIO_NET_F_MAC))
> 
> BIT_UL() ?

Updated in v10

> 
> > +/* Function declarations. */
> 
> Noise.

Removed in v10

> 
> > +static int mlxbf_tmfifo_remove(struct platform_device *pdev);
> 
> Why do you need forward declaration for this?

Removed in v10

> 
> > +/* Return the consumed Tx buffer space. */
> > +static int mlxbf_tmfifo_vdev_tx_buf_len(struct mlxbf_tmfifo_vdev *vdev)
> > +{
> > +       return ((vdev->tx_tail >= vdev->tx_head) ?
> > +               (vdev->tx_tail - vdev->tx_head) :
> > +               (MLXBF_TMFIFO_CONS_TX_BUF_SIZE - vdev->tx_head +
> > +                vdev->tx_tail));
> 
> Split this for better reading.

Updated in v10

> 
> > +}
> > +
> > +/* Return the available Tx buffer space. */
> > +static int mlxbf_tmfifo_vdev_tx_buf_avail(struct mlxbf_tmfifo_vdev *vdev)
> > +{
> > +       return (MLXBF_TMFIFO_CONS_TX_BUF_RSV_SIZE -
> > +               mlxbf_tmfifo_vdev_tx_buf_len(vdev));
> 
> Redundant parens.
> Moreover, you might consider temporary variable for better reading.

Updated in v10

> 
> > +}
> > +
> > +/* Update Rx/Tx buffer index pointer. */
> > +static void mlxbf_tmfifo_vdev_tx_buf_index_inc(u32 *index, u32 len)
> > +{
> > +       *index += len;
> > +       if (*index >= MLXBF_TMFIFO_CONS_TX_BUF_SIZE)
> > +               *index -= MLXBF_TMFIFO_CONS_TX_BUF_SIZE;
> > +}
> > +
> > +/* Allocate vrings for the fifo. */
> > +static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
> > +                                    struct mlxbf_tmfifo_vdev *tm_vdev,
> > +                                    int vdev_id)
> > +{
> > +       struct mlxbf_tmfifo_vring *vring;
> > +       dma_addr_t dma;
> > +       int i, size;
> > +       void *va;
> > +
> > +       for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
> > +               vring = &tm_vdev->vrings[i];
> > +               vring->fifo = fifo;
> > +               vring->size = MLXBF_TMFIFO_VRING_SIZE;
> > +               vring->align = SMP_CACHE_BYTES;
> > +               vring->id = i;
> > +               vring->vdev_id = vdev_id;
> > +
> 
> > +               size = PAGE_ALIGN(vring_size(vring->size, vring->align));
> 
> Why do you need this?
> dma_alloc_coherent() allocates memory on page granularity anyway.

Updated in v10

> 
> > +               va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
> > +                                       GFP_KERNEL);
> > +               if (!va) {
> 
> > +                       dev_err(tm_vdev->vdev.dev.parent,
> 
> Would be much easy if you have temporary variable for this device.

Updated in v10

> 
> > +                               "dma_alloc_coherent failed\n");
> > +                       return -ENOMEM;
> > +               }
> > +
> > +               vring->va = va;
> > +               vring->dma = dma;
> > +       }
> > +
> > +       return 0;
> > +}
> 
> > +/* Interrupt handler. */
> > +static irqreturn_t mlxbf_tmfifo_irq_handler(int irq, void *arg)
> > +{
> > +       struct mlxbf_tmfifo_irq_info *irq_info;
> > +
> > +       irq_info = (struct mlxbf_tmfifo_irq_info *)arg;
> 
> Useless casting.
> Assignment can be done in definition block.

Updated in v10

> 
> > +       if (irq_info->index < MLXBF_TM_IRQ_CNT &&
> > +           !test_and_set_bit(irq_info->index, &irq_info->fifo->pend_events))
> > +               schedule_work(&irq_info->fifo->work);
> > +
> > +       return IRQ_HANDLED;
> > +}
> > +
> > +/* Get the next packet descriptor from the vring. */
> > +static struct vring_desc *mlxbf_tmfifo_get_next_desc(struct virtqueue *vq)
> > +{
> > +       struct mlxbf_tmfifo_vring *vring;
> > +       unsigned int idx, head;
> > +       struct vring *vr;
> > +
> > +       vr = (struct vring *)virtqueue_get_vring(vq);
> 
> Return type is different? Is it safe to cast? Why?

It's 'const' casting. Fixed in v10 to use 'const struct vring *vr' instead.

> 
> > +       if (!vr)
> > +               return NULL;
> 
> + blank line

Updated in v10

> 
> > +       vring = (struct mlxbf_tmfifo_vring *)vq->priv;
> 
> Do you need explicit casting?

Updated in v10

> 
> > +       if (vring->next_avail == virtio16_to_cpu(vq->vdev, vr->avail->idx))
> > +               return NULL;
> 
> +blank line

Updated in v10

> 
> > +       idx = vring->next_avail % vr->num;
> > +       head = virtio16_to_cpu(vq->vdev, vr->avail->ring[idx]);
> > +       if (WARN_ON(head >= vr->num))
> > +               return NULL;
> > +       vring->next_avail++;
> > +
> > +       return &vr->desc[head];
> > +}
> > +
> > +/* Release virtio descriptor. */
> > +static void mlxbf_tmfifo_release_desc(struct virtio_device *vdev,
> > +                                     struct vring *vr, struct vring_desc *desc,
> > +                                     u32 len)
> > +{
> > +       u16 idx, vr_idx;
> > +
> > +       vr_idx = virtio16_to_cpu(vdev, vr->used->idx);
> > +       idx = vr_idx % vr->num;
> > +       vr->used->ring[idx].id = cpu_to_virtio32(vdev, desc - vr->desc);
> > +       vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
> > +
> > +       /* Virtio could poll and check the 'idx' to decide
> > +        * whether the desc is done or not. Add a memory
> > +        * barrier here to make sure the update above completes
> > +        * before updating the idx.
> > +        */
> 
> Multi-line comment style is broken.

Updated in v10

> 
> > +       mb();
> > +       vr->used->idx = cpu_to_virtio16(vdev, vr_idx + 1);
> > +}
> 
> > +/* House-keeping timer. */
> > +static void mlxbf_tmfifo_timer(struct timer_list *arg)
> > +{
> > +       struct mlxbf_tmfifo *fifo;
> 
> > +       fifo = container_of(arg, struct mlxbf_tmfifo, timer);
> 
> Can't be done in the definition block?

Updated in v10

> 
> > +       /*
> > +        * Wake up the work handler to poll the Rx FIFO in case interrupt
> > +        * missing or any leftover bytes stuck in the FIFO.
> > +        */
> > +       test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events);
> 
> How do you utilize test results?

Fixed in v10

> 
> > +
> > +       /*
> > +        * Wake up Tx handler in case virtio has queued too many packets
> > +        * and are waiting for buffer return.
> > +        */
> > +       test_and_set_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events);
> 
> Ditto.

Fixed in v10

> 
> > +
> > +       schedule_work(&fifo->work);
> > +
> > +       mod_timer(&fifo->timer, jiffies + mlxbf_tmfifo_timer_interval);
> > +}
> 
> > +       /* Adjust the size to available space. */
> > +       if (size + sizeof(hdr) > avail * sizeof(u64))
> > +               size = avail * sizeof(u64) - sizeof(hdr);
> 
> Can avail be 0?

It won't be 0. There is a check at the beginning of this function.
The function will return is avail is too small.

> 
> > +       /* Write header. */
> > +       hdr.u.data = 0;
> > +       hdr.type = VIRTIO_ID_CONSOLE;
> > +       hdr.len = htons(size);
> > +       hdr.u.data_le = cpu_to_le64(hdr.u.data);
> 
> > +       writeq(hdr.u.data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
> 
> So, this one is not protected anyhow? Potential race condition?

The spin-lock is to protect reference to the ‘tx_buf’, not the read/write of the fifo.
The fifo read/write is protected by mutex. Added a comment in v10 to avoid such
confusion.
> 
> > +
> > +       spin_lock_irqsave(&fifo->spin_lock, flags);
> > +
> > +       while (size > 0) {
> > +               addr = cons->tx_buf + cons->tx_head;
> > +
> > +               if (cons->tx_head + sizeof(u64) <=
> > +                   MLXBF_TMFIFO_CONS_TX_BUF_SIZE) {
> > +                       memcpy(&data, addr, sizeof(u64));
> > +               } else {
> > +                       partial = MLXBF_TMFIFO_CONS_TX_BUF_SIZE - cons->tx_head;
> > +                       memcpy(&data, addr, partial);
> > +                       memcpy((u8 *)&data + partial, cons->tx_buf,
> > +                              sizeof(u64) - partial);
> > +               }
> > +               writeq(data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
> > +
> > +               if (size >= sizeof(u64)) {
> > +                       mlxbf_tmfifo_vdev_tx_buf_index_inc(&cons->tx_head,
> > +                                                          sizeof(u64));
> > +                       size -= sizeof(u64);
> > +               } else {
> > +                       mlxbf_tmfifo_vdev_tx_buf_index_inc(&cons->tx_head,
> > +                                                          size);
> > +                       size = 0;
> > +               }
> > +       }
> > +
> > +       spin_unlock_irqrestore(&fifo->spin_lock, flags);
> > +}
> 
> > +       /* Rx/Tx one word (8 bytes) if not done. */
> > +       if (vring->cur_len != len)
> > +               mlxbf_tmfifo_rxtx_word(fifo, vdev, vring, desc, is_rx, avail,
> > +                                      len);
> 
> In such case better to keep it in one line.

Updated in v10

> 
> > +/* Get the array of feature bits for this device. */
> > +static u64 mlxbf_tmfifo_virtio_get_features(struct virtio_device *vdev)
> > +{
> > +       struct mlxbf_tmfifo_vdev *tm_vdev;
> > +
> > +       tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> > +       return tm_vdev->features;
> > +}
> > +
> > +/* Confirm device features to use. */
> > +static int mlxbf_tmfifo_virtio_finalize_features(struct virtio_device *vdev)
> > +{
> > +       struct mlxbf_tmfifo_vdev *tm_vdev;
> > +
> 
> > +       tm_vdev = container_of(vdev, struct mlxbf_tmfifo_vdev, vdev);
> 
> This is candidate to be a macro
> 
> #define mlxbt_vdev_to_tmfifo(...) ...

Updated in v10

> 
> > +       tm_vdev->features = vdev->features;
> > +
> > +       return 0;
> > +}
> 
> > +/* Create vdev type in a tmfifo. */
> > +static int mlxbf_tmfifo_create_vdev(struct device *dev,
> > +                                   struct mlxbf_tmfifo *fifo,
> > +                                   int vdev_id, u64 features,
> > +                                   void *config, u32 size)
> > +{
> > +       struct mlxbf_tmfifo_vdev *tm_vdev;
> > +       int ret = 0;
> > +
> > +       mutex_lock(&fifo->lock);
> > +
> > +       tm_vdev = fifo->vdev[vdev_id];
> > +       if (tm_vdev) {
> > +               dev_err(dev, "vdev %d already exists\n", vdev_id);
> > +               ret = -EEXIST;
> > +               goto fail;
> > +       }
> > +
> > +       tm_vdev = devm_kzalloc(dev, sizeof(*tm_vdev), GFP_KERNEL);
> > +       if (!tm_vdev) {
> > +               ret = -ENOMEM;
> > +               goto fail;
> > +       }
> > +
> > +       tm_vdev->vdev.id.device = vdev_id;
> > +       tm_vdev->vdev.config = &mlxbf_tmfifo_virtio_config_ops;
> > +       tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
> > +       tm_vdev->features = features;
> > +       if (config)
> > +               memcpy(&tm_vdev->config, config, size);
> > +       if (mlxbf_tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
> > +               dev_err(dev, "unable to allocate vring\n");
> > +               ret = -ENOMEM;
> > +               goto fail;
> > +       }
> > +       if (vdev_id == VIRTIO_ID_CONSOLE)
> 
> > +               tm_vdev->tx_buf = devm_kmalloc(dev,
> > +                                              MLXBF_TMFIFO_CONS_TX_BUF_SIZE,
> > +                                              GFP_KERNEL);
> 
> Are you sure devm_ suits here?

I think it's ok. The tx_buf is normal memory for output buffer.
It's running on SoC and the TmFifo is always there. So it's 
allocated at init and supposed to be released on module remove.

> 
> > +       fifo->vdev[vdev_id] = tm_vdev;
> > +
> > +       /* Register the virtio device. */
> > +       ret = register_virtio_device(&tm_vdev->vdev);
> > +       if (ret) {
> > +               dev_err(&fifo->pdev->dev, "register_virtio_device failed\n");
> > +               goto register_fail;
> > +       }
> > +
> > +       mutex_unlock(&fifo->lock);
> > +       return 0;
> > +
> > +register_fail:
> > +       mlxbf_tmfifo_free_vrings(fifo, vdev_id);
> > +       fifo->vdev[vdev_id] = NULL;
> > +fail:
> > +       mutex_unlock(&fifo->lock);
> > +       return ret;
> > +}
> 
> > +/* Read the configured network MAC address from efi variable. */
> > +static void mlxbf_tmfifo_get_cfg_mac(u8 *mac)
> > +{
> > +       efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
> > +       efi_status_t status;
> > +       unsigned long size;
> > +       u8 buf[6];
> > +
> > +       size = sizeof(buf);
> > +       status = efi.get_variable(mlxbf_tmfifo_efi_name, &guid, NULL, &size,
> > +                                 buf);
> > +       if (status == EFI_SUCCESS && size == sizeof(buf))
> > +               memcpy(mac, buf, sizeof(buf));
> > +}
> 
> Shouldn't be rather helper in EFI lib in kernel?

It's a little strange that there seems no such existing lib function. I searched
a little bit in kernel tree like below, they seem using the efi.get_variable()
approach.
arch/x86/kernel/ima_arch.c
drivers/scsi/isci/probe_roms.c
security/integrity/platform_certs/load_uefi.c

> 
> > +/* Probe the TMFIFO. */
> > +static int mlxbf_tmfifo_probe(struct platform_device *pdev)
> > +{
> > +       struct virtio_net_config net_config;
> > +       struct resource *rx_res, *tx_res;
> > +       struct mlxbf_tmfifo *fifo;
> > +       int i, ret;
> > +
> > +       /* Get the resource of the Rx FIFO. */
> > +       rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> > +       if (!rx_res)
> > +               return -ENODEV;
> > +
> > +       /* Get the resource of the Tx FIFO. */
> > +       tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> > +       if (!tx_res)
> > +               return -ENODEV;
> > +
> > +       if (!devm_request_mem_region(&pdev->dev, rx_res->start,
> > +                                    resource_size(rx_res), "bf-tmfifo"))
> > +               return -EBUSY;
> > +
> > +       if (!devm_request_mem_region(&pdev->dev, tx_res->start,
> > +                                    resource_size(tx_res), "bf-tmfifo"))
> > +               return -EBUSY;
> > +
> > +       fifo = devm_kzalloc(&pdev->dev, sizeof(*fifo), GFP_KERNEL);
> > +       if (!fifo)
> > +               return -ENOMEM;
> > +
> > +       fifo->pdev = pdev;
> > +       platform_set_drvdata(pdev, fifo);
> > +
> > +       spin_lock_init(&fifo->spin_lock);
> > +       INIT_WORK(&fifo->work, mlxbf_tmfifo_work_handler);
> > +
> > +       timer_setup(&fifo->timer, mlxbf_tmfifo_timer, 0);
> > +
> > +       for (i = 0; i < MLXBF_TM_IRQ_CNT; i++) {
> > +               fifo->irq_info[i].index = i;
> > +               fifo->irq_info[i].fifo = fifo;
> 
> > +               fifo->irq_info[i].irq = platform_get_irq(pdev, i);
> 
> 
> > +               ret = devm_request_irq(&pdev->dev, fifo->irq_info[i].irq,
> > +                                      mlxbf_tmfifo_irq_handler, 0,
> > +                                      "tmfifo", &fifo->irq_info[i]);
> > +               if (ret) {
> > +                       dev_err(&pdev->dev, "devm_request_irq failed\n");
> > +                       fifo->irq_info[i].irq = 0;
> > +                       return ret;
> > +               }
> > +       }
> > +
> 
> > +       fifo->rx_base = devm_ioremap(&pdev->dev, rx_res->start,
> > +                                    resource_size(rx_res));
> > +       if (!fifo->rx_base)
> > +               return -ENOMEM;
> > +
> > +       fifo->tx_base = devm_ioremap(&pdev->dev, tx_res->start,
> > +                                    resource_size(tx_res));
> > +       if (!fifo->tx_base)
> > +               return -ENOMEM;
> 
> Switch to devm_ioremap_resource().
> However, I think you probably need memremap().

Updated in v10 to use devm_ioremap_resource().

The map is just for several registers which is not meant to be
cache-able. Probably devm_ioremap_nocache() might make
more sense? I checked arm64/include/asm/io.h, looks like
ioremap/ioremap_nocache/ioremap_wt are defined the same
thing.

> 
> > +       mutex_init(&fifo->lock);
> 
> Isn't too late for initializing this one?

It won't cause problem here due to the 'is_ready'
flag, but definitely better to move it ahead. Updated in v10.

> 
> 
> > +/* Device remove function. */
> > +static int mlxbf_tmfifo_remove(struct platform_device *pdev)
> > +{
> > +       struct mlxbf_tmfifo *fifo = platform_get_drvdata(pdev);
> > +
> 
> > +       if (fifo)
> 
> How is it possible to be not true?

Updated in v10. Removed.

> 
> > +               mlxbf_tmfifo_cleanup(fifo);
> > +
> 
> > +       platform_set_drvdata(pdev, NULL);
> 
> Redundant.

Updated in v10. Removed.

> 
> > +
> > +       return 0;
> > +}
> 
> > +MODULE_LICENSE("GPL");
> 
> Is it correct?

Fixed in v10 and updated to MODULE_LICENSE("GPL v2");

> 
> --
> With Best Regards,
> Andy Shevchenko

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v10] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 16:06 ` Liming Sun
                   ` (45 preceding siblings ...)
  (?)
@ 2019-02-28 15:51 ` Liming Sun
  2019-03-05 15:34   ` Andy Shevchenko
  -1 siblings, 1 reply; 179+ messages in thread
From: Liming Sun @ 2019-02-28 15:51 UTC (permalink / raw)
  To: David Woods, Andy Shevchenko, Darren Hart, Vadim Pasternak
  Cc: Liming Sun, linux-kernel, platform-driver-x86

This commit adds the TmFifo platform driver for Mellanox BlueField
Soc. TmFifo is a shared FIFO which enables external host machine
to exchange data with the SoC via USB or PCIe. The driver is based
on virtio framework and has console and network access enabled.

Reviewed-by: Vadim Pasternak <vadimp@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
v9->v10:
    Fixes for comments from Andy:
    - Use devm_ioremap_resource() instead of devm_ioremap().
    - Use kernel-doc comments.
    - Keep Makefile contents sorted.
    - Use same fixed format for offsets.
    - Use SZ_1K/SZ_32K instead of 1024/23*1024.
    - Remove unnecessary comments.
    - Use one style for max numbers.
    - More comments for mlxbf_tmfifo_vdev and mlxbf_tmfifo_data_64bit.
    - Use globally defined MTU instead of new definition.
    - Remove forward declaration of mlxbf_tmfifo_remove().
    - Remove PAGE_ALIGN() for dma_alloc_coherent)().
    - Remove the cast of "struct vring *".
    - Check return result of test_and_set_bit().
    - Add a macro mlxbt_vdev_to_tmfifo().
    - Several other minor coding style comments.
    Comment not applied:
    - "Shouldn't be rather helper in EFI lib in kernel"
      Looks like efi.get_variable() is the way I found in the kernel
      tree.
    - "this one is not protected anyhow? Potential race condition"
      In mlxbf_tmfifo_console_tx(), the spin-lock is used to protect the
      'tx_buf' only, not the FIFO writes. So there is no race condition.
    - "Is __packed needed in mlxbf_tmfifo_msg_hdr".
      Yes, it is needed to make sure the structure is 8 bytes.
    Fixes for comments from Vadim:
    - Use tab in mlxbf-tmfifo-regs.h
    - Use kernel-doc comments for struct mlxbf_tmfifo_msg_hdr and
      mlxbf_tmfifo_irq_info as well.
    - Use _MAX instead of _CNT in the macro definition to be consistent.
    - Fix the MODULE_LICENSE.
    - Use BIT_ULL() instead of BIT().
    - Remove argument of 'avail' for mlxbf_tmfifo_rxtx_header() and
      mlxbf_tmfifo_rxtx_word()
    - Revise logic in mlxbf_tmfifo_rxtx_one_desc() to remove the
      WARN_ON().
    - Change "union mlxbf_tmfifo_u64 u" to "union mlxbf_tmfifo_u64 buf"
      in mlxbf_tmfifo_rxtx_word().
    - Change date type of vring_change from 'int' to 'bool'.
    - Remove the blank lines after Signed-off.
    - Don’t use declaration in the middle.
    - Make the network header initialization in some more elegant way.
    - Change label done to mlxbf_tmfifo_desc_done.
    - Remove some unnecessary comments, and several other misc coding
      style comments.
    - Simplify code logic in mlxbf_tmfifo_virtio_notify()
    New changes by Liming:
    - Simplify the Rx/Tx function arguments to make it more readable.
v8->v9:
    Fixes for comments from Andy:
    - Use modern devm_xxx() API instead.
    Fixes for comments from Vadim:
    - Split the Rx/Tx function into smaller funcitons.
    - File name, copyright information.
    - Function and variable name conversion.
    - Local variable and indent coding styles.
    - Remove unnecessary 'inline' declarations.
    - Use devm_xxx() APIs.
    - Move the efi_char16_t MAC address definition to global.
    - Fix warnings reported by 'checkpatch --strict'.
    - Fix warnings reported by 'make CF="-D__CHECK_ENDIAN__"'.
    - Change select VIRTIO_xxx to depends on  VIRTIO_ in Kconfig.
    - Merge mlxbf_tmfifo_vdev_tx_buf_push() and
      mlxbf_tmfifo_vdev_tx_buf_pop().
    - Add union to avoid casting between __le64 and u64.
    - Several other misc coding style comments.
    New changes by Liming:
    - Removed the DT binding documentation since only ACPI is
      supported for now by UEFI on the SoC.
v8: Re-submit under drivers/platform/mellanox for the target-side
    platform driver only.
v7: Added host side drivers into the same patch set.
v5~v6: Coding style fix.
v1~v4: Initial version for directory drivers/soc/mellanox.
---
 drivers/platform/mellanox/Kconfig             |   12 +-
 drivers/platform/mellanox/Makefile            |    1 +
 drivers/platform/mellanox/mlxbf-tmfifo-regs.h |   63 ++
 drivers/platform/mellanox/mlxbf-tmfifo.c      | 1342 +++++++++++++++++++++++++
 4 files changed, 1417 insertions(+), 1 deletion(-)
 create mode 100644 drivers/platform/mellanox/mlxbf-tmfifo-regs.h
 create mode 100644 drivers/platform/mellanox/mlxbf-tmfifo.c

diff --git a/drivers/platform/mellanox/Kconfig b/drivers/platform/mellanox/Kconfig
index cd8a908..530fe7e 100644
--- a/drivers/platform/mellanox/Kconfig
+++ b/drivers/platform/mellanox/Kconfig
@@ -5,7 +5,7 @@
 
 menuconfig MELLANOX_PLATFORM
 	bool "Platform support for Mellanox hardware"
-	depends on X86 || ARM || COMPILE_TEST
+	depends on X86 || ARM || ARM64 || COMPILE_TEST
 	---help---
 	  Say Y here to get to see options for platform support for
 	  Mellanox systems. This option alone does not add any kernel code.
@@ -34,4 +34,14 @@ config MLXREG_IO
 	  to system resets operation, system reset causes monitoring and some
 	  kinds of mux selection.
 
+config MLXBF_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo platform driver"
+	depends on ARM64
+	depends on ACPI
+	depends on VIRTIO_CONSOLE && VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides
+          platform driver support for the TmFifo which supports console
+          and networking based on the virtio framework.
+
 endif # MELLANOX_PLATFORM
diff --git a/drivers/platform/mellanox/Makefile b/drivers/platform/mellanox/Makefile
index 57074d9c..a229bda1 100644
--- a/drivers/platform/mellanox/Makefile
+++ b/drivers/platform/mellanox/Makefile
@@ -3,5 +3,6 @@
 # Makefile for linux/drivers/platform/mellanox
 # Mellanox Platform-Specific Drivers
 #
+obj-$(CONFIG_MLXBF_TMFIFO)	+= mlxbf-tmfifo.o
 obj-$(CONFIG_MLXREG_HOTPLUG)	+= mlxreg-hotplug.o
 obj-$(CONFIG_MLXREG_IO) += mlxreg-io.o
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo-regs.h b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
new file mode 100644
index 0000000..4b2bd29
--- /dev/null
+++ b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2019, Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef __MLXBF_TMFIFO_REGS_H__
+#define __MLXBF_TMFIFO_REGS_H__
+
+#include <linux/types.h>
+#include <linux/bits.h>
+
+#define MLXBF_TMFIFO_TX_DATA				0x00
+#define MLXBF_TMFIFO_TX_STS				0x08
+#define MLXBF_TMFIFO_TX_STS__LENGTH			0x0001
+#define MLXBF_TMFIFO_TX_STS__COUNT_SHIFT		0
+#define MLXBF_TMFIFO_TX_STS__COUNT_WIDTH		9
+#define MLXBF_TMFIFO_TX_STS__COUNT_RESET_VAL		0
+#define MLXBF_TMFIFO_TX_STS__COUNT_RMASK		GENMASK(8, 0)
+#define MLXBF_TMFIFO_TX_STS__COUNT_MASK			GENMASK(8, 0)
+#define MLXBF_TMFIFO_TX_CTL				0x10
+#define MLXBF_TMFIFO_TX_CTL__LENGTH			0x0001
+#define MLXBF_TMFIFO_TX_CTL__LWM_SHIFT			0
+#define MLXBF_TMFIFO_TX_CTL__LWM_WIDTH			8
+#define MLXBF_TMFIFO_TX_CTL__LWM_RESET_VAL		128
+#define MLXBF_TMFIFO_TX_CTL__LWM_RMASK			GENMASK(7, 0)
+#define MLXBF_TMFIFO_TX_CTL__LWM_MASK			GENMASK(7, 0)
+#define MLXBF_TMFIFO_TX_CTL__HWM_SHIFT			8
+#define MLXBF_TMFIFO_TX_CTL__HWM_WIDTH			8
+#define MLXBF_TMFIFO_TX_CTL__HWM_RESET_VAL		128
+#define MLXBF_TMFIFO_TX_CTL__HWM_RMASK			GENMASK(7, 0)
+#define MLXBF_TMFIFO_TX_CTL__HWM_MASK			GENMASK(15, 8)
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT		32
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH		9
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL	256
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RMASK		GENMASK(8, 0)
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK		GENMASK_ULL(40, 32)
+#define MLXBF_TMFIFO_RX_DATA				0x00
+#define MLXBF_TMFIFO_RX_STS				0x08
+#define MLXBF_TMFIFO_RX_STS__LENGTH			0x0001
+#define MLXBF_TMFIFO_RX_STS__COUNT_SHIFT		0
+#define MLXBF_TMFIFO_RX_STS__COUNT_WIDTH		9
+#define MLXBF_TMFIFO_RX_STS__COUNT_RESET_VAL		0
+#define MLXBF_TMFIFO_RX_STS__COUNT_RMASK		GENMASK(8, 0)
+#define MLXBF_TMFIFO_RX_STS__COUNT_MASK			GENMASK(8, 0)
+#define MLXBF_TMFIFO_RX_CTL				0x10
+#define MLXBF_TMFIFO_RX_CTL__LENGTH			0x0001
+#define MLXBF_TMFIFO_RX_CTL__LWM_SHIFT			0
+#define MLXBF_TMFIFO_RX_CTL__LWM_WIDTH			8
+#define MLXBF_TMFIFO_RX_CTL__LWM_RESET_VAL		128
+#define MLXBF_TMFIFO_RX_CTL__LWM_RMASK			GENMASK(7, 0)
+#define MLXBF_TMFIFO_RX_CTL__LWM_MASK			GENMASK(7, 0)
+#define MLXBF_TMFIFO_RX_CTL__HWM_SHIFT			8
+#define MLXBF_TMFIFO_RX_CTL__HWM_WIDTH			8
+#define MLXBF_TMFIFO_RX_CTL__HWM_RESET_VAL		128
+#define MLXBF_TMFIFO_RX_CTL__HWM_RMASK			GENMASK(7, 0)
+#define MLXBF_TMFIFO_RX_CTL__HWM_MASK			GENMASK(15, 8)
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT		32
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH		9
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL	256
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RMASK		GENMASK(8, 0)
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK		GENMASK_ULL(40, 32)
+
+#endif /* !defined(__MLXBF_TMFIFO_REGS_H__) */
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
new file mode 100644
index 0000000..a6626ffe1
--- /dev/null
+++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
@@ -0,0 +1,1342 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Mellanox BlueField SoC TmFifo driver
+ *
+ * Copyright (C) 2019 Mellanox Technologies
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/efi.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+
+#include "mlxbf-tmfifo-regs.h"
+
+/* Vring size. */
+#define MLXBF_TMFIFO_VRING_SIZE			SZ_1K
+
+/* Console Tx buffer size. */
+#define MLXBF_TMFIFO_CONS_TX_BUF_SIZE		SZ_32K
+
+/* Console Tx buffer size with some reservation. */
+#define MLXBF_TMFIFO_CONS_TX_BUF_RSV_SIZE	\
+	(MLXBF_TMFIFO_CONS_TX_BUF_SIZE - 8)
+
+/* House-keeping timer interval. */
+#define MLXBF_TMFIFO_TIMER_INTERVAL		(HZ / 10)
+
+/* Virtual devices sharing the TM FIFO. */
+#define MLXBF_TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/*
+ * Reserve 1/16 of TmFifo space, so console messages are not starved by
+ * the networking traffic.
+ */
+#define MLXBF_TMFIFO_RESERVE_RATIO		16
+
+/* Message with data needs at least two words (for header & data). */
+#define MLXBF_TMFIFO_DATA_MIN_WORDS		2
+
+struct mlxbf_tmfifo;
+
+/**
+ * mlxbf_tmfifo_vring - Structure of the TmFifo virtual ring
+ * @va: virtual address of the ring
+ * @dma: dma address of the ring
+ * @vq: pointer to the virtio virtqueue
+ * @desc: current descriptor of the pending packet
+ * @desc_head: head descriptor of the pending packet
+ * @cur_len: processed length of the current descriptor
+ * @rem_len: remaining length of the pending packet
+ * @pkt_len: total length of the pending packet
+ * @next_avail: next avail descriptor id
+ * @num: vring size (number of descriptors)
+ * @align: vring alignment size
+ * @index: vring index
+ * @vdev_id: vring virtio id (VIRTIO_ID_xxx)
+ * @fifo: pointer to the tmfifo structure
+ */
+struct mlxbf_tmfifo_vring {
+	void *va;
+	dma_addr_t dma;
+	struct virtqueue *vq;
+	struct vring_desc *desc;
+	struct vring_desc *desc_head;
+	int cur_len;
+	int rem_len;
+	u32 pkt_len;
+	u16 next_avail;
+	int num;
+	int align;
+	int index;
+	int vdev_id;
+	struct mlxbf_tmfifo *fifo;
+};
+
+/* Interrupt types. */
+enum {
+	MLXBF_TM_RX_LWM_IRQ,
+	MLXBF_TM_RX_HWM_IRQ,
+	MLXBF_TM_TX_LWM_IRQ,
+	MLXBF_TM_TX_HWM_IRQ,
+	MLXBF_TM_MAX_IRQ
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	MLXBF_TMFIFO_VRING_RX,
+	MLXBF_TMFIFO_VRING_TX,
+	MLXBF_TMFIFO_VRING_MAX
+};
+
+/**
+ * mlxbf_tmfifo_vdev - Structure of the TmFifo virtual device
+ * @vdev: virtio device, in which the vdev.id.device field has the
+ *        VIRTIO_ID_xxx id to distinguish the virtual device.
+ * @status: status of the device
+ * @features: supported features of the device
+ * @vrings: array of tmfifo vrings of this device
+ * @config.cons: virtual console config -
+ *               select if vdev.id.device is VIRTIO_ID_CONSOLE
+ * @config.net: virtual network config -
+ *              select if vdev.id.device is VIRTIO_ID_NET
+ * @tx_head: head of the tx_buf
+ * @tx_tail: tail of the tx_buf
+ * @tx_buf: output ring buffer
+ */
+struct mlxbf_tmfifo_vdev {
+	struct virtio_device vdev;
+	u8 status;
+	u64 features;
+	struct mlxbf_tmfifo_vring vrings[MLXBF_TMFIFO_VRING_MAX];
+	union {
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	u32 tx_head;
+	u32 tx_tail;
+	u8 *tx_buf;
+};
+
+/**
+ * mlxbf_tmfifo_irq_info - Structure of the interrupt information
+ * @fifo: pointer to the tmfifo structure
+ * @irq: interrupt number
+ * @index: index into the interrupt array
+ */
+struct mlxbf_tmfifo_irq_info {
+	struct mlxbf_tmfifo *fifo;
+	int irq;
+	int index;
+};
+
+/**
+ * mlxbf_tmfifo - Structure of the TmFifo
+ * @vdev: array of the virtual devices running over the TmFifo
+ * @pdev: platform device
+ * @lock: lock to protect the TmFifo access
+ * @rx_base: mapped register base address for the Rx fifo
+ * @tx_base: mapped register base address for the Tx fifo
+ * @rx_fifo_size: number of entries of the Rx fifo
+ * @tx_fifo_size: number of entries of the Tx fifo
+ * @pend_events: pending bits for deferred events
+ * @irq_info: interrupt information
+ * @work: work struct for deferred process
+ * @timer: background timer
+ * @vring: Tx/Rx ring
+ * @spin_lock: spin lock
+ * @is_ready: ready flag
+ */
+struct mlxbf_tmfifo {
+	struct mlxbf_tmfifo_vdev *vdev[MLXBF_TMFIFO_VDEV_MAX];
+	struct platform_device *pdev;
+	struct mutex lock;		/* TmFifo lock */
+	void __iomem *rx_base;
+	void __iomem *tx_base;
+	int rx_fifo_size;
+	int tx_fifo_size;
+	unsigned long pend_events;
+	struct mlxbf_tmfifo_irq_info irq_info[MLXBF_TM_MAX_IRQ];
+	struct work_struct work;
+	struct timer_list timer;
+	struct mlxbf_tmfifo_vring *vring[2];
+	spinlock_t spin_lock;		/* spin lock */
+	bool is_ready;
+};
+
+/**
+ * mlxbf_tmfifo_u64 - Union of 64-bit data
+ * @data - 64-bit data in host byte order
+ * @data_le - 64-bit data in little-endian byte order
+ *
+ * It's expected to send 64-bit little-endian value (__le64) into the TmFifo.
+ * readq() and writeq() expect u64 instead. A union structure is used here
+ * to workaround the explicit casting usage like writeq(*(u64 *)&data_le).
+ */
+union mlxbf_tmfifo_u64 {
+	u64 data;
+	__le64 data_le;
+};
+
+/**
+ * mlxbf_tmfifo_msg_hdr - Structure of the TmFifo message header
+ * @type: message type
+ * @len: payload length
+ * @u: 64-bit union data
+ */
+union mlxbf_tmfifo_msg_hdr {
+	struct {
+		u8 type;
+		__be16 len;
+		u8 unused[5];
+	} __packed;
+	union mlxbf_tmfifo_u64 u;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 mlxbf_tmfifo_net_default_mac[6] = {
+	0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* EFI variable name of the MAC address. */
+static efi_char16_t mlxbf_tmfifo_efi_name[] = L"RshimMacAddr";
+
+/* Maximum L2 header length. */
+#define MLXBF_TMFIFO_NET_L2_OVERHEAD	36
+
+/* Supported virtio-net features. */
+#define MLXBF_TMFIFO_NET_FEATURES	(BIT_ULL(VIRTIO_NET_F_MTU) | \
+					 BIT_ULL(VIRTIO_NET_F_STATUS) | \
+					 BIT_ULL(VIRTIO_NET_F_MAC))
+
+#define mlxbf_vdev_to_tmfifo(dev)	\
+	container_of(dev, struct mlxbf_tmfifo_vdev, vdev)
+
+/* Console output are buffered and can be accessed with the functions below. */
+
+/* Return the consumed Tx buffer space. */
+static int mlxbf_tmfifo_vdev_tx_buf_len(struct mlxbf_tmfifo_vdev *tm_vdev)
+{
+	int len;
+
+	if (tm_vdev->tx_tail >= tm_vdev->tx_head)
+		len = tm_vdev->tx_tail - tm_vdev->tx_head;
+	else
+		len = MLXBF_TMFIFO_CONS_TX_BUF_SIZE - tm_vdev->tx_head +
+			tm_vdev->tx_tail;
+
+	return len;
+}
+
+/* Return the available Tx buffer space. */
+static int mlxbf_tmfifo_vdev_tx_buf_avail(struct mlxbf_tmfifo_vdev *tm_vdev)
+{
+	int len;
+
+	len = MLXBF_TMFIFO_CONS_TX_BUF_RSV_SIZE -
+		mlxbf_tmfifo_vdev_tx_buf_len(tm_vdev);
+
+	return len;
+}
+
+/* Update Rx/Tx buffer index pointer. */
+static void mlxbf_tmfifo_vdev_tx_buf_index_inc(u32 *index, u32 len)
+{
+	*index += len;
+	if (*index >= MLXBF_TMFIFO_CONS_TX_BUF_SIZE)
+		*index -= MLXBF_TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Allocate vrings for the fifo. */
+static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
+				     struct mlxbf_tmfifo_vdev *tm_vdev)
+{
+	struct mlxbf_tmfifo_vring *vring;
+	struct device *dev;
+	dma_addr_t dma;
+	int i, size;
+	void *va;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->num = MLXBF_TMFIFO_VRING_SIZE;
+		vring->align = SMP_CACHE_BYTES;
+		vring->index = i;
+		vring->vdev_id = tm_vdev->vdev.id.device;
+		dev = &tm_vdev->vdev.dev;
+
+		size = vring_size(vring->num, vring->align);
+		va = dma_alloc_coherent(dev->parent, size, &dma, GFP_KERNEL);
+		if (!va) {
+			dev_err(dev->parent, "dma_alloc_coherent failed\n");
+			return -ENOMEM;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void mlxbf_tmfifo_free_vrings(struct mlxbf_tmfifo *fifo,
+				     struct mlxbf_tmfifo_vdev *tm_vdev)
+{
+	struct mlxbf_tmfifo_vring *vring;
+	int i, size;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		if (vring->va) {
+			size = vring_size(vring->num, vring->align);
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Disable interrupts of the fifo device. */
+static void mlxbf_tmfifo_disable_irqs(struct mlxbf_tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < MLXBF_TM_MAX_IRQ; i++) {
+		irq = fifo->irq_info[i].irq;
+		if (irq) {
+			fifo->irq_info[i].irq = 0;
+			disable_irq(irq);
+		}
+	}
+}
+
+/* Interrupt handler. */
+static irqreturn_t mlxbf_tmfifo_irq_handler(int irq, void *arg)
+{
+	struct mlxbf_tmfifo_irq_info *irq_info = arg;
+
+	if (irq_info->index < MLXBF_TM_MAX_IRQ &&
+	    !test_and_set_bit(irq_info->index, &irq_info->fifo->pend_events))
+		schedule_work(&irq_info->fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Get the next packet descriptor from the vring. */
+static struct vring_desc *
+mlxbf_tmfifo_get_next_desc(struct mlxbf_tmfifo_vring *vring)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = vring->vq->vdev;
+	unsigned int idx, head;
+
+	if (vring->next_avail == virtio16_to_cpu(vdev, vr->avail->idx))
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = virtio16_to_cpu(vdev, vr->avail->ring[idx]);
+	if (WARN_ON(head >= vr->num))
+		return NULL;
+
+	vring->next_avail++;
+
+	return &vr->desc[head];
+}
+
+/* Release virtio descriptor. */
+static void mlxbf_tmfifo_release_desc(struct mlxbf_tmfifo_vring *vring,
+				      struct vring_desc *desc, u32 len)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = vring->vq->vdev;
+	u16 idx, vr_idx;
+
+	vr_idx = virtio16_to_cpu(vdev, vr->used->idx);
+	idx = vr_idx % vr->num;
+	vr->used->ring[idx].id = cpu_to_virtio32(vdev, desc - vr->desc);
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/*
+	 * Virtio could poll and check the 'idx' to decide whether the desc is
+	 * done or not. Add a memory barrier here to make sure the update above
+	 * completes before updating the idx.
+	 */
+	mb();
+	vr->used->idx = cpu_to_virtio16(vdev, vr_idx + 1);
+}
+
+/* Get the total length of the descriptor chain. */
+static u32 mlxbf_tmfifo_get_pkt_len(struct mlxbf_tmfifo_vring *vring,
+				    struct vring_desc *desc)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = vring->vq->vdev;
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void mlxbf_tmfifo_release_pending_pkt(struct mlxbf_tmfifo_vring *vring)
+{
+	struct vring_desc *desc_head;
+	u32 len = 0;
+
+	if (vring->desc_head) {
+		desc_head = vring->desc_head;
+		len = vring->pkt_len;
+	} else {
+		desc_head = mlxbf_tmfifo_get_next_desc(vring);
+		if (desc_head)
+			len = mlxbf_tmfifo_get_pkt_len(vring, desc_head);
+	}
+
+	if (desc_head)
+		mlxbf_tmfifo_release_desc(vring, desc_head, len);
+
+	vring->pkt_len = 0;
+	vring->desc = NULL;
+	vring->desc_head = NULL;
+}
+
+static void mlxbf_tmfifo_init_net_desc(struct mlxbf_tmfifo_vring *vring,
+				       struct vring_desc *desc, bool is_rx)
+{
+	struct virtio_device *vdev = vring->vq->vdev;
+	struct virtio_net_hdr *net_hdr;
+
+	net_hdr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+	memset(net_hdr, 0, sizeof(*net_hdr));
+}
+
+/* Get and initialize the next packet. */
+static struct vring_desc *
+mlxbf_tmfifo_get_next_pkt(struct mlxbf_tmfifo_vring *vring, bool is_rx)
+{
+	struct vring_desc *desc;
+
+	desc = mlxbf_tmfifo_get_next_desc(vring);
+	if (desc && is_rx && vring->vdev_id == VIRTIO_ID_NET)
+		mlxbf_tmfifo_init_net_desc(vring, desc, is_rx);
+
+	vring->desc_head = desc;
+	vring->desc = desc;
+
+	return desc;
+}
+
+/* House-keeping timer. */
+static void mlxbf_tmfifo_timer(struct timer_list *arg)
+{
+	struct mlxbf_tmfifo *fifo = container_of(arg, struct mlxbf_tmfifo,
+						 timer);
+	int more;
+
+	more = !test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events) |
+		    !test_and_set_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	if (more)
+		schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + MLXBF_TMFIFO_TIMER_INTERVAL);
+}
+
+/* Copy one console packet into the output buffer. */
+static void mlxbf_tmfifo_console_output_one(struct mlxbf_tmfifo_vdev *cons,
+					    struct mlxbf_tmfifo_vring *vring,
+					    struct vring_desc *desc)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = &cons->vdev;
+	u32 len, idx, seg;
+	void *addr;
+
+	while (desc) {
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+		len = virtio32_to_cpu(vdev, desc->len);
+
+		if (len <= MLXBF_TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail) {
+			memcpy(cons->tx_buf + cons->tx_tail, addr, len);
+		} else {
+			seg = MLXBF_TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail;
+			memcpy(cons->tx_buf + cons->tx_tail, addr, seg);
+			addr += seg;
+			memcpy(cons->tx_buf, addr, len - seg);
+		}
+		mlxbf_tmfifo_vdev_tx_buf_index_inc(&cons->tx_tail, len);
+
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+}
+
+/* Copy console data into the output buffer. */
+static void mlxbf_tmfifo_console_output(struct mlxbf_tmfifo_vdev *cons,
+					struct mlxbf_tmfifo_vring *vring)
+{
+	struct vring_desc *desc;
+	u32 len;
+
+	desc = mlxbf_tmfifo_get_next_desc(vring);
+	while (desc) {
+		/* Release the packet if not enough space. */
+		len = mlxbf_tmfifo_get_pkt_len(vring, desc);
+		if (len > mlxbf_tmfifo_vdev_tx_buf_avail(cons)) {
+			mlxbf_tmfifo_release_desc(vring, desc, len);
+			break;
+		}
+
+		mlxbf_tmfifo_console_output_one(cons, vring, desc);
+		mlxbf_tmfifo_release_desc(vring, desc, len);
+		desc = mlxbf_tmfifo_get_next_desc(vring);
+	}
+}
+
+/* Get the number of available words in Rx FIFO for receiving. */
+static int mlxbf_tmfifo_get_rx_avail(struct mlxbf_tmfifo *fifo)
+{
+	u64 sts;
+
+	sts = readq(fifo->rx_base + MLXBF_TMFIFO_RX_STS);
+	return FIELD_GET(MLXBF_TMFIFO_RX_STS__COUNT_MASK, sts);
+}
+
+/* Get the number of available words in the TmFifo for sending. */
+static int mlxbf_tmfifo_get_tx_avail(struct mlxbf_tmfifo *fifo, int vdev_id)
+{
+	int tx_reserve;
+	u64 sts;
+
+	/* Reserve some room in FIFO for console messages. */
+	if (vdev_id == VIRTIO_ID_NET)
+		tx_reserve = fifo->tx_fifo_size / MLXBF_TMFIFO_RESERVE_RATIO;
+	else
+		tx_reserve = 1;
+
+	sts = readq(fifo->tx_base + MLXBF_TMFIFO_TX_STS);
+	return (fifo->tx_fifo_size - tx_reserve -
+		FIELD_GET(MLXBF_TMFIFO_TX_STS__COUNT_MASK, sts));
+}
+
+/* Console Tx (move data from the output buffer into the TmFifo). */
+static void mlxbf_tmfifo_console_tx(struct mlxbf_tmfifo *fifo, int avail)
+{
+	union mlxbf_tmfifo_msg_hdr hdr;
+	struct mlxbf_tmfifo_vdev *cons;
+	unsigned long flags;
+	int size, partial;
+	void *addr;
+	u64 data;
+
+	/* Return if not enough space available. */
+	if (avail < MLXBF_TMFIFO_DATA_MIN_WORDS)
+		return;
+
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+	if (!cons || !cons->tx_buf)
+		return;
+
+	/* Return if no data to send. */
+	size = mlxbf_tmfifo_vdev_tx_buf_len(cons);
+	if (size == 0)
+		return;
+
+	/* Adjust the size to available space. */
+	if (size + sizeof(hdr) > avail * sizeof(u64))
+		size = avail * sizeof(u64) - sizeof(hdr);
+
+	/* Write header. */
+	hdr.u.data = 0;
+	hdr.type = VIRTIO_ID_CONSOLE;
+	hdr.len = htons(size);
+	hdr.u.data_le = cpu_to_le64(hdr.u.data);
+	writeq(hdr.u.data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+
+	/* Use spin-lock to protect the 'cons->tx_buf' access. */
+	spin_lock_irqsave(&fifo->spin_lock, flags);
+
+	while (size > 0) {
+		addr = cons->tx_buf + cons->tx_head;
+
+		if (cons->tx_head + sizeof(u64) <=
+		    MLXBF_TMFIFO_CONS_TX_BUF_SIZE) {
+			memcpy(&data, addr, sizeof(u64));
+		} else {
+			partial = MLXBF_TMFIFO_CONS_TX_BUF_SIZE - cons->tx_head;
+			memcpy(&data, addr, partial);
+			memcpy((u8 *)&data + partial, cons->tx_buf,
+			       sizeof(u64) - partial);
+		}
+		writeq(data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+
+		if (size >= sizeof(u64)) {
+			mlxbf_tmfifo_vdev_tx_buf_index_inc(&cons->tx_head,
+							   sizeof(u64));
+			size -= sizeof(u64);
+		} else {
+			mlxbf_tmfifo_vdev_tx_buf_index_inc(&cons->tx_head,
+							   size);
+			size = 0;
+		}
+	}
+
+	spin_unlock_irqrestore(&fifo->spin_lock, flags);
+}
+
+/* Rx/Tx one word in the descriptor buffer. */
+static void mlxbf_tmfifo_rxtx_word(struct mlxbf_tmfifo_vring *vring,
+				   struct vring_desc *desc,
+				   bool is_rx, int len)
+{
+	struct virtio_device *vdev = vring->vq->vdev;
+	struct mlxbf_tmfifo *fifo = vring->fifo;
+	union mlxbf_tmfifo_u64 buf;
+	void *addr;
+
+	/* Get the buffer address of this desc. */
+	addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+	/* Read a word from FIFO for Rx. */
+	if (is_rx) {
+		buf.data = readq(fifo->rx_base + MLXBF_TMFIFO_RX_DATA);
+		buf.data = le64_to_cpu(buf.data_le);
+	}
+
+	if (vring->cur_len + sizeof(u64) <= len) {
+		/* The whole word. */
+		if (is_rx)
+			memcpy(addr + vring->cur_len, &buf.data, sizeof(u64));
+		else
+			memcpy(&buf.data, addr + vring->cur_len, sizeof(u64));
+		vring->cur_len += sizeof(u64);
+	} else {
+		/* Leftover bytes. */
+		if (is_rx)
+			memcpy(addr + vring->cur_len, &buf.data,
+			       len - vring->cur_len);
+		else
+			memcpy(&buf.data, addr + vring->cur_len,
+			       len - vring->cur_len);
+		vring->cur_len = len;
+	}
+
+	/* Write the word into FIFO for Tx. */
+	if (!is_rx) {
+		buf.data_le = cpu_to_le64(buf.data);
+		writeq(buf.data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+	}
+}
+
+/*
+ * Rx/Tx packet header.
+ *
+ * In Rx case, the packet might be found to belong to a different vring since
+ * the TmFifo is shared by different services. In such case, the 'vring_change'
+ * flag is set.
+ */
+static void mlxbf_tmfifo_rxtx_header(struct mlxbf_tmfifo_vring *vring,
+				     struct vring_desc *desc,
+				     bool is_rx, bool *vring_change)
+{
+	struct mlxbf_tmfifo *fifo = vring->fifo;
+	struct virtio_net_config *config;
+	union mlxbf_tmfifo_msg_hdr hdr;
+	int vdev_id, hdr_len;
+
+	/* Read/Write packet header. */
+	if (is_rx) {
+		/* Drain one word from the FIFO. */
+		hdr.u.data = readq(fifo->rx_base + MLXBF_TMFIFO_RX_DATA);
+		hdr.u.data = le64_to_cpu(hdr.u.data_le);
+
+		/* Skip the length 0 packets (keepalive). */
+		if (hdr.len == 0)
+			return;
+
+		/* Check packet type. */
+		if (hdr.type == VIRTIO_ID_NET) {
+			vdev_id = VIRTIO_ID_NET;
+			hdr_len = sizeof(struct virtio_net_hdr);
+			config = &fifo->vdev[vdev_id]->config.net;
+			if (ntohs(hdr.len) > config->mtu +
+			    MLXBF_TMFIFO_NET_L2_OVERHEAD)
+				return;
+		} else {
+			vdev_id = VIRTIO_ID_CONSOLE;
+			hdr_len = 0;
+		}
+
+		/*
+		 * Check whether the new packet still belongs to this vring.
+		 * If not, update the pkt_len of the new vring.
+		 */
+		if (vdev_id != vring->vdev_id) {
+			struct mlxbf_tmfifo_vdev *tm_dev2 = fifo->vdev[vdev_id];
+
+			if (!tm_dev2)
+				return;
+			vring->desc = desc;
+			vring = &tm_dev2->vrings[MLXBF_TMFIFO_VRING_RX];
+			*vring_change = true;
+		}
+		vring->pkt_len = ntohs(hdr.len) + hdr_len;
+	} else {
+		/* Network virtio has an extra header. */
+		hdr_len = (vring->vdev_id == VIRTIO_ID_NET) ?
+			   sizeof(struct virtio_net_hdr) : 0;
+		vring->pkt_len = mlxbf_tmfifo_get_pkt_len(vring, desc);
+		hdr.u.data = 0;
+		hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+			    VIRTIO_ID_NET : VIRTIO_ID_CONSOLE;
+		hdr.len = htons(vring->pkt_len - hdr_len);
+		hdr.u.data_le = cpu_to_le64(hdr.u.data);
+		writeq(hdr.u.data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+	}
+
+	vring->cur_len = hdr_len;
+	vring->rem_len = vring->pkt_len;
+	fifo->vring[is_rx] = vring;
+}
+
+/*
+ * Rx/Tx one descriptor.
+ *
+ * Return true to indicate more data available.
+ */
+static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring,
+				       bool is_rx, int *avail)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct mlxbf_tmfifo *fifo = vring->fifo;
+	struct virtio_device *vdev;
+	bool vring_change = false;
+	struct vring_desc *desc;
+	unsigned long flags;
+	u32 len, idx;
+
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+
+	/* Get the descriptor of the next packet. */
+	if (!vring->desc) {
+		desc = mlxbf_tmfifo_get_next_pkt(vring, is_rx);
+		if (!desc)
+			return false;
+	} else {
+		desc = vring->desc;
+	}
+
+	/* Beginning of a packet. Start to Rx/Tx packet header. */
+	if (vring->pkt_len == 0) {
+		mlxbf_tmfifo_rxtx_header(vring, desc, is_rx, &vring_change);
+		(*avail)--;
+
+		/* Return if new packet is for another ring. */
+		if (vring_change)
+			return false;
+		goto mlxbf_tmfifo_desc_done;
+	}
+
+	/* Get the length of this desc. */
+	len = virtio32_to_cpu(vdev, desc->len);
+	if (len > vring->rem_len)
+		len = vring->rem_len;
+
+	/* Rx/Tx one word (8 bytes) if not done. */
+	if (vring->cur_len < len) {
+		mlxbf_tmfifo_rxtx_word(vring, desc, is_rx, len);
+		(*avail)--;
+	}
+
+	/* Check again whether it's done. */
+	if (vring->cur_len == len) {
+		vring->cur_len = 0;
+		vring->rem_len -= len;
+
+		/* Get the next desc on the chain. */
+		if (vring->rem_len > 0 &&
+		    (virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT)) {
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+			goto mlxbf_tmfifo_desc_done;
+		}
+
+		/* Done and release the pending packet. */
+		mlxbf_tmfifo_release_pending_pkt(vring);
+		desc = NULL;
+		fifo->vring[is_rx] = NULL;
+
+		/* Notify upper layer that packet is done. */
+		spin_lock_irqsave(&fifo->spin_lock, flags);
+		vring_interrupt(0, vring->vq);
+		spin_unlock_irqrestore(&fifo->spin_lock, flags);
+	}
+
+mlxbf_tmfifo_desc_done:
+	/* Save the current desc. */
+	vring->desc = desc;
+
+	return true;
+}
+
+/* Rx & Tx processing of a queue. */
+static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
+{
+	int avail = 0, devid = vring->vdev_id;
+	struct mlxbf_tmfifo *fifo;
+	bool more;
+
+	fifo = vring->fifo;
+
+	/* Return if vdev is not ready. */
+	if (!fifo->vdev[devid])
+		return;
+
+	/* Return if another vring is running. */
+	if (fifo->vring[is_rx] && fifo->vring[is_rx] != vring)
+		return;
+
+	/* Only handle console and network for now. */
+	if (WARN_ON(devid != VIRTIO_ID_NET && devid != VIRTIO_ID_CONSOLE))
+		return;
+
+	do {
+		/* Get available FIFO space. */
+		if (avail == 0) {
+			if (is_rx)
+				avail = mlxbf_tmfifo_get_rx_avail(fifo);
+			else
+				avail = mlxbf_tmfifo_get_tx_avail(fifo, devid);
+			if (avail <= 0)
+				break;
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && devid == VIRTIO_ID_CONSOLE) {
+			mlxbf_tmfifo_console_tx(fifo, avail);
+			break;
+		}
+
+		/* Handle one descriptor. */
+		more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail);
+	} while (more);
+}
+
+/* Handle Rx or Tx queues. */
+static void mlxbf_tmfifo_work_rxtx(struct mlxbf_tmfifo *fifo, int queue_id,
+				   int irq_id, bool is_rx)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+	struct mlxbf_tmfifo_vring *vring;
+	int i;
+
+	if (!test_and_clear_bit(irq_id, &fifo->pend_events) ||
+	    !fifo->irq_info[irq_id].irq)
+		return;
+
+	for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++) {
+		tm_vdev = fifo->vdev[i];
+		if (tm_vdev) {
+			vring = &tm_vdev->vrings[queue_id];
+			if (vring->vq)
+				mlxbf_tmfifo_rxtx(vring, is_rx);
+		}
+	}
+}
+
+/* Work handler for Rx and Tx case. */
+static void mlxbf_tmfifo_work_handler(struct work_struct *work)
+{
+	struct mlxbf_tmfifo *fifo;
+
+	fifo = container_of(work, struct mlxbf_tmfifo, work);
+	if (!fifo->is_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx (Send data to the TmFifo). */
+	mlxbf_tmfifo_work_rxtx(fifo, MLXBF_TMFIFO_VRING_TX,
+			       MLXBF_TM_TX_LWM_IRQ, false);
+
+	/* Rx (Receive data from the TmFifo). */
+	mlxbf_tmfifo_work_rxtx(fifo, MLXBF_TMFIFO_VRING_RX,
+			       MLXBF_TM_RX_HWM_IRQ, true);
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool mlxbf_tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct mlxbf_tmfifo_vring *vring = vq->priv;
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+	struct mlxbf_tmfifo *fifo;
+	unsigned long flags;
+
+	fifo = vring->fifo;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->index & BIT(0))) {
+		if (test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events))
+			return true;
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			tm_vdev = fifo->vdev[VIRTIO_ID_CONSOLE];
+			mlxbf_tmfifo_console_output(tm_vdev, vring);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+		} else if (test_and_set_bit(MLXBF_TM_TX_LWM_IRQ,
+					    &fifo->pend_events)) {
+			return true;
+		}
+	}
+
+	schedule_work(&fifo->work);
+
+	return true;
+}
+
+/* Get the array of feature bits for this device. */
+static u64 mlxbf_tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int mlxbf_tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	tm_vdev->features = vdev->features;
+
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void mlxbf_tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+	struct mlxbf_tmfifo_vring *vring;
+	struct virtqueue *vq;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc)
+			mlxbf_tmfifo_release_pending_pkt(vring);
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+					unsigned int nvqs,
+					struct virtqueue *vqs[],
+					vq_callback_t *callbacks[],
+					const char * const names[],
+					const bool *ctx,
+					struct irq_affinity *desc)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+	struct mlxbf_tmfifo_vring *vring;
+	struct virtqueue *vq;
+	int i, ret, size;
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i]) {
+			ret = -EINVAL;
+			goto error;
+		}
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->num, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->num, vring->align, vdev,
+					 false, false, vring->va,
+					 mlxbf_tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	mlxbf_tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 mlxbf_tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void mlxbf_tmfifo_virtio_set_status(struct virtio_device *vdev,
+					   u8 status)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void mlxbf_tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void mlxbf_tmfifo_virtio_get(struct virtio_device *vdev,
+				    unsigned int offset,
+				    void *buf,
+				    unsigned int len)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	if (offset + len > sizeof(tm_vdev->config))
+		return;
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void mlxbf_tmfifo_virtio_set(struct virtio_device *vdev,
+				    unsigned int offset,
+				    const void *buf,
+				    unsigned int len)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	if (offset + len > sizeof(tm_vdev->config))
+		return;
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/*
+ * Nothing to do for now. This function is needed to avoid warnings
+ * when the device is released in device_release().
+ */
+static void tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops mlxbf_tmfifo_virtio_config_ops = {
+	.get_features = mlxbf_tmfifo_virtio_get_features,
+	.finalize_features = mlxbf_tmfifo_virtio_finalize_features,
+	.find_vqs = mlxbf_tmfifo_virtio_find_vqs,
+	.del_vqs = mlxbf_tmfifo_virtio_del_vqs,
+	.reset = mlxbf_tmfifo_virtio_reset,
+	.set_status = mlxbf_tmfifo_virtio_set_status,
+	.get_status = mlxbf_tmfifo_virtio_get_status,
+	.get = mlxbf_tmfifo_virtio_get,
+	.set = mlxbf_tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+static int mlxbf_tmfifo_create_vdev(struct device *dev,
+				    struct mlxbf_tmfifo *fifo,
+				    int vdev_id, u64 features,
+				    void *config, u32 size)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+	int ret;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		dev_err(dev, "vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto fail;
+	}
+
+	tm_vdev = devm_kzalloc(dev, sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &mlxbf_tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+
+	if (mlxbf_tmfifo_alloc_vrings(fifo, tm_vdev)) {
+		dev_err(dev, "unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	/* Allocate an output buffer for the console device. */
+	if (vdev_id == VIRTIO_ID_CONSOLE)
+		tm_vdev->tx_buf = devm_kmalloc(dev,
+					       MLXBF_TMFIFO_CONS_TX_BUF_SIZE,
+					       GFP_KERNEL);
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device failed\n");
+		goto register_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+register_fail:
+	mlxbf_tmfifo_free_vrings(fifo, tm_vdev);
+	fifo->vdev[vdev_id] = NULL;
+fail:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+static int mlxbf_tmfifo_delete_vdev(struct mlxbf_tmfifo *fifo, int vdev_id)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		mlxbf_tmfifo_free_vrings(fifo, tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void mlxbf_tmfifo_get_cfg_mac(u8 *mac)
+{
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+	efi_status_t status;
+	unsigned long size;
+	u8 buf[6];
+
+	size = sizeof(buf);
+	status = efi.get_variable(mlxbf_tmfifo_efi_name, &guid, NULL, &size,
+				  buf);
+	if (status == EFI_SUCCESS && size == sizeof(buf))
+		memcpy(mac, buf, sizeof(buf));
+}
+
+/* Set TmFifo thresolds which is used to trigger interrupts. */
+static void mlxbf_tmfifo_set_threshold(struct mlxbf_tmfifo *fifo)
+{
+	u64 ctl;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		FIELD_GET(MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__LWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_TX_CTL__LWM_MASK,
+			   fifo->tx_fifo_size / 2);
+	ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__HWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_TX_CTL__HWM_MASK,
+			   fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		FIELD_GET(MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__LWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__HWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
+}
+
+static void mlxbf_tmfifo_cleanup(struct mlxbf_tmfifo *fifo)
+{
+	int i;
+
+	fifo->is_ready = false;
+	del_timer_sync(&fifo->timer);
+	mlxbf_tmfifo_disable_irqs(fifo);
+	cancel_work_sync(&fifo->work);
+	for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++)
+		mlxbf_tmfifo_delete_vdev(fifo, i);
+}
+
+/* Probe the TMFIFO. */
+static int mlxbf_tmfifo_probe(struct platform_device *pdev)
+{
+	struct virtio_net_config net_config;
+	struct mlxbf_tmfifo *fifo;
+	struct resource *res;
+	int i, ret;
+
+	fifo = devm_kzalloc(&pdev->dev, sizeof(*fifo), GFP_KERNEL);
+	if (!fifo)
+		return -ENOMEM;
+
+	spin_lock_init(&fifo->spin_lock);
+	INIT_WORK(&fifo->work, mlxbf_tmfifo_work_handler);
+	mutex_init(&fifo->lock);
+
+	/* Get the resource of the Rx FIFO. */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	fifo->rx_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(fifo->rx_base))
+		return PTR_ERR(fifo->rx_base);
+
+	/* Get the resource of the Tx FIFO. */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	fifo->tx_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(fifo->tx_base))
+		return PTR_ERR(fifo->tx_base);
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	timer_setup(&fifo->timer, mlxbf_tmfifo_timer, 0);
+
+	for (i = 0; i < MLXBF_TM_MAX_IRQ; i++) {
+		fifo->irq_info[i].index = i;
+		fifo->irq_info[i].fifo = fifo;
+		fifo->irq_info[i].irq = platform_get_irq(pdev, i);
+		ret = devm_request_irq(&pdev->dev, fifo->irq_info[i].irq,
+				       mlxbf_tmfifo_irq_handler, 0,
+				       "tmfifo", &fifo->irq_info[i]);
+		if (ret) {
+			dev_err(&pdev->dev, "devm_request_irq failed\n");
+			fifo->irq_info[i].irq = 0;
+			return ret;
+		}
+	}
+
+	mlxbf_tmfifo_set_threshold(fifo);
+
+	/* Create the console vdev. */
+	ret = mlxbf_tmfifo_create_vdev(&pdev->dev, fifo, VIRTIO_ID_CONSOLE, 0,
+				       NULL, 0);
+	if (ret)
+		goto fail;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = ETH_DATA_LEN;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	memcpy(net_config.mac, mlxbf_tmfifo_net_default_mac, 6);
+	mlxbf_tmfifo_get_cfg_mac(net_config.mac);
+	ret = mlxbf_tmfifo_create_vdev(&pdev->dev, fifo, VIRTIO_ID_NET,
+				       MLXBF_TMFIFO_NET_FEATURES, &net_config,
+				       sizeof(net_config));
+	if (ret)
+		goto fail;
+
+	mod_timer(&fifo->timer, jiffies + MLXBF_TMFIFO_TIMER_INTERVAL);
+
+	fifo->is_ready = true;
+	return 0;
+
+fail:
+	mlxbf_tmfifo_cleanup(fifo);
+	return ret;
+}
+
+/* Device remove function. */
+static int mlxbf_tmfifo_remove(struct platform_device *pdev)
+{
+	struct mlxbf_tmfifo *fifo = platform_get_drvdata(pdev);
+
+	mlxbf_tmfifo_cleanup(fifo);
+
+	return 0;
+}
+
+static const struct acpi_device_id mlxbf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, mlxbf_tmfifo_acpi_match);
+
+static struct platform_driver mlxbf_tmfifo_driver = {
+	.probe = mlxbf_tmfifo_probe,
+	.remove = mlxbf_tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.acpi_match_table = mlxbf_tmfifo_acpi_match,
+	},
+};
+
+module_platform_driver(mlxbf_tmfifo_driver);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TmFifo Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 179+ messages in thread

* Re: [PATCH v10] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2019-02-28 15:51 ` [PATCH v10] " Liming Sun
@ 2019-03-05 15:34   ` Andy Shevchenko
  2019-03-06 20:00     ` Liming Sun
  0 siblings, 1 reply; 179+ messages in thread
From: Andy Shevchenko @ 2019-03-05 15:34 UTC (permalink / raw)
  To: Liming Sun
  Cc: David Woods, Andy Shevchenko, Darren Hart, Vadim Pasternak,
	Linux Kernel Mailing List, Platform Driver

On Thu, Feb 28, 2019 at 5:51 PM Liming Sun <lsun@mellanox.com> wrote:
>
> This commit adds the TmFifo platform driver for Mellanox BlueField
> Soc. TmFifo is a shared FIFO which enables external host machine
> to exchange data with the SoC via USB or PCIe. The driver is based
> on virtio framework and has console and network access enabled.

Thank you for an update.

Unfortunately more work is needed. My comments below.

> +#define MLXBF_TMFIFO_TX_STS__COUNT_RMASK               GENMASK(8, 0)
> +#define MLXBF_TMFIFO_TX_STS__COUNT_MASK                        GENMASK(8, 0)

> +#define MLXBF_TMFIFO_TX_CTL__LWM_RMASK                 GENMASK(7, 0)
> +#define MLXBF_TMFIFO_TX_CTL__LWM_MASK                  GENMASK(7, 0)

> +#define MLXBF_TMFIFO_TX_CTL__HWM_RMASK                 GENMASK(7, 0)
> +#define MLXBF_TMFIFO_TX_CTL__HWM_MASK                  GENMASK(15, 8)

> +#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RMASK         GENMASK(8, 0)
> +#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK          GENMASK_ULL(40, 32)

> +#define MLXBF_TMFIFO_RX_STS__COUNT_RMASK               GENMASK(8, 0)
> +#define MLXBF_TMFIFO_RX_STS__COUNT_MASK                        GENMASK(8, 0)

> +#define MLXBF_TMFIFO_RX_CTL__LWM_RMASK                 GENMASK(7, 0)
> +#define MLXBF_TMFIFO_RX_CTL__LWM_MASK                  GENMASK(7, 0)

> +#define MLXBF_TMFIFO_RX_CTL__HWM_RMASK                 GENMASK(7, 0)
> +#define MLXBF_TMFIFO_RX_CTL__HWM_MASK                  GENMASK(15, 8)

> +#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RMASK         GENMASK(8, 0)
> +#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK          GENMASK_ULL(40, 32)

Since two of them have _ULL suffix I'm wondering if you have checked
for side effects on the rest, i.e. if you operate with 64-bit variable
and use something like ~MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RMASK, it may
give you interesting results.

> +#define MLXBF_TMFIFO_TIMER_INTERVAL            (HZ / 10)

> +/**
> + * mlxbf_tmfifo_u64 - Union of 64-bit data
> + * @data - 64-bit data in host byte order
> + * @data_le - 64-bit data in little-endian byte order
> + *
> + * It's expected to send 64-bit little-endian value (__le64) into the TmFifo.
> + * readq() and writeq() expect u64 instead. A union structure is used here
> + * to workaround the explicit casting usage like writeq(*(u64 *)&data_le).
> + */

How do you know what readq()/writeq() does with the data? Is it on all
architectures?
How the endianess conversion affects the actual data?

> +union mlxbf_tmfifo_u64 {
> +       u64 data;
> +       __le64 data_le;
> +};

> +/*
> + * Default MAC.
> + * This MAC address will be read from EFI persistent variable if configured.
> + * It can also be reconfigured with standard Linux tools.
> + */
> +static u8 mlxbf_tmfifo_net_default_mac[6] = {
> +       0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};

> +#define mlxbf_vdev_to_tmfifo(dev)      \
> +       container_of(dev, struct mlxbf_tmfifo_vdev, vdev)

One line?

> +/* Return the consumed Tx buffer space. */
> +static int mlxbf_tmfifo_vdev_tx_buf_len(struct mlxbf_tmfifo_vdev *tm_vdev)
> +{
> +       int len;
> +
> +       if (tm_vdev->tx_tail >= tm_vdev->tx_head)
> +               len = tm_vdev->tx_tail - tm_vdev->tx_head;
> +       else
> +               len = MLXBF_TMFIFO_CONS_TX_BUF_SIZE - tm_vdev->tx_head +
> +                       tm_vdev->tx_tail;
> +       return len;
> +}

Is this custom implementation of some kind of circ_buf?

> +/* Allocate vrings for the fifo. */
> +static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
> +                                    struct mlxbf_tmfifo_vdev *tm_vdev)
> +{
> +       struct mlxbf_tmfifo_vring *vring;
> +       struct device *dev;
> +       dma_addr_t dma;
> +       int i, size;
> +       void *va;
> +
> +       for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
> +               vring = &tm_vdev->vrings[i];
> +               vring->fifo = fifo;
> +               vring->num = MLXBF_TMFIFO_VRING_SIZE;
> +               vring->align = SMP_CACHE_BYTES;
> +               vring->index = i;
> +               vring->vdev_id = tm_vdev->vdev.id.device;
> +               dev = &tm_vdev->vdev.dev;
> +
> +               size = vring_size(vring->num, vring->align);
> +               va = dma_alloc_coherent(dev->parent, size, &dma, GFP_KERNEL);
> +               if (!va) {

> +                       dev_err(dev->parent, "dma_alloc_coherent failed\n");

And how do you clean previously allocated items?

> +                       return -ENOMEM;
> +               }
> +
> +               vring->va = va;
> +               vring->dma = dma;
> +       }
> +
> +       return 0;
> +}

> +/* Disable interrupts of the fifo device. */
> +static void mlxbf_tmfifo_disable_irqs(struct mlxbf_tmfifo *fifo)
> +{
> +       int i, irq;
> +
> +       for (i = 0; i < MLXBF_TM_MAX_IRQ; i++) {
> +               irq = fifo->irq_info[i].irq;

> +               if (irq) {

I don't think this check is needed if you can guarantee that it has no
staled records.

> +                       fifo->irq_info[i].irq = 0;
> +                       disable_irq(irq);
> +               }
> +       }
> +}

> +/* Get the number of available words in the TmFifo for sending. */
> +static int mlxbf_tmfifo_get_tx_avail(struct mlxbf_tmfifo *fifo, int vdev_id)
> +{
> +       int tx_reserve;
> +       u64 sts;
> +
> +       /* Reserve some room in FIFO for console messages. */
> +       if (vdev_id == VIRTIO_ID_NET)
> +               tx_reserve = fifo->tx_fifo_size / MLXBF_TMFIFO_RESERVE_RATIO;
> +       else
> +               tx_reserve = 1;
> +
> +       sts = readq(fifo->tx_base + MLXBF_TMFIFO_TX_STS);

> +       return (fifo->tx_fifo_size - tx_reserve -
> +               FIELD_GET(MLXBF_TMFIFO_TX_STS__COUNT_MASK, sts));

Redundant parens.
Moreover, consider

u32 count; // or whatever suits for FIELD_GET().
...

sts = readq(...);
count = FIELD_GET(...);
return ...;

> +}

> +       while (size > 0) {
> +               addr = cons->tx_buf + cons->tx_head;
> +
> +               if (cons->tx_head + sizeof(u64) <=
> +                   MLXBF_TMFIFO_CONS_TX_BUF_SIZE) {
> +                       memcpy(&data, addr, sizeof(u64));
> +               } else {
> +                       partial = MLXBF_TMFIFO_CONS_TX_BUF_SIZE - cons->tx_head;
> +                       memcpy(&data, addr, partial);

> +                       memcpy((u8 *)&data + partial, cons->tx_buf,
> +                              sizeof(u64) - partial);

Unaligned access?!

> +               }

> +               buf.data = readq(fifo->rx_base + MLXBF_TMFIFO_RX_DATA);
> +               buf.data = le64_to_cpu(buf.data_le);

Are you sure this is correct?
How did you test this on BE architectures?

> +       tm_vdev = devm_kzalloc(dev, sizeof(*tm_vdev), GFP_KERNEL);

Is it appropriate use of devm_* ?

> +       if (!tm_vdev) {
> +               ret = -ENOMEM;
> +               goto fail;
> +       }

> +/* Read the configured network MAC address from efi variable. */
> +static void mlxbf_tmfifo_get_cfg_mac(u8 *mac)
> +{
> +       efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
> +       efi_status_t status;
> +       unsigned long size;

> +       u8 buf[6];

ETH_ALEN ?

> +
> +       size = sizeof(buf);

Ditto.

> +       status = efi.get_variable(mlxbf_tmfifo_efi_name, &guid, NULL, &size,
> +                                 buf);

> +       if (status == EFI_SUCCESS && size == sizeof(buf))

Ditto.

> +               memcpy(mac, buf, sizeof(buf));

ether_addr_copy().

> +}

> +       memcpy(net_config.mac, mlxbf_tmfifo_net_default_mac, 6);

ether_addr_copy()...

> +       mlxbf_tmfifo_get_cfg_mac(net_config.mac);

... but actually above should be part of this function.

-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v10] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2019-03-05 15:34   ` Andy Shevchenko
@ 2019-03-06 20:00     ` Liming Sun
  2019-03-08 14:44       ` Liming Sun
  0 siblings, 1 reply; 179+ messages in thread
From: Liming Sun @ 2019-03-06 20:00 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: David Woods, Andy Shevchenko, Darren Hart, Vadim Pasternak,
	Linux Kernel Mailing List, Platform Driver

Thanks Andy! Please see my response below. If no further comments, I'll try to post v11 after more testing.

Regards,
Liming

> -----Original Message-----
> From: Andy Shevchenko <andy.shevchenko@gmail.com>
> Sent: Tuesday, March 5, 2019 10:34 AM
> To: Liming Sun <lsun@mellanox.com>
> Cc: David Woods <dwoods@mellanox.com>; Andy Shevchenko <andy@infradead.org>; Darren Hart <dvhart@infradead.org>; Vadim
> Pasternak <vadimp@mellanox.com>; Linux Kernel Mailing List <linux-kernel@vger.kernel.org>; Platform Driver <platform-driver-
> x86@vger.kernel.org>
> Subject: Re: [PATCH v10] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
> 
> On Thu, Feb 28, 2019 at 5:51 PM Liming Sun <lsun@mellanox.com> wrote:
> >
> > This commit adds the TmFifo platform driver for Mellanox BlueField
> > Soc. TmFifo is a shared FIFO which enables external host machine
> > to exchange data with the SoC via USB or PCIe. The driver is based
> > on virtio framework and has console and network access enabled.
> 
> Thank you for an update.
> 
> Unfortunately more work is needed. My comments below.
> 
> > +#define MLXBF_TMFIFO_TX_STS__COUNT_RMASK               GENMASK(8, 0)
> > +#define MLXBF_TMFIFO_TX_STS__COUNT_MASK                        GENMASK(8, 0)
> 
> > +#define MLXBF_TMFIFO_TX_CTL__LWM_RMASK                 GENMASK(7, 0)
> > +#define MLXBF_TMFIFO_TX_CTL__LWM_MASK                  GENMASK(7, 0)
> 
> > +#define MLXBF_TMFIFO_TX_CTL__HWM_RMASK                 GENMASK(7, 0)
> > +#define MLXBF_TMFIFO_TX_CTL__HWM_MASK                  GENMASK(15, 8)
> 
> > +#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RMASK         GENMASK(8, 0)
> > +#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK          GENMASK_ULL(40, 32)
> 
> > +#define MLXBF_TMFIFO_RX_STS__COUNT_RMASK               GENMASK(8, 0)
> > +#define MLXBF_TMFIFO_RX_STS__COUNT_MASK                        GENMASK(8, 0)
> 
> > +#define MLXBF_TMFIFO_RX_CTL__LWM_RMASK                 GENMASK(7, 0)
> > +#define MLXBF_TMFIFO_RX_CTL__LWM_MASK                  GENMASK(7, 0)
> 
> > +#define MLXBF_TMFIFO_RX_CTL__HWM_RMASK                 GENMASK(7, 0)
> > +#define MLXBF_TMFIFO_RX_CTL__HWM_MASK                  GENMASK(15, 8)
> 
> > +#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RMASK         GENMASK(8, 0)
> > +#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK          GENMASK_ULL(40, 32)
> 
> Since two of them have _ULL suffix I'm wondering if you have checked
> for side effects on the rest, i.e. if you operate with 64-bit variable
> and use something like ~MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RMASK, it may
> give you interesting results.

The running system on the SoC is arm64 where BITS_PER_LONG and 
BITS_PER_LONG_LONG have the same value. In such case, the two macros appears
to be the same. But you're right, I should use GENMASK_ULL() to be consistent
and more correctly, just in case the "CONFIG_64BIT" is not defined somehow.

Will update it in v11.

> 
> > +#define MLXBF_TMFIFO_TIMER_INTERVAL            (HZ / 10)
> 
> > +/**
> > + * mlxbf_tmfifo_u64 - Union of 64-bit data
> > + * @data - 64-bit data in host byte order
> > + * @data_le - 64-bit data in little-endian byte order
> > + *
> > + * It's expected to send 64-bit little-endian value (__le64) into the TmFifo.
> > + * readq() and writeq() expect u64 instead. A union structure is used here
> > + * to workaround the explicit casting usage like writeq(*(u64 *)&data_le).
> > + */
> 
> How do you know what readq()/writeq() does with the data? Is it on all
> architectures?
> How the endianess conversion affects the actual data?

The SoC runs arm64 and supports little endian for now. The FIFO has two sides,
one side is the SoC, the other side is extern host machine which could 
access the FIFO via USB or PCIe. The rule is that the 'byte stream' will 
keep the same when one side write 8 bytes and the other side reads 
the 8 bytes. So as long as both sides have agreement on the byte-order
it should be fine.

After double check the arm64 readq()/writeq() implementation, it appears
that these APIs already does cpu_to_le64() and le64_to_cpu()
conversion. There's actually no need to make another conversion 
(and shouldn't do it). I'll remove these conversions in v11. The code will
look much cleaner.

> 
> > +union mlxbf_tmfifo_u64 {
> > +       u64 data;
> > +       __le64 data_le;
> > +};
> 
> > +/*
> > + * Default MAC.
> > + * This MAC address will be read from EFI persistent variable if configured.
> > + * It can also be reconfigured with standard Linux tools.
> > + */
> > +static u8 mlxbf_tmfifo_net_default_mac[6] = {
> > +       0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
> 
> > +#define mlxbf_vdev_to_tmfifo(dev)      \
> > +       container_of(dev, struct mlxbf_tmfifo_vdev, vdev)
> 
> One line?

Couldn't fit it into one line within 80 characters.
(Please correct me if you meant single line even exceeding 80 chracters).

> 
> > +/* Return the consumed Tx buffer space. */
> > +static int mlxbf_tmfifo_vdev_tx_buf_len(struct mlxbf_tmfifo_vdev *tm_vdev)
> > +{
> > +       int len;
> > +
> > +       if (tm_vdev->tx_tail >= tm_vdev->tx_head)
> > +               len = tm_vdev->tx_tail - tm_vdev->tx_head;
> > +       else
> > +               len = MLXBF_TMFIFO_CONS_TX_BUF_SIZE - tm_vdev->tx_head +
> > +                       tm_vdev->tx_tail;
> > +       return len;
> > +}
> 
> Is this custom implementation of some kind of circ_buf?

Yes. I'll try if I could re-use the circ_buf structure and update it in v11.

> 
> > +/* Allocate vrings for the fifo. */
> > +static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
> > +                                    struct mlxbf_tmfifo_vdev *tm_vdev)
> > +{
> > +       struct mlxbf_tmfifo_vring *vring;
> > +       struct device *dev;
> > +       dma_addr_t dma;
> > +       int i, size;
> > +       void *va;
> > +
> > +       for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
> > +               vring = &tm_vdev->vrings[i];
> > +               vring->fifo = fifo;
> > +               vring->num = MLXBF_TMFIFO_VRING_SIZE;
> > +               vring->align = SMP_CACHE_BYTES;
> > +               vring->index = i;
> > +               vring->vdev_id = tm_vdev->vdev.id.device;
> > +               dev = &tm_vdev->vdev.dev;
> > +
> > +               size = vring_size(vring->num, vring->align);
> > +               va = dma_alloc_coherent(dev->parent, size, &dma, GFP_KERNEL);
> > +               if (!va) {
> 
> > +                       dev_err(dev->parent, "dma_alloc_coherent failed\n");
> 
> And how do you clean previously allocated items?

Fixed. Check the return value of mlxbf_tmfifo_alloc_vrings() and goto 
'register_fail' (probably change to a better name) instead of 'fail'. 
In such case the mlxbf_tmfifo_free_vrings() will be called to clean up
all allocated vrings.

> 
> > +                       return -ENOMEM;
> > +               }
> > +
> > +               vring->va = va;
> > +               vring->dma = dma;
> > +       }
> > +
> > +       return 0;
> > +}
> 
> > +/* Disable interrupts of the fifo device. */
> > +static void mlxbf_tmfifo_disable_irqs(struct mlxbf_tmfifo *fifo)
> > +{
> > +       int i, irq;
> > +
> > +       for (i = 0; i < MLXBF_TM_MAX_IRQ; i++) {
> > +               irq = fifo->irq_info[i].irq;
> 
> > +               if (irq) {
> 
> I don't think this check is needed if you can guarantee that it has no
> staled records.

Yes, it's not needed any more according to the current code.
Will remove it in v11.

> 
> > +                       fifo->irq_info[i].irq = 0;
> > +                       disable_irq(irq);
> > +               }
> > +       }
> > +}
> 
> > +/* Get the number of available words in the TmFifo for sending. */
> > +static int mlxbf_tmfifo_get_tx_avail(struct mlxbf_tmfifo *fifo, int vdev_id)
> > +{
> > +       int tx_reserve;
> > +       u64 sts;
> > +
> > +       /* Reserve some room in FIFO for console messages. */
> > +       if (vdev_id == VIRTIO_ID_NET)
> > +               tx_reserve = fifo->tx_fifo_size / MLXBF_TMFIFO_RESERVE_RATIO;
> > +       else
> > +               tx_reserve = 1;
> > +
> > +       sts = readq(fifo->tx_base + MLXBF_TMFIFO_TX_STS);
> 
> > +       return (fifo->tx_fifo_size - tx_reserve -
> > +               FIELD_GET(MLXBF_TMFIFO_TX_STS__COUNT_MASK, sts));
> 
> Redundant parens.
> Moreover, consider
> 
> u32 count; // or whatever suits for FIELD_GET().
> ...
> 
> sts = readq(...);
> count = FIELD_GET(...);
> return ...;

Will update in v11.

> 
> > +}
> 
> > +       while (size > 0) {
> > +               addr = cons->tx_buf + cons->tx_head;
> > +
> > +               if (cons->tx_head + sizeof(u64) <=
> > +                   MLXBF_TMFIFO_CONS_TX_BUF_SIZE) {
> > +                       memcpy(&data, addr, sizeof(u64));
> > +               } else {
> > +                       partial = MLXBF_TMFIFO_CONS_TX_BUF_SIZE - cons->tx_head;
> > +                       memcpy(&data, addr, partial);
> 
> > +                       memcpy((u8 *)&data + partial, cons->tx_buf,
> > +                              sizeof(u64) - partial);
> 
> Unaligned access?!

The code here is to build and copy 8 bytes from the buffer into the 'data'
variable. The source could be unaligned. For example, 3 bytes are at the
end of the buffer and 5 bytes are at the beginning of the buffer. memcpy()
is used to do byte-stream copy which seems ok. Please correct me if
I misunderstand the comment.

> 
> > +               }
> 
> > +               buf.data = readq(fifo->rx_base + MLXBF_TMFIFO_RX_DATA);
> > +               buf.data = le64_to_cpu(buf.data_le);
> 
> Are you sure this is correct?
> How did you test this on BE architectures?

Thanks for the comment! Same as above, the conversion is not really needed. 
I'll remove them in v11. As for testing, we only have arm64 little-endian Linux
running on the SoC. This conversion doesn't make much difference for the SoC.
As for BE architecture, we mainly verify the other side of the FIFO, which is the
external host like using ppc64.

> 
> > +       tm_vdev = devm_kzalloc(dev, sizeof(*tm_vdev), GFP_KERNEL);
> 
> Is it appropriate use of devm_* ?

This is SoC, the device won't be closed or detached. The only case is when
the driver is unloaded. So it appears ok to use devm_kzalloc() since it's
allocated during probe() and released during module unload . Please
correct me if I misunderstand it.

> 
> > +       if (!tm_vdev) {
> > +               ret = -ENOMEM;
> > +               goto fail;
> > +       }
> 
> > +/* Read the configured network MAC address from efi variable. */
> > +static void mlxbf_tmfifo_get_cfg_mac(u8 *mac)
> > +{
> > +       efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
> > +       efi_status_t status;
> > +       unsigned long size;
> 
> > +       u8 buf[6];
> 
> ETH_ALEN ?

Will update it in v11

> 
> > +
> > +       size = sizeof(buf);
> 
> Ditto.

Will update it in v11

> 
> > +       status = efi.get_variable(mlxbf_tmfifo_efi_name, &guid, NULL, &size,
> > +                                 buf);
> 
> > +       if (status == EFI_SUCCESS && size == sizeof(buf))
> 
> Ditto.

Will update it in v11

> 
> > +               memcpy(mac, buf, sizeof(buf));
> 
> ether_addr_copy().

Will update it in v11

> 
> > +}
> 
> > +       memcpy(net_config.mac, mlxbf_tmfifo_net_default_mac, 6);
> 
> ether_addr_copy()...
> 
> > +       mlxbf_tmfifo_get_cfg_mac(net_config.mac);
> 
> ... but actually above should be part of this function.

Will update it in v11

> 
> --
> With Best Regards,
> Andy Shevchenko

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v11] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 16:06 ` Liming Sun
                   ` (46 preceding siblings ...)
  (?)
@ 2019-03-08 14:41 ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-03-08 14:41 UTC (permalink / raw)
  To: David Woods, Andy Shevchenko, Darren Hart, Vadim Pasternak
  Cc: Liming Sun, linux-kernel, platform-driver-x86

This commit adds the TmFifo platform driver for Mellanox BlueField
Soc. TmFifo is a shared FIFO which enables external host machine
to exchange data with the SoC via USB or PCIe. The driver is based
on virtio framework and has console and network access enabled.

Reviewed-by: Vadim Pasternak <vadimp@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
v10->v11:
    Fixes for comments from Andy:
    - Use GENMASK_ULL() instead of GENMASK() in mlxbf-tmfifo-regs.h
    - Removed the cpu_to_le64()/le64_to_cpu() conversion since
      readq()/writeq() already takes care of it.
    - Remove the "if (irq)" check in mlxbf_tmfifo_disable_irqs().
    - Add "u32 count" temp variable in mlxbf_tmfifo_get_tx_avail().
    - Clean up mlxbf_tmfifo_get_cfg_mac(), use ETH_ALEN instead of
      value 6.
    - Change the tx_buf to use Linux existing 'struct circ_buf'.
    Comment not applied:
    - "Change macro mlxbf_vdev_to_tmfifo() to one line"
      Couldn't fit in one line with 80 chracters
    - "Is it appropriate use of devm_* for 'tm_vdev = devm_kzalloc'"
      This is SoC, the device won't be closed or detached.
      The only case is when the driver is unloaded. So it appears
      ok to use devm_kzalloc() since it's allocated during probe()
      and released during module unload.
    Comments from Vadim: OK
v9->v10:
    Fixes for comments from Andy:
    - Use devm_ioremap_resource() instead of devm_ioremap().
    - Use kernel-doc comments.
    - Keep Makefile contents sorted.
    - Use same fixed format for offsets.
    - Use SZ_1K/SZ_32K instead of 1024/23*1024.
    - Remove unnecessary comments.
    - Use one style for max numbers.
    - More comments for mlxbf_tmfifo_vdev and mlxbf_tmfifo_data_64bit.
    - Use globally defined MTU instead of new definition.
    - Remove forward declaration of mlxbf_tmfifo_remove().
    - Remove PAGE_ALIGN() for dma_alloc_coherent)().
    - Remove the cast of "struct vring *".
    - Check return result of test_and_set_bit().
    - Add a macro mlxbt_vdev_to_tmfifo().
    - Several other minor coding style comments.
    Comment not applied:
    - "Shouldn't be rather helper in EFI lib in kernel"
      Looks like efi.get_variable() is the way I found in the kernel
      tree.
    - "this one is not protected anyhow? Potential race condition"
      In mlxbf_tmfifo_console_tx(), the spin-lock is used to protect the
      'tx_buf' only, not the FIFO writes. So there is no race condition.
    - "Is __packed needed in mlxbf_tmfifo_msg_hdr".
      Yes, it is needed to make sure the structure is 8 bytes.
    Fixes for comments from Vadim:
    - Use tab in mlxbf-tmfifo-regs.h
    - Use kernel-doc comments for struct mlxbf_tmfifo_msg_hdr and
      mlxbf_tmfifo_irq_info as well.
    - Use _MAX instead of _CNT in the macro definition to be consistent.
    - Fix the MODULE_LICENSE.
    - Use BIT_ULL() instead of BIT().
    - Remove argument of 'avail' for mlxbf_tmfifo_rxtx_header() and
      mlxbf_tmfifo_rxtx_word()
    - Revise logic in mlxbf_tmfifo_rxtx_one_desc() to remove the
      WARN_ON().
    - Change "union mlxbf_tmfifo_u64 u" to "union mlxbf_tmfifo_u64 buf"
      in mlxbf_tmfifo_rxtx_word().
    - Change date type of vring_change from 'int' to 'bool'.
    - Remove the blank lines after Signed-off.
    - Don’t use declaration in the middle.
    - Make the network header initialization in some more elegant way.
    - Change label done to mlxbf_tmfifo_desc_done.
    - Remove some unnecessary comments, and several other misc coding
      style comments.
    - Simplify code logic in mlxbf_tmfifo_virtio_notify()
    New changes by Liming:
    - Simplify the Rx/Tx function arguments to make it more readable.
v8->v9:
    Fixes for comments from Andy:
    - Use modern devm_xxx() API instead.
    Fixes for comments from Vadim:
    - Split the Rx/Tx function into smaller funcitons.
    - File name, copyright information.
    - Function and variable name conversion.
    - Local variable and indent coding styles.
    - Remove unnecessary 'inline' declarations.
    - Use devm_xxx() APIs.
    - Move the efi_char16_t MAC address definition to global.
    - Fix warnings reported by 'checkpatch --strict'.
    - Fix warnings reported by 'make CF="-D__CHECK_ENDIAN__"'.
    - Change select VIRTIO_xxx to depends on  VIRTIO_ in Kconfig.
    - Merge mlxbf_tmfifo_vdev_tx_buf_push() and
      mlxbf_tmfifo_vdev_tx_buf_pop().
    - Add union to avoid casting between __le64 and u64.
    - Several other misc coding style comments.
    New changes by Liming:
    - Removed the DT binding documentation since only ACPI is
      supported for now by UEFI on the SoC.
v8: Re-submit under drivers/platform/mellanox for the target-side
    platform driver only.
v7: Added host side drivers into the same patch set.
v5~v6: Coding style fix.
v1~v4: Initial version for directory drivers/soc/mellanox.
---
 drivers/platform/mellanox/Kconfig             |   12 +-
 drivers/platform/mellanox/Makefile            |    1 +
 drivers/platform/mellanox/mlxbf-tmfifo-regs.h |   63 ++
 drivers/platform/mellanox/mlxbf-tmfifo.c      | 1286 +++++++++++++++++++++++++
 4 files changed, 1361 insertions(+), 1 deletion(-)
 create mode 100644 drivers/platform/mellanox/mlxbf-tmfifo-regs.h
 create mode 100644 drivers/platform/mellanox/mlxbf-tmfifo.c

diff --git a/drivers/platform/mellanox/Kconfig b/drivers/platform/mellanox/Kconfig
index cd8a908..530fe7e 100644
--- a/drivers/platform/mellanox/Kconfig
+++ b/drivers/platform/mellanox/Kconfig
@@ -5,7 +5,7 @@
 
 menuconfig MELLANOX_PLATFORM
 	bool "Platform support for Mellanox hardware"
-	depends on X86 || ARM || COMPILE_TEST
+	depends on X86 || ARM || ARM64 || COMPILE_TEST
 	---help---
 	  Say Y here to get to see options for platform support for
 	  Mellanox systems. This option alone does not add any kernel code.
@@ -34,4 +34,14 @@ config MLXREG_IO
 	  to system resets operation, system reset causes monitoring and some
 	  kinds of mux selection.
 
+config MLXBF_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo platform driver"
+	depends on ARM64
+	depends on ACPI
+	depends on VIRTIO_CONSOLE && VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides
+          platform driver support for the TmFifo which supports console
+          and networking based on the virtio framework.
+
 endif # MELLANOX_PLATFORM
diff --git a/drivers/platform/mellanox/Makefile b/drivers/platform/mellanox/Makefile
index 57074d9c..a229bda1 100644
--- a/drivers/platform/mellanox/Makefile
+++ b/drivers/platform/mellanox/Makefile
@@ -3,5 +3,6 @@
 # Makefile for linux/drivers/platform/mellanox
 # Mellanox Platform-Specific Drivers
 #
+obj-$(CONFIG_MLXBF_TMFIFO)	+= mlxbf-tmfifo.o
 obj-$(CONFIG_MLXREG_HOTPLUG)	+= mlxreg-hotplug.o
 obj-$(CONFIG_MLXREG_IO) += mlxreg-io.o
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo-regs.h b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
new file mode 100644
index 0000000..e4f0d2e
--- /dev/null
+++ b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2019, Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef __MLXBF_TMFIFO_REGS_H__
+#define __MLXBF_TMFIFO_REGS_H__
+
+#include <linux/types.h>
+#include <linux/bits.h>
+
+#define MLXBF_TMFIFO_TX_DATA				0x00
+#define MLXBF_TMFIFO_TX_STS				0x08
+#define MLXBF_TMFIFO_TX_STS__LENGTH			0x0001
+#define MLXBF_TMFIFO_TX_STS__COUNT_SHIFT		0
+#define MLXBF_TMFIFO_TX_STS__COUNT_WIDTH		9
+#define MLXBF_TMFIFO_TX_STS__COUNT_RESET_VAL		0
+#define MLXBF_TMFIFO_TX_STS__COUNT_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_TX_STS__COUNT_MASK			GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_TX_CTL				0x10
+#define MLXBF_TMFIFO_TX_CTL__LENGTH			0x0001
+#define MLXBF_TMFIFO_TX_CTL__LWM_SHIFT			0
+#define MLXBF_TMFIFO_TX_CTL__LWM_WIDTH			8
+#define MLXBF_TMFIFO_TX_CTL__LWM_RESET_VAL		128
+#define MLXBF_TMFIFO_TX_CTL__LWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_TX_CTL__LWM_MASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_TX_CTL__HWM_SHIFT			8
+#define MLXBF_TMFIFO_TX_CTL__HWM_WIDTH			8
+#define MLXBF_TMFIFO_TX_CTL__HWM_RESET_VAL		128
+#define MLXBF_TMFIFO_TX_CTL__HWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_TX_CTL__HWM_MASK			GENMASK_ULL(15, 8)
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT		32
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH		9
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL	256
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK		GENMASK_ULL(40, 32)
+#define MLXBF_TMFIFO_RX_DATA				0x00
+#define MLXBF_TMFIFO_RX_STS				0x08
+#define MLXBF_TMFIFO_RX_STS__LENGTH			0x0001
+#define MLXBF_TMFIFO_RX_STS__COUNT_SHIFT		0
+#define MLXBF_TMFIFO_RX_STS__COUNT_WIDTH		9
+#define MLXBF_TMFIFO_RX_STS__COUNT_RESET_VAL		0
+#define MLXBF_TMFIFO_RX_STS__COUNT_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_RX_STS__COUNT_MASK			GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_RX_CTL				0x10
+#define MLXBF_TMFIFO_RX_CTL__LENGTH			0x0001
+#define MLXBF_TMFIFO_RX_CTL__LWM_SHIFT			0
+#define MLXBF_TMFIFO_RX_CTL__LWM_WIDTH			8
+#define MLXBF_TMFIFO_RX_CTL__LWM_RESET_VAL		128
+#define MLXBF_TMFIFO_RX_CTL__LWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_RX_CTL__LWM_MASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_RX_CTL__HWM_SHIFT			8
+#define MLXBF_TMFIFO_RX_CTL__HWM_WIDTH			8
+#define MLXBF_TMFIFO_RX_CTL__HWM_RESET_VAL		128
+#define MLXBF_TMFIFO_RX_CTL__HWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_RX_CTL__HWM_MASK			GENMASK_ULL(15, 8)
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT		32
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH		9
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL	256
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK		GENMASK_ULL(40, 32)
+
+#endif /* !defined(__MLXBF_TMFIFO_REGS_H__) */
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
new file mode 100644
index 0000000..0a31ffa
--- /dev/null
+++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
@@ -0,0 +1,1286 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Mellanox BlueField SoC TmFifo driver
+ *
+ * Copyright (C) 2019 Mellanox Technologies
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/circ_buf.h>
+#include <linux/efi.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+
+#include "mlxbf-tmfifo-regs.h"
+
+/* Vring size. */
+#define MLXBF_TMFIFO_VRING_SIZE			SZ_1K
+
+/* Console Tx buffer size. */
+#define MLXBF_TMFIFO_CON_TX_BUF_SIZE		SZ_32K
+
+/* Console Tx buffer reserved space. */
+#define MLXBF_TMFIFO_CON_TX_BUF_RSV_SIZE	8
+
+/* House-keeping timer interval. */
+#define MLXBF_TMFIFO_TIMER_INTERVAL		(HZ / 10)
+
+/* Virtual devices sharing the TM FIFO. */
+#define MLXBF_TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/*
+ * Reserve 1/16 of TmFifo space, so console messages are not starved by
+ * the networking traffic.
+ */
+#define MLXBF_TMFIFO_RESERVE_RATIO		16
+
+/* Message with data needs at least two words (for header & data). */
+#define MLXBF_TMFIFO_DATA_MIN_WORDS		2
+
+struct mlxbf_tmfifo;
+
+/**
+ * mlxbf_tmfifo_vring - Structure of the TmFifo virtual ring
+ * @va: virtual address of the ring
+ * @dma: dma address of the ring
+ * @vq: pointer to the virtio virtqueue
+ * @desc: current descriptor of the pending packet
+ * @desc_head: head descriptor of the pending packet
+ * @cur_len: processed length of the current descriptor
+ * @rem_len: remaining length of the pending packet
+ * @pkt_len: total length of the pending packet
+ * @next_avail: next avail descriptor id
+ * @num: vring size (number of descriptors)
+ * @align: vring alignment size
+ * @index: vring index
+ * @vdev_id: vring virtio id (VIRTIO_ID_xxx)
+ * @fifo: pointer to the tmfifo structure
+ */
+struct mlxbf_tmfifo_vring {
+	void *va;
+	dma_addr_t dma;
+	struct virtqueue *vq;
+	struct vring_desc *desc;
+	struct vring_desc *desc_head;
+	int cur_len;
+	int rem_len;
+	u32 pkt_len;
+	u16 next_avail;
+	int num;
+	int align;
+	int index;
+	int vdev_id;
+	struct mlxbf_tmfifo *fifo;
+};
+
+/* Interrupt types. */
+enum {
+	MLXBF_TM_RX_LWM_IRQ,
+	MLXBF_TM_RX_HWM_IRQ,
+	MLXBF_TM_TX_LWM_IRQ,
+	MLXBF_TM_TX_HWM_IRQ,
+	MLXBF_TM_MAX_IRQ
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	MLXBF_TMFIFO_VRING_RX,
+	MLXBF_TMFIFO_VRING_TX,
+	MLXBF_TMFIFO_VRING_MAX
+};
+
+/**
+ * mlxbf_tmfifo_vdev - Structure of the TmFifo virtual device
+ * @vdev: virtio device, in which the vdev.id.device field has the
+ *        VIRTIO_ID_xxx id to distinguish the virtual device.
+ * @status: status of the device
+ * @features: supported features of the device
+ * @vrings: array of tmfifo vrings of this device
+ * @config.cons: virtual console config -
+ *               select if vdev.id.device is VIRTIO_ID_CONSOLE
+ * @config.net: virtual network config -
+ *              select if vdev.id.device is VIRTIO_ID_NET
+ * @tx_buf: tx buffer used to buffer data before writing into the FIFO
+ */
+struct mlxbf_tmfifo_vdev {
+	struct virtio_device vdev;
+	u8 status;
+	u64 features;
+	struct mlxbf_tmfifo_vring vrings[MLXBF_TMFIFO_VRING_MAX];
+	union {
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct circ_buf tx_buf;
+};
+
+/**
+ * mlxbf_tmfifo_irq_info - Structure of the interrupt information
+ * @fifo: pointer to the tmfifo structure
+ * @irq: interrupt number
+ * @index: index into the interrupt array
+ */
+struct mlxbf_tmfifo_irq_info {
+	struct mlxbf_tmfifo *fifo;
+	int irq;
+	int index;
+};
+
+/**
+ * mlxbf_tmfifo - Structure of the TmFifo
+ * @vdev: array of the virtual devices running over the TmFifo
+ * @pdev: platform device
+ * @lock: lock to protect the TmFifo access
+ * @rx_base: mapped register base address for the Rx fifo
+ * @tx_base: mapped register base address for the Tx fifo
+ * @rx_fifo_size: number of entries of the Rx fifo
+ * @tx_fifo_size: number of entries of the Tx fifo
+ * @pend_events: pending bits for deferred events
+ * @irq_info: interrupt information
+ * @work: work struct for deferred process
+ * @timer: background timer
+ * @vring: Tx/Rx ring
+ * @spin_lock: spin lock
+ * @is_ready: ready flag
+ */
+struct mlxbf_tmfifo {
+	struct mlxbf_tmfifo_vdev *vdev[MLXBF_TMFIFO_VDEV_MAX];
+	struct platform_device *pdev;
+	struct mutex lock;		/* TmFifo lock */
+	void __iomem *rx_base;
+	void __iomem *tx_base;
+	int rx_fifo_size;
+	int tx_fifo_size;
+	unsigned long pend_events;
+	struct mlxbf_tmfifo_irq_info irq_info[MLXBF_TM_MAX_IRQ];
+	struct work_struct work;
+	struct timer_list timer;
+	struct mlxbf_tmfifo_vring *vring[2];
+	spinlock_t spin_lock;		/* spin lock */
+	bool is_ready;
+};
+
+/**
+ * mlxbf_tmfifo_msg_hdr - Structure of the TmFifo message header
+ * @type: message type
+ * @len: payload length
+ * @u: 64-bit union data
+ */
+union mlxbf_tmfifo_msg_hdr {
+	struct {
+		u8 type;
+		__be16 len;
+		u8 unused[5];
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 mlxbf_tmfifo_net_default_mac[ETH_ALEN] = {
+	0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* EFI variable name of the MAC address. */
+static efi_char16_t mlxbf_tmfifo_efi_name[] = L"RshimMacAddr";
+
+/* Maximum L2 header length. */
+#define MLXBF_TMFIFO_NET_L2_OVERHEAD	36
+
+/* Supported virtio-net features. */
+#define MLXBF_TMFIFO_NET_FEATURES	(BIT_ULL(VIRTIO_NET_F_MTU) | \
+					 BIT_ULL(VIRTIO_NET_F_STATUS) | \
+					 BIT_ULL(VIRTIO_NET_F_MAC))
+
+#define mlxbf_vdev_to_tmfifo(dev)	\
+	container_of(dev, struct mlxbf_tmfifo_vdev, vdev)
+
+/* Allocate vrings for the fifo. */
+static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
+				     struct mlxbf_tmfifo_vdev *tm_vdev)
+{
+	struct mlxbf_tmfifo_vring *vring;
+	struct device *dev;
+	dma_addr_t dma;
+	int i, size;
+	void *va;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->num = MLXBF_TMFIFO_VRING_SIZE;
+		vring->align = SMP_CACHE_BYTES;
+		vring->index = i;
+		vring->vdev_id = tm_vdev->vdev.id.device;
+		dev = &tm_vdev->vdev.dev;
+
+		size = vring_size(vring->num, vring->align);
+		va = dma_alloc_coherent(dev->parent, size, &dma, GFP_KERNEL);
+		if (!va) {
+			dev_err(dev->parent, "dma_alloc_coherent failed\n");
+			return -ENOMEM;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void mlxbf_tmfifo_free_vrings(struct mlxbf_tmfifo *fifo,
+				     struct mlxbf_tmfifo_vdev *tm_vdev)
+{
+	struct mlxbf_tmfifo_vring *vring;
+	int i, size;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		if (vring->va) {
+			size = vring_size(vring->num, vring->align);
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Disable interrupts of the fifo device. */
+static void mlxbf_tmfifo_disable_irqs(struct mlxbf_tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < MLXBF_TM_MAX_IRQ; i++) {
+		irq = fifo->irq_info[i].irq;
+		fifo->irq_info[i].irq = 0;
+		disable_irq(irq);
+	}
+}
+
+/* Interrupt handler. */
+static irqreturn_t mlxbf_tmfifo_irq_handler(int irq, void *arg)
+{
+	struct mlxbf_tmfifo_irq_info *irq_info = arg;
+
+	if (irq_info->index < MLXBF_TM_MAX_IRQ &&
+	    !test_and_set_bit(irq_info->index, &irq_info->fifo->pend_events))
+		schedule_work(&irq_info->fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Get the next packet descriptor from the vring. */
+static struct vring_desc *
+mlxbf_tmfifo_get_next_desc(struct mlxbf_tmfifo_vring *vring)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = vring->vq->vdev;
+	unsigned int idx, head;
+
+	if (vring->next_avail == virtio16_to_cpu(vdev, vr->avail->idx))
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = virtio16_to_cpu(vdev, vr->avail->ring[idx]);
+	if (WARN_ON(head >= vr->num))
+		return NULL;
+
+	vring->next_avail++;
+
+	return &vr->desc[head];
+}
+
+/* Release virtio descriptor. */
+static void mlxbf_tmfifo_release_desc(struct mlxbf_tmfifo_vring *vring,
+				      struct vring_desc *desc, u32 len)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = vring->vq->vdev;
+	u16 idx, vr_idx;
+
+	vr_idx = virtio16_to_cpu(vdev, vr->used->idx);
+	idx = vr_idx % vr->num;
+	vr->used->ring[idx].id = cpu_to_virtio32(vdev, desc - vr->desc);
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/*
+	 * Virtio could poll and check the 'idx' to decide whether the desc is
+	 * done or not. Add a memory barrier here to make sure the update above
+	 * completes before updating the idx.
+	 */
+	mb();
+	vr->used->idx = cpu_to_virtio16(vdev, vr_idx + 1);
+}
+
+/* Get the total length of the descriptor chain. */
+static u32 mlxbf_tmfifo_get_pkt_len(struct mlxbf_tmfifo_vring *vring,
+				    struct vring_desc *desc)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = vring->vq->vdev;
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void mlxbf_tmfifo_release_pending_pkt(struct mlxbf_tmfifo_vring *vring)
+{
+	struct vring_desc *desc_head;
+	u32 len = 0;
+
+	if (vring->desc_head) {
+		desc_head = vring->desc_head;
+		len = vring->pkt_len;
+	} else {
+		desc_head = mlxbf_tmfifo_get_next_desc(vring);
+		if (desc_head)
+			len = mlxbf_tmfifo_get_pkt_len(vring, desc_head);
+	}
+
+	if (desc_head)
+		mlxbf_tmfifo_release_desc(vring, desc_head, len);
+
+	vring->pkt_len = 0;
+	vring->desc = NULL;
+	vring->desc_head = NULL;
+}
+
+static void mlxbf_tmfifo_init_net_desc(struct mlxbf_tmfifo_vring *vring,
+				       struct vring_desc *desc, bool is_rx)
+{
+	struct virtio_device *vdev = vring->vq->vdev;
+	struct virtio_net_hdr *net_hdr;
+
+	net_hdr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+	memset(net_hdr, 0, sizeof(*net_hdr));
+}
+
+/* Get and initialize the next packet. */
+static struct vring_desc *
+mlxbf_tmfifo_get_next_pkt(struct mlxbf_tmfifo_vring *vring, bool is_rx)
+{
+	struct vring_desc *desc;
+
+	desc = mlxbf_tmfifo_get_next_desc(vring);
+	if (desc && is_rx && vring->vdev_id == VIRTIO_ID_NET)
+		mlxbf_tmfifo_init_net_desc(vring, desc, is_rx);
+
+	vring->desc_head = desc;
+	vring->desc = desc;
+
+	return desc;
+}
+
+/* House-keeping timer. */
+static void mlxbf_tmfifo_timer(struct timer_list *arg)
+{
+	struct mlxbf_tmfifo *fifo = container_of(arg, struct mlxbf_tmfifo,
+						 timer);
+	int more;
+
+	more = !test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events) ||
+		    !test_and_set_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	if (more)
+		schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + MLXBF_TMFIFO_TIMER_INTERVAL);
+}
+
+/* Copy one console packet into the output buffer. */
+static void mlxbf_tmfifo_console_output_one(struct mlxbf_tmfifo_vdev *cons,
+					    struct mlxbf_tmfifo_vring *vring,
+					    struct vring_desc *desc)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = &cons->vdev;
+	u32 len, idx, seg;
+	void *addr;
+
+	while (desc) {
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+		len = virtio32_to_cpu(vdev, desc->len);
+
+		seg = CIRC_SPACE_TO_END(cons->tx_buf.head, cons->tx_buf.tail,
+					MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+		if (len <= seg) {
+			memcpy(cons->tx_buf.buf + cons->tx_buf.head, addr, len);
+		} else {
+			memcpy(cons->tx_buf.buf + cons->tx_buf.head, addr, seg);
+			addr += seg;
+			memcpy(cons->tx_buf.buf, addr, len - seg);
+		}
+		cons->tx_buf.head = (cons->tx_buf.head + len) %
+			MLXBF_TMFIFO_CON_TX_BUF_SIZE;
+
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+}
+
+/* Copy console data into the output buffer. */
+static void mlxbf_tmfifo_console_output(struct mlxbf_tmfifo_vdev *cons,
+					struct mlxbf_tmfifo_vring *vring)
+{
+	struct vring_desc *desc;
+	u32 len, avail;
+
+	desc = mlxbf_tmfifo_get_next_desc(vring);
+	while (desc) {
+		/* Release the packet if not enough space. */
+		len = mlxbf_tmfifo_get_pkt_len(vring, desc);
+		avail = CIRC_SPACE(cons->tx_buf.head, cons->tx_buf.tail,
+				   MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+		if (len + MLXBF_TMFIFO_CON_TX_BUF_RSV_SIZE > avail) {
+			mlxbf_tmfifo_release_desc(vring, desc, len);
+			break;
+		}
+
+		mlxbf_tmfifo_console_output_one(cons, vring, desc);
+		mlxbf_tmfifo_release_desc(vring, desc, len);
+		desc = mlxbf_tmfifo_get_next_desc(vring);
+	}
+}
+
+/* Get the number of available words in Rx FIFO for receiving. */
+static int mlxbf_tmfifo_get_rx_avail(struct mlxbf_tmfifo *fifo)
+{
+	u64 sts;
+
+	sts = readq(fifo->rx_base + MLXBF_TMFIFO_RX_STS);
+	return FIELD_GET(MLXBF_TMFIFO_RX_STS__COUNT_MASK, sts);
+}
+
+/* Get the number of available words in the TmFifo for sending. */
+static int mlxbf_tmfifo_get_tx_avail(struct mlxbf_tmfifo *fifo, int vdev_id)
+{
+	int tx_reserve;
+	u32 count;
+	u64 sts;
+
+	/* Reserve some room in FIFO for console messages. */
+	if (vdev_id == VIRTIO_ID_NET)
+		tx_reserve = fifo->tx_fifo_size / MLXBF_TMFIFO_RESERVE_RATIO;
+	else
+		tx_reserve = 1;
+
+	sts = readq(fifo->tx_base + MLXBF_TMFIFO_TX_STS);
+	count = FIELD_GET(MLXBF_TMFIFO_TX_STS__COUNT_MASK, sts);
+	return fifo->tx_fifo_size - tx_reserve - count;
+}
+
+/* Console Tx (move data from the output buffer into the TmFifo). */
+static void mlxbf_tmfifo_console_tx(struct mlxbf_tmfifo *fifo, int avail)
+{
+	union mlxbf_tmfifo_msg_hdr hdr;
+	struct mlxbf_tmfifo_vdev *cons;
+	unsigned long flags;
+	int size, seg;
+	void *addr;
+	u64 data;
+
+	/* Return if not enough space available. */
+	if (avail < MLXBF_TMFIFO_DATA_MIN_WORDS)
+		return;
+
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+	if (!cons || !cons->tx_buf.buf)
+		return;
+
+	/* Return if no data to send. */
+	size = CIRC_CNT(cons->tx_buf.head, cons->tx_buf.tail,
+			MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+	if (size == 0)
+		return;
+
+	/* Adjust the size to available space. */
+	if (size + sizeof(hdr) > avail * sizeof(u64))
+		size = avail * sizeof(u64) - sizeof(hdr);
+
+	/* Write header. */
+	hdr.data = 0;
+	hdr.type = VIRTIO_ID_CONSOLE;
+	hdr.len = htons(size);
+	writeq(hdr.data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+
+	/* Use spin-lock to protect the 'cons->tx_buf'. */
+	spin_lock_irqsave(&fifo->spin_lock, flags);
+
+	while (size > 0) {
+		addr = cons->tx_buf.buf + cons->tx_buf.tail;
+
+		seg = CIRC_CNT_TO_END(cons->tx_buf.head, cons->tx_buf.tail,
+				      MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+		if (seg >= sizeof(u64)) {
+			memcpy(&data, addr, sizeof(u64));
+		} else {
+			memcpy(&data, addr, seg);
+			memcpy((u8 *)&data + seg, cons->tx_buf.buf,
+			       sizeof(u64) - seg);
+		}
+		writeq(data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+
+		if (size >= sizeof(u64)) {
+			cons->tx_buf.tail = (cons->tx_buf.tail + sizeof(u64)) %
+				MLXBF_TMFIFO_CON_TX_BUF_SIZE;
+			size -= sizeof(u64);
+		} else {
+			cons->tx_buf.tail = (cons->tx_buf.tail + size) %
+				MLXBF_TMFIFO_CON_TX_BUF_SIZE;
+			size = 0;
+		}
+	}
+
+	spin_unlock_irqrestore(&fifo->spin_lock, flags);
+}
+
+/* Rx/Tx one word in the descriptor buffer. */
+static void mlxbf_tmfifo_rxtx_word(struct mlxbf_tmfifo_vring *vring,
+				   struct vring_desc *desc,
+				   bool is_rx, int len)
+{
+	struct virtio_device *vdev = vring->vq->vdev;
+	struct mlxbf_tmfifo *fifo = vring->fifo;
+	void *addr;
+	u64 data;
+
+	/* Get the buffer address of this desc. */
+	addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+	/* Read a word from FIFO for Rx. */
+	if (is_rx)
+		data = readq(fifo->rx_base + MLXBF_TMFIFO_RX_DATA);
+
+	if (vring->cur_len + sizeof(u64) <= len) {
+		/* The whole word. */
+		if (is_rx)
+			memcpy(addr + vring->cur_len, &data, sizeof(u64));
+		else
+			memcpy(&data, addr + vring->cur_len, sizeof(u64));
+		vring->cur_len += sizeof(u64);
+	} else {
+		/* Leftover bytes. */
+		if (is_rx)
+			memcpy(addr + vring->cur_len, &data,
+			       len - vring->cur_len);
+		else
+			memcpy(&data, addr + vring->cur_len,
+			       len - vring->cur_len);
+		vring->cur_len = len;
+	}
+
+	/* Write the word into FIFO for Tx. */
+	if (!is_rx)
+		writeq(data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+}
+
+/*
+ * Rx/Tx packet header.
+ *
+ * In Rx case, the packet might be found to belong to a different vring since
+ * the TmFifo is shared by different services. In such case, the 'vring_change'
+ * flag is set.
+ */
+static void mlxbf_tmfifo_rxtx_header(struct mlxbf_tmfifo_vring *vring,
+				     struct vring_desc *desc,
+				     bool is_rx, bool *vring_change)
+{
+	struct mlxbf_tmfifo *fifo = vring->fifo;
+	struct virtio_net_config *config;
+	union mlxbf_tmfifo_msg_hdr hdr;
+	int vdev_id, hdr_len;
+
+	/* Read/Write packet header. */
+	if (is_rx) {
+		/* Drain one word from the FIFO. */
+		hdr.data = readq(fifo->rx_base + MLXBF_TMFIFO_RX_DATA);
+
+		/* Skip the length 0 packets (keepalive). */
+		if (hdr.len == 0)
+			return;
+
+		/* Check packet type. */
+		if (hdr.type == VIRTIO_ID_NET) {
+			vdev_id = VIRTIO_ID_NET;
+			hdr_len = sizeof(struct virtio_net_hdr);
+			config = &fifo->vdev[vdev_id]->config.net;
+			if (ntohs(hdr.len) > config->mtu +
+			    MLXBF_TMFIFO_NET_L2_OVERHEAD)
+				return;
+		} else {
+			vdev_id = VIRTIO_ID_CONSOLE;
+			hdr_len = 0;
+		}
+
+		/*
+		 * Check whether the new packet still belongs to this vring.
+		 * If not, update the pkt_len of the new vring.
+		 */
+		if (vdev_id != vring->vdev_id) {
+			struct mlxbf_tmfifo_vdev *tm_dev2 = fifo->vdev[vdev_id];
+
+			if (!tm_dev2)
+				return;
+			vring->desc = desc;
+			vring = &tm_dev2->vrings[MLXBF_TMFIFO_VRING_RX];
+			*vring_change = true;
+		}
+		vring->pkt_len = ntohs(hdr.len) + hdr_len;
+	} else {
+		/* Network virtio has an extra header. */
+		hdr_len = (vring->vdev_id == VIRTIO_ID_NET) ?
+			   sizeof(struct virtio_net_hdr) : 0;
+		vring->pkt_len = mlxbf_tmfifo_get_pkt_len(vring, desc);
+		hdr.data = 0;
+		hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+			    VIRTIO_ID_NET : VIRTIO_ID_CONSOLE;
+		hdr.len = htons(vring->pkt_len - hdr_len);
+		writeq(hdr.data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+	}
+
+	vring->cur_len = hdr_len;
+	vring->rem_len = vring->pkt_len;
+	fifo->vring[is_rx] = vring;
+}
+
+/*
+ * Rx/Tx one descriptor.
+ *
+ * Return true to indicate more data available.
+ */
+static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring,
+				       bool is_rx, int *avail)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct mlxbf_tmfifo *fifo = vring->fifo;
+	struct virtio_device *vdev;
+	bool vring_change = false;
+	struct vring_desc *desc;
+	unsigned long flags;
+	u32 len, idx;
+
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+
+	/* Get the descriptor of the next packet. */
+	if (!vring->desc) {
+		desc = mlxbf_tmfifo_get_next_pkt(vring, is_rx);
+		if (!desc)
+			return false;
+	} else {
+		desc = vring->desc;
+	}
+
+	/* Beginning of a packet. Start to Rx/Tx packet header. */
+	if (vring->pkt_len == 0) {
+		mlxbf_tmfifo_rxtx_header(vring, desc, is_rx, &vring_change);
+		(*avail)--;
+
+		/* Return if new packet is for another ring. */
+		if (vring_change)
+			return false;
+		goto mlxbf_tmfifo_desc_done;
+	}
+
+	/* Get the length of this desc. */
+	len = virtio32_to_cpu(vdev, desc->len);
+	if (len > vring->rem_len)
+		len = vring->rem_len;
+
+	/* Rx/Tx one word (8 bytes) if not done. */
+	if (vring->cur_len < len) {
+		mlxbf_tmfifo_rxtx_word(vring, desc, is_rx, len);
+		(*avail)--;
+	}
+
+	/* Check again whether it's done. */
+	if (vring->cur_len == len) {
+		vring->cur_len = 0;
+		vring->rem_len -= len;
+
+		/* Get the next desc on the chain. */
+		if (vring->rem_len > 0 &&
+		    (virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT)) {
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+			goto mlxbf_tmfifo_desc_done;
+		}
+
+		/* Done and release the pending packet. */
+		mlxbf_tmfifo_release_pending_pkt(vring);
+		desc = NULL;
+		fifo->vring[is_rx] = NULL;
+
+		/* Notify upper layer that packet is done. */
+		spin_lock_irqsave(&fifo->spin_lock, flags);
+		vring_interrupt(0, vring->vq);
+		spin_unlock_irqrestore(&fifo->spin_lock, flags);
+	}
+
+mlxbf_tmfifo_desc_done:
+	/* Save the current desc. */
+	vring->desc = desc;
+
+	return true;
+}
+
+/* Rx & Tx processing of a queue. */
+static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
+{
+	int avail = 0, devid = vring->vdev_id;
+	struct mlxbf_tmfifo *fifo;
+	bool more;
+
+	fifo = vring->fifo;
+
+	/* Return if vdev is not ready. */
+	if (!fifo->vdev[devid])
+		return;
+
+	/* Return if another vring is running. */
+	if (fifo->vring[is_rx] && fifo->vring[is_rx] != vring)
+		return;
+
+	/* Only handle console and network for now. */
+	if (WARN_ON(devid != VIRTIO_ID_NET && devid != VIRTIO_ID_CONSOLE))
+		return;
+
+	do {
+		/* Get available FIFO space. */
+		if (avail == 0) {
+			if (is_rx)
+				avail = mlxbf_tmfifo_get_rx_avail(fifo);
+			else
+				avail = mlxbf_tmfifo_get_tx_avail(fifo, devid);
+			if (avail <= 0)
+				break;
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && devid == VIRTIO_ID_CONSOLE) {
+			mlxbf_tmfifo_console_tx(fifo, avail);
+			break;
+		}
+
+		/* Handle one descriptor. */
+		more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail);
+	} while (more);
+}
+
+/* Handle Rx or Tx queues. */
+static void mlxbf_tmfifo_work_rxtx(struct mlxbf_tmfifo *fifo, int queue_id,
+				   int irq_id, bool is_rx)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+	struct mlxbf_tmfifo_vring *vring;
+	int i;
+
+	if (!test_and_clear_bit(irq_id, &fifo->pend_events) ||
+	    !fifo->irq_info[irq_id].irq)
+		return;
+
+	for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++) {
+		tm_vdev = fifo->vdev[i];
+		if (tm_vdev) {
+			vring = &tm_vdev->vrings[queue_id];
+			if (vring->vq)
+				mlxbf_tmfifo_rxtx(vring, is_rx);
+		}
+	}
+}
+
+/* Work handler for Rx and Tx case. */
+static void mlxbf_tmfifo_work_handler(struct work_struct *work)
+{
+	struct mlxbf_tmfifo *fifo;
+
+	fifo = container_of(work, struct mlxbf_tmfifo, work);
+	if (!fifo->is_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx (Send data to the TmFifo). */
+	mlxbf_tmfifo_work_rxtx(fifo, MLXBF_TMFIFO_VRING_TX,
+			       MLXBF_TM_TX_LWM_IRQ, false);
+
+	/* Rx (Receive data from the TmFifo). */
+	mlxbf_tmfifo_work_rxtx(fifo, MLXBF_TMFIFO_VRING_RX,
+			       MLXBF_TM_RX_HWM_IRQ, true);
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool mlxbf_tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct mlxbf_tmfifo_vring *vring = vq->priv;
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+	struct mlxbf_tmfifo *fifo;
+	unsigned long flags;
+
+	fifo = vring->fifo;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->index & BIT(0))) {
+		if (test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events))
+			return true;
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			tm_vdev = fifo->vdev[VIRTIO_ID_CONSOLE];
+			mlxbf_tmfifo_console_output(tm_vdev, vring);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+		} else if (test_and_set_bit(MLXBF_TM_TX_LWM_IRQ,
+					    &fifo->pend_events)) {
+			return true;
+		}
+	}
+
+	schedule_work(&fifo->work);
+
+	return true;
+}
+
+/* Get the array of feature bits for this device. */
+static u64 mlxbf_tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int mlxbf_tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	tm_vdev->features = vdev->features;
+
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void mlxbf_tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+	struct mlxbf_tmfifo_vring *vring;
+	struct virtqueue *vq;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc)
+			mlxbf_tmfifo_release_pending_pkt(vring);
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+					unsigned int nvqs,
+					struct virtqueue *vqs[],
+					vq_callback_t *callbacks[],
+					const char * const names[],
+					const bool *ctx,
+					struct irq_affinity *desc)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+	struct mlxbf_tmfifo_vring *vring;
+	struct virtqueue *vq;
+	int i, ret, size;
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i]) {
+			ret = -EINVAL;
+			goto error;
+		}
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->num, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->num, vring->align, vdev,
+					 false, false, vring->va,
+					 mlxbf_tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	mlxbf_tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 mlxbf_tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void mlxbf_tmfifo_virtio_set_status(struct virtio_device *vdev,
+					   u8 status)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void mlxbf_tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void mlxbf_tmfifo_virtio_get(struct virtio_device *vdev,
+				    unsigned int offset,
+				    void *buf,
+				    unsigned int len)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	if (offset + len > sizeof(tm_vdev->config))
+		return;
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void mlxbf_tmfifo_virtio_set(struct virtio_device *vdev,
+				    unsigned int offset,
+				    const void *buf,
+				    unsigned int len)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	if (offset + len > sizeof(tm_vdev->config))
+		return;
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/*
+ * Nothing to do for now. This function is needed to avoid warnings
+ * when the device is released in device_release().
+ */
+static void tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops mlxbf_tmfifo_virtio_config_ops = {
+	.get_features = mlxbf_tmfifo_virtio_get_features,
+	.finalize_features = mlxbf_tmfifo_virtio_finalize_features,
+	.find_vqs = mlxbf_tmfifo_virtio_find_vqs,
+	.del_vqs = mlxbf_tmfifo_virtio_del_vqs,
+	.reset = mlxbf_tmfifo_virtio_reset,
+	.set_status = mlxbf_tmfifo_virtio_set_status,
+	.get_status = mlxbf_tmfifo_virtio_get_status,
+	.get = mlxbf_tmfifo_virtio_get,
+	.set = mlxbf_tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+static int mlxbf_tmfifo_create_vdev(struct device *dev,
+				    struct mlxbf_tmfifo *fifo,
+				    int vdev_id, u64 features,
+				    void *config, u32 size)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+	int ret;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		dev_err(dev, "vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto fail;
+	}
+
+	tm_vdev = devm_kzalloc(dev, sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &mlxbf_tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+
+	if (mlxbf_tmfifo_alloc_vrings(fifo, tm_vdev)) {
+		dev_err(dev, "unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto vdev_fail;
+	}
+
+	/* Allocate an output buffer for the console device. */
+	if (vdev_id == VIRTIO_ID_CONSOLE)
+		tm_vdev->tx_buf.buf = devm_kmalloc(dev,
+						   MLXBF_TMFIFO_CON_TX_BUF_SIZE,
+						   GFP_KERNEL);
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device failed\n");
+		goto vdev_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+vdev_fail:
+	mlxbf_tmfifo_free_vrings(fifo, tm_vdev);
+	fifo->vdev[vdev_id] = NULL;
+fail:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+static int mlxbf_tmfifo_delete_vdev(struct mlxbf_tmfifo *fifo, int vdev_id)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		mlxbf_tmfifo_free_vrings(fifo, tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void mlxbf_tmfifo_get_cfg_mac(u8 *mac)
+{
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+	unsigned long size = ETH_ALEN;
+	efi_status_t status;
+	u8 buf[ETH_ALEN];
+
+	status = efi.get_variable(mlxbf_tmfifo_efi_name, &guid, NULL, &size,
+				  buf);
+	if (status == EFI_SUCCESS && size == ETH_ALEN)
+		ether_addr_copy(mac, buf);
+	else
+		memcpy(mac, mlxbf_tmfifo_net_default_mac, ETH_ALEN);
+}
+
+/* Set TmFifo thresolds which is used to trigger interrupts. */
+static void mlxbf_tmfifo_set_threshold(struct mlxbf_tmfifo *fifo)
+{
+	u64 ctl;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		FIELD_GET(MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__LWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_TX_CTL__LWM_MASK,
+			   fifo->tx_fifo_size / 2);
+	ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__HWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_TX_CTL__HWM_MASK,
+			   fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		FIELD_GET(MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__LWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__HWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
+}
+
+static void mlxbf_tmfifo_cleanup(struct mlxbf_tmfifo *fifo)
+{
+	int i;
+
+	fifo->is_ready = false;
+	del_timer_sync(&fifo->timer);
+	mlxbf_tmfifo_disable_irqs(fifo);
+	cancel_work_sync(&fifo->work);
+	for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++)
+		mlxbf_tmfifo_delete_vdev(fifo, i);
+}
+
+/* Probe the TMFIFO. */
+static int mlxbf_tmfifo_probe(struct platform_device *pdev)
+{
+	struct virtio_net_config net_config;
+	struct mlxbf_tmfifo *fifo;
+	struct resource *res;
+	int i, ret;
+
+	fifo = devm_kzalloc(&pdev->dev, sizeof(*fifo), GFP_KERNEL);
+	if (!fifo)
+		return -ENOMEM;
+
+	spin_lock_init(&fifo->spin_lock);
+	INIT_WORK(&fifo->work, mlxbf_tmfifo_work_handler);
+	mutex_init(&fifo->lock);
+
+	/* Get the resource of the Rx FIFO. */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	fifo->rx_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(fifo->rx_base))
+		return PTR_ERR(fifo->rx_base);
+
+	/* Get the resource of the Tx FIFO. */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	fifo->tx_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(fifo->tx_base))
+		return PTR_ERR(fifo->tx_base);
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	timer_setup(&fifo->timer, mlxbf_tmfifo_timer, 0);
+
+	for (i = 0; i < MLXBF_TM_MAX_IRQ; i++) {
+		fifo->irq_info[i].index = i;
+		fifo->irq_info[i].fifo = fifo;
+		fifo->irq_info[i].irq = platform_get_irq(pdev, i);
+		ret = devm_request_irq(&pdev->dev, fifo->irq_info[i].irq,
+				       mlxbf_tmfifo_irq_handler, 0,
+				       "tmfifo", &fifo->irq_info[i]);
+		if (ret) {
+			dev_err(&pdev->dev, "devm_request_irq failed\n");
+			fifo->irq_info[i].irq = 0;
+			return ret;
+		}
+	}
+
+	mlxbf_tmfifo_set_threshold(fifo);
+
+	/* Create the console vdev. */
+	ret = mlxbf_tmfifo_create_vdev(&pdev->dev, fifo, VIRTIO_ID_CONSOLE, 0,
+				       NULL, 0);
+	if (ret)
+		goto fail;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = ETH_DATA_LEN;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	mlxbf_tmfifo_get_cfg_mac(net_config.mac);
+	ret = mlxbf_tmfifo_create_vdev(&pdev->dev, fifo, VIRTIO_ID_NET,
+				       MLXBF_TMFIFO_NET_FEATURES, &net_config,
+				       sizeof(net_config));
+	if (ret)
+		goto fail;
+
+	mod_timer(&fifo->timer, jiffies + MLXBF_TMFIFO_TIMER_INTERVAL);
+
+	fifo->is_ready = true;
+	return 0;
+
+fail:
+	mlxbf_tmfifo_cleanup(fifo);
+	return ret;
+}
+
+/* Device remove function. */
+static int mlxbf_tmfifo_remove(struct platform_device *pdev)
+{
+	struct mlxbf_tmfifo *fifo = platform_get_drvdata(pdev);
+
+	mlxbf_tmfifo_cleanup(fifo);
+
+	return 0;
+}
+
+static const struct acpi_device_id mlxbf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, mlxbf_tmfifo_acpi_match);
+
+static struct platform_driver mlxbf_tmfifo_driver = {
+	.probe = mlxbf_tmfifo_probe,
+	.remove = mlxbf_tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.acpi_match_table = mlxbf_tmfifo_acpi_match,
+	},
+};
+
+module_platform_driver(mlxbf_tmfifo_driver);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TmFifo Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 179+ messages in thread

* RE: [PATCH v10] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2019-03-06 20:00     ` Liming Sun
@ 2019-03-08 14:44       ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-03-08 14:44 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: David Woods, Andy Shevchenko, Darren Hart, Vadim Pasternak,
	Linux Kernel Mailing List, Platform Driver

Andy,

The v11 has been posted.

Thanks!
Liming

> -----Original Message-----
> From: Liming Sun
> Sent: Wednesday, March 6, 2019 3:01 PM
> To: 'Andy Shevchenko' <andy.shevchenko@gmail.com>
> Cc: David Woods <dwoods@mellanox.com>; Andy Shevchenko <andy@infradead.org>; Darren Hart <dvhart@infradead.org>; Vadim
> Pasternak <vadimp@mellanox.com>; Linux Kernel Mailing List <linux-kernel@vger.kernel.org>; Platform Driver <platform-driver-
> x86@vger.kernel.org>
> Subject: RE: [PATCH v10] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
> 
> Thanks Andy! Please see my response below. If no further comments, I'll try to post v11 after more testing.
> 
> Regards,
> Liming
> 
> > -----Original Message-----
> > From: Andy Shevchenko <andy.shevchenko@gmail.com>
> > Sent: Tuesday, March 5, 2019 10:34 AM
> > To: Liming Sun <lsun@mellanox.com>
> > Cc: David Woods <dwoods@mellanox.com>; Andy Shevchenko <andy@infradead.org>; Darren Hart <dvhart@infradead.org>; Vadim
> > Pasternak <vadimp@mellanox.com>; Linux Kernel Mailing List <linux-kernel@vger.kernel.org>; Platform Driver <platform-driver-
> > x86@vger.kernel.org>
> > Subject: Re: [PATCH v10] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
> >
> > On Thu, Feb 28, 2019 at 5:51 PM Liming Sun <lsun@mellanox.com> wrote:
> > >
> > > This commit adds the TmFifo platform driver for Mellanox BlueField
> > > Soc. TmFifo is a shared FIFO which enables external host machine
> > > to exchange data with the SoC via USB or PCIe. The driver is based
> > > on virtio framework and has console and network access enabled.
> >
> > Thank you for an update.
> >
> > Unfortunately more work is needed. My comments below.
> >
> > > +#define MLXBF_TMFIFO_TX_STS__COUNT_RMASK               GENMASK(8, 0)
> > > +#define MLXBF_TMFIFO_TX_STS__COUNT_MASK                        GENMASK(8, 0)
> >
> > > +#define MLXBF_TMFIFO_TX_CTL__LWM_RMASK                 GENMASK(7, 0)
> > > +#define MLXBF_TMFIFO_TX_CTL__LWM_MASK                  GENMASK(7, 0)
> >
> > > +#define MLXBF_TMFIFO_TX_CTL__HWM_RMASK                 GENMASK(7, 0)
> > > +#define MLXBF_TMFIFO_TX_CTL__HWM_MASK                  GENMASK(15, 8)
> >
> > > +#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RMASK         GENMASK(8, 0)
> > > +#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK          GENMASK_ULL(40, 32)
> >
> > > +#define MLXBF_TMFIFO_RX_STS__COUNT_RMASK               GENMASK(8, 0)
> > > +#define MLXBF_TMFIFO_RX_STS__COUNT_MASK                        GENMASK(8, 0)
> >
> > > +#define MLXBF_TMFIFO_RX_CTL__LWM_RMASK                 GENMASK(7, 0)
> > > +#define MLXBF_TMFIFO_RX_CTL__LWM_MASK                  GENMASK(7, 0)
> >
> > > +#define MLXBF_TMFIFO_RX_CTL__HWM_RMASK                 GENMASK(7, 0)
> > > +#define MLXBF_TMFIFO_RX_CTL__HWM_MASK                  GENMASK(15, 8)
> >
> > > +#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RMASK         GENMASK(8, 0)
> > > +#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK          GENMASK_ULL(40, 32)
> >
> > Since two of them have _ULL suffix I'm wondering if you have checked
> > for side effects on the rest, i.e. if you operate with 64-bit variable
> > and use something like ~MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RMASK, it may
> > give you interesting results.
> 
> The running system on the SoC is arm64 where BITS_PER_LONG and
> BITS_PER_LONG_LONG have the same value. In such case, the two macros appears
> to be the same. But you're right, I should use GENMASK_ULL() to be consistent
> and more correctly, just in case the "CONFIG_64BIT" is not defined somehow.
> 
> Will update it in v11.
> 
> >
> > > +#define MLXBF_TMFIFO_TIMER_INTERVAL            (HZ / 10)
> >
> > > +/**
> > > + * mlxbf_tmfifo_u64 - Union of 64-bit data
> > > + * @data - 64-bit data in host byte order
> > > + * @data_le - 64-bit data in little-endian byte order
> > > + *
> > > + * It's expected to send 64-bit little-endian value (__le64) into the TmFifo.
> > > + * readq() and writeq() expect u64 instead. A union structure is used here
> > > + * to workaround the explicit casting usage like writeq(*(u64 *)&data_le).
> > > + */
> >
> > How do you know what readq()/writeq() does with the data? Is it on all
> > architectures?
> > How the endianess conversion affects the actual data?
> 
> The SoC runs arm64 and supports little endian for now. The FIFO has two sides,
> one side is the SoC, the other side is extern host machine which could
> access the FIFO via USB or PCIe. The rule is that the 'byte stream' will
> keep the same when one side write 8 bytes and the other side reads
> the 8 bytes. So as long as both sides have agreement on the byte-order
> it should be fine.
> 
> After double check the arm64 readq()/writeq() implementation, it appears
> that these APIs already does cpu_to_le64() and le64_to_cpu()
> conversion. There's actually no need to make another conversion
> (and shouldn't do it). I'll remove these conversions in v11. The code will
> look much cleaner.
> 
> >
> > > +union mlxbf_tmfifo_u64 {
> > > +       u64 data;
> > > +       __le64 data_le;
> > > +};
> >
> > > +/*
> > > + * Default MAC.
> > > + * This MAC address will be read from EFI persistent variable if configured.
> > > + * It can also be reconfigured with standard Linux tools.
> > > + */
> > > +static u8 mlxbf_tmfifo_net_default_mac[6] = {
> > > +       0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
> >
> > > +#define mlxbf_vdev_to_tmfifo(dev)      \
> > > +       container_of(dev, struct mlxbf_tmfifo_vdev, vdev)
> >
> > One line?
> 
> Couldn't fit it into one line within 80 characters.
> (Please correct me if you meant single line even exceeding 80 chracters).
> 
> >
> > > +/* Return the consumed Tx buffer space. */
> > > +static int mlxbf_tmfifo_vdev_tx_buf_len(struct mlxbf_tmfifo_vdev *tm_vdev)
> > > +{
> > > +       int len;
> > > +
> > > +       if (tm_vdev->tx_tail >= tm_vdev->tx_head)
> > > +               len = tm_vdev->tx_tail - tm_vdev->tx_head;
> > > +       else
> > > +               len = MLXBF_TMFIFO_CONS_TX_BUF_SIZE - tm_vdev->tx_head +
> > > +                       tm_vdev->tx_tail;
> > > +       return len;
> > > +}
> >
> > Is this custom implementation of some kind of circ_buf?
> 
> Yes. I'll try if I could re-use the circ_buf structure and update it in v11.
> 
> >
> > > +/* Allocate vrings for the fifo. */
> > > +static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
> > > +                                    struct mlxbf_tmfifo_vdev *tm_vdev)
> > > +{
> > > +       struct mlxbf_tmfifo_vring *vring;
> > > +       struct device *dev;
> > > +       dma_addr_t dma;
> > > +       int i, size;
> > > +       void *va;
> > > +
> > > +       for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
> > > +               vring = &tm_vdev->vrings[i];
> > > +               vring->fifo = fifo;
> > > +               vring->num = MLXBF_TMFIFO_VRING_SIZE;
> > > +               vring->align = SMP_CACHE_BYTES;
> > > +               vring->index = i;
> > > +               vring->vdev_id = tm_vdev->vdev.id.device;
> > > +               dev = &tm_vdev->vdev.dev;
> > > +
> > > +               size = vring_size(vring->num, vring->align);
> > > +               va = dma_alloc_coherent(dev->parent, size, &dma, GFP_KERNEL);
> > > +               if (!va) {
> >
> > > +                       dev_err(dev->parent, "dma_alloc_coherent failed\n");
> >
> > And how do you clean previously allocated items?
> 
> Fixed. Check the return value of mlxbf_tmfifo_alloc_vrings() and goto
> 'register_fail' (probably change to a better name) instead of 'fail'.
> In such case the mlxbf_tmfifo_free_vrings() will be called to clean up
> all allocated vrings.
> 
> >
> > > +                       return -ENOMEM;
> > > +               }
> > > +
> > > +               vring->va = va;
> > > +               vring->dma = dma;
> > > +       }
> > > +
> > > +       return 0;
> > > +}
> >
> > > +/* Disable interrupts of the fifo device. */
> > > +static void mlxbf_tmfifo_disable_irqs(struct mlxbf_tmfifo *fifo)
> > > +{
> > > +       int i, irq;
> > > +
> > > +       for (i = 0; i < MLXBF_TM_MAX_IRQ; i++) {
> > > +               irq = fifo->irq_info[i].irq;
> >
> > > +               if (irq) {
> >
> > I don't think this check is needed if you can guarantee that it has no
> > staled records.
> 
> Yes, it's not needed any more according to the current code.
> Will remove it in v11.
> 
> >
> > > +                       fifo->irq_info[i].irq = 0;
> > > +                       disable_irq(irq);
> > > +               }
> > > +       }
> > > +}
> >
> > > +/* Get the number of available words in the TmFifo for sending. */
> > > +static int mlxbf_tmfifo_get_tx_avail(struct mlxbf_tmfifo *fifo, int vdev_id)
> > > +{
> > > +       int tx_reserve;
> > > +       u64 sts;
> > > +
> > > +       /* Reserve some room in FIFO for console messages. */
> > > +       if (vdev_id == VIRTIO_ID_NET)
> > > +               tx_reserve = fifo->tx_fifo_size / MLXBF_TMFIFO_RESERVE_RATIO;
> > > +       else
> > > +               tx_reserve = 1;
> > > +
> > > +       sts = readq(fifo->tx_base + MLXBF_TMFIFO_TX_STS);
> >
> > > +       return (fifo->tx_fifo_size - tx_reserve -
> > > +               FIELD_GET(MLXBF_TMFIFO_TX_STS__COUNT_MASK, sts));
> >
> > Redundant parens.
> > Moreover, consider
> >
> > u32 count; // or whatever suits for FIELD_GET().
> > ...
> >
> > sts = readq(...);
> > count = FIELD_GET(...);
> > return ...;
> 
> Will update in v11.
> 
> >
> > > +}
> >
> > > +       while (size > 0) {
> > > +               addr = cons->tx_buf + cons->tx_head;
> > > +
> > > +               if (cons->tx_head + sizeof(u64) <=
> > > +                   MLXBF_TMFIFO_CONS_TX_BUF_SIZE) {
> > > +                       memcpy(&data, addr, sizeof(u64));
> > > +               } else {
> > > +                       partial = MLXBF_TMFIFO_CONS_TX_BUF_SIZE - cons->tx_head;
> > > +                       memcpy(&data, addr, partial);
> >
> > > +                       memcpy((u8 *)&data + partial, cons->tx_buf,
> > > +                              sizeof(u64) - partial);
> >
> > Unaligned access?!
> 
> The code here is to build and copy 8 bytes from the buffer into the 'data'
> variable. The source could be unaligned. For example, 3 bytes are at the
> end of the buffer and 5 bytes are at the beginning of the buffer. memcpy()
> is used to do byte-stream copy which seems ok. Please correct me if
> I misunderstand the comment.
> 
> >
> > > +               }
> >
> > > +               buf.data = readq(fifo->rx_base + MLXBF_TMFIFO_RX_DATA);
> > > +               buf.data = le64_to_cpu(buf.data_le);
> >
> > Are you sure this is correct?
> > How did you test this on BE architectures?
> 
> Thanks for the comment! Same as above, the conversion is not really needed.
> I'll remove them in v11. As for testing, we only have arm64 little-endian Linux
> running on the SoC. This conversion doesn't make much difference for the SoC.
> As for BE architecture, we mainly verify the other side of the FIFO, which is the
> external host like using ppc64.
> 
> >
> > > +       tm_vdev = devm_kzalloc(dev, sizeof(*tm_vdev), GFP_KERNEL);
> >
> > Is it appropriate use of devm_* ?
> 
> This is SoC, the device won't be closed or detached. The only case is when
> the driver is unloaded. So it appears ok to use devm_kzalloc() since it's
> allocated during probe() and released during module unload . Please
> correct me if I misunderstand it.
> 
> >
> > > +       if (!tm_vdev) {
> > > +               ret = -ENOMEM;
> > > +               goto fail;
> > > +       }
> >
> > > +/* Read the configured network MAC address from efi variable. */
> > > +static void mlxbf_tmfifo_get_cfg_mac(u8 *mac)
> > > +{
> > > +       efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
> > > +       efi_status_t status;
> > > +       unsigned long size;
> >
> > > +       u8 buf[6];
> >
> > ETH_ALEN ?
> 
> Will update it in v11
> 
> >
> > > +
> > > +       size = sizeof(buf);
> >
> > Ditto.
> 
> Will update it in v11
> 
> >
> > > +       status = efi.get_variable(mlxbf_tmfifo_efi_name, &guid, NULL, &size,
> > > +                                 buf);
> >
> > > +       if (status == EFI_SUCCESS && size == sizeof(buf))
> >
> > Ditto.
> 
> Will update it in v11
> 
> >
> > > +               memcpy(mac, buf, sizeof(buf));
> >
> > ether_addr_copy().
> 
> Will update it in v11
> 
> >
> > > +}
> >
> > > +       memcpy(net_config.mac, mlxbf_tmfifo_net_default_mac, 6);
> >
> > ether_addr_copy()...
> >
> > > +       mlxbf_tmfifo_get_cfg_mac(net_config.mac);
> >
> > ... but actually above should be part of this function.
> 
> Will update it in v11
> 
> >
> > --
> > With Best Regards,
> > Andy Shevchenko

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v7 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc
  2019-01-03 19:17   ` Liming Sun
@ 2019-03-15 13:18     ` Matthias Brugger
  -1 siblings, 0 replies; 179+ messages in thread
From: Matthias Brugger @ 2019-03-15 13:18 UTC (permalink / raw)
  To: Liming Sun, Olof Johansson, Arnd Bergmann, David Woods,
	Robin Murphy, arm-soc
  Cc: devicetree, linux-arm-kernel



On 03/01/2019 20:17, Liming Sun wrote:
> This commit adds the TmFifo driver for Mellanox BlueField Soc.
> TmFifo is a shared FIFO which enables external host machine to
> exchange data with the SoC via USB or PCIe. The driver is based on
> virtio framework and has console and network access enabled.
> 
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>
> ---
>  drivers/soc/Kconfig                |    1 +
>  drivers/soc/Makefile               |    1 +
>  drivers/soc/mellanox/Kconfig       |   18 +
>  drivers/soc/mellanox/Makefile      |    5 +
>  drivers/soc/mellanox/tmfifo.c      | 1244 ++++++++++++++++++++++++++++++++++++
>  drivers/soc/mellanox/tmfifo_regs.h |   76 +++
>  6 files changed, 1345 insertions(+)
>  create mode 100644 drivers/soc/mellanox/Kconfig
>  create mode 100644 drivers/soc/mellanox/Makefile
>  create mode 100644 drivers/soc/mellanox/tmfifo.c
>  create mode 100644 drivers/soc/mellanox/tmfifo_regs.h
> 
[..]
> diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
> new file mode 100644
> index 0000000..2975229
> --- /dev/null
> +++ b/drivers/soc/mellanox/tmfifo.c
[..]

> +
> +/* Console Tx buffer size. */
> +#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
> +
> +/* House-keeping timer interval. */
> +static int tmfifo_timer_interval = HZ / 10;
> +module_param(tmfifo_timer_interval, int, 0644);
> +MODULE_PARM_DESC(tmfifo_timer_interval, "timer interval");
> +
> +/* Global lock. */
> +static DEFINE_MUTEX(tmfifo_lock);

Why do we need that? To synchronize between different tmfifo driver instances?

> +
> +/* Virtio ring size. */
> +static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
> +module_param(tmfifo_vring_size, int, 0444);
> +MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring");
> +
> +/* Struct declaration. */
> +struct tmfifo;
> +
> +/* Virtual devices sharing the TM FIFO. */
> +#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
> +
> +/* Structure to maintain the ring state. */
> +struct tmfifo_vring {
> +	void *va;			/* virtual address */
> +	dma_addr_t dma;			/* dma address */
> +	struct virtqueue *vq;		/* virtqueue pointer */
> +	struct vring_desc *desc;	/* current desc */
> +	struct vring_desc *desc_head;	/* current desc head */
> +	int cur_len;			/* processed len in current desc */
> +	int rem_len;			/* remaining length to be processed */
> +	int size;			/* vring size */
> +	int align;			/* vring alignment */
> +	int id;				/* vring id */
> +	int vdev_id;			/* TMFIFO_VDEV_xxx */
> +	u32 pkt_len;			/* packet total length */
> +	__virtio16 next_avail;		/* next avail desc id */
> +	struct tmfifo *fifo;		/* pointer back to the tmfifo */
> +};
> +
> +/* Interrupt types. */
> +enum {
> +	TM_RX_LWM_IRQ,			/* Rx low water mark irq */
> +	TM_RX_HWM_IRQ,			/* Rx high water mark irq */
> +	TM_TX_LWM_IRQ,			/* Tx low water mark irq */
> +	TM_TX_HWM_IRQ,			/* Tx high water mark irq */
> +	TM_IRQ_CNT
> +};
> +
> +/* Ring types (Rx & Tx). */
> +enum {
> +	TMFIFO_VRING_RX,		/* Rx ring */
> +	TMFIFO_VRING_TX,		/* Tx ring */
> +	TMFIFO_VRING_NUM
> +};
> +
> +struct tmfifo_vdev {
> +	struct virtio_device vdev;	/* virtual device */
> +	u8 status;
> +	u64 features;
> +	union {				/* virtio config space */
> +		struct virtio_console_config cons;
> +		struct virtio_net_config net;
> +	} config;
> +	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
> +	u8 *tx_buf;			/* tx buffer */
> +	u32 tx_head;			/* tx buffer head */
> +	u32 tx_tail;			/* tx buffer tail */
> +};
> +
> +struct tmfifo_irq_info {
> +	struct tmfifo *fifo;		/* tmfifo structure */
> +	int irq;			/* interrupt number */
> +	int index;			/* array index */
> +};
> +
> +/* TMFIFO device structure */
> +struct tmfifo {
> +	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
> +	struct platform_device *pdev;	/* platform device */
> +	struct mutex lock;

>From what I understand we use this lock to syncronize between
tmfifo_create_vdev, tmfifo_delete_vdev and tmfifo_work_handler.

Create happens in probe and delete in remove, so we don't need a lock here.
So the only reason I can see we need this lock here is, to make sure that we
don't mess up between create a vdev and being already in the work handler. That
can only happen, if an IRQ was triggered. If we enable the IRQs after creating
the vdev, we don't need the lock at all.

> +	void __iomem *rx_base;		/* mapped register base */
> +	void __iomem *tx_base;		/* mapped register base */
> +	int tx_fifo_size;		/* number of entries of the Tx FIFO */
> +	int rx_fifo_size;		/* number of entries of the Rx FIFO */
> +	unsigned long pend_events;	/* pending bits for deferred process */
> +	struct tmfifo_irq_info irq_info[TM_IRQ_CNT];	/* irq info */
> +	struct work_struct work;	/* work struct for deferred process */
> +	struct timer_list timer;	/* keepalive timer */
> +	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
> +	bool is_ready;			/* ready flag */
> +	spinlock_t spin_lock;		/* spin lock */
> +};
> +
> +union tmfifo_msg_hdr {
> +	struct {
> +		u8 type;		/* message type */
> +		__be16 len;		/* payload length */
> +		u8 unused[5];		/* reserved, set to 0 */
> +	} __packed;
> +	u64 data;
> +};
> +
> +/*
> + * Default MAC.
> + * This MAC address will be read from EFI persistent variable if configured.
> + * It can also be reconfigured with standard Linux tools.
> + */
> +static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
> +
> +/* MTU setting of the virtio-net interface. */
> +#define TMFIFO_NET_MTU		1500
> +
> +/* Supported virtio-net features. */
> +#define TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
> +				 (1UL << VIRTIO_NET_F_STATUS) | \
> +				 (1UL << VIRTIO_NET_F_MAC))
> +
> +/* Return the available Tx buffer space. */
> +static inline int tmfifo_vdev_tx_buf_avail(struct tmfifo_vdev *vdev)
> +{
> +	return ((vdev->tx_tail >= vdev->tx_head) ?
> +		(TMFIFO_CONS_TX_BUF_SIZE - 8 - (vdev->tx_tail -
> +		vdev->tx_head)) : (vdev->tx_head - vdev->tx_tail - 8));

Why do we need to subtract 8 from the available buffer size?

> +}
> +
> +/* Update Tx buffer pointer after pushing data. */
> +static inline void tmfifo_vdev_tx_buf_push(struct tmfifo_vdev *vdev, u32 len)
> +{
> +	vdev->tx_tail += len;
> +	if (vdev->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE)
> +		vdev->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE;

I would have expected
vdev->tx_tail = (vdev->tx_tail + len) % TMFIFO_CONS_TX_BUF_SIZE;

But I suppose your code executes faster.
What I miss is some code to assure that no ring buffer overflow/underrun can happen.

> +}
> +
> +/* Update Tx buffer pointer after popping data. */
> +static inline void tmfifo_vdev_tx_buf_pop(struct tmfifo_vdev *vdev, u32 len)
> +{
> +	vdev->tx_head += len;
> +	if (vdev->tx_head >= TMFIFO_CONS_TX_BUF_SIZE)
> +		vdev->tx_head -= TMFIFO_CONS_TX_BUF_SIZE;
> +}
> +

[...]

> +
> +/* Rx & Tx processing of a virtual queue. */
> +static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)> +{
> +	struct tmfifo_vring *vring;
> +	struct tmfifo *fifo;
> +	struct vring *vr;
> +	struct virtio_device *vdev;
> +	u64 sts, data;
> +	int num_avail = 0, hdr_len, tx_reserve;
> +	void *addr;
> +	u32 len, idx;
> +	struct vring_desc *desc;
> +	unsigned long flags;
> +	struct tmfifo_vdev *cons;
> +
> +	if (!vq)
> +		return;
> +
> +	vring = (struct tmfifo_vring *)vq->priv;


You can pass strict tmfifo_vring* instead of virtqueue as function parameter,
then you don't have to do this.


> +	fifo = vring->fifo;
> +	vr = (struct vring *)virtqueue_get_vring(vq);
> +
> +	if (!fifo->vdev[vring->vdev_id])
> +		return;
> +	vdev = &fifo->vdev[vring->vdev_id]->vdev;
> +	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
> +
> +	/* Don't continue if another vring is running. */
> +	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)

How can that happen?

> +		return;
> +
> +	/* tx_reserve is used to reserved some room in FIFO for console. */
> +	if (vring->vdev_id == VIRTIO_ID_NET) {
> +		hdr_len = sizeof(struct virtio_net_hdr);
> +		tx_reserve = fifo->tx_fifo_size / 16;
> +	} else {
> +		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
> +		hdr_len = 0;
> +		tx_reserve = 1;
> +	}
> +
> +	desc = vring->desc;
> +
[...]
> +
> +/* Work handler for Rx, Tx or activity monitoring. */
> +static void tmfifo_work_handler(struct work_struct *work)
> +{
> +	int i;
> +	struct tmfifo_vdev *tm_vdev;
> +	struct tmfifo *fifo = container_of(work, struct tmfifo, work);
> +
> +	if (!fifo->is_ready)
> +		return;
> +
> +	mutex_lock(&fifo->lock);
> +

So you don't want to queue up more work when remove is called. As is_ready is
not atomic you could deadlock here:
  remove             work_handler
 mutex_lock
                    if(!is_ready)
                   mutex_lock <- sleeps
is_ready = false
    ...
cancel_work_sync <- deadlock

> +	/* Tx. */
> +	if (test_and_clear_bit(TM_TX_LWM_IRQ, &fifo->pend_events) &&
> +		       fifo->irq_info[TM_TX_LWM_IRQ].irq) {
> +		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
> +			tm_vdev = fifo->vdev[i];
> +			if (tm_vdev != NULL) {
> +				tmfifo_virtio_rxtx(
> +					tm_vdev->vrings[TMFIFO_VRING_TX].vq,
> +					false);
> +			}
> +		}
> +	}
> +
> +	/* Rx. */
> +	if (test_and_clear_bit(TM_RX_HWM_IRQ, &fifo->pend_events) &&
> +		       fifo->irq_info[TM_RX_HWM_IRQ].irq) {
> +		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
> +			tm_vdev = fifo->vdev[i];
> +			if (tm_vdev != NULL) {
> +				tmfifo_virtio_rxtx(
> +					tm_vdev->vrings[TMFIFO_VRING_RX].vq,
> +					true);
> +			}
> +		}
> +	}
> +
> +	mutex_unlock(&fifo->lock);
> +}
[...]

> +
> +/* Probe the TMFIFO. */
> +static int tmfifo_probe(struct platform_device *pdev)
> +{
> +	u64 ctl;
> +	struct tmfifo *fifo;
> +	struct resource *rx_res, *tx_res;
> +	struct virtio_net_config net_config;
> +	int i, ret;
> +
> +	/* Get the resource of the Rx & Tx FIFO. */
> +	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> +	if (!rx_res || !tx_res) {
> +		ret = -EINVAL;
> +		goto err;
> +	}
> +
> +	if (request_mem_region(rx_res->start,
> +			       resource_size(rx_res), "bf-tmfifo") == NULL) {
> +		ret = -EBUSY;
> +		goto early_err;
> +	}
> +
> +	if (request_mem_region(tx_res->start,
> +			       resource_size(tx_res), "bf-tmfifo") == NULL) {

Can't we use devm_request_mem_region and get rid of the release_mem_region here
and in the remove function?

Regards,
Matthias

> +		release_mem_region(rx_res->start, resource_size(rx_res));
> +		ret = -EBUSY;
> +		goto early_err;
> +	}
> +
> +	ret = -ENOMEM;
> +	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
> +	if (!fifo)
> +		goto err;
> +
> +	fifo->pdev = pdev;
> +	platform_set_drvdata(pdev, fifo);
> +
> +	spin_lock_init(&fifo->spin_lock);
> +	INIT_WORK(&fifo->work, tmfifo_work_handler);
> +
> +	timer_setup(&fifo->timer, tmfifo_timer, 0);
> +	fifo->timer.function = tmfifo_timer;
> +
> +	for (i = 0; i < TM_IRQ_CNT; i++) {
> +		fifo->irq_info[i].index = i;
> +		fifo->irq_info[i].fifo = fifo;
> +		fifo->irq_info[i].irq = platform_get_irq(pdev, i);
> +		ret = request_irq(fifo->irq_info[i].irq, tmfifo_irq_handler, 0,
> +				  "tmfifo", &fifo->irq_info[i]);
> +		if (ret) {
> +			pr_err("Unable to request irq\n");
> +			fifo->irq_info[i].irq = 0;
> +			goto err;
> +		}
> +	}
> +
> +	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
> +	if (!fifo->rx_base)
> +		goto err;
> +
> +	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
> +	if (!fifo->tx_base)
> +		goto err;
> +
> +	/* Get Tx FIFO size and set the low/high watermark. */
> +	ctl = readq(fifo->tx_base + TMFIFO_TX_CTL);
> +	fifo->tx_fifo_size =
> +		FIELD_GET(TMFIFO_TX_CTL__MAX_ENTRIES_MASK, ctl);
> +	ctl = (ctl & ~TMFIFO_TX_CTL__LWM_MASK) |
> +		FIELD_PREP(TMFIFO_TX_CTL__LWM_MASK, fifo->tx_fifo_size / 2);
> +	ctl = (ctl & ~TMFIFO_TX_CTL__HWM_MASK) |
> +		FIELD_PREP(TMFIFO_TX_CTL__HWM_MASK, fifo->tx_fifo_size - 1);
> +	writeq(ctl, fifo->tx_base + TMFIFO_TX_CTL);
> +
> +	/* Get Rx FIFO size and set the low/high watermark. */
> +	ctl = readq(fifo->rx_base + TMFIFO_RX_CTL);
> +	fifo->rx_fifo_size =
> +		FIELD_GET(TMFIFO_RX_CTL__MAX_ENTRIES_MASK, ctl);
> +	ctl = (ctl & ~TMFIFO_RX_CTL__LWM_MASK) |
> +		FIELD_PREP(TMFIFO_RX_CTL__LWM_MASK, 0);
> +	ctl = (ctl & ~TMFIFO_RX_CTL__HWM_MASK) |
> +		FIELD_PREP(TMFIFO_RX_CTL__HWM_MASK, 1);
> +	writeq(ctl, fifo->rx_base + TMFIFO_RX_CTL);
> +
> +	mutex_init(&fifo->lock);
> +
> +	/* Create the console vdev. */
> +	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
> +	if (ret)
> +		goto err;
> +
> +	/* Create the network vdev. */
> +	memset(&net_config, 0, sizeof(net_config));
> +	net_config.mtu = TMFIFO_NET_MTU;
> +	net_config.status = VIRTIO_NET_S_LINK_UP;
> +	memcpy(net_config.mac, tmfifo_net_default_mac, 6);
> +	tmfifo_get_cfg_mac(net_config.mac);
> +	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_NET, TMFIFO_NET_FEATURES,
> +				 &net_config, sizeof(net_config));
> +	if (ret)
> +		goto err;
> +
> +	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
> +
> +	fifo->is_ready = true;
> +
> +	return 0;
> +
> +err:
> +	tmfifo_remove(pdev);
> +early_err:
> +	dev_err(&pdev->dev, "Probe Failed\n");
> +	return ret;
> +}
> +
> +static const struct of_device_id tmfifo_match[] = {
> +	{ .compatible = "mellanox,bf-tmfifo" },
> +	{},
> +};
> +MODULE_DEVICE_TABLE(of, tmfifo_match);
> +
> +static const struct acpi_device_id bf_tmfifo_acpi_match[] = {
> +	{ "MLNXBF01", 0 },
> +	{},
> +};
> +MODULE_DEVICE_TABLE(acpi, bf_tmfifo_acpi_match);
> +
> +static struct platform_driver tmfifo_driver = {
> +	.probe = tmfifo_probe,
> +	.remove = tmfifo_remove,
> +	.driver = {
> +		.name = "bf-tmfifo",
> +		.of_match_table = tmfifo_match,
> +		.acpi_match_table = ACPI_PTR(bf_tmfifo_acpi_match),
> +	},
> +};
> +
> +static int __init tmfifo_init(void)
> +{
> +	int ret;
> +
> +	ret = platform_driver_register(&tmfifo_driver);
> +	if (ret)
> +		pr_err("Failed to register tmfifo driver.\n");
> +
> +	return ret;
> +}
> +
> +static void __exit tmfifo_exit(void)
> +{
> +	platform_driver_unregister(&tmfifo_driver);
> +}
> +
> +module_init(tmfifo_init);
> +module_exit(tmfifo_exit);
> +
> +MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
> +MODULE_LICENSE("GPL v2");
> +MODULE_AUTHOR("Mellanox Technologies");
> diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
> new file mode 100644
> index 0000000..9f21764
> --- /dev/null
> +++ b/drivers/soc/mellanox/tmfifo_regs.h
> @@ -0,0 +1,76 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 and
> + * only version 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +
> +#ifndef __TMFIFO_REGS_H__
> +#define __TMFIFO_REGS_H__
> +
> +#include <linux/types.h>
> +
> +#define TMFIFO_TX_DATA 0x0
> +
> +#define TMFIFO_TX_STS 0x8
> +#define TMFIFO_TX_STS__LENGTH 0x0001
> +#define TMFIFO_TX_STS__COUNT_SHIFT 0
> +#define TMFIFO_TX_STS__COUNT_WIDTH 9
> +#define TMFIFO_TX_STS__COUNT_RESET_VAL 0
> +#define TMFIFO_TX_STS__COUNT_RMASK 0x1ff
> +#define TMFIFO_TX_STS__COUNT_MASK  0x1ff
> +
> +#define TMFIFO_TX_CTL 0x10
> +#define TMFIFO_TX_CTL__LENGTH 0x0001
> +#define TMFIFO_TX_CTL__LWM_SHIFT 0
> +#define TMFIFO_TX_CTL__LWM_WIDTH 8
> +#define TMFIFO_TX_CTL__LWM_RESET_VAL 128
> +#define TMFIFO_TX_CTL__LWM_RMASK 0xff
> +#define TMFIFO_TX_CTL__LWM_MASK  0xff
> +#define TMFIFO_TX_CTL__HWM_SHIFT 8
> +#define TMFIFO_TX_CTL__HWM_WIDTH 8
> +#define TMFIFO_TX_CTL__HWM_RESET_VAL 128
> +#define TMFIFO_TX_CTL__HWM_RMASK 0xff
> +#define TMFIFO_TX_CTL__HWM_MASK  0xff00
> +#define TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
> +#define TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
> +#define TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
> +#define TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
> +#define TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
> +
> +#define TMFIFO_RX_DATA 0x0
> +
> +#define TMFIFO_RX_STS 0x8
> +#define TMFIFO_RX_STS__LENGTH 0x0001
> +#define TMFIFO_RX_STS__COUNT_SHIFT 0
> +#define TMFIFO_RX_STS__COUNT_WIDTH 9
> +#define TMFIFO_RX_STS__COUNT_RESET_VAL 0
> +#define TMFIFO_RX_STS__COUNT_RMASK 0x1ff
> +#define TMFIFO_RX_STS__COUNT_MASK  0x1ff
> +
> +#define TMFIFO_RX_CTL 0x10
> +#define TMFIFO_RX_CTL__LENGTH 0x0001
> +#define TMFIFO_RX_CTL__LWM_SHIFT 0
> +#define TMFIFO_RX_CTL__LWM_WIDTH 8
> +#define TMFIFO_RX_CTL__LWM_RESET_VAL 128
> +#define TMFIFO_RX_CTL__LWM_RMASK 0xff
> +#define TMFIFO_RX_CTL__LWM_MASK  0xff
> +#define TMFIFO_RX_CTL__HWM_SHIFT 8
> +#define TMFIFO_RX_CTL__HWM_WIDTH 8
> +#define TMFIFO_RX_CTL__HWM_RESET_VAL 128
> +#define TMFIFO_RX_CTL__HWM_RMASK 0xff
> +#define TMFIFO_RX_CTL__HWM_MASK  0xff00
> +#define TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
> +#define TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
> +#define TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
> +#define TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
> +#define TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
> +
> +#endif /* !defined(__TMFIFO_REGS_H__) */
> 

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v7 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc
@ 2019-03-15 13:18     ` Matthias Brugger
  0 siblings, 0 replies; 179+ messages in thread
From: Matthias Brugger @ 2019-03-15 13:18 UTC (permalink / raw)
  To: Liming Sun, Olof Johansson, Arnd Bergmann, David Woods,
	Robin Murphy, arm-soc
  Cc: devicetree, linux-arm-kernel



On 03/01/2019 20:17, Liming Sun wrote:
> This commit adds the TmFifo driver for Mellanox BlueField Soc.
> TmFifo is a shared FIFO which enables external host machine to
> exchange data with the SoC via USB or PCIe. The driver is based on
> virtio framework and has console and network access enabled.
> 
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>
> ---
>  drivers/soc/Kconfig                |    1 +
>  drivers/soc/Makefile               |    1 +
>  drivers/soc/mellanox/Kconfig       |   18 +
>  drivers/soc/mellanox/Makefile      |    5 +
>  drivers/soc/mellanox/tmfifo.c      | 1244 ++++++++++++++++++++++++++++++++++++
>  drivers/soc/mellanox/tmfifo_regs.h |   76 +++
>  6 files changed, 1345 insertions(+)
>  create mode 100644 drivers/soc/mellanox/Kconfig
>  create mode 100644 drivers/soc/mellanox/Makefile
>  create mode 100644 drivers/soc/mellanox/tmfifo.c
>  create mode 100644 drivers/soc/mellanox/tmfifo_regs.h
> 
[..]
> diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
> new file mode 100644
> index 0000000..2975229
> --- /dev/null
> +++ b/drivers/soc/mellanox/tmfifo.c
[..]

> +
> +/* Console Tx buffer size. */
> +#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
> +
> +/* House-keeping timer interval. */
> +static int tmfifo_timer_interval = HZ / 10;
> +module_param(tmfifo_timer_interval, int, 0644);
> +MODULE_PARM_DESC(tmfifo_timer_interval, "timer interval");
> +
> +/* Global lock. */
> +static DEFINE_MUTEX(tmfifo_lock);

Why do we need that? To synchronize between different tmfifo driver instances?

> +
> +/* Virtio ring size. */
> +static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
> +module_param(tmfifo_vring_size, int, 0444);
> +MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring");
> +
> +/* Struct declaration. */
> +struct tmfifo;
> +
> +/* Virtual devices sharing the TM FIFO. */
> +#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
> +
> +/* Structure to maintain the ring state. */
> +struct tmfifo_vring {
> +	void *va;			/* virtual address */
> +	dma_addr_t dma;			/* dma address */
> +	struct virtqueue *vq;		/* virtqueue pointer */
> +	struct vring_desc *desc;	/* current desc */
> +	struct vring_desc *desc_head;	/* current desc head */
> +	int cur_len;			/* processed len in current desc */
> +	int rem_len;			/* remaining length to be processed */
> +	int size;			/* vring size */
> +	int align;			/* vring alignment */
> +	int id;				/* vring id */
> +	int vdev_id;			/* TMFIFO_VDEV_xxx */
> +	u32 pkt_len;			/* packet total length */
> +	__virtio16 next_avail;		/* next avail desc id */
> +	struct tmfifo *fifo;		/* pointer back to the tmfifo */
> +};
> +
> +/* Interrupt types. */
> +enum {
> +	TM_RX_LWM_IRQ,			/* Rx low water mark irq */
> +	TM_RX_HWM_IRQ,			/* Rx high water mark irq */
> +	TM_TX_LWM_IRQ,			/* Tx low water mark irq */
> +	TM_TX_HWM_IRQ,			/* Tx high water mark irq */
> +	TM_IRQ_CNT
> +};
> +
> +/* Ring types (Rx & Tx). */
> +enum {
> +	TMFIFO_VRING_RX,		/* Rx ring */
> +	TMFIFO_VRING_TX,		/* Tx ring */
> +	TMFIFO_VRING_NUM
> +};
> +
> +struct tmfifo_vdev {
> +	struct virtio_device vdev;	/* virtual device */
> +	u8 status;
> +	u64 features;
> +	union {				/* virtio config space */
> +		struct virtio_console_config cons;
> +		struct virtio_net_config net;
> +	} config;
> +	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
> +	u8 *tx_buf;			/* tx buffer */
> +	u32 tx_head;			/* tx buffer head */
> +	u32 tx_tail;			/* tx buffer tail */
> +};
> +
> +struct tmfifo_irq_info {
> +	struct tmfifo *fifo;		/* tmfifo structure */
> +	int irq;			/* interrupt number */
> +	int index;			/* array index */
> +};
> +
> +/* TMFIFO device structure */
> +struct tmfifo {
> +	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
> +	struct platform_device *pdev;	/* platform device */
> +	struct mutex lock;

From what I understand we use this lock to syncronize between
tmfifo_create_vdev, tmfifo_delete_vdev and tmfifo_work_handler.

Create happens in probe and delete in remove, so we don't need a lock here.
So the only reason I can see we need this lock here is, to make sure that we
don't mess up between create a vdev and being already in the work handler. That
can only happen, if an IRQ was triggered. If we enable the IRQs after creating
the vdev, we don't need the lock at all.

> +	void __iomem *rx_base;		/* mapped register base */
> +	void __iomem *tx_base;		/* mapped register base */
> +	int tx_fifo_size;		/* number of entries of the Tx FIFO */
> +	int rx_fifo_size;		/* number of entries of the Rx FIFO */
> +	unsigned long pend_events;	/* pending bits for deferred process */
> +	struct tmfifo_irq_info irq_info[TM_IRQ_CNT];	/* irq info */
> +	struct work_struct work;	/* work struct for deferred process */
> +	struct timer_list timer;	/* keepalive timer */
> +	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
> +	bool is_ready;			/* ready flag */
> +	spinlock_t spin_lock;		/* spin lock */
> +};
> +
> +union tmfifo_msg_hdr {
> +	struct {
> +		u8 type;		/* message type */
> +		__be16 len;		/* payload length */
> +		u8 unused[5];		/* reserved, set to 0 */
> +	} __packed;
> +	u64 data;
> +};
> +
> +/*
> + * Default MAC.
> + * This MAC address will be read from EFI persistent variable if configured.
> + * It can also be reconfigured with standard Linux tools.
> + */
> +static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
> +
> +/* MTU setting of the virtio-net interface. */
> +#define TMFIFO_NET_MTU		1500
> +
> +/* Supported virtio-net features. */
> +#define TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
> +				 (1UL << VIRTIO_NET_F_STATUS) | \
> +				 (1UL << VIRTIO_NET_F_MAC))
> +
> +/* Return the available Tx buffer space. */
> +static inline int tmfifo_vdev_tx_buf_avail(struct tmfifo_vdev *vdev)
> +{
> +	return ((vdev->tx_tail >= vdev->tx_head) ?
> +		(TMFIFO_CONS_TX_BUF_SIZE - 8 - (vdev->tx_tail -
> +		vdev->tx_head)) : (vdev->tx_head - vdev->tx_tail - 8));

Why do we need to subtract 8 from the available buffer size?

> +}
> +
> +/* Update Tx buffer pointer after pushing data. */
> +static inline void tmfifo_vdev_tx_buf_push(struct tmfifo_vdev *vdev, u32 len)
> +{
> +	vdev->tx_tail += len;
> +	if (vdev->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE)
> +		vdev->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE;

I would have expected
vdev->tx_tail = (vdev->tx_tail + len) % TMFIFO_CONS_TX_BUF_SIZE;

But I suppose your code executes faster.
What I miss is some code to assure that no ring buffer overflow/underrun can happen.

> +}
> +
> +/* Update Tx buffer pointer after popping data. */
> +static inline void tmfifo_vdev_tx_buf_pop(struct tmfifo_vdev *vdev, u32 len)
> +{
> +	vdev->tx_head += len;
> +	if (vdev->tx_head >= TMFIFO_CONS_TX_BUF_SIZE)
> +		vdev->tx_head -= TMFIFO_CONS_TX_BUF_SIZE;
> +}
> +

[...]

> +
> +/* Rx & Tx processing of a virtual queue. */
> +static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)> +{
> +	struct tmfifo_vring *vring;
> +	struct tmfifo *fifo;
> +	struct vring *vr;
> +	struct virtio_device *vdev;
> +	u64 sts, data;
> +	int num_avail = 0, hdr_len, tx_reserve;
> +	void *addr;
> +	u32 len, idx;
> +	struct vring_desc *desc;
> +	unsigned long flags;
> +	struct tmfifo_vdev *cons;
> +
> +	if (!vq)
> +		return;
> +
> +	vring = (struct tmfifo_vring *)vq->priv;


You can pass strict tmfifo_vring* instead of virtqueue as function parameter,
then you don't have to do this.


> +	fifo = vring->fifo;
> +	vr = (struct vring *)virtqueue_get_vring(vq);
> +
> +	if (!fifo->vdev[vring->vdev_id])
> +		return;
> +	vdev = &fifo->vdev[vring->vdev_id]->vdev;
> +	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
> +
> +	/* Don't continue if another vring is running. */
> +	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)

How can that happen?

> +		return;
> +
> +	/* tx_reserve is used to reserved some room in FIFO for console. */
> +	if (vring->vdev_id == VIRTIO_ID_NET) {
> +		hdr_len = sizeof(struct virtio_net_hdr);
> +		tx_reserve = fifo->tx_fifo_size / 16;
> +	} else {
> +		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
> +		hdr_len = 0;
> +		tx_reserve = 1;
> +	}
> +
> +	desc = vring->desc;
> +
[...]
> +
> +/* Work handler for Rx, Tx or activity monitoring. */
> +static void tmfifo_work_handler(struct work_struct *work)
> +{
> +	int i;
> +	struct tmfifo_vdev *tm_vdev;
> +	struct tmfifo *fifo = container_of(work, struct tmfifo, work);
> +
> +	if (!fifo->is_ready)
> +		return;
> +
> +	mutex_lock(&fifo->lock);
> +

So you don't want to queue up more work when remove is called. As is_ready is
not atomic you could deadlock here:
  remove             work_handler
 mutex_lock
                    if(!is_ready)
                   mutex_lock <- sleeps
is_ready = false
    ...
cancel_work_sync <- deadlock

> +	/* Tx. */
> +	if (test_and_clear_bit(TM_TX_LWM_IRQ, &fifo->pend_events) &&
> +		       fifo->irq_info[TM_TX_LWM_IRQ].irq) {
> +		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
> +			tm_vdev = fifo->vdev[i];
> +			if (tm_vdev != NULL) {
> +				tmfifo_virtio_rxtx(
> +					tm_vdev->vrings[TMFIFO_VRING_TX].vq,
> +					false);
> +			}
> +		}
> +	}
> +
> +	/* Rx. */
> +	if (test_and_clear_bit(TM_RX_HWM_IRQ, &fifo->pend_events) &&
> +		       fifo->irq_info[TM_RX_HWM_IRQ].irq) {
> +		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
> +			tm_vdev = fifo->vdev[i];
> +			if (tm_vdev != NULL) {
> +				tmfifo_virtio_rxtx(
> +					tm_vdev->vrings[TMFIFO_VRING_RX].vq,
> +					true);
> +			}
> +		}
> +	}
> +
> +	mutex_unlock(&fifo->lock);
> +}
[...]

> +
> +/* Probe the TMFIFO. */
> +static int tmfifo_probe(struct platform_device *pdev)
> +{
> +	u64 ctl;
> +	struct tmfifo *fifo;
> +	struct resource *rx_res, *tx_res;
> +	struct virtio_net_config net_config;
> +	int i, ret;
> +
> +	/* Get the resource of the Rx & Tx FIFO. */
> +	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> +	if (!rx_res || !tx_res) {
> +		ret = -EINVAL;
> +		goto err;
> +	}
> +
> +	if (request_mem_region(rx_res->start,
> +			       resource_size(rx_res), "bf-tmfifo") == NULL) {
> +		ret = -EBUSY;
> +		goto early_err;
> +	}
> +
> +	if (request_mem_region(tx_res->start,
> +			       resource_size(tx_res), "bf-tmfifo") == NULL) {

Can't we use devm_request_mem_region and get rid of the release_mem_region here
and in the remove function?

Regards,
Matthias

> +		release_mem_region(rx_res->start, resource_size(rx_res));
> +		ret = -EBUSY;
> +		goto early_err;
> +	}
> +
> +	ret = -ENOMEM;
> +	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
> +	if (!fifo)
> +		goto err;
> +
> +	fifo->pdev = pdev;
> +	platform_set_drvdata(pdev, fifo);
> +
> +	spin_lock_init(&fifo->spin_lock);
> +	INIT_WORK(&fifo->work, tmfifo_work_handler);
> +
> +	timer_setup(&fifo->timer, tmfifo_timer, 0);
> +	fifo->timer.function = tmfifo_timer;
> +
> +	for (i = 0; i < TM_IRQ_CNT; i++) {
> +		fifo->irq_info[i].index = i;
> +		fifo->irq_info[i].fifo = fifo;
> +		fifo->irq_info[i].irq = platform_get_irq(pdev, i);
> +		ret = request_irq(fifo->irq_info[i].irq, tmfifo_irq_handler, 0,
> +				  "tmfifo", &fifo->irq_info[i]);
> +		if (ret) {
> +			pr_err("Unable to request irq\n");
> +			fifo->irq_info[i].irq = 0;
> +			goto err;
> +		}
> +	}
> +
> +	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
> +	if (!fifo->rx_base)
> +		goto err;
> +
> +	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
> +	if (!fifo->tx_base)
> +		goto err;
> +
> +	/* Get Tx FIFO size and set the low/high watermark. */
> +	ctl = readq(fifo->tx_base + TMFIFO_TX_CTL);
> +	fifo->tx_fifo_size =
> +		FIELD_GET(TMFIFO_TX_CTL__MAX_ENTRIES_MASK, ctl);
> +	ctl = (ctl & ~TMFIFO_TX_CTL__LWM_MASK) |
> +		FIELD_PREP(TMFIFO_TX_CTL__LWM_MASK, fifo->tx_fifo_size / 2);
> +	ctl = (ctl & ~TMFIFO_TX_CTL__HWM_MASK) |
> +		FIELD_PREP(TMFIFO_TX_CTL__HWM_MASK, fifo->tx_fifo_size - 1);
> +	writeq(ctl, fifo->tx_base + TMFIFO_TX_CTL);
> +
> +	/* Get Rx FIFO size and set the low/high watermark. */
> +	ctl = readq(fifo->rx_base + TMFIFO_RX_CTL);
> +	fifo->rx_fifo_size =
> +		FIELD_GET(TMFIFO_RX_CTL__MAX_ENTRIES_MASK, ctl);
> +	ctl = (ctl & ~TMFIFO_RX_CTL__LWM_MASK) |
> +		FIELD_PREP(TMFIFO_RX_CTL__LWM_MASK, 0);
> +	ctl = (ctl & ~TMFIFO_RX_CTL__HWM_MASK) |
> +		FIELD_PREP(TMFIFO_RX_CTL__HWM_MASK, 1);
> +	writeq(ctl, fifo->rx_base + TMFIFO_RX_CTL);
> +
> +	mutex_init(&fifo->lock);
> +
> +	/* Create the console vdev. */
> +	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
> +	if (ret)
> +		goto err;
> +
> +	/* Create the network vdev. */
> +	memset(&net_config, 0, sizeof(net_config));
> +	net_config.mtu = TMFIFO_NET_MTU;
> +	net_config.status = VIRTIO_NET_S_LINK_UP;
> +	memcpy(net_config.mac, tmfifo_net_default_mac, 6);
> +	tmfifo_get_cfg_mac(net_config.mac);
> +	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_NET, TMFIFO_NET_FEATURES,
> +				 &net_config, sizeof(net_config));
> +	if (ret)
> +		goto err;
> +
> +	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
> +
> +	fifo->is_ready = true;
> +
> +	return 0;
> +
> +err:
> +	tmfifo_remove(pdev);
> +early_err:
> +	dev_err(&pdev->dev, "Probe Failed\n");
> +	return ret;
> +}
> +
> +static const struct of_device_id tmfifo_match[] = {
> +	{ .compatible = "mellanox,bf-tmfifo" },
> +	{},
> +};
> +MODULE_DEVICE_TABLE(of, tmfifo_match);
> +
> +static const struct acpi_device_id bf_tmfifo_acpi_match[] = {
> +	{ "MLNXBF01", 0 },
> +	{},
> +};
> +MODULE_DEVICE_TABLE(acpi, bf_tmfifo_acpi_match);
> +
> +static struct platform_driver tmfifo_driver = {
> +	.probe = tmfifo_probe,
> +	.remove = tmfifo_remove,
> +	.driver = {
> +		.name = "bf-tmfifo",
> +		.of_match_table = tmfifo_match,
> +		.acpi_match_table = ACPI_PTR(bf_tmfifo_acpi_match),
> +	},
> +};
> +
> +static int __init tmfifo_init(void)
> +{
> +	int ret;
> +
> +	ret = platform_driver_register(&tmfifo_driver);
> +	if (ret)
> +		pr_err("Failed to register tmfifo driver.\n");
> +
> +	return ret;
> +}
> +
> +static void __exit tmfifo_exit(void)
> +{
> +	platform_driver_unregister(&tmfifo_driver);
> +}
> +
> +module_init(tmfifo_init);
> +module_exit(tmfifo_exit);
> +
> +MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
> +MODULE_LICENSE("GPL v2");
> +MODULE_AUTHOR("Mellanox Technologies");
> diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
> new file mode 100644
> index 0000000..9f21764
> --- /dev/null
> +++ b/drivers/soc/mellanox/tmfifo_regs.h
> @@ -0,0 +1,76 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 and
> + * only version 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +
> +#ifndef __TMFIFO_REGS_H__
> +#define __TMFIFO_REGS_H__
> +
> +#include <linux/types.h>
> +
> +#define TMFIFO_TX_DATA 0x0
> +
> +#define TMFIFO_TX_STS 0x8
> +#define TMFIFO_TX_STS__LENGTH 0x0001
> +#define TMFIFO_TX_STS__COUNT_SHIFT 0
> +#define TMFIFO_TX_STS__COUNT_WIDTH 9
> +#define TMFIFO_TX_STS__COUNT_RESET_VAL 0
> +#define TMFIFO_TX_STS__COUNT_RMASK 0x1ff
> +#define TMFIFO_TX_STS__COUNT_MASK  0x1ff
> +
> +#define TMFIFO_TX_CTL 0x10
> +#define TMFIFO_TX_CTL__LENGTH 0x0001
> +#define TMFIFO_TX_CTL__LWM_SHIFT 0
> +#define TMFIFO_TX_CTL__LWM_WIDTH 8
> +#define TMFIFO_TX_CTL__LWM_RESET_VAL 128
> +#define TMFIFO_TX_CTL__LWM_RMASK 0xff
> +#define TMFIFO_TX_CTL__LWM_MASK  0xff
> +#define TMFIFO_TX_CTL__HWM_SHIFT 8
> +#define TMFIFO_TX_CTL__HWM_WIDTH 8
> +#define TMFIFO_TX_CTL__HWM_RESET_VAL 128
> +#define TMFIFO_TX_CTL__HWM_RMASK 0xff
> +#define TMFIFO_TX_CTL__HWM_MASK  0xff00
> +#define TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
> +#define TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
> +#define TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
> +#define TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
> +#define TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
> +
> +#define TMFIFO_RX_DATA 0x0
> +
> +#define TMFIFO_RX_STS 0x8
> +#define TMFIFO_RX_STS__LENGTH 0x0001
> +#define TMFIFO_RX_STS__COUNT_SHIFT 0
> +#define TMFIFO_RX_STS__COUNT_WIDTH 9
> +#define TMFIFO_RX_STS__COUNT_RESET_VAL 0
> +#define TMFIFO_RX_STS__COUNT_RMASK 0x1ff
> +#define TMFIFO_RX_STS__COUNT_MASK  0x1ff
> +
> +#define TMFIFO_RX_CTL 0x10
> +#define TMFIFO_RX_CTL__LENGTH 0x0001
> +#define TMFIFO_RX_CTL__LWM_SHIFT 0
> +#define TMFIFO_RX_CTL__LWM_WIDTH 8
> +#define TMFIFO_RX_CTL__LWM_RESET_VAL 128
> +#define TMFIFO_RX_CTL__LWM_RMASK 0xff
> +#define TMFIFO_RX_CTL__LWM_MASK  0xff
> +#define TMFIFO_RX_CTL__HWM_SHIFT 8
> +#define TMFIFO_RX_CTL__HWM_WIDTH 8
> +#define TMFIFO_RX_CTL__HWM_RESET_VAL 128
> +#define TMFIFO_RX_CTL__HWM_RMASK 0xff
> +#define TMFIFO_RX_CTL__HWM_MASK  0xff00
> +#define TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
> +#define TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
> +#define TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
> +#define TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
> +#define TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
> +
> +#endif /* !defined(__TMFIFO_REGS_H__) */
> 

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v11] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 16:06 ` Liming Sun
                   ` (47 preceding siblings ...)
  (?)
@ 2019-03-26 21:13 ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-03-26 21:13 UTC (permalink / raw)
  To: David Woods, Andy Shevchenko, Darren Hart, Vadim Pasternak
  Cc: Liming Sun, linux-kernel, platform-driver-x86

This commit adds the TmFifo platform driver for Mellanox BlueField
Soc. TmFifo is a shared FIFO which enables external host machine
to exchange data with the SoC via USB or PCIe. The driver is based
on virtio framework and has console and network access enabled.

Reviewed-by: Vadim Pasternak <vadimp@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
v11->v11: rebase & resend, no new changes
v10->v11:
    Fixes for comments from Andy:
    - Use GENMASK_ULL() instead of GENMASK() in mlxbf-tmfifo-regs.h
    - Removed the cpu_to_le64()/le64_to_cpu() conversion since
      readq()/writeq() already takes care of it.
    - Remove the "if (irq)" check in mlxbf_tmfifo_disable_irqs().
    - Add "u32 count" temp variable in mlxbf_tmfifo_get_tx_avail().
    - Clean up mlxbf_tmfifo_get_cfg_mac(), use ETH_ALEN instead of
      value 6.
    - Change the tx_buf to use Linux existing 'struct circ_buf'.
    Comment not applied:
    - "Change macro mlxbf_vdev_to_tmfifo() to one line"
      Couldn't fit in one line with 80 chracters
    - "Is it appropriate use of devm_* for 'tm_vdev = devm_kzalloc'"
      This is SoC, the device won't be closed or detached.
      The only case is when the driver is unloaded. So it appears
      ok to use devm_kzalloc() since it's allocated during probe()
      and released during module unload.
    Comments from Vadim: OK
v9->v10:
    Fixes for comments from Andy:
    - Use devm_ioremap_resource() instead of devm_ioremap().
    - Use kernel-doc comments.
    - Keep Makefile contents sorted.
    - Use same fixed format for offsets.
    - Use SZ_1K/SZ_32K instead of 1024/23*1024.
    - Remove unnecessary comments.
    - Use one style for max numbers.
    - More comments for mlxbf_tmfifo_vdev and mlxbf_tmfifo_data_64bit.
    - Use globally defined MTU instead of new definition.
    - Remove forward declaration of mlxbf_tmfifo_remove().
    - Remove PAGE_ALIGN() for dma_alloc_coherent)().
    - Remove the cast of "struct vring *".
    - Check return result of test_and_set_bit().
    - Add a macro mlxbt_vdev_to_tmfifo().
    - Several other minor coding style comments.
    Comment not applied:
    - "Shouldn't be rather helper in EFI lib in kernel"
      Looks like efi.get_variable() is the way I found in the kernel
      tree.
    - "this one is not protected anyhow? Potential race condition"
      In mlxbf_tmfifo_console_tx(), the spin-lock is used to protect the
      'tx_buf' only, not the FIFO writes. So there is no race condition.
    - "Is __packed needed in mlxbf_tmfifo_msg_hdr".
      Yes, it is needed to make sure the structure is 8 bytes.
    Fixes for comments from Vadim:
    - Use tab in mlxbf-tmfifo-regs.h
    - Use kernel-doc comments for struct mlxbf_tmfifo_msg_hdr and
      mlxbf_tmfifo_irq_info as well.
    - Use _MAX instead of _CNT in the macro definition to be consistent.
    - Fix the MODULE_LICENSE.
    - Use BIT_ULL() instead of BIT().
    - Remove argument of 'avail' for mlxbf_tmfifo_rxtx_header() and
      mlxbf_tmfifo_rxtx_word()
    - Revise logic in mlxbf_tmfifo_rxtx_one_desc() to remove the
      WARN_ON().
    - Change "union mlxbf_tmfifo_u64 u" to "union mlxbf_tmfifo_u64 buf"
      in mlxbf_tmfifo_rxtx_word().
    - Change date type of vring_change from 'int' to 'bool'.
    - Remove the blank lines after Signed-off.
    - Don’t use declaration in the middle.
    - Make the network header initialization in some more elegant way.
    - Change label done to mlxbf_tmfifo_desc_done.
    - Remove some unnecessary comments, and several other misc coding
      style comments.
    - Simplify code logic in mlxbf_tmfifo_virtio_notify()
    New changes by Liming:
    - Simplify the Rx/Tx function arguments to make it more readable.
v8->v9:
    Fixes for comments from Andy:
    - Use modern devm_xxx() API instead.
    Fixes for comments from Vadim:
    - Split the Rx/Tx function into smaller funcitons.
    - File name, copyright information.
    - Function and variable name conversion.
    - Local variable and indent coding styles.
    - Remove unnecessary 'inline' declarations.
    - Use devm_xxx() APIs.
    - Move the efi_char16_t MAC address definition to global.
    - Fix warnings reported by 'checkpatch --strict'.
    - Fix warnings reported by 'make CF="-D__CHECK_ENDIAN__"'.
    - Change select VIRTIO_xxx to depends on  VIRTIO_ in Kconfig.
    - Merge mlxbf_tmfifo_vdev_tx_buf_push() and
      mlxbf_tmfifo_vdev_tx_buf_pop().
    - Add union to avoid casting between __le64 and u64.
    - Several other misc coding style comments.
    New changes by Liming:
    - Removed the DT binding documentation since only ACPI is
      supported for now by UEFI on the SoC.
v8: Re-submit under drivers/platform/mellanox for the target-side
    platform driver only.
v7: Added host side drivers into the same patch set.
v5~v6: Coding style fix.
v1~v4: Initial version for directory drivers/soc/mellanox.
---
 drivers/platform/mellanox/Kconfig             |   12 +-
 drivers/platform/mellanox/Makefile            |    1 +
 drivers/platform/mellanox/mlxbf-tmfifo-regs.h |   63 ++
 drivers/platform/mellanox/mlxbf-tmfifo.c      | 1286 +++++++++++++++++++++++++
 4 files changed, 1361 insertions(+), 1 deletion(-)
 create mode 100644 drivers/platform/mellanox/mlxbf-tmfifo-regs.h
 create mode 100644 drivers/platform/mellanox/mlxbf-tmfifo.c

diff --git a/drivers/platform/mellanox/Kconfig b/drivers/platform/mellanox/Kconfig
index cd8a908..530fe7e 100644
--- a/drivers/platform/mellanox/Kconfig
+++ b/drivers/platform/mellanox/Kconfig
@@ -5,7 +5,7 @@
 
 menuconfig MELLANOX_PLATFORM
 	bool "Platform support for Mellanox hardware"
-	depends on X86 || ARM || COMPILE_TEST
+	depends on X86 || ARM || ARM64 || COMPILE_TEST
 	---help---
 	  Say Y here to get to see options for platform support for
 	  Mellanox systems. This option alone does not add any kernel code.
@@ -34,4 +34,14 @@ config MLXREG_IO
 	  to system resets operation, system reset causes monitoring and some
 	  kinds of mux selection.
 
+config MLXBF_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo platform driver"
+	depends on ARM64
+	depends on ACPI
+	depends on VIRTIO_CONSOLE && VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides
+          platform driver support for the TmFifo which supports console
+          and networking based on the virtio framework.
+
 endif # MELLANOX_PLATFORM
diff --git a/drivers/platform/mellanox/Makefile b/drivers/platform/mellanox/Makefile
index 57074d9c..a229bda1 100644
--- a/drivers/platform/mellanox/Makefile
+++ b/drivers/platform/mellanox/Makefile
@@ -3,5 +3,6 @@
 # Makefile for linux/drivers/platform/mellanox
 # Mellanox Platform-Specific Drivers
 #
+obj-$(CONFIG_MLXBF_TMFIFO)	+= mlxbf-tmfifo.o
 obj-$(CONFIG_MLXREG_HOTPLUG)	+= mlxreg-hotplug.o
 obj-$(CONFIG_MLXREG_IO) += mlxreg-io.o
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo-regs.h b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
new file mode 100644
index 0000000..e4f0d2e
--- /dev/null
+++ b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2019, Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef __MLXBF_TMFIFO_REGS_H__
+#define __MLXBF_TMFIFO_REGS_H__
+
+#include <linux/types.h>
+#include <linux/bits.h>
+
+#define MLXBF_TMFIFO_TX_DATA				0x00
+#define MLXBF_TMFIFO_TX_STS				0x08
+#define MLXBF_TMFIFO_TX_STS__LENGTH			0x0001
+#define MLXBF_TMFIFO_TX_STS__COUNT_SHIFT		0
+#define MLXBF_TMFIFO_TX_STS__COUNT_WIDTH		9
+#define MLXBF_TMFIFO_TX_STS__COUNT_RESET_VAL		0
+#define MLXBF_TMFIFO_TX_STS__COUNT_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_TX_STS__COUNT_MASK			GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_TX_CTL				0x10
+#define MLXBF_TMFIFO_TX_CTL__LENGTH			0x0001
+#define MLXBF_TMFIFO_TX_CTL__LWM_SHIFT			0
+#define MLXBF_TMFIFO_TX_CTL__LWM_WIDTH			8
+#define MLXBF_TMFIFO_TX_CTL__LWM_RESET_VAL		128
+#define MLXBF_TMFIFO_TX_CTL__LWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_TX_CTL__LWM_MASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_TX_CTL__HWM_SHIFT			8
+#define MLXBF_TMFIFO_TX_CTL__HWM_WIDTH			8
+#define MLXBF_TMFIFO_TX_CTL__HWM_RESET_VAL		128
+#define MLXBF_TMFIFO_TX_CTL__HWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_TX_CTL__HWM_MASK			GENMASK_ULL(15, 8)
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT		32
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH		9
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL	256
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK		GENMASK_ULL(40, 32)
+#define MLXBF_TMFIFO_RX_DATA				0x00
+#define MLXBF_TMFIFO_RX_STS				0x08
+#define MLXBF_TMFIFO_RX_STS__LENGTH			0x0001
+#define MLXBF_TMFIFO_RX_STS__COUNT_SHIFT		0
+#define MLXBF_TMFIFO_RX_STS__COUNT_WIDTH		9
+#define MLXBF_TMFIFO_RX_STS__COUNT_RESET_VAL		0
+#define MLXBF_TMFIFO_RX_STS__COUNT_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_RX_STS__COUNT_MASK			GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_RX_CTL				0x10
+#define MLXBF_TMFIFO_RX_CTL__LENGTH			0x0001
+#define MLXBF_TMFIFO_RX_CTL__LWM_SHIFT			0
+#define MLXBF_TMFIFO_RX_CTL__LWM_WIDTH			8
+#define MLXBF_TMFIFO_RX_CTL__LWM_RESET_VAL		128
+#define MLXBF_TMFIFO_RX_CTL__LWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_RX_CTL__LWM_MASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_RX_CTL__HWM_SHIFT			8
+#define MLXBF_TMFIFO_RX_CTL__HWM_WIDTH			8
+#define MLXBF_TMFIFO_RX_CTL__HWM_RESET_VAL		128
+#define MLXBF_TMFIFO_RX_CTL__HWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_RX_CTL__HWM_MASK			GENMASK_ULL(15, 8)
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT		32
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH		9
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL	256
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK		GENMASK_ULL(40, 32)
+
+#endif /* !defined(__MLXBF_TMFIFO_REGS_H__) */
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
new file mode 100644
index 0000000..0a31ffa
--- /dev/null
+++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
@@ -0,0 +1,1286 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Mellanox BlueField SoC TmFifo driver
+ *
+ * Copyright (C) 2019 Mellanox Technologies
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/circ_buf.h>
+#include <linux/efi.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+
+#include "mlxbf-tmfifo-regs.h"
+
+/* Vring size. */
+#define MLXBF_TMFIFO_VRING_SIZE			SZ_1K
+
+/* Console Tx buffer size. */
+#define MLXBF_TMFIFO_CON_TX_BUF_SIZE		SZ_32K
+
+/* Console Tx buffer reserved space. */
+#define MLXBF_TMFIFO_CON_TX_BUF_RSV_SIZE	8
+
+/* House-keeping timer interval. */
+#define MLXBF_TMFIFO_TIMER_INTERVAL		(HZ / 10)
+
+/* Virtual devices sharing the TM FIFO. */
+#define MLXBF_TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/*
+ * Reserve 1/16 of TmFifo space, so console messages are not starved by
+ * the networking traffic.
+ */
+#define MLXBF_TMFIFO_RESERVE_RATIO		16
+
+/* Message with data needs at least two words (for header & data). */
+#define MLXBF_TMFIFO_DATA_MIN_WORDS		2
+
+struct mlxbf_tmfifo;
+
+/**
+ * mlxbf_tmfifo_vring - Structure of the TmFifo virtual ring
+ * @va: virtual address of the ring
+ * @dma: dma address of the ring
+ * @vq: pointer to the virtio virtqueue
+ * @desc: current descriptor of the pending packet
+ * @desc_head: head descriptor of the pending packet
+ * @cur_len: processed length of the current descriptor
+ * @rem_len: remaining length of the pending packet
+ * @pkt_len: total length of the pending packet
+ * @next_avail: next avail descriptor id
+ * @num: vring size (number of descriptors)
+ * @align: vring alignment size
+ * @index: vring index
+ * @vdev_id: vring virtio id (VIRTIO_ID_xxx)
+ * @fifo: pointer to the tmfifo structure
+ */
+struct mlxbf_tmfifo_vring {
+	void *va;
+	dma_addr_t dma;
+	struct virtqueue *vq;
+	struct vring_desc *desc;
+	struct vring_desc *desc_head;
+	int cur_len;
+	int rem_len;
+	u32 pkt_len;
+	u16 next_avail;
+	int num;
+	int align;
+	int index;
+	int vdev_id;
+	struct mlxbf_tmfifo *fifo;
+};
+
+/* Interrupt types. */
+enum {
+	MLXBF_TM_RX_LWM_IRQ,
+	MLXBF_TM_RX_HWM_IRQ,
+	MLXBF_TM_TX_LWM_IRQ,
+	MLXBF_TM_TX_HWM_IRQ,
+	MLXBF_TM_MAX_IRQ
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	MLXBF_TMFIFO_VRING_RX,
+	MLXBF_TMFIFO_VRING_TX,
+	MLXBF_TMFIFO_VRING_MAX
+};
+
+/**
+ * mlxbf_tmfifo_vdev - Structure of the TmFifo virtual device
+ * @vdev: virtio device, in which the vdev.id.device field has the
+ *        VIRTIO_ID_xxx id to distinguish the virtual device.
+ * @status: status of the device
+ * @features: supported features of the device
+ * @vrings: array of tmfifo vrings of this device
+ * @config.cons: virtual console config -
+ *               select if vdev.id.device is VIRTIO_ID_CONSOLE
+ * @config.net: virtual network config -
+ *              select if vdev.id.device is VIRTIO_ID_NET
+ * @tx_buf: tx buffer used to buffer data before writing into the FIFO
+ */
+struct mlxbf_tmfifo_vdev {
+	struct virtio_device vdev;
+	u8 status;
+	u64 features;
+	struct mlxbf_tmfifo_vring vrings[MLXBF_TMFIFO_VRING_MAX];
+	union {
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct circ_buf tx_buf;
+};
+
+/**
+ * mlxbf_tmfifo_irq_info - Structure of the interrupt information
+ * @fifo: pointer to the tmfifo structure
+ * @irq: interrupt number
+ * @index: index into the interrupt array
+ */
+struct mlxbf_tmfifo_irq_info {
+	struct mlxbf_tmfifo *fifo;
+	int irq;
+	int index;
+};
+
+/**
+ * mlxbf_tmfifo - Structure of the TmFifo
+ * @vdev: array of the virtual devices running over the TmFifo
+ * @pdev: platform device
+ * @lock: lock to protect the TmFifo access
+ * @rx_base: mapped register base address for the Rx fifo
+ * @tx_base: mapped register base address for the Tx fifo
+ * @rx_fifo_size: number of entries of the Rx fifo
+ * @tx_fifo_size: number of entries of the Tx fifo
+ * @pend_events: pending bits for deferred events
+ * @irq_info: interrupt information
+ * @work: work struct for deferred process
+ * @timer: background timer
+ * @vring: Tx/Rx ring
+ * @spin_lock: spin lock
+ * @is_ready: ready flag
+ */
+struct mlxbf_tmfifo {
+	struct mlxbf_tmfifo_vdev *vdev[MLXBF_TMFIFO_VDEV_MAX];
+	struct platform_device *pdev;
+	struct mutex lock;		/* TmFifo lock */
+	void __iomem *rx_base;
+	void __iomem *tx_base;
+	int rx_fifo_size;
+	int tx_fifo_size;
+	unsigned long pend_events;
+	struct mlxbf_tmfifo_irq_info irq_info[MLXBF_TM_MAX_IRQ];
+	struct work_struct work;
+	struct timer_list timer;
+	struct mlxbf_tmfifo_vring *vring[2];
+	spinlock_t spin_lock;		/* spin lock */
+	bool is_ready;
+};
+
+/**
+ * mlxbf_tmfifo_msg_hdr - Structure of the TmFifo message header
+ * @type: message type
+ * @len: payload length
+ * @u: 64-bit union data
+ */
+union mlxbf_tmfifo_msg_hdr {
+	struct {
+		u8 type;
+		__be16 len;
+		u8 unused[5];
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 mlxbf_tmfifo_net_default_mac[ETH_ALEN] = {
+	0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* EFI variable name of the MAC address. */
+static efi_char16_t mlxbf_tmfifo_efi_name[] = L"RshimMacAddr";
+
+/* Maximum L2 header length. */
+#define MLXBF_TMFIFO_NET_L2_OVERHEAD	36
+
+/* Supported virtio-net features. */
+#define MLXBF_TMFIFO_NET_FEATURES	(BIT_ULL(VIRTIO_NET_F_MTU) | \
+					 BIT_ULL(VIRTIO_NET_F_STATUS) | \
+					 BIT_ULL(VIRTIO_NET_F_MAC))
+
+#define mlxbf_vdev_to_tmfifo(dev)	\
+	container_of(dev, struct mlxbf_tmfifo_vdev, vdev)
+
+/* Allocate vrings for the fifo. */
+static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
+				     struct mlxbf_tmfifo_vdev *tm_vdev)
+{
+	struct mlxbf_tmfifo_vring *vring;
+	struct device *dev;
+	dma_addr_t dma;
+	int i, size;
+	void *va;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->num = MLXBF_TMFIFO_VRING_SIZE;
+		vring->align = SMP_CACHE_BYTES;
+		vring->index = i;
+		vring->vdev_id = tm_vdev->vdev.id.device;
+		dev = &tm_vdev->vdev.dev;
+
+		size = vring_size(vring->num, vring->align);
+		va = dma_alloc_coherent(dev->parent, size, &dma, GFP_KERNEL);
+		if (!va) {
+			dev_err(dev->parent, "dma_alloc_coherent failed\n");
+			return -ENOMEM;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void mlxbf_tmfifo_free_vrings(struct mlxbf_tmfifo *fifo,
+				     struct mlxbf_tmfifo_vdev *tm_vdev)
+{
+	struct mlxbf_tmfifo_vring *vring;
+	int i, size;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		if (vring->va) {
+			size = vring_size(vring->num, vring->align);
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Disable interrupts of the fifo device. */
+static void mlxbf_tmfifo_disable_irqs(struct mlxbf_tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < MLXBF_TM_MAX_IRQ; i++) {
+		irq = fifo->irq_info[i].irq;
+		fifo->irq_info[i].irq = 0;
+		disable_irq(irq);
+	}
+}
+
+/* Interrupt handler. */
+static irqreturn_t mlxbf_tmfifo_irq_handler(int irq, void *arg)
+{
+	struct mlxbf_tmfifo_irq_info *irq_info = arg;
+
+	if (irq_info->index < MLXBF_TM_MAX_IRQ &&
+	    !test_and_set_bit(irq_info->index, &irq_info->fifo->pend_events))
+		schedule_work(&irq_info->fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Get the next packet descriptor from the vring. */
+static struct vring_desc *
+mlxbf_tmfifo_get_next_desc(struct mlxbf_tmfifo_vring *vring)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = vring->vq->vdev;
+	unsigned int idx, head;
+
+	if (vring->next_avail == virtio16_to_cpu(vdev, vr->avail->idx))
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = virtio16_to_cpu(vdev, vr->avail->ring[idx]);
+	if (WARN_ON(head >= vr->num))
+		return NULL;
+
+	vring->next_avail++;
+
+	return &vr->desc[head];
+}
+
+/* Release virtio descriptor. */
+static void mlxbf_tmfifo_release_desc(struct mlxbf_tmfifo_vring *vring,
+				      struct vring_desc *desc, u32 len)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = vring->vq->vdev;
+	u16 idx, vr_idx;
+
+	vr_idx = virtio16_to_cpu(vdev, vr->used->idx);
+	idx = vr_idx % vr->num;
+	vr->used->ring[idx].id = cpu_to_virtio32(vdev, desc - vr->desc);
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/*
+	 * Virtio could poll and check the 'idx' to decide whether the desc is
+	 * done or not. Add a memory barrier here to make sure the update above
+	 * completes before updating the idx.
+	 */
+	mb();
+	vr->used->idx = cpu_to_virtio16(vdev, vr_idx + 1);
+}
+
+/* Get the total length of the descriptor chain. */
+static u32 mlxbf_tmfifo_get_pkt_len(struct mlxbf_tmfifo_vring *vring,
+				    struct vring_desc *desc)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = vring->vq->vdev;
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void mlxbf_tmfifo_release_pending_pkt(struct mlxbf_tmfifo_vring *vring)
+{
+	struct vring_desc *desc_head;
+	u32 len = 0;
+
+	if (vring->desc_head) {
+		desc_head = vring->desc_head;
+		len = vring->pkt_len;
+	} else {
+		desc_head = mlxbf_tmfifo_get_next_desc(vring);
+		if (desc_head)
+			len = mlxbf_tmfifo_get_pkt_len(vring, desc_head);
+	}
+
+	if (desc_head)
+		mlxbf_tmfifo_release_desc(vring, desc_head, len);
+
+	vring->pkt_len = 0;
+	vring->desc = NULL;
+	vring->desc_head = NULL;
+}
+
+static void mlxbf_tmfifo_init_net_desc(struct mlxbf_tmfifo_vring *vring,
+				       struct vring_desc *desc, bool is_rx)
+{
+	struct virtio_device *vdev = vring->vq->vdev;
+	struct virtio_net_hdr *net_hdr;
+
+	net_hdr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+	memset(net_hdr, 0, sizeof(*net_hdr));
+}
+
+/* Get and initialize the next packet. */
+static struct vring_desc *
+mlxbf_tmfifo_get_next_pkt(struct mlxbf_tmfifo_vring *vring, bool is_rx)
+{
+	struct vring_desc *desc;
+
+	desc = mlxbf_tmfifo_get_next_desc(vring);
+	if (desc && is_rx && vring->vdev_id == VIRTIO_ID_NET)
+		mlxbf_tmfifo_init_net_desc(vring, desc, is_rx);
+
+	vring->desc_head = desc;
+	vring->desc = desc;
+
+	return desc;
+}
+
+/* House-keeping timer. */
+static void mlxbf_tmfifo_timer(struct timer_list *arg)
+{
+	struct mlxbf_tmfifo *fifo = container_of(arg, struct mlxbf_tmfifo,
+						 timer);
+	int more;
+
+	more = !test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events) ||
+		    !test_and_set_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	if (more)
+		schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + MLXBF_TMFIFO_TIMER_INTERVAL);
+}
+
+/* Copy one console packet into the output buffer. */
+static void mlxbf_tmfifo_console_output_one(struct mlxbf_tmfifo_vdev *cons,
+					    struct mlxbf_tmfifo_vring *vring,
+					    struct vring_desc *desc)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = &cons->vdev;
+	u32 len, idx, seg;
+	void *addr;
+
+	while (desc) {
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+		len = virtio32_to_cpu(vdev, desc->len);
+
+		seg = CIRC_SPACE_TO_END(cons->tx_buf.head, cons->tx_buf.tail,
+					MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+		if (len <= seg) {
+			memcpy(cons->tx_buf.buf + cons->tx_buf.head, addr, len);
+		} else {
+			memcpy(cons->tx_buf.buf + cons->tx_buf.head, addr, seg);
+			addr += seg;
+			memcpy(cons->tx_buf.buf, addr, len - seg);
+		}
+		cons->tx_buf.head = (cons->tx_buf.head + len) %
+			MLXBF_TMFIFO_CON_TX_BUF_SIZE;
+
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+}
+
+/* Copy console data into the output buffer. */
+static void mlxbf_tmfifo_console_output(struct mlxbf_tmfifo_vdev *cons,
+					struct mlxbf_tmfifo_vring *vring)
+{
+	struct vring_desc *desc;
+	u32 len, avail;
+
+	desc = mlxbf_tmfifo_get_next_desc(vring);
+	while (desc) {
+		/* Release the packet if not enough space. */
+		len = mlxbf_tmfifo_get_pkt_len(vring, desc);
+		avail = CIRC_SPACE(cons->tx_buf.head, cons->tx_buf.tail,
+				   MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+		if (len + MLXBF_TMFIFO_CON_TX_BUF_RSV_SIZE > avail) {
+			mlxbf_tmfifo_release_desc(vring, desc, len);
+			break;
+		}
+
+		mlxbf_tmfifo_console_output_one(cons, vring, desc);
+		mlxbf_tmfifo_release_desc(vring, desc, len);
+		desc = mlxbf_tmfifo_get_next_desc(vring);
+	}
+}
+
+/* Get the number of available words in Rx FIFO for receiving. */
+static int mlxbf_tmfifo_get_rx_avail(struct mlxbf_tmfifo *fifo)
+{
+	u64 sts;
+
+	sts = readq(fifo->rx_base + MLXBF_TMFIFO_RX_STS);
+	return FIELD_GET(MLXBF_TMFIFO_RX_STS__COUNT_MASK, sts);
+}
+
+/* Get the number of available words in the TmFifo for sending. */
+static int mlxbf_tmfifo_get_tx_avail(struct mlxbf_tmfifo *fifo, int vdev_id)
+{
+	int tx_reserve;
+	u32 count;
+	u64 sts;
+
+	/* Reserve some room in FIFO for console messages. */
+	if (vdev_id == VIRTIO_ID_NET)
+		tx_reserve = fifo->tx_fifo_size / MLXBF_TMFIFO_RESERVE_RATIO;
+	else
+		tx_reserve = 1;
+
+	sts = readq(fifo->tx_base + MLXBF_TMFIFO_TX_STS);
+	count = FIELD_GET(MLXBF_TMFIFO_TX_STS__COUNT_MASK, sts);
+	return fifo->tx_fifo_size - tx_reserve - count;
+}
+
+/* Console Tx (move data from the output buffer into the TmFifo). */
+static void mlxbf_tmfifo_console_tx(struct mlxbf_tmfifo *fifo, int avail)
+{
+	union mlxbf_tmfifo_msg_hdr hdr;
+	struct mlxbf_tmfifo_vdev *cons;
+	unsigned long flags;
+	int size, seg;
+	void *addr;
+	u64 data;
+
+	/* Return if not enough space available. */
+	if (avail < MLXBF_TMFIFO_DATA_MIN_WORDS)
+		return;
+
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+	if (!cons || !cons->tx_buf.buf)
+		return;
+
+	/* Return if no data to send. */
+	size = CIRC_CNT(cons->tx_buf.head, cons->tx_buf.tail,
+			MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+	if (size == 0)
+		return;
+
+	/* Adjust the size to available space. */
+	if (size + sizeof(hdr) > avail * sizeof(u64))
+		size = avail * sizeof(u64) - sizeof(hdr);
+
+	/* Write header. */
+	hdr.data = 0;
+	hdr.type = VIRTIO_ID_CONSOLE;
+	hdr.len = htons(size);
+	writeq(hdr.data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+
+	/* Use spin-lock to protect the 'cons->tx_buf'. */
+	spin_lock_irqsave(&fifo->spin_lock, flags);
+
+	while (size > 0) {
+		addr = cons->tx_buf.buf + cons->tx_buf.tail;
+
+		seg = CIRC_CNT_TO_END(cons->tx_buf.head, cons->tx_buf.tail,
+				      MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+		if (seg >= sizeof(u64)) {
+			memcpy(&data, addr, sizeof(u64));
+		} else {
+			memcpy(&data, addr, seg);
+			memcpy((u8 *)&data + seg, cons->tx_buf.buf,
+			       sizeof(u64) - seg);
+		}
+		writeq(data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+
+		if (size >= sizeof(u64)) {
+			cons->tx_buf.tail = (cons->tx_buf.tail + sizeof(u64)) %
+				MLXBF_TMFIFO_CON_TX_BUF_SIZE;
+			size -= sizeof(u64);
+		} else {
+			cons->tx_buf.tail = (cons->tx_buf.tail + size) %
+				MLXBF_TMFIFO_CON_TX_BUF_SIZE;
+			size = 0;
+		}
+	}
+
+	spin_unlock_irqrestore(&fifo->spin_lock, flags);
+}
+
+/* Rx/Tx one word in the descriptor buffer. */
+static void mlxbf_tmfifo_rxtx_word(struct mlxbf_tmfifo_vring *vring,
+				   struct vring_desc *desc,
+				   bool is_rx, int len)
+{
+	struct virtio_device *vdev = vring->vq->vdev;
+	struct mlxbf_tmfifo *fifo = vring->fifo;
+	void *addr;
+	u64 data;
+
+	/* Get the buffer address of this desc. */
+	addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+	/* Read a word from FIFO for Rx. */
+	if (is_rx)
+		data = readq(fifo->rx_base + MLXBF_TMFIFO_RX_DATA);
+
+	if (vring->cur_len + sizeof(u64) <= len) {
+		/* The whole word. */
+		if (is_rx)
+			memcpy(addr + vring->cur_len, &data, sizeof(u64));
+		else
+			memcpy(&data, addr + vring->cur_len, sizeof(u64));
+		vring->cur_len += sizeof(u64);
+	} else {
+		/* Leftover bytes. */
+		if (is_rx)
+			memcpy(addr + vring->cur_len, &data,
+			       len - vring->cur_len);
+		else
+			memcpy(&data, addr + vring->cur_len,
+			       len - vring->cur_len);
+		vring->cur_len = len;
+	}
+
+	/* Write the word into FIFO for Tx. */
+	if (!is_rx)
+		writeq(data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+}
+
+/*
+ * Rx/Tx packet header.
+ *
+ * In Rx case, the packet might be found to belong to a different vring since
+ * the TmFifo is shared by different services. In such case, the 'vring_change'
+ * flag is set.
+ */
+static void mlxbf_tmfifo_rxtx_header(struct mlxbf_tmfifo_vring *vring,
+				     struct vring_desc *desc,
+				     bool is_rx, bool *vring_change)
+{
+	struct mlxbf_tmfifo *fifo = vring->fifo;
+	struct virtio_net_config *config;
+	union mlxbf_tmfifo_msg_hdr hdr;
+	int vdev_id, hdr_len;
+
+	/* Read/Write packet header. */
+	if (is_rx) {
+		/* Drain one word from the FIFO. */
+		hdr.data = readq(fifo->rx_base + MLXBF_TMFIFO_RX_DATA);
+
+		/* Skip the length 0 packets (keepalive). */
+		if (hdr.len == 0)
+			return;
+
+		/* Check packet type. */
+		if (hdr.type == VIRTIO_ID_NET) {
+			vdev_id = VIRTIO_ID_NET;
+			hdr_len = sizeof(struct virtio_net_hdr);
+			config = &fifo->vdev[vdev_id]->config.net;
+			if (ntohs(hdr.len) > config->mtu +
+			    MLXBF_TMFIFO_NET_L2_OVERHEAD)
+				return;
+		} else {
+			vdev_id = VIRTIO_ID_CONSOLE;
+			hdr_len = 0;
+		}
+
+		/*
+		 * Check whether the new packet still belongs to this vring.
+		 * If not, update the pkt_len of the new vring.
+		 */
+		if (vdev_id != vring->vdev_id) {
+			struct mlxbf_tmfifo_vdev *tm_dev2 = fifo->vdev[vdev_id];
+
+			if (!tm_dev2)
+				return;
+			vring->desc = desc;
+			vring = &tm_dev2->vrings[MLXBF_TMFIFO_VRING_RX];
+			*vring_change = true;
+		}
+		vring->pkt_len = ntohs(hdr.len) + hdr_len;
+	} else {
+		/* Network virtio has an extra header. */
+		hdr_len = (vring->vdev_id == VIRTIO_ID_NET) ?
+			   sizeof(struct virtio_net_hdr) : 0;
+		vring->pkt_len = mlxbf_tmfifo_get_pkt_len(vring, desc);
+		hdr.data = 0;
+		hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+			    VIRTIO_ID_NET : VIRTIO_ID_CONSOLE;
+		hdr.len = htons(vring->pkt_len - hdr_len);
+		writeq(hdr.data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+	}
+
+	vring->cur_len = hdr_len;
+	vring->rem_len = vring->pkt_len;
+	fifo->vring[is_rx] = vring;
+}
+
+/*
+ * Rx/Tx one descriptor.
+ *
+ * Return true to indicate more data available.
+ */
+static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring,
+				       bool is_rx, int *avail)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct mlxbf_tmfifo *fifo = vring->fifo;
+	struct virtio_device *vdev;
+	bool vring_change = false;
+	struct vring_desc *desc;
+	unsigned long flags;
+	u32 len, idx;
+
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+
+	/* Get the descriptor of the next packet. */
+	if (!vring->desc) {
+		desc = mlxbf_tmfifo_get_next_pkt(vring, is_rx);
+		if (!desc)
+			return false;
+	} else {
+		desc = vring->desc;
+	}
+
+	/* Beginning of a packet. Start to Rx/Tx packet header. */
+	if (vring->pkt_len == 0) {
+		mlxbf_tmfifo_rxtx_header(vring, desc, is_rx, &vring_change);
+		(*avail)--;
+
+		/* Return if new packet is for another ring. */
+		if (vring_change)
+			return false;
+		goto mlxbf_tmfifo_desc_done;
+	}
+
+	/* Get the length of this desc. */
+	len = virtio32_to_cpu(vdev, desc->len);
+	if (len > vring->rem_len)
+		len = vring->rem_len;
+
+	/* Rx/Tx one word (8 bytes) if not done. */
+	if (vring->cur_len < len) {
+		mlxbf_tmfifo_rxtx_word(vring, desc, is_rx, len);
+		(*avail)--;
+	}
+
+	/* Check again whether it's done. */
+	if (vring->cur_len == len) {
+		vring->cur_len = 0;
+		vring->rem_len -= len;
+
+		/* Get the next desc on the chain. */
+		if (vring->rem_len > 0 &&
+		    (virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT)) {
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+			goto mlxbf_tmfifo_desc_done;
+		}
+
+		/* Done and release the pending packet. */
+		mlxbf_tmfifo_release_pending_pkt(vring);
+		desc = NULL;
+		fifo->vring[is_rx] = NULL;
+
+		/* Notify upper layer that packet is done. */
+		spin_lock_irqsave(&fifo->spin_lock, flags);
+		vring_interrupt(0, vring->vq);
+		spin_unlock_irqrestore(&fifo->spin_lock, flags);
+	}
+
+mlxbf_tmfifo_desc_done:
+	/* Save the current desc. */
+	vring->desc = desc;
+
+	return true;
+}
+
+/* Rx & Tx processing of a queue. */
+static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
+{
+	int avail = 0, devid = vring->vdev_id;
+	struct mlxbf_tmfifo *fifo;
+	bool more;
+
+	fifo = vring->fifo;
+
+	/* Return if vdev is not ready. */
+	if (!fifo->vdev[devid])
+		return;
+
+	/* Return if another vring is running. */
+	if (fifo->vring[is_rx] && fifo->vring[is_rx] != vring)
+		return;
+
+	/* Only handle console and network for now. */
+	if (WARN_ON(devid != VIRTIO_ID_NET && devid != VIRTIO_ID_CONSOLE))
+		return;
+
+	do {
+		/* Get available FIFO space. */
+		if (avail == 0) {
+			if (is_rx)
+				avail = mlxbf_tmfifo_get_rx_avail(fifo);
+			else
+				avail = mlxbf_tmfifo_get_tx_avail(fifo, devid);
+			if (avail <= 0)
+				break;
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && devid == VIRTIO_ID_CONSOLE) {
+			mlxbf_tmfifo_console_tx(fifo, avail);
+			break;
+		}
+
+		/* Handle one descriptor. */
+		more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail);
+	} while (more);
+}
+
+/* Handle Rx or Tx queues. */
+static void mlxbf_tmfifo_work_rxtx(struct mlxbf_tmfifo *fifo, int queue_id,
+				   int irq_id, bool is_rx)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+	struct mlxbf_tmfifo_vring *vring;
+	int i;
+
+	if (!test_and_clear_bit(irq_id, &fifo->pend_events) ||
+	    !fifo->irq_info[irq_id].irq)
+		return;
+
+	for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++) {
+		tm_vdev = fifo->vdev[i];
+		if (tm_vdev) {
+			vring = &tm_vdev->vrings[queue_id];
+			if (vring->vq)
+				mlxbf_tmfifo_rxtx(vring, is_rx);
+		}
+	}
+}
+
+/* Work handler for Rx and Tx case. */
+static void mlxbf_tmfifo_work_handler(struct work_struct *work)
+{
+	struct mlxbf_tmfifo *fifo;
+
+	fifo = container_of(work, struct mlxbf_tmfifo, work);
+	if (!fifo->is_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx (Send data to the TmFifo). */
+	mlxbf_tmfifo_work_rxtx(fifo, MLXBF_TMFIFO_VRING_TX,
+			       MLXBF_TM_TX_LWM_IRQ, false);
+
+	/* Rx (Receive data from the TmFifo). */
+	mlxbf_tmfifo_work_rxtx(fifo, MLXBF_TMFIFO_VRING_RX,
+			       MLXBF_TM_RX_HWM_IRQ, true);
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool mlxbf_tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct mlxbf_tmfifo_vring *vring = vq->priv;
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+	struct mlxbf_tmfifo *fifo;
+	unsigned long flags;
+
+	fifo = vring->fifo;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->index & BIT(0))) {
+		if (test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events))
+			return true;
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			tm_vdev = fifo->vdev[VIRTIO_ID_CONSOLE];
+			mlxbf_tmfifo_console_output(tm_vdev, vring);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+		} else if (test_and_set_bit(MLXBF_TM_TX_LWM_IRQ,
+					    &fifo->pend_events)) {
+			return true;
+		}
+	}
+
+	schedule_work(&fifo->work);
+
+	return true;
+}
+
+/* Get the array of feature bits for this device. */
+static u64 mlxbf_tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int mlxbf_tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	tm_vdev->features = vdev->features;
+
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void mlxbf_tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+	struct mlxbf_tmfifo_vring *vring;
+	struct virtqueue *vq;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc)
+			mlxbf_tmfifo_release_pending_pkt(vring);
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+					unsigned int nvqs,
+					struct virtqueue *vqs[],
+					vq_callback_t *callbacks[],
+					const char * const names[],
+					const bool *ctx,
+					struct irq_affinity *desc)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+	struct mlxbf_tmfifo_vring *vring;
+	struct virtqueue *vq;
+	int i, ret, size;
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i]) {
+			ret = -EINVAL;
+			goto error;
+		}
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->num, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->num, vring->align, vdev,
+					 false, false, vring->va,
+					 mlxbf_tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	mlxbf_tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 mlxbf_tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void mlxbf_tmfifo_virtio_set_status(struct virtio_device *vdev,
+					   u8 status)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void mlxbf_tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void mlxbf_tmfifo_virtio_get(struct virtio_device *vdev,
+				    unsigned int offset,
+				    void *buf,
+				    unsigned int len)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	if (offset + len > sizeof(tm_vdev->config))
+		return;
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void mlxbf_tmfifo_virtio_set(struct virtio_device *vdev,
+				    unsigned int offset,
+				    const void *buf,
+				    unsigned int len)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	if (offset + len > sizeof(tm_vdev->config))
+		return;
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/*
+ * Nothing to do for now. This function is needed to avoid warnings
+ * when the device is released in device_release().
+ */
+static void tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops mlxbf_tmfifo_virtio_config_ops = {
+	.get_features = mlxbf_tmfifo_virtio_get_features,
+	.finalize_features = mlxbf_tmfifo_virtio_finalize_features,
+	.find_vqs = mlxbf_tmfifo_virtio_find_vqs,
+	.del_vqs = mlxbf_tmfifo_virtio_del_vqs,
+	.reset = mlxbf_tmfifo_virtio_reset,
+	.set_status = mlxbf_tmfifo_virtio_set_status,
+	.get_status = mlxbf_tmfifo_virtio_get_status,
+	.get = mlxbf_tmfifo_virtio_get,
+	.set = mlxbf_tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+static int mlxbf_tmfifo_create_vdev(struct device *dev,
+				    struct mlxbf_tmfifo *fifo,
+				    int vdev_id, u64 features,
+				    void *config, u32 size)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+	int ret;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		dev_err(dev, "vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto fail;
+	}
+
+	tm_vdev = devm_kzalloc(dev, sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &mlxbf_tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+
+	if (mlxbf_tmfifo_alloc_vrings(fifo, tm_vdev)) {
+		dev_err(dev, "unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto vdev_fail;
+	}
+
+	/* Allocate an output buffer for the console device. */
+	if (vdev_id == VIRTIO_ID_CONSOLE)
+		tm_vdev->tx_buf.buf = devm_kmalloc(dev,
+						   MLXBF_TMFIFO_CON_TX_BUF_SIZE,
+						   GFP_KERNEL);
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device failed\n");
+		goto vdev_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+vdev_fail:
+	mlxbf_tmfifo_free_vrings(fifo, tm_vdev);
+	fifo->vdev[vdev_id] = NULL;
+fail:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+static int mlxbf_tmfifo_delete_vdev(struct mlxbf_tmfifo *fifo, int vdev_id)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		mlxbf_tmfifo_free_vrings(fifo, tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void mlxbf_tmfifo_get_cfg_mac(u8 *mac)
+{
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+	unsigned long size = ETH_ALEN;
+	efi_status_t status;
+	u8 buf[ETH_ALEN];
+
+	status = efi.get_variable(mlxbf_tmfifo_efi_name, &guid, NULL, &size,
+				  buf);
+	if (status == EFI_SUCCESS && size == ETH_ALEN)
+		ether_addr_copy(mac, buf);
+	else
+		memcpy(mac, mlxbf_tmfifo_net_default_mac, ETH_ALEN);
+}
+
+/* Set TmFifo thresolds which is used to trigger interrupts. */
+static void mlxbf_tmfifo_set_threshold(struct mlxbf_tmfifo *fifo)
+{
+	u64 ctl;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		FIELD_GET(MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__LWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_TX_CTL__LWM_MASK,
+			   fifo->tx_fifo_size / 2);
+	ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__HWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_TX_CTL__HWM_MASK,
+			   fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		FIELD_GET(MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__LWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__HWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
+}
+
+static void mlxbf_tmfifo_cleanup(struct mlxbf_tmfifo *fifo)
+{
+	int i;
+
+	fifo->is_ready = false;
+	del_timer_sync(&fifo->timer);
+	mlxbf_tmfifo_disable_irqs(fifo);
+	cancel_work_sync(&fifo->work);
+	for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++)
+		mlxbf_tmfifo_delete_vdev(fifo, i);
+}
+
+/* Probe the TMFIFO. */
+static int mlxbf_tmfifo_probe(struct platform_device *pdev)
+{
+	struct virtio_net_config net_config;
+	struct mlxbf_tmfifo *fifo;
+	struct resource *res;
+	int i, ret;
+
+	fifo = devm_kzalloc(&pdev->dev, sizeof(*fifo), GFP_KERNEL);
+	if (!fifo)
+		return -ENOMEM;
+
+	spin_lock_init(&fifo->spin_lock);
+	INIT_WORK(&fifo->work, mlxbf_tmfifo_work_handler);
+	mutex_init(&fifo->lock);
+
+	/* Get the resource of the Rx FIFO. */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	fifo->rx_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(fifo->rx_base))
+		return PTR_ERR(fifo->rx_base);
+
+	/* Get the resource of the Tx FIFO. */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	fifo->tx_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(fifo->tx_base))
+		return PTR_ERR(fifo->tx_base);
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	timer_setup(&fifo->timer, mlxbf_tmfifo_timer, 0);
+
+	for (i = 0; i < MLXBF_TM_MAX_IRQ; i++) {
+		fifo->irq_info[i].index = i;
+		fifo->irq_info[i].fifo = fifo;
+		fifo->irq_info[i].irq = platform_get_irq(pdev, i);
+		ret = devm_request_irq(&pdev->dev, fifo->irq_info[i].irq,
+				       mlxbf_tmfifo_irq_handler, 0,
+				       "tmfifo", &fifo->irq_info[i]);
+		if (ret) {
+			dev_err(&pdev->dev, "devm_request_irq failed\n");
+			fifo->irq_info[i].irq = 0;
+			return ret;
+		}
+	}
+
+	mlxbf_tmfifo_set_threshold(fifo);
+
+	/* Create the console vdev. */
+	ret = mlxbf_tmfifo_create_vdev(&pdev->dev, fifo, VIRTIO_ID_CONSOLE, 0,
+				       NULL, 0);
+	if (ret)
+		goto fail;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = ETH_DATA_LEN;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	mlxbf_tmfifo_get_cfg_mac(net_config.mac);
+	ret = mlxbf_tmfifo_create_vdev(&pdev->dev, fifo, VIRTIO_ID_NET,
+				       MLXBF_TMFIFO_NET_FEATURES, &net_config,
+				       sizeof(net_config));
+	if (ret)
+		goto fail;
+
+	mod_timer(&fifo->timer, jiffies + MLXBF_TMFIFO_TIMER_INTERVAL);
+
+	fifo->is_ready = true;
+	return 0;
+
+fail:
+	mlxbf_tmfifo_cleanup(fifo);
+	return ret;
+}
+
+/* Device remove function. */
+static int mlxbf_tmfifo_remove(struct platform_device *pdev)
+{
+	struct mlxbf_tmfifo *fifo = platform_get_drvdata(pdev);
+
+	mlxbf_tmfifo_cleanup(fifo);
+
+	return 0;
+}
+
+static const struct acpi_device_id mlxbf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, mlxbf_tmfifo_acpi_match);
+
+static struct platform_driver mlxbf_tmfifo_driver = {
+	.probe = mlxbf_tmfifo_probe,
+	.remove = mlxbf_tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.acpi_match_table = mlxbf_tmfifo_acpi_match,
+	},
+};
+
+module_platform_driver(mlxbf_tmfifo_driver);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TmFifo Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v12] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 16:06 ` Liming Sun
                   ` (48 preceding siblings ...)
  (?)
@ 2019-03-28 19:56 ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-03-28 19:56 UTC (permalink / raw)
  To: David Woods, Andy Shevchenko, Darren Hart, Vadim Pasternak
  Cc: Liming Sun, linux-kernel, platform-driver-x86

This commit adds the TmFifo platform driver for Mellanox BlueField
Soc. TmFifo is a shared FIFO which enables external host machine
to exchange data with the SoC via USB or PCIe. The driver is based
on virtio framework and has console and network access enabled.

Reviewed-by: Vadim Pasternak <vadimp@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
v11->v12:
    Fixed the two unsolved comments from v11.
    - "Change macro mlxbf_vdev_to_tmfifo() to one line"
      Done. Seems not hard.
    - "Is it appropriate use of devm_* for 'tm_vdev = devm_kzalloc'"
      Yes, understand the comment now. The tmfifo is fixed, but the
      vdev is dynamic. Use kzalloc() instead, and free the device
      in the release callback which is the right place for it.
v10->v11:
    Fixes for comments from Andy:
    - Use GENMASK_ULL() instead of GENMASK() in mlxbf-tmfifo-regs.h
    - Removed the cpu_to_le64()/le64_to_cpu() conversion since
      readq()/writeq() already takes care of it.
    - Remove the "if (irq)" check in mlxbf_tmfifo_disable_irqs().
    - Add "u32 count" temp variable in mlxbf_tmfifo_get_tx_avail().
    - Clean up mlxbf_tmfifo_get_cfg_mac(), use ETH_ALEN instead of
      value 6.
    - Change the tx_buf to use Linux existing 'struct circ_buf'.
    Comment not applied:
    - "Change macro mlxbf_vdev_to_tmfifo() to one line"
      Couldn't fit in one line with 80 chracters
    - "Is it appropriate use of devm_* for 'tm_vdev = devm_kzalloc'"
      This is SoC, the device won't be closed or detached.
      The only case is when the driver is unloaded. So it appears
      ok to use devm_kzalloc() since it's allocated during probe()
      and released during module unload.
    Comments from Vadim: OK
v9->v10:
    Fixes for comments from Andy:
    - Use devm_ioremap_resource() instead of devm_ioremap().
    - Use kernel-doc comments.
    - Keep Makefile contents sorted.
    - Use same fixed format for offsets.
    - Use SZ_1K/SZ_32K instead of 1024/23*1024.
    - Remove unnecessary comments.
    - Use one style for max numbers.
    - More comments for mlxbf_tmfifo_vdev and mlxbf_tmfifo_data_64bit.
    - Use globally defined MTU instead of new definition.
    - Remove forward declaration of mlxbf_tmfifo_remove().
    - Remove PAGE_ALIGN() for dma_alloc_coherent)().
    - Remove the cast of "struct vring *".
    - Check return result of test_and_set_bit().
    - Add a macro mlxbt_vdev_to_tmfifo().
    - Several other minor coding style comments.
    Comment not applied:
    - "Shouldn't be rather helper in EFI lib in kernel"
      Looks like efi.get_variable() is the way I found in the kernel
      tree.
    - "this one is not protected anyhow? Potential race condition"
      In mlxbf_tmfifo_console_tx(), the spin-lock is used to protect the
      'tx_buf' only, not the FIFO writes. So there is no race condition.
    - "Is __packed needed in mlxbf_tmfifo_msg_hdr".
      Yes, it is needed to make sure the structure is 8 bytes.
    Fixes for comments from Vadim:
    - Use tab in mlxbf-tmfifo-regs.h
    - Use kernel-doc comments for struct mlxbf_tmfifo_msg_hdr and
      mlxbf_tmfifo_irq_info as well.
    - Use _MAX instead of _CNT in the macro definition to be consistent.
    - Fix the MODULE_LICENSE.
    - Use BIT_ULL() instead of BIT().
    - Remove argument of 'avail' for mlxbf_tmfifo_rxtx_header() and
      mlxbf_tmfifo_rxtx_word()
    - Revise logic in mlxbf_tmfifo_rxtx_one_desc() to remove the
      WARN_ON().
    - Change "union mlxbf_tmfifo_u64 u" to "union mlxbf_tmfifo_u64 buf"
      in mlxbf_tmfifo_rxtx_word().
    - Change date type of vring_change from 'int' to 'bool'.
    - Remove the blank lines after Signed-off.
    - Don’t use declaration in the middle.
    - Make the network header initialization in some more elegant way.
    - Change label done to mlxbf_tmfifo_desc_done.
    - Remove some unnecessary comments, and several other misc coding
      style comments.
    - Simplify code logic in mlxbf_tmfifo_virtio_notify()
    New changes by Liming:
    - Simplify the Rx/Tx function arguments to make it more readable.
v8->v9:
    Fixes for comments from Andy:
    - Use modern devm_xxx() API instead.
    Fixes for comments from Vadim:
    - Split the Rx/Tx function into smaller funcitons.
    - File name, copyright information.
    - Function and variable name conversion.
    - Local variable and indent coding styles.
    - Remove unnecessary 'inline' declarations.
    - Use devm_xxx() APIs.
    - Move the efi_char16_t MAC address definition to global.
    - Fix warnings reported by 'checkpatch --strict'.
    - Fix warnings reported by 'make CF="-D__CHECK_ENDIAN__"'.
    - Change select VIRTIO_xxx to depends on  VIRTIO_ in Kconfig.
    - Merge mlxbf_tmfifo_vdev_tx_buf_push() and
      mlxbf_tmfifo_vdev_tx_buf_pop().
    - Add union to avoid casting between __le64 and u64.
    - Several other misc coding style comments.
    New changes by Liming:
    - Removed the DT binding documentation since only ACPI is
      supported for now by UEFI on the SoC.
v8: Re-submit under drivers/platform/mellanox for the target-side
    platform driver only.
v7: Added host side drivers into the same patch set.
v5~v6: Coding style fix.
v1~v4: Initial version for directory drivers/soc/mellanox.
---
 drivers/platform/mellanox/Kconfig             |   12 +-
 drivers/platform/mellanox/Makefile            |    1 +
 drivers/platform/mellanox/mlxbf-tmfifo-regs.h |   63 ++
 drivers/platform/mellanox/mlxbf-tmfifo.c      | 1291 +++++++++++++++++++++++++
 4 files changed, 1366 insertions(+), 1 deletion(-)
 create mode 100644 drivers/platform/mellanox/mlxbf-tmfifo-regs.h
 create mode 100644 drivers/platform/mellanox/mlxbf-tmfifo.c

diff --git a/drivers/platform/mellanox/Kconfig b/drivers/platform/mellanox/Kconfig
index cd8a908..530fe7e 100644
--- a/drivers/platform/mellanox/Kconfig
+++ b/drivers/platform/mellanox/Kconfig
@@ -5,7 +5,7 @@
 
 menuconfig MELLANOX_PLATFORM
 	bool "Platform support for Mellanox hardware"
-	depends on X86 || ARM || COMPILE_TEST
+	depends on X86 || ARM || ARM64 || COMPILE_TEST
 	---help---
 	  Say Y here to get to see options for platform support for
 	  Mellanox systems. This option alone does not add any kernel code.
@@ -34,4 +34,14 @@ config MLXREG_IO
 	  to system resets operation, system reset causes monitoring and some
 	  kinds of mux selection.
 
+config MLXBF_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo platform driver"
+	depends on ARM64
+	depends on ACPI
+	depends on VIRTIO_CONSOLE && VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides
+          platform driver support for the TmFifo which supports console
+          and networking based on the virtio framework.
+
 endif # MELLANOX_PLATFORM
diff --git a/drivers/platform/mellanox/Makefile b/drivers/platform/mellanox/Makefile
index 57074d9c..a229bda1 100644
--- a/drivers/platform/mellanox/Makefile
+++ b/drivers/platform/mellanox/Makefile
@@ -3,5 +3,6 @@
 # Makefile for linux/drivers/platform/mellanox
 # Mellanox Platform-Specific Drivers
 #
+obj-$(CONFIG_MLXBF_TMFIFO)	+= mlxbf-tmfifo.o
 obj-$(CONFIG_MLXREG_HOTPLUG)	+= mlxreg-hotplug.o
 obj-$(CONFIG_MLXREG_IO) += mlxreg-io.o
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo-regs.h b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
new file mode 100644
index 0000000..e4f0d2e
--- /dev/null
+++ b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2019, Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef __MLXBF_TMFIFO_REGS_H__
+#define __MLXBF_TMFIFO_REGS_H__
+
+#include <linux/types.h>
+#include <linux/bits.h>
+
+#define MLXBF_TMFIFO_TX_DATA				0x00
+#define MLXBF_TMFIFO_TX_STS				0x08
+#define MLXBF_TMFIFO_TX_STS__LENGTH			0x0001
+#define MLXBF_TMFIFO_TX_STS__COUNT_SHIFT		0
+#define MLXBF_TMFIFO_TX_STS__COUNT_WIDTH		9
+#define MLXBF_TMFIFO_TX_STS__COUNT_RESET_VAL		0
+#define MLXBF_TMFIFO_TX_STS__COUNT_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_TX_STS__COUNT_MASK			GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_TX_CTL				0x10
+#define MLXBF_TMFIFO_TX_CTL__LENGTH			0x0001
+#define MLXBF_TMFIFO_TX_CTL__LWM_SHIFT			0
+#define MLXBF_TMFIFO_TX_CTL__LWM_WIDTH			8
+#define MLXBF_TMFIFO_TX_CTL__LWM_RESET_VAL		128
+#define MLXBF_TMFIFO_TX_CTL__LWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_TX_CTL__LWM_MASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_TX_CTL__HWM_SHIFT			8
+#define MLXBF_TMFIFO_TX_CTL__HWM_WIDTH			8
+#define MLXBF_TMFIFO_TX_CTL__HWM_RESET_VAL		128
+#define MLXBF_TMFIFO_TX_CTL__HWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_TX_CTL__HWM_MASK			GENMASK_ULL(15, 8)
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT		32
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH		9
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL	256
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK		GENMASK_ULL(40, 32)
+#define MLXBF_TMFIFO_RX_DATA				0x00
+#define MLXBF_TMFIFO_RX_STS				0x08
+#define MLXBF_TMFIFO_RX_STS__LENGTH			0x0001
+#define MLXBF_TMFIFO_RX_STS__COUNT_SHIFT		0
+#define MLXBF_TMFIFO_RX_STS__COUNT_WIDTH		9
+#define MLXBF_TMFIFO_RX_STS__COUNT_RESET_VAL		0
+#define MLXBF_TMFIFO_RX_STS__COUNT_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_RX_STS__COUNT_MASK			GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_RX_CTL				0x10
+#define MLXBF_TMFIFO_RX_CTL__LENGTH			0x0001
+#define MLXBF_TMFIFO_RX_CTL__LWM_SHIFT			0
+#define MLXBF_TMFIFO_RX_CTL__LWM_WIDTH			8
+#define MLXBF_TMFIFO_RX_CTL__LWM_RESET_VAL		128
+#define MLXBF_TMFIFO_RX_CTL__LWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_RX_CTL__LWM_MASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_RX_CTL__HWM_SHIFT			8
+#define MLXBF_TMFIFO_RX_CTL__HWM_WIDTH			8
+#define MLXBF_TMFIFO_RX_CTL__HWM_RESET_VAL		128
+#define MLXBF_TMFIFO_RX_CTL__HWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_RX_CTL__HWM_MASK			GENMASK_ULL(15, 8)
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT		32
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH		9
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL	256
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK		GENMASK_ULL(40, 32)
+
+#endif /* !defined(__MLXBF_TMFIFO_REGS_H__) */
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
new file mode 100644
index 0000000..2bc03c3
--- /dev/null
+++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
@@ -0,0 +1,1291 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Mellanox BlueField SoC TmFifo driver
+ *
+ * Copyright (C) 2019 Mellanox Technologies
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/circ_buf.h>
+#include <linux/efi.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+
+#include "mlxbf-tmfifo-regs.h"
+
+/* Vring size. */
+#define MLXBF_TMFIFO_VRING_SIZE			SZ_1K
+
+/* Console Tx buffer size. */
+#define MLXBF_TMFIFO_CON_TX_BUF_SIZE		SZ_32K
+
+/* Console Tx buffer reserved space. */
+#define MLXBF_TMFIFO_CON_TX_BUF_RSV_SIZE	8
+
+/* House-keeping timer interval. */
+#define MLXBF_TMFIFO_TIMER_INTERVAL		(HZ / 10)
+
+/* Virtual devices sharing the TM FIFO. */
+#define MLXBF_TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/*
+ * Reserve 1/16 of TmFifo space, so console messages are not starved by
+ * the networking traffic.
+ */
+#define MLXBF_TMFIFO_RESERVE_RATIO		16
+
+/* Message with data needs at least two words (for header & data). */
+#define MLXBF_TMFIFO_DATA_MIN_WORDS		2
+
+struct mlxbf_tmfifo;
+
+/**
+ * mlxbf_tmfifo_vring - Structure of the TmFifo virtual ring
+ * @va: virtual address of the ring
+ * @dma: dma address of the ring
+ * @vq: pointer to the virtio virtqueue
+ * @desc: current descriptor of the pending packet
+ * @desc_head: head descriptor of the pending packet
+ * @cur_len: processed length of the current descriptor
+ * @rem_len: remaining length of the pending packet
+ * @pkt_len: total length of the pending packet
+ * @next_avail: next avail descriptor id
+ * @num: vring size (number of descriptors)
+ * @align: vring alignment size
+ * @index: vring index
+ * @vdev_id: vring virtio id (VIRTIO_ID_xxx)
+ * @fifo: pointer to the tmfifo structure
+ */
+struct mlxbf_tmfifo_vring {
+	void *va;
+	dma_addr_t dma;
+	struct virtqueue *vq;
+	struct vring_desc *desc;
+	struct vring_desc *desc_head;
+	int cur_len;
+	int rem_len;
+	u32 pkt_len;
+	u16 next_avail;
+	int num;
+	int align;
+	int index;
+	int vdev_id;
+	struct mlxbf_tmfifo *fifo;
+};
+
+/* Interrupt types. */
+enum {
+	MLXBF_TM_RX_LWM_IRQ,
+	MLXBF_TM_RX_HWM_IRQ,
+	MLXBF_TM_TX_LWM_IRQ,
+	MLXBF_TM_TX_HWM_IRQ,
+	MLXBF_TM_MAX_IRQ
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	MLXBF_TMFIFO_VRING_RX,
+	MLXBF_TMFIFO_VRING_TX,
+	MLXBF_TMFIFO_VRING_MAX
+};
+
+/**
+ * mlxbf_tmfifo_vdev - Structure of the TmFifo virtual device
+ * @vdev: virtio device, in which the vdev.id.device field has the
+ *        VIRTIO_ID_xxx id to distinguish the virtual device.
+ * @status: status of the device
+ * @features: supported features of the device
+ * @vrings: array of tmfifo vrings of this device
+ * @config.cons: virtual console config -
+ *               select if vdev.id.device is VIRTIO_ID_CONSOLE
+ * @config.net: virtual network config -
+ *              select if vdev.id.device is VIRTIO_ID_NET
+ * @tx_buf: tx buffer used to buffer data before writing into the FIFO
+ */
+struct mlxbf_tmfifo_vdev {
+	struct virtio_device vdev;
+	u8 status;
+	u64 features;
+	struct mlxbf_tmfifo_vring vrings[MLXBF_TMFIFO_VRING_MAX];
+	union {
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct circ_buf tx_buf;
+};
+
+/**
+ * mlxbf_tmfifo_irq_info - Structure of the interrupt information
+ * @fifo: pointer to the tmfifo structure
+ * @irq: interrupt number
+ * @index: index into the interrupt array
+ */
+struct mlxbf_tmfifo_irq_info {
+	struct mlxbf_tmfifo *fifo;
+	int irq;
+	int index;
+};
+
+/**
+ * mlxbf_tmfifo - Structure of the TmFifo
+ * @vdev: array of the virtual devices running over the TmFifo
+ * @pdev: platform device
+ * @lock: lock to protect the TmFifo access
+ * @rx_base: mapped register base address for the Rx fifo
+ * @tx_base: mapped register base address for the Tx fifo
+ * @rx_fifo_size: number of entries of the Rx fifo
+ * @tx_fifo_size: number of entries of the Tx fifo
+ * @pend_events: pending bits for deferred events
+ * @irq_info: interrupt information
+ * @work: work struct for deferred process
+ * @timer: background timer
+ * @vring: Tx/Rx ring
+ * @spin_lock: spin lock
+ * @is_ready: ready flag
+ */
+struct mlxbf_tmfifo {
+	struct mlxbf_tmfifo_vdev *vdev[MLXBF_TMFIFO_VDEV_MAX];
+	struct platform_device *pdev;
+	struct mutex lock;		/* TmFifo lock */
+	void __iomem *rx_base;
+	void __iomem *tx_base;
+	int rx_fifo_size;
+	int tx_fifo_size;
+	unsigned long pend_events;
+	struct mlxbf_tmfifo_irq_info irq_info[MLXBF_TM_MAX_IRQ];
+	struct work_struct work;
+	struct timer_list timer;
+	struct mlxbf_tmfifo_vring *vring[2];
+	spinlock_t spin_lock;		/* spin lock */
+	bool is_ready;
+};
+
+/**
+ * mlxbf_tmfifo_msg_hdr - Structure of the TmFifo message header
+ * @type: message type
+ * @len: payload length
+ * @u: 64-bit union data
+ */
+union mlxbf_tmfifo_msg_hdr {
+	struct {
+		u8 type;
+		__be16 len;
+		u8 unused[5];
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 mlxbf_tmfifo_net_default_mac[ETH_ALEN] = {
+	0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* EFI variable name of the MAC address. */
+static efi_char16_t mlxbf_tmfifo_efi_name[] = L"RshimMacAddr";
+
+/* Maximum L2 header length. */
+#define MLXBF_TMFIFO_NET_L2_OVERHEAD	36
+
+/* Supported virtio-net features. */
+#define MLXBF_TMFIFO_NET_FEATURES	(BIT_ULL(VIRTIO_NET_F_MTU) | \
+					 BIT_ULL(VIRTIO_NET_F_STATUS) | \
+					 BIT_ULL(VIRTIO_NET_F_MAC))
+
+#define mlxbf_vdev_to_tmfifo(d) container_of(d, struct mlxbf_tmfifo_vdev, vdev)
+
+/* Allocate vrings for the fifo. */
+static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
+				     struct mlxbf_tmfifo_vdev *tm_vdev)
+{
+	struct mlxbf_tmfifo_vring *vring;
+	struct device *dev;
+	dma_addr_t dma;
+	int i, size;
+	void *va;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->num = MLXBF_TMFIFO_VRING_SIZE;
+		vring->align = SMP_CACHE_BYTES;
+		vring->index = i;
+		vring->vdev_id = tm_vdev->vdev.id.device;
+		dev = &tm_vdev->vdev.dev;
+
+		size = vring_size(vring->num, vring->align);
+		va = dma_alloc_coherent(dev->parent, size, &dma, GFP_KERNEL);
+		if (!va) {
+			dev_err(dev->parent, "dma_alloc_coherent failed\n");
+			return -ENOMEM;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void mlxbf_tmfifo_free_vrings(struct mlxbf_tmfifo *fifo,
+				     struct mlxbf_tmfifo_vdev *tm_vdev)
+{
+	struct mlxbf_tmfifo_vring *vring;
+	int i, size;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		if (vring->va) {
+			size = vring_size(vring->num, vring->align);
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Disable interrupts of the fifo device. */
+static void mlxbf_tmfifo_disable_irqs(struct mlxbf_tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < MLXBF_TM_MAX_IRQ; i++) {
+		irq = fifo->irq_info[i].irq;
+		fifo->irq_info[i].irq = 0;
+		disable_irq(irq);
+	}
+}
+
+/* Interrupt handler. */
+static irqreturn_t mlxbf_tmfifo_irq_handler(int irq, void *arg)
+{
+	struct mlxbf_tmfifo_irq_info *irq_info = arg;
+
+	if (irq_info->index < MLXBF_TM_MAX_IRQ &&
+	    !test_and_set_bit(irq_info->index, &irq_info->fifo->pend_events))
+		schedule_work(&irq_info->fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Get the next packet descriptor from the vring. */
+static struct vring_desc *
+mlxbf_tmfifo_get_next_desc(struct mlxbf_tmfifo_vring *vring)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = vring->vq->vdev;
+	unsigned int idx, head;
+
+	if (vring->next_avail == virtio16_to_cpu(vdev, vr->avail->idx))
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = virtio16_to_cpu(vdev, vr->avail->ring[idx]);
+	if (WARN_ON(head >= vr->num))
+		return NULL;
+
+	vring->next_avail++;
+
+	return &vr->desc[head];
+}
+
+/* Release virtio descriptor. */
+static void mlxbf_tmfifo_release_desc(struct mlxbf_tmfifo_vring *vring,
+				      struct vring_desc *desc, u32 len)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = vring->vq->vdev;
+	u16 idx, vr_idx;
+
+	vr_idx = virtio16_to_cpu(vdev, vr->used->idx);
+	idx = vr_idx % vr->num;
+	vr->used->ring[idx].id = cpu_to_virtio32(vdev, desc - vr->desc);
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/*
+	 * Virtio could poll and check the 'idx' to decide whether the desc is
+	 * done or not. Add a memory barrier here to make sure the update above
+	 * completes before updating the idx.
+	 */
+	mb();
+	vr->used->idx = cpu_to_virtio16(vdev, vr_idx + 1);
+}
+
+/* Get the total length of the descriptor chain. */
+static u32 mlxbf_tmfifo_get_pkt_len(struct mlxbf_tmfifo_vring *vring,
+				    struct vring_desc *desc)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = vring->vq->vdev;
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void mlxbf_tmfifo_release_pending_pkt(struct mlxbf_tmfifo_vring *vring)
+{
+	struct vring_desc *desc_head;
+	u32 len = 0;
+
+	if (vring->desc_head) {
+		desc_head = vring->desc_head;
+		len = vring->pkt_len;
+	} else {
+		desc_head = mlxbf_tmfifo_get_next_desc(vring);
+		if (desc_head)
+			len = mlxbf_tmfifo_get_pkt_len(vring, desc_head);
+	}
+
+	if (desc_head)
+		mlxbf_tmfifo_release_desc(vring, desc_head, len);
+
+	vring->pkt_len = 0;
+	vring->desc = NULL;
+	vring->desc_head = NULL;
+}
+
+static void mlxbf_tmfifo_init_net_desc(struct mlxbf_tmfifo_vring *vring,
+				       struct vring_desc *desc, bool is_rx)
+{
+	struct virtio_device *vdev = vring->vq->vdev;
+	struct virtio_net_hdr *net_hdr;
+
+	net_hdr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+	memset(net_hdr, 0, sizeof(*net_hdr));
+}
+
+/* Get and initialize the next packet. */
+static struct vring_desc *
+mlxbf_tmfifo_get_next_pkt(struct mlxbf_tmfifo_vring *vring, bool is_rx)
+{
+	struct vring_desc *desc;
+
+	desc = mlxbf_tmfifo_get_next_desc(vring);
+	if (desc && is_rx && vring->vdev_id == VIRTIO_ID_NET)
+		mlxbf_tmfifo_init_net_desc(vring, desc, is_rx);
+
+	vring->desc_head = desc;
+	vring->desc = desc;
+
+	return desc;
+}
+
+/* House-keeping timer. */
+static void mlxbf_tmfifo_timer(struct timer_list *arg)
+{
+	struct mlxbf_tmfifo *fifo = container_of(arg, struct mlxbf_tmfifo,
+						 timer);
+	int more;
+
+	more = !test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events) ||
+		    !test_and_set_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	if (more)
+		schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + MLXBF_TMFIFO_TIMER_INTERVAL);
+}
+
+/* Copy one console packet into the output buffer. */
+static void mlxbf_tmfifo_console_output_one(struct mlxbf_tmfifo_vdev *cons,
+					    struct mlxbf_tmfifo_vring *vring,
+					    struct vring_desc *desc)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = &cons->vdev;
+	u32 len, idx, seg;
+	void *addr;
+
+	while (desc) {
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+		len = virtio32_to_cpu(vdev, desc->len);
+
+		seg = CIRC_SPACE_TO_END(cons->tx_buf.head, cons->tx_buf.tail,
+					MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+		if (len <= seg) {
+			memcpy(cons->tx_buf.buf + cons->tx_buf.head, addr, len);
+		} else {
+			memcpy(cons->tx_buf.buf + cons->tx_buf.head, addr, seg);
+			addr += seg;
+			memcpy(cons->tx_buf.buf, addr, len - seg);
+		}
+		cons->tx_buf.head = (cons->tx_buf.head + len) %
+			MLXBF_TMFIFO_CON_TX_BUF_SIZE;
+
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+}
+
+/* Copy console data into the output buffer. */
+static void mlxbf_tmfifo_console_output(struct mlxbf_tmfifo_vdev *cons,
+					struct mlxbf_tmfifo_vring *vring)
+{
+	struct vring_desc *desc;
+	u32 len, avail;
+
+	desc = mlxbf_tmfifo_get_next_desc(vring);
+	while (desc) {
+		/* Release the packet if not enough space. */
+		len = mlxbf_tmfifo_get_pkt_len(vring, desc);
+		avail = CIRC_SPACE(cons->tx_buf.head, cons->tx_buf.tail,
+				   MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+		if (len + MLXBF_TMFIFO_CON_TX_BUF_RSV_SIZE > avail) {
+			mlxbf_tmfifo_release_desc(vring, desc, len);
+			break;
+		}
+
+		mlxbf_tmfifo_console_output_one(cons, vring, desc);
+		mlxbf_tmfifo_release_desc(vring, desc, len);
+		desc = mlxbf_tmfifo_get_next_desc(vring);
+	}
+}
+
+/* Get the number of available words in Rx FIFO for receiving. */
+static int mlxbf_tmfifo_get_rx_avail(struct mlxbf_tmfifo *fifo)
+{
+	u64 sts;
+
+	sts = readq(fifo->rx_base + MLXBF_TMFIFO_RX_STS);
+	return FIELD_GET(MLXBF_TMFIFO_RX_STS__COUNT_MASK, sts);
+}
+
+/* Get the number of available words in the TmFifo for sending. */
+static int mlxbf_tmfifo_get_tx_avail(struct mlxbf_tmfifo *fifo, int vdev_id)
+{
+	int tx_reserve;
+	u32 count;
+	u64 sts;
+
+	/* Reserve some room in FIFO for console messages. */
+	if (vdev_id == VIRTIO_ID_NET)
+		tx_reserve = fifo->tx_fifo_size / MLXBF_TMFIFO_RESERVE_RATIO;
+	else
+		tx_reserve = 1;
+
+	sts = readq(fifo->tx_base + MLXBF_TMFIFO_TX_STS);
+	count = FIELD_GET(MLXBF_TMFIFO_TX_STS__COUNT_MASK, sts);
+	return fifo->tx_fifo_size - tx_reserve - count;
+}
+
+/* Console Tx (move data from the output buffer into the TmFifo). */
+static void mlxbf_tmfifo_console_tx(struct mlxbf_tmfifo *fifo, int avail)
+{
+	union mlxbf_tmfifo_msg_hdr hdr;
+	struct mlxbf_tmfifo_vdev *cons;
+	unsigned long flags;
+	int size, seg;
+	void *addr;
+	u64 data;
+
+	/* Return if not enough space available. */
+	if (avail < MLXBF_TMFIFO_DATA_MIN_WORDS)
+		return;
+
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+	if (!cons || !cons->tx_buf.buf)
+		return;
+
+	/* Return if no data to send. */
+	size = CIRC_CNT(cons->tx_buf.head, cons->tx_buf.tail,
+			MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+	if (size == 0)
+		return;
+
+	/* Adjust the size to available space. */
+	if (size + sizeof(hdr) > avail * sizeof(u64))
+		size = avail * sizeof(u64) - sizeof(hdr);
+
+	/* Write header. */
+	hdr.data = 0;
+	hdr.type = VIRTIO_ID_CONSOLE;
+	hdr.len = htons(size);
+	writeq(hdr.data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+
+	/* Use spin-lock to protect the 'cons->tx_buf'. */
+	spin_lock_irqsave(&fifo->spin_lock, flags);
+
+	while (size > 0) {
+		addr = cons->tx_buf.buf + cons->tx_buf.tail;
+
+		seg = CIRC_CNT_TO_END(cons->tx_buf.head, cons->tx_buf.tail,
+				      MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+		if (seg >= sizeof(u64)) {
+			memcpy(&data, addr, sizeof(u64));
+		} else {
+			memcpy(&data, addr, seg);
+			memcpy((u8 *)&data + seg, cons->tx_buf.buf,
+			       sizeof(u64) - seg);
+		}
+		writeq(data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+
+		if (size >= sizeof(u64)) {
+			cons->tx_buf.tail = (cons->tx_buf.tail + sizeof(u64)) %
+				MLXBF_TMFIFO_CON_TX_BUF_SIZE;
+			size -= sizeof(u64);
+		} else {
+			cons->tx_buf.tail = (cons->tx_buf.tail + size) %
+				MLXBF_TMFIFO_CON_TX_BUF_SIZE;
+			size = 0;
+		}
+	}
+
+	spin_unlock_irqrestore(&fifo->spin_lock, flags);
+}
+
+/* Rx/Tx one word in the descriptor buffer. */
+static void mlxbf_tmfifo_rxtx_word(struct mlxbf_tmfifo_vring *vring,
+				   struct vring_desc *desc,
+				   bool is_rx, int len)
+{
+	struct virtio_device *vdev = vring->vq->vdev;
+	struct mlxbf_tmfifo *fifo = vring->fifo;
+	void *addr;
+	u64 data;
+
+	/* Get the buffer address of this desc. */
+	addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+	/* Read a word from FIFO for Rx. */
+	if (is_rx)
+		data = readq(fifo->rx_base + MLXBF_TMFIFO_RX_DATA);
+
+	if (vring->cur_len + sizeof(u64) <= len) {
+		/* The whole word. */
+		if (is_rx)
+			memcpy(addr + vring->cur_len, &data, sizeof(u64));
+		else
+			memcpy(&data, addr + vring->cur_len, sizeof(u64));
+		vring->cur_len += sizeof(u64);
+	} else {
+		/* Leftover bytes. */
+		if (is_rx)
+			memcpy(addr + vring->cur_len, &data,
+			       len - vring->cur_len);
+		else
+			memcpy(&data, addr + vring->cur_len,
+			       len - vring->cur_len);
+		vring->cur_len = len;
+	}
+
+	/* Write the word into FIFO for Tx. */
+	if (!is_rx)
+		writeq(data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+}
+
+/*
+ * Rx/Tx packet header.
+ *
+ * In Rx case, the packet might be found to belong to a different vring since
+ * the TmFifo is shared by different services. In such case, the 'vring_change'
+ * flag is set.
+ */
+static void mlxbf_tmfifo_rxtx_header(struct mlxbf_tmfifo_vring *vring,
+				     struct vring_desc *desc,
+				     bool is_rx, bool *vring_change)
+{
+	struct mlxbf_tmfifo *fifo = vring->fifo;
+	struct virtio_net_config *config;
+	union mlxbf_tmfifo_msg_hdr hdr;
+	int vdev_id, hdr_len;
+
+	/* Read/Write packet header. */
+	if (is_rx) {
+		/* Drain one word from the FIFO. */
+		hdr.data = readq(fifo->rx_base + MLXBF_TMFIFO_RX_DATA);
+
+		/* Skip the length 0 packets (keepalive). */
+		if (hdr.len == 0)
+			return;
+
+		/* Check packet type. */
+		if (hdr.type == VIRTIO_ID_NET) {
+			vdev_id = VIRTIO_ID_NET;
+			hdr_len = sizeof(struct virtio_net_hdr);
+			config = &fifo->vdev[vdev_id]->config.net;
+			if (ntohs(hdr.len) > config->mtu +
+			    MLXBF_TMFIFO_NET_L2_OVERHEAD)
+				return;
+		} else {
+			vdev_id = VIRTIO_ID_CONSOLE;
+			hdr_len = 0;
+		}
+
+		/*
+		 * Check whether the new packet still belongs to this vring.
+		 * If not, update the pkt_len of the new vring.
+		 */
+		if (vdev_id != vring->vdev_id) {
+			struct mlxbf_tmfifo_vdev *tm_dev2 = fifo->vdev[vdev_id];
+
+			if (!tm_dev2)
+				return;
+			vring->desc = desc;
+			vring = &tm_dev2->vrings[MLXBF_TMFIFO_VRING_RX];
+			*vring_change = true;
+		}
+		vring->pkt_len = ntohs(hdr.len) + hdr_len;
+	} else {
+		/* Network virtio has an extra header. */
+		hdr_len = (vring->vdev_id == VIRTIO_ID_NET) ?
+			   sizeof(struct virtio_net_hdr) : 0;
+		vring->pkt_len = mlxbf_tmfifo_get_pkt_len(vring, desc);
+		hdr.data = 0;
+		hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+			    VIRTIO_ID_NET : VIRTIO_ID_CONSOLE;
+		hdr.len = htons(vring->pkt_len - hdr_len);
+		writeq(hdr.data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+	}
+
+	vring->cur_len = hdr_len;
+	vring->rem_len = vring->pkt_len;
+	fifo->vring[is_rx] = vring;
+}
+
+/*
+ * Rx/Tx one descriptor.
+ *
+ * Return true to indicate more data available.
+ */
+static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring,
+				       bool is_rx, int *avail)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct mlxbf_tmfifo *fifo = vring->fifo;
+	struct virtio_device *vdev;
+	bool vring_change = false;
+	struct vring_desc *desc;
+	unsigned long flags;
+	u32 len, idx;
+
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+
+	/* Get the descriptor of the next packet. */
+	if (!vring->desc) {
+		desc = mlxbf_tmfifo_get_next_pkt(vring, is_rx);
+		if (!desc)
+			return false;
+	} else {
+		desc = vring->desc;
+	}
+
+	/* Beginning of a packet. Start to Rx/Tx packet header. */
+	if (vring->pkt_len == 0) {
+		mlxbf_tmfifo_rxtx_header(vring, desc, is_rx, &vring_change);
+		(*avail)--;
+
+		/* Return if new packet is for another ring. */
+		if (vring_change)
+			return false;
+		goto mlxbf_tmfifo_desc_done;
+	}
+
+	/* Get the length of this desc. */
+	len = virtio32_to_cpu(vdev, desc->len);
+	if (len > vring->rem_len)
+		len = vring->rem_len;
+
+	/* Rx/Tx one word (8 bytes) if not done. */
+	if (vring->cur_len < len) {
+		mlxbf_tmfifo_rxtx_word(vring, desc, is_rx, len);
+		(*avail)--;
+	}
+
+	/* Check again whether it's done. */
+	if (vring->cur_len == len) {
+		vring->cur_len = 0;
+		vring->rem_len -= len;
+
+		/* Get the next desc on the chain. */
+		if (vring->rem_len > 0 &&
+		    (virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT)) {
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+			goto mlxbf_tmfifo_desc_done;
+		}
+
+		/* Done and release the pending packet. */
+		mlxbf_tmfifo_release_pending_pkt(vring);
+		desc = NULL;
+		fifo->vring[is_rx] = NULL;
+
+		/* Notify upper layer that packet is done. */
+		spin_lock_irqsave(&fifo->spin_lock, flags);
+		vring_interrupt(0, vring->vq);
+		spin_unlock_irqrestore(&fifo->spin_lock, flags);
+	}
+
+mlxbf_tmfifo_desc_done:
+	/* Save the current desc. */
+	vring->desc = desc;
+
+	return true;
+}
+
+/* Rx & Tx processing of a queue. */
+static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
+{
+	int avail = 0, devid = vring->vdev_id;
+	struct mlxbf_tmfifo *fifo;
+	bool more;
+
+	fifo = vring->fifo;
+
+	/* Return if vdev is not ready. */
+	if (!fifo->vdev[devid])
+		return;
+
+	/* Return if another vring is running. */
+	if (fifo->vring[is_rx] && fifo->vring[is_rx] != vring)
+		return;
+
+	/* Only handle console and network for now. */
+	if (WARN_ON(devid != VIRTIO_ID_NET && devid != VIRTIO_ID_CONSOLE))
+		return;
+
+	do {
+		/* Get available FIFO space. */
+		if (avail == 0) {
+			if (is_rx)
+				avail = mlxbf_tmfifo_get_rx_avail(fifo);
+			else
+				avail = mlxbf_tmfifo_get_tx_avail(fifo, devid);
+			if (avail <= 0)
+				break;
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && devid == VIRTIO_ID_CONSOLE) {
+			mlxbf_tmfifo_console_tx(fifo, avail);
+			break;
+		}
+
+		/* Handle one descriptor. */
+		more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail);
+	} while (more);
+}
+
+/* Handle Rx or Tx queues. */
+static void mlxbf_tmfifo_work_rxtx(struct mlxbf_tmfifo *fifo, int queue_id,
+				   int irq_id, bool is_rx)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+	struct mlxbf_tmfifo_vring *vring;
+	int i;
+
+	if (!test_and_clear_bit(irq_id, &fifo->pend_events) ||
+	    !fifo->irq_info[irq_id].irq)
+		return;
+
+	for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++) {
+		tm_vdev = fifo->vdev[i];
+		if (tm_vdev) {
+			vring = &tm_vdev->vrings[queue_id];
+			if (vring->vq)
+				mlxbf_tmfifo_rxtx(vring, is_rx);
+		}
+	}
+}
+
+/* Work handler for Rx and Tx case. */
+static void mlxbf_tmfifo_work_handler(struct work_struct *work)
+{
+	struct mlxbf_tmfifo *fifo;
+
+	fifo = container_of(work, struct mlxbf_tmfifo, work);
+	if (!fifo->is_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx (Send data to the TmFifo). */
+	mlxbf_tmfifo_work_rxtx(fifo, MLXBF_TMFIFO_VRING_TX,
+			       MLXBF_TM_TX_LWM_IRQ, false);
+
+	/* Rx (Receive data from the TmFifo). */
+	mlxbf_tmfifo_work_rxtx(fifo, MLXBF_TMFIFO_VRING_RX,
+			       MLXBF_TM_RX_HWM_IRQ, true);
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool mlxbf_tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct mlxbf_tmfifo_vring *vring = vq->priv;
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+	struct mlxbf_tmfifo *fifo;
+	unsigned long flags;
+
+	fifo = vring->fifo;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->index & BIT(0))) {
+		if (test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events))
+			return true;
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			tm_vdev = fifo->vdev[VIRTIO_ID_CONSOLE];
+			mlxbf_tmfifo_console_output(tm_vdev, vring);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+		} else if (test_and_set_bit(MLXBF_TM_TX_LWM_IRQ,
+					    &fifo->pend_events)) {
+			return true;
+		}
+	}
+
+	schedule_work(&fifo->work);
+
+	return true;
+}
+
+/* Get the array of feature bits for this device. */
+static u64 mlxbf_tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int mlxbf_tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	tm_vdev->features = vdev->features;
+
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void mlxbf_tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+	struct mlxbf_tmfifo_vring *vring;
+	struct virtqueue *vq;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc)
+			mlxbf_tmfifo_release_pending_pkt(vring);
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+					unsigned int nvqs,
+					struct virtqueue *vqs[],
+					vq_callback_t *callbacks[],
+					const char * const names[],
+					const bool *ctx,
+					struct irq_affinity *desc)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+	struct mlxbf_tmfifo_vring *vring;
+	struct virtqueue *vq;
+	int i, ret, size;
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i]) {
+			ret = -EINVAL;
+			goto error;
+		}
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->num, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->num, vring->align, vdev,
+					 false, false, vring->va,
+					 mlxbf_tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	mlxbf_tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 mlxbf_tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void mlxbf_tmfifo_virtio_set_status(struct virtio_device *vdev,
+					   u8 status)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void mlxbf_tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void mlxbf_tmfifo_virtio_get(struct virtio_device *vdev,
+				    unsigned int offset,
+				    void *buf,
+				    unsigned int len)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	if (offset + len > sizeof(tm_vdev->config))
+		return;
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void mlxbf_tmfifo_virtio_set(struct virtio_device *vdev,
+				    unsigned int offset,
+				    const void *buf,
+				    unsigned int len)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	if (offset + len > sizeof(tm_vdev->config))
+		return;
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+static void tmfifo_virtio_dev_release(struct device *device)
+{
+	struct virtio_device *vdev =
+			container_of(device, struct virtio_device, dev);
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	kfree(tm_vdev);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops mlxbf_tmfifo_virtio_config_ops = {
+	.get_features = mlxbf_tmfifo_virtio_get_features,
+	.finalize_features = mlxbf_tmfifo_virtio_finalize_features,
+	.find_vqs = mlxbf_tmfifo_virtio_find_vqs,
+	.del_vqs = mlxbf_tmfifo_virtio_del_vqs,
+	.reset = mlxbf_tmfifo_virtio_reset,
+	.set_status = mlxbf_tmfifo_virtio_set_status,
+	.get_status = mlxbf_tmfifo_virtio_get_status,
+	.get = mlxbf_tmfifo_virtio_get,
+	.set = mlxbf_tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+static int mlxbf_tmfifo_create_vdev(struct device *dev,
+				    struct mlxbf_tmfifo *fifo,
+				    int vdev_id, u64 features,
+				    void *config, u32 size)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev, *reg_dev = NULL;
+	int ret;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		dev_err(dev, "vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto fail;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &mlxbf_tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+
+	if (mlxbf_tmfifo_alloc_vrings(fifo, tm_vdev)) {
+		dev_err(dev, "unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto vdev_fail;
+	}
+
+	/* Allocate an output buffer for the console device. */
+	if (vdev_id == VIRTIO_ID_CONSOLE)
+		tm_vdev->tx_buf.buf = devm_kmalloc(dev,
+						   MLXBF_TMFIFO_CON_TX_BUF_SIZE,
+						   GFP_KERNEL);
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	reg_dev = tm_vdev;
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device failed\n");
+		goto vdev_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+vdev_fail:
+	mlxbf_tmfifo_free_vrings(fifo, tm_vdev);
+	fifo->vdev[vdev_id] = NULL;
+	if (reg_dev)
+		put_device(&tm_vdev->vdev.dev);
+	else
+		kfree(tm_vdev);
+fail:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+static int mlxbf_tmfifo_delete_vdev(struct mlxbf_tmfifo *fifo, int vdev_id)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		mlxbf_tmfifo_free_vrings(fifo, tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void mlxbf_tmfifo_get_cfg_mac(u8 *mac)
+{
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+	unsigned long size = ETH_ALEN;
+	efi_status_t status;
+	u8 buf[ETH_ALEN];
+
+	status = efi.get_variable(mlxbf_tmfifo_efi_name, &guid, NULL, &size,
+				  buf);
+	if (status == EFI_SUCCESS && size == ETH_ALEN)
+		ether_addr_copy(mac, buf);
+	else
+		memcpy(mac, mlxbf_tmfifo_net_default_mac, ETH_ALEN);
+}
+
+/* Set TmFifo thresolds which is used to trigger interrupts. */
+static void mlxbf_tmfifo_set_threshold(struct mlxbf_tmfifo *fifo)
+{
+	u64 ctl;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		FIELD_GET(MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__LWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_TX_CTL__LWM_MASK,
+			   fifo->tx_fifo_size / 2);
+	ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__HWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_TX_CTL__HWM_MASK,
+			   fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		FIELD_GET(MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__LWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__HWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
+}
+
+static void mlxbf_tmfifo_cleanup(struct mlxbf_tmfifo *fifo)
+{
+	int i;
+
+	fifo->is_ready = false;
+	del_timer_sync(&fifo->timer);
+	mlxbf_tmfifo_disable_irqs(fifo);
+	cancel_work_sync(&fifo->work);
+	for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++)
+		mlxbf_tmfifo_delete_vdev(fifo, i);
+}
+
+/* Probe the TMFIFO. */
+static int mlxbf_tmfifo_probe(struct platform_device *pdev)
+{
+	struct virtio_net_config net_config;
+	struct mlxbf_tmfifo *fifo;
+	struct resource *res;
+	int i, ret;
+
+	fifo = devm_kzalloc(&pdev->dev, sizeof(*fifo), GFP_KERNEL);
+	if (!fifo)
+		return -ENOMEM;
+
+	spin_lock_init(&fifo->spin_lock);
+	INIT_WORK(&fifo->work, mlxbf_tmfifo_work_handler);
+	mutex_init(&fifo->lock);
+
+	/* Get the resource of the Rx FIFO. */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	fifo->rx_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(fifo->rx_base))
+		return PTR_ERR(fifo->rx_base);
+
+	/* Get the resource of the Tx FIFO. */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	fifo->tx_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(fifo->tx_base))
+		return PTR_ERR(fifo->tx_base);
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	timer_setup(&fifo->timer, mlxbf_tmfifo_timer, 0);
+
+	for (i = 0; i < MLXBF_TM_MAX_IRQ; i++) {
+		fifo->irq_info[i].index = i;
+		fifo->irq_info[i].fifo = fifo;
+		fifo->irq_info[i].irq = platform_get_irq(pdev, i);
+		ret = devm_request_irq(&pdev->dev, fifo->irq_info[i].irq,
+				       mlxbf_tmfifo_irq_handler, 0,
+				       "tmfifo", &fifo->irq_info[i]);
+		if (ret) {
+			dev_err(&pdev->dev, "devm_request_irq failed\n");
+			fifo->irq_info[i].irq = 0;
+			return ret;
+		}
+	}
+
+	mlxbf_tmfifo_set_threshold(fifo);
+
+	/* Create the console vdev. */
+	ret = mlxbf_tmfifo_create_vdev(&pdev->dev, fifo, VIRTIO_ID_CONSOLE, 0,
+				       NULL, 0);
+	if (ret)
+		goto fail;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = ETH_DATA_LEN;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	mlxbf_tmfifo_get_cfg_mac(net_config.mac);
+	ret = mlxbf_tmfifo_create_vdev(&pdev->dev, fifo, VIRTIO_ID_NET,
+				       MLXBF_TMFIFO_NET_FEATURES, &net_config,
+				       sizeof(net_config));
+	if (ret)
+		goto fail;
+
+	mod_timer(&fifo->timer, jiffies + MLXBF_TMFIFO_TIMER_INTERVAL);
+
+	fifo->is_ready = true;
+	return 0;
+
+fail:
+	mlxbf_tmfifo_cleanup(fifo);
+	return ret;
+}
+
+/* Device remove function. */
+static int mlxbf_tmfifo_remove(struct platform_device *pdev)
+{
+	struct mlxbf_tmfifo *fifo = platform_get_drvdata(pdev);
+
+	mlxbf_tmfifo_cleanup(fifo);
+
+	return 0;
+}
+
+static const struct acpi_device_id mlxbf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, mlxbf_tmfifo_acpi_match);
+
+static struct platform_driver mlxbf_tmfifo_driver = {
+	.probe = mlxbf_tmfifo_probe,
+	.remove = mlxbf_tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.acpi_match_table = mlxbf_tmfifo_acpi_match,
+	},
+};
+
+module_platform_driver(mlxbf_tmfifo_driver);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TmFifo Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v13] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 16:06 ` Liming Sun
                   ` (49 preceding siblings ...)
  (?)
@ 2019-04-04 19:36 ` Liming Sun
  2019-04-05 15:44   ` Andy Shevchenko
  -1 siblings, 1 reply; 179+ messages in thread
From: Liming Sun @ 2019-04-04 19:36 UTC (permalink / raw)
  To: David Woods, Andy Shevchenko, Darren Hart, Vadim Pasternak
  Cc: Liming Sun, linux-kernel, platform-driver-x86

This commit adds the TmFifo platform driver for Mellanox BlueField
Soc. TmFifo is a shared FIFO which enables external host machine
to exchange data with the SoC via USB or PCIe. The driver is based
on virtio framework and has console and network access enabled.

Reviewed-by: Vadim Pasternak <vadimp@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
v12->v13:
    Rebase and resubmit (no new changes).
v11->v12:
    Fixed the two unsolved comments from v11.
    - "Change macro mlxbf_vdev_to_tmfifo() to one line"
      Done. Seems not hard.
    - "Is it appropriate use of devm_* for 'tm_vdev = devm_kzalloc'"
      Yes, understand the comment now. The tmfifo is fixed, but the
      vdev is dynamic. Use kzalloc() instead, and free the device
      in the release callback which is the right place for it.
v10->v11:
    Fixes for comments from Andy:
    - Use GENMASK_ULL() instead of GENMASK() in mlxbf-tmfifo-regs.h
    - Removed the cpu_to_le64()/le64_to_cpu() conversion since
      readq()/writeq() already takes care of it.
    - Remove the "if (irq)" check in mlxbf_tmfifo_disable_irqs().
    - Add "u32 count" temp variable in mlxbf_tmfifo_get_tx_avail().
    - Clean up mlxbf_tmfifo_get_cfg_mac(), use ETH_ALEN instead of
      value 6.
    - Change the tx_buf to use Linux existing 'struct circ_buf'.
    Comment not applied:
    - "Change macro mlxbf_vdev_to_tmfifo() to one line"
      Couldn't fit in one line with 80 chracters
    - "Is it appropriate use of devm_* for 'tm_vdev = devm_kzalloc'"
      This is SoC, the device won't be closed or detached.
      The only case is when the driver is unloaded. So it appears
      ok to use devm_kzalloc() since it's allocated during probe()
      and released during module unload.
    Comments from Vadim: OK
v9->v10:
    Fixes for comments from Andy:
    - Use devm_ioremap_resource() instead of devm_ioremap().
    - Use kernel-doc comments.
    - Keep Makefile contents sorted.
    - Use same fixed format for offsets.
    - Use SZ_1K/SZ_32K instead of 1024/23*1024.
    - Remove unnecessary comments.
    - Use one style for max numbers.
    - More comments for mlxbf_tmfifo_vdev and mlxbf_tmfifo_data_64bit.
    - Use globally defined MTU instead of new definition.
    - Remove forward declaration of mlxbf_tmfifo_remove().
    - Remove PAGE_ALIGN() for dma_alloc_coherent)().
    - Remove the cast of "struct vring *".
    - Check return result of test_and_set_bit().
    - Add a macro mlxbt_vdev_to_tmfifo().
    - Several other minor coding style comments.
    Comment not applied:
    - "Shouldn't be rather helper in EFI lib in kernel"
      Looks like efi.get_variable() is the way I found in the kernel
      tree.
    - "this one is not protected anyhow? Potential race condition"
      In mlxbf_tmfifo_console_tx(), the spin-lock is used to protect the
      'tx_buf' only, not the FIFO writes. So there is no race condition.
    - "Is __packed needed in mlxbf_tmfifo_msg_hdr".
      Yes, it is needed to make sure the structure is 8 bytes.
    Fixes for comments from Vadim:
    - Use tab in mlxbf-tmfifo-regs.h
    - Use kernel-doc comments for struct mlxbf_tmfifo_msg_hdr and
      mlxbf_tmfifo_irq_info as well.
    - Use _MAX instead of _CNT in the macro definition to be consistent.
    - Fix the MODULE_LICENSE.
    - Use BIT_ULL() instead of BIT().
    - Remove argument of 'avail' for mlxbf_tmfifo_rxtx_header() and
      mlxbf_tmfifo_rxtx_word()
    - Revise logic in mlxbf_tmfifo_rxtx_one_desc() to remove the
      WARN_ON().
    - Change "union mlxbf_tmfifo_u64 u" to "union mlxbf_tmfifo_u64 buf"
      in mlxbf_tmfifo_rxtx_word().
    - Change date type of vring_change from 'int' to 'bool'.
    - Remove the blank lines after Signed-off.
    - Don’t use declaration in the middle.
    - Make the network header initialization in some more elegant way.
    - Change label done to mlxbf_tmfifo_desc_done.
    - Remove some unnecessary comments, and several other misc coding
      style comments.
    - Simplify code logic in mlxbf_tmfifo_virtio_notify()
    New changes by Liming:
    - Simplify the Rx/Tx function arguments to make it more readable.
v8->v9:
    Fixes for comments from Andy:
    - Use modern devm_xxx() API instead.
    Fixes for comments from Vadim:
    - Split the Rx/Tx function into smaller funcitons.
    - File name, copyright information.
    - Function and variable name conversion.
    - Local variable and indent coding styles.
    - Remove unnecessary 'inline' declarations.
    - Use devm_xxx() APIs.
    - Move the efi_char16_t MAC address definition to global.
    - Fix warnings reported by 'checkpatch --strict'.
    - Fix warnings reported by 'make CF="-D__CHECK_ENDIAN__"'.
    - Change select VIRTIO_xxx to depends on  VIRTIO_ in Kconfig.
    - Merge mlxbf_tmfifo_vdev_tx_buf_push() and
      mlxbf_tmfifo_vdev_tx_buf_pop().
    - Add union to avoid casting between __le64 and u64.
    - Several other misc coding style comments.
    New changes by Liming:
    - Removed the DT binding documentation since only ACPI is
      supported for now by UEFI on the SoC.
v8: Re-submit under drivers/platform/mellanox for the target-side
    platform driver only.
v7: Added host side drivers into the same patch set.
v5~v6: Coding style fix.
v1~v4: Initial version for directory drivers/soc/mellanox.
---
 drivers/platform/mellanox/Kconfig             |   12 +-
 drivers/platform/mellanox/Makefile            |    1 +
 drivers/platform/mellanox/mlxbf-tmfifo-regs.h |   63 ++
 drivers/platform/mellanox/mlxbf-tmfifo.c      | 1291 +++++++++++++++++++++++++
 4 files changed, 1366 insertions(+), 1 deletion(-)
 create mode 100644 drivers/platform/mellanox/mlxbf-tmfifo-regs.h
 create mode 100644 drivers/platform/mellanox/mlxbf-tmfifo.c

diff --git a/drivers/platform/mellanox/Kconfig b/drivers/platform/mellanox/Kconfig
index cd8a908..530fe7e 100644
--- a/drivers/platform/mellanox/Kconfig
+++ b/drivers/platform/mellanox/Kconfig
@@ -5,7 +5,7 @@
 
 menuconfig MELLANOX_PLATFORM
 	bool "Platform support for Mellanox hardware"
-	depends on X86 || ARM || COMPILE_TEST
+	depends on X86 || ARM || ARM64 || COMPILE_TEST
 	---help---
 	  Say Y here to get to see options for platform support for
 	  Mellanox systems. This option alone does not add any kernel code.
@@ -34,4 +34,14 @@ config MLXREG_IO
 	  to system resets operation, system reset causes monitoring and some
 	  kinds of mux selection.
 
+config MLXBF_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo platform driver"
+	depends on ARM64
+	depends on ACPI
+	depends on VIRTIO_CONSOLE && VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides
+          platform driver support for the TmFifo which supports console
+          and networking based on the virtio framework.
+
 endif # MELLANOX_PLATFORM
diff --git a/drivers/platform/mellanox/Makefile b/drivers/platform/mellanox/Makefile
index 57074d9c..a229bda1 100644
--- a/drivers/platform/mellanox/Makefile
+++ b/drivers/platform/mellanox/Makefile
@@ -3,5 +3,6 @@
 # Makefile for linux/drivers/platform/mellanox
 # Mellanox Platform-Specific Drivers
 #
+obj-$(CONFIG_MLXBF_TMFIFO)	+= mlxbf-tmfifo.o
 obj-$(CONFIG_MLXREG_HOTPLUG)	+= mlxreg-hotplug.o
 obj-$(CONFIG_MLXREG_IO) += mlxreg-io.o
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo-regs.h b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
new file mode 100644
index 0000000..e4f0d2e
--- /dev/null
+++ b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2019, Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef __MLXBF_TMFIFO_REGS_H__
+#define __MLXBF_TMFIFO_REGS_H__
+
+#include <linux/types.h>
+#include <linux/bits.h>
+
+#define MLXBF_TMFIFO_TX_DATA				0x00
+#define MLXBF_TMFIFO_TX_STS				0x08
+#define MLXBF_TMFIFO_TX_STS__LENGTH			0x0001
+#define MLXBF_TMFIFO_TX_STS__COUNT_SHIFT		0
+#define MLXBF_TMFIFO_TX_STS__COUNT_WIDTH		9
+#define MLXBF_TMFIFO_TX_STS__COUNT_RESET_VAL		0
+#define MLXBF_TMFIFO_TX_STS__COUNT_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_TX_STS__COUNT_MASK			GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_TX_CTL				0x10
+#define MLXBF_TMFIFO_TX_CTL__LENGTH			0x0001
+#define MLXBF_TMFIFO_TX_CTL__LWM_SHIFT			0
+#define MLXBF_TMFIFO_TX_CTL__LWM_WIDTH			8
+#define MLXBF_TMFIFO_TX_CTL__LWM_RESET_VAL		128
+#define MLXBF_TMFIFO_TX_CTL__LWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_TX_CTL__LWM_MASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_TX_CTL__HWM_SHIFT			8
+#define MLXBF_TMFIFO_TX_CTL__HWM_WIDTH			8
+#define MLXBF_TMFIFO_TX_CTL__HWM_RESET_VAL		128
+#define MLXBF_TMFIFO_TX_CTL__HWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_TX_CTL__HWM_MASK			GENMASK_ULL(15, 8)
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT		32
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH		9
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL	256
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK		GENMASK_ULL(40, 32)
+#define MLXBF_TMFIFO_RX_DATA				0x00
+#define MLXBF_TMFIFO_RX_STS				0x08
+#define MLXBF_TMFIFO_RX_STS__LENGTH			0x0001
+#define MLXBF_TMFIFO_RX_STS__COUNT_SHIFT		0
+#define MLXBF_TMFIFO_RX_STS__COUNT_WIDTH		9
+#define MLXBF_TMFIFO_RX_STS__COUNT_RESET_VAL		0
+#define MLXBF_TMFIFO_RX_STS__COUNT_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_RX_STS__COUNT_MASK			GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_RX_CTL				0x10
+#define MLXBF_TMFIFO_RX_CTL__LENGTH			0x0001
+#define MLXBF_TMFIFO_RX_CTL__LWM_SHIFT			0
+#define MLXBF_TMFIFO_RX_CTL__LWM_WIDTH			8
+#define MLXBF_TMFIFO_RX_CTL__LWM_RESET_VAL		128
+#define MLXBF_TMFIFO_RX_CTL__LWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_RX_CTL__LWM_MASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_RX_CTL__HWM_SHIFT			8
+#define MLXBF_TMFIFO_RX_CTL__HWM_WIDTH			8
+#define MLXBF_TMFIFO_RX_CTL__HWM_RESET_VAL		128
+#define MLXBF_TMFIFO_RX_CTL__HWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_RX_CTL__HWM_MASK			GENMASK_ULL(15, 8)
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT		32
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH		9
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL	256
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK		GENMASK_ULL(40, 32)
+
+#endif /* !defined(__MLXBF_TMFIFO_REGS_H__) */
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
new file mode 100644
index 0000000..2bc03c3
--- /dev/null
+++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
@@ -0,0 +1,1291 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Mellanox BlueField SoC TmFifo driver
+ *
+ * Copyright (C) 2019 Mellanox Technologies
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/circ_buf.h>
+#include <linux/efi.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+
+#include "mlxbf-tmfifo-regs.h"
+
+/* Vring size. */
+#define MLXBF_TMFIFO_VRING_SIZE			SZ_1K
+
+/* Console Tx buffer size. */
+#define MLXBF_TMFIFO_CON_TX_BUF_SIZE		SZ_32K
+
+/* Console Tx buffer reserved space. */
+#define MLXBF_TMFIFO_CON_TX_BUF_RSV_SIZE	8
+
+/* House-keeping timer interval. */
+#define MLXBF_TMFIFO_TIMER_INTERVAL		(HZ / 10)
+
+/* Virtual devices sharing the TM FIFO. */
+#define MLXBF_TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/*
+ * Reserve 1/16 of TmFifo space, so console messages are not starved by
+ * the networking traffic.
+ */
+#define MLXBF_TMFIFO_RESERVE_RATIO		16
+
+/* Message with data needs at least two words (for header & data). */
+#define MLXBF_TMFIFO_DATA_MIN_WORDS		2
+
+struct mlxbf_tmfifo;
+
+/**
+ * mlxbf_tmfifo_vring - Structure of the TmFifo virtual ring
+ * @va: virtual address of the ring
+ * @dma: dma address of the ring
+ * @vq: pointer to the virtio virtqueue
+ * @desc: current descriptor of the pending packet
+ * @desc_head: head descriptor of the pending packet
+ * @cur_len: processed length of the current descriptor
+ * @rem_len: remaining length of the pending packet
+ * @pkt_len: total length of the pending packet
+ * @next_avail: next avail descriptor id
+ * @num: vring size (number of descriptors)
+ * @align: vring alignment size
+ * @index: vring index
+ * @vdev_id: vring virtio id (VIRTIO_ID_xxx)
+ * @fifo: pointer to the tmfifo structure
+ */
+struct mlxbf_tmfifo_vring {
+	void *va;
+	dma_addr_t dma;
+	struct virtqueue *vq;
+	struct vring_desc *desc;
+	struct vring_desc *desc_head;
+	int cur_len;
+	int rem_len;
+	u32 pkt_len;
+	u16 next_avail;
+	int num;
+	int align;
+	int index;
+	int vdev_id;
+	struct mlxbf_tmfifo *fifo;
+};
+
+/* Interrupt types. */
+enum {
+	MLXBF_TM_RX_LWM_IRQ,
+	MLXBF_TM_RX_HWM_IRQ,
+	MLXBF_TM_TX_LWM_IRQ,
+	MLXBF_TM_TX_HWM_IRQ,
+	MLXBF_TM_MAX_IRQ
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	MLXBF_TMFIFO_VRING_RX,
+	MLXBF_TMFIFO_VRING_TX,
+	MLXBF_TMFIFO_VRING_MAX
+};
+
+/**
+ * mlxbf_tmfifo_vdev - Structure of the TmFifo virtual device
+ * @vdev: virtio device, in which the vdev.id.device field has the
+ *        VIRTIO_ID_xxx id to distinguish the virtual device.
+ * @status: status of the device
+ * @features: supported features of the device
+ * @vrings: array of tmfifo vrings of this device
+ * @config.cons: virtual console config -
+ *               select if vdev.id.device is VIRTIO_ID_CONSOLE
+ * @config.net: virtual network config -
+ *              select if vdev.id.device is VIRTIO_ID_NET
+ * @tx_buf: tx buffer used to buffer data before writing into the FIFO
+ */
+struct mlxbf_tmfifo_vdev {
+	struct virtio_device vdev;
+	u8 status;
+	u64 features;
+	struct mlxbf_tmfifo_vring vrings[MLXBF_TMFIFO_VRING_MAX];
+	union {
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct circ_buf tx_buf;
+};
+
+/**
+ * mlxbf_tmfifo_irq_info - Structure of the interrupt information
+ * @fifo: pointer to the tmfifo structure
+ * @irq: interrupt number
+ * @index: index into the interrupt array
+ */
+struct mlxbf_tmfifo_irq_info {
+	struct mlxbf_tmfifo *fifo;
+	int irq;
+	int index;
+};
+
+/**
+ * mlxbf_tmfifo - Structure of the TmFifo
+ * @vdev: array of the virtual devices running over the TmFifo
+ * @pdev: platform device
+ * @lock: lock to protect the TmFifo access
+ * @rx_base: mapped register base address for the Rx fifo
+ * @tx_base: mapped register base address for the Tx fifo
+ * @rx_fifo_size: number of entries of the Rx fifo
+ * @tx_fifo_size: number of entries of the Tx fifo
+ * @pend_events: pending bits for deferred events
+ * @irq_info: interrupt information
+ * @work: work struct for deferred process
+ * @timer: background timer
+ * @vring: Tx/Rx ring
+ * @spin_lock: spin lock
+ * @is_ready: ready flag
+ */
+struct mlxbf_tmfifo {
+	struct mlxbf_tmfifo_vdev *vdev[MLXBF_TMFIFO_VDEV_MAX];
+	struct platform_device *pdev;
+	struct mutex lock;		/* TmFifo lock */
+	void __iomem *rx_base;
+	void __iomem *tx_base;
+	int rx_fifo_size;
+	int tx_fifo_size;
+	unsigned long pend_events;
+	struct mlxbf_tmfifo_irq_info irq_info[MLXBF_TM_MAX_IRQ];
+	struct work_struct work;
+	struct timer_list timer;
+	struct mlxbf_tmfifo_vring *vring[2];
+	spinlock_t spin_lock;		/* spin lock */
+	bool is_ready;
+};
+
+/**
+ * mlxbf_tmfifo_msg_hdr - Structure of the TmFifo message header
+ * @type: message type
+ * @len: payload length
+ * @u: 64-bit union data
+ */
+union mlxbf_tmfifo_msg_hdr {
+	struct {
+		u8 type;
+		__be16 len;
+		u8 unused[5];
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 mlxbf_tmfifo_net_default_mac[ETH_ALEN] = {
+	0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* EFI variable name of the MAC address. */
+static efi_char16_t mlxbf_tmfifo_efi_name[] = L"RshimMacAddr";
+
+/* Maximum L2 header length. */
+#define MLXBF_TMFIFO_NET_L2_OVERHEAD	36
+
+/* Supported virtio-net features. */
+#define MLXBF_TMFIFO_NET_FEATURES	(BIT_ULL(VIRTIO_NET_F_MTU) | \
+					 BIT_ULL(VIRTIO_NET_F_STATUS) | \
+					 BIT_ULL(VIRTIO_NET_F_MAC))
+
+#define mlxbf_vdev_to_tmfifo(d) container_of(d, struct mlxbf_tmfifo_vdev, vdev)
+
+/* Allocate vrings for the fifo. */
+static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
+				     struct mlxbf_tmfifo_vdev *tm_vdev)
+{
+	struct mlxbf_tmfifo_vring *vring;
+	struct device *dev;
+	dma_addr_t dma;
+	int i, size;
+	void *va;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->num = MLXBF_TMFIFO_VRING_SIZE;
+		vring->align = SMP_CACHE_BYTES;
+		vring->index = i;
+		vring->vdev_id = tm_vdev->vdev.id.device;
+		dev = &tm_vdev->vdev.dev;
+
+		size = vring_size(vring->num, vring->align);
+		va = dma_alloc_coherent(dev->parent, size, &dma, GFP_KERNEL);
+		if (!va) {
+			dev_err(dev->parent, "dma_alloc_coherent failed\n");
+			return -ENOMEM;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void mlxbf_tmfifo_free_vrings(struct mlxbf_tmfifo *fifo,
+				     struct mlxbf_tmfifo_vdev *tm_vdev)
+{
+	struct mlxbf_tmfifo_vring *vring;
+	int i, size;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		if (vring->va) {
+			size = vring_size(vring->num, vring->align);
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Disable interrupts of the fifo device. */
+static void mlxbf_tmfifo_disable_irqs(struct mlxbf_tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < MLXBF_TM_MAX_IRQ; i++) {
+		irq = fifo->irq_info[i].irq;
+		fifo->irq_info[i].irq = 0;
+		disable_irq(irq);
+	}
+}
+
+/* Interrupt handler. */
+static irqreturn_t mlxbf_tmfifo_irq_handler(int irq, void *arg)
+{
+	struct mlxbf_tmfifo_irq_info *irq_info = arg;
+
+	if (irq_info->index < MLXBF_TM_MAX_IRQ &&
+	    !test_and_set_bit(irq_info->index, &irq_info->fifo->pend_events))
+		schedule_work(&irq_info->fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Get the next packet descriptor from the vring. */
+static struct vring_desc *
+mlxbf_tmfifo_get_next_desc(struct mlxbf_tmfifo_vring *vring)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = vring->vq->vdev;
+	unsigned int idx, head;
+
+	if (vring->next_avail == virtio16_to_cpu(vdev, vr->avail->idx))
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = virtio16_to_cpu(vdev, vr->avail->ring[idx]);
+	if (WARN_ON(head >= vr->num))
+		return NULL;
+
+	vring->next_avail++;
+
+	return &vr->desc[head];
+}
+
+/* Release virtio descriptor. */
+static void mlxbf_tmfifo_release_desc(struct mlxbf_tmfifo_vring *vring,
+				      struct vring_desc *desc, u32 len)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = vring->vq->vdev;
+	u16 idx, vr_idx;
+
+	vr_idx = virtio16_to_cpu(vdev, vr->used->idx);
+	idx = vr_idx % vr->num;
+	vr->used->ring[idx].id = cpu_to_virtio32(vdev, desc - vr->desc);
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/*
+	 * Virtio could poll and check the 'idx' to decide whether the desc is
+	 * done or not. Add a memory barrier here to make sure the update above
+	 * completes before updating the idx.
+	 */
+	mb();
+	vr->used->idx = cpu_to_virtio16(vdev, vr_idx + 1);
+}
+
+/* Get the total length of the descriptor chain. */
+static u32 mlxbf_tmfifo_get_pkt_len(struct mlxbf_tmfifo_vring *vring,
+				    struct vring_desc *desc)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = vring->vq->vdev;
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void mlxbf_tmfifo_release_pending_pkt(struct mlxbf_tmfifo_vring *vring)
+{
+	struct vring_desc *desc_head;
+	u32 len = 0;
+
+	if (vring->desc_head) {
+		desc_head = vring->desc_head;
+		len = vring->pkt_len;
+	} else {
+		desc_head = mlxbf_tmfifo_get_next_desc(vring);
+		if (desc_head)
+			len = mlxbf_tmfifo_get_pkt_len(vring, desc_head);
+	}
+
+	if (desc_head)
+		mlxbf_tmfifo_release_desc(vring, desc_head, len);
+
+	vring->pkt_len = 0;
+	vring->desc = NULL;
+	vring->desc_head = NULL;
+}
+
+static void mlxbf_tmfifo_init_net_desc(struct mlxbf_tmfifo_vring *vring,
+				       struct vring_desc *desc, bool is_rx)
+{
+	struct virtio_device *vdev = vring->vq->vdev;
+	struct virtio_net_hdr *net_hdr;
+
+	net_hdr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+	memset(net_hdr, 0, sizeof(*net_hdr));
+}
+
+/* Get and initialize the next packet. */
+static struct vring_desc *
+mlxbf_tmfifo_get_next_pkt(struct mlxbf_tmfifo_vring *vring, bool is_rx)
+{
+	struct vring_desc *desc;
+
+	desc = mlxbf_tmfifo_get_next_desc(vring);
+	if (desc && is_rx && vring->vdev_id == VIRTIO_ID_NET)
+		mlxbf_tmfifo_init_net_desc(vring, desc, is_rx);
+
+	vring->desc_head = desc;
+	vring->desc = desc;
+
+	return desc;
+}
+
+/* House-keeping timer. */
+static void mlxbf_tmfifo_timer(struct timer_list *arg)
+{
+	struct mlxbf_tmfifo *fifo = container_of(arg, struct mlxbf_tmfifo,
+						 timer);
+	int more;
+
+	more = !test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events) ||
+		    !test_and_set_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	if (more)
+		schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + MLXBF_TMFIFO_TIMER_INTERVAL);
+}
+
+/* Copy one console packet into the output buffer. */
+static void mlxbf_tmfifo_console_output_one(struct mlxbf_tmfifo_vdev *cons,
+					    struct mlxbf_tmfifo_vring *vring,
+					    struct vring_desc *desc)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = &cons->vdev;
+	u32 len, idx, seg;
+	void *addr;
+
+	while (desc) {
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+		len = virtio32_to_cpu(vdev, desc->len);
+
+		seg = CIRC_SPACE_TO_END(cons->tx_buf.head, cons->tx_buf.tail,
+					MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+		if (len <= seg) {
+			memcpy(cons->tx_buf.buf + cons->tx_buf.head, addr, len);
+		} else {
+			memcpy(cons->tx_buf.buf + cons->tx_buf.head, addr, seg);
+			addr += seg;
+			memcpy(cons->tx_buf.buf, addr, len - seg);
+		}
+		cons->tx_buf.head = (cons->tx_buf.head + len) %
+			MLXBF_TMFIFO_CON_TX_BUF_SIZE;
+
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+}
+
+/* Copy console data into the output buffer. */
+static void mlxbf_tmfifo_console_output(struct mlxbf_tmfifo_vdev *cons,
+					struct mlxbf_tmfifo_vring *vring)
+{
+	struct vring_desc *desc;
+	u32 len, avail;
+
+	desc = mlxbf_tmfifo_get_next_desc(vring);
+	while (desc) {
+		/* Release the packet if not enough space. */
+		len = mlxbf_tmfifo_get_pkt_len(vring, desc);
+		avail = CIRC_SPACE(cons->tx_buf.head, cons->tx_buf.tail,
+				   MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+		if (len + MLXBF_TMFIFO_CON_TX_BUF_RSV_SIZE > avail) {
+			mlxbf_tmfifo_release_desc(vring, desc, len);
+			break;
+		}
+
+		mlxbf_tmfifo_console_output_one(cons, vring, desc);
+		mlxbf_tmfifo_release_desc(vring, desc, len);
+		desc = mlxbf_tmfifo_get_next_desc(vring);
+	}
+}
+
+/* Get the number of available words in Rx FIFO for receiving. */
+static int mlxbf_tmfifo_get_rx_avail(struct mlxbf_tmfifo *fifo)
+{
+	u64 sts;
+
+	sts = readq(fifo->rx_base + MLXBF_TMFIFO_RX_STS);
+	return FIELD_GET(MLXBF_TMFIFO_RX_STS__COUNT_MASK, sts);
+}
+
+/* Get the number of available words in the TmFifo for sending. */
+static int mlxbf_tmfifo_get_tx_avail(struct mlxbf_tmfifo *fifo, int vdev_id)
+{
+	int tx_reserve;
+	u32 count;
+	u64 sts;
+
+	/* Reserve some room in FIFO for console messages. */
+	if (vdev_id == VIRTIO_ID_NET)
+		tx_reserve = fifo->tx_fifo_size / MLXBF_TMFIFO_RESERVE_RATIO;
+	else
+		tx_reserve = 1;
+
+	sts = readq(fifo->tx_base + MLXBF_TMFIFO_TX_STS);
+	count = FIELD_GET(MLXBF_TMFIFO_TX_STS__COUNT_MASK, sts);
+	return fifo->tx_fifo_size - tx_reserve - count;
+}
+
+/* Console Tx (move data from the output buffer into the TmFifo). */
+static void mlxbf_tmfifo_console_tx(struct mlxbf_tmfifo *fifo, int avail)
+{
+	union mlxbf_tmfifo_msg_hdr hdr;
+	struct mlxbf_tmfifo_vdev *cons;
+	unsigned long flags;
+	int size, seg;
+	void *addr;
+	u64 data;
+
+	/* Return if not enough space available. */
+	if (avail < MLXBF_TMFIFO_DATA_MIN_WORDS)
+		return;
+
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+	if (!cons || !cons->tx_buf.buf)
+		return;
+
+	/* Return if no data to send. */
+	size = CIRC_CNT(cons->tx_buf.head, cons->tx_buf.tail,
+			MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+	if (size == 0)
+		return;
+
+	/* Adjust the size to available space. */
+	if (size + sizeof(hdr) > avail * sizeof(u64))
+		size = avail * sizeof(u64) - sizeof(hdr);
+
+	/* Write header. */
+	hdr.data = 0;
+	hdr.type = VIRTIO_ID_CONSOLE;
+	hdr.len = htons(size);
+	writeq(hdr.data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+
+	/* Use spin-lock to protect the 'cons->tx_buf'. */
+	spin_lock_irqsave(&fifo->spin_lock, flags);
+
+	while (size > 0) {
+		addr = cons->tx_buf.buf + cons->tx_buf.tail;
+
+		seg = CIRC_CNT_TO_END(cons->tx_buf.head, cons->tx_buf.tail,
+				      MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+		if (seg >= sizeof(u64)) {
+			memcpy(&data, addr, sizeof(u64));
+		} else {
+			memcpy(&data, addr, seg);
+			memcpy((u8 *)&data + seg, cons->tx_buf.buf,
+			       sizeof(u64) - seg);
+		}
+		writeq(data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+
+		if (size >= sizeof(u64)) {
+			cons->tx_buf.tail = (cons->tx_buf.tail + sizeof(u64)) %
+				MLXBF_TMFIFO_CON_TX_BUF_SIZE;
+			size -= sizeof(u64);
+		} else {
+			cons->tx_buf.tail = (cons->tx_buf.tail + size) %
+				MLXBF_TMFIFO_CON_TX_BUF_SIZE;
+			size = 0;
+		}
+	}
+
+	spin_unlock_irqrestore(&fifo->spin_lock, flags);
+}
+
+/* Rx/Tx one word in the descriptor buffer. */
+static void mlxbf_tmfifo_rxtx_word(struct mlxbf_tmfifo_vring *vring,
+				   struct vring_desc *desc,
+				   bool is_rx, int len)
+{
+	struct virtio_device *vdev = vring->vq->vdev;
+	struct mlxbf_tmfifo *fifo = vring->fifo;
+	void *addr;
+	u64 data;
+
+	/* Get the buffer address of this desc. */
+	addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+	/* Read a word from FIFO for Rx. */
+	if (is_rx)
+		data = readq(fifo->rx_base + MLXBF_TMFIFO_RX_DATA);
+
+	if (vring->cur_len + sizeof(u64) <= len) {
+		/* The whole word. */
+		if (is_rx)
+			memcpy(addr + vring->cur_len, &data, sizeof(u64));
+		else
+			memcpy(&data, addr + vring->cur_len, sizeof(u64));
+		vring->cur_len += sizeof(u64);
+	} else {
+		/* Leftover bytes. */
+		if (is_rx)
+			memcpy(addr + vring->cur_len, &data,
+			       len - vring->cur_len);
+		else
+			memcpy(&data, addr + vring->cur_len,
+			       len - vring->cur_len);
+		vring->cur_len = len;
+	}
+
+	/* Write the word into FIFO for Tx. */
+	if (!is_rx)
+		writeq(data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+}
+
+/*
+ * Rx/Tx packet header.
+ *
+ * In Rx case, the packet might be found to belong to a different vring since
+ * the TmFifo is shared by different services. In such case, the 'vring_change'
+ * flag is set.
+ */
+static void mlxbf_tmfifo_rxtx_header(struct mlxbf_tmfifo_vring *vring,
+				     struct vring_desc *desc,
+				     bool is_rx, bool *vring_change)
+{
+	struct mlxbf_tmfifo *fifo = vring->fifo;
+	struct virtio_net_config *config;
+	union mlxbf_tmfifo_msg_hdr hdr;
+	int vdev_id, hdr_len;
+
+	/* Read/Write packet header. */
+	if (is_rx) {
+		/* Drain one word from the FIFO. */
+		hdr.data = readq(fifo->rx_base + MLXBF_TMFIFO_RX_DATA);
+
+		/* Skip the length 0 packets (keepalive). */
+		if (hdr.len == 0)
+			return;
+
+		/* Check packet type. */
+		if (hdr.type == VIRTIO_ID_NET) {
+			vdev_id = VIRTIO_ID_NET;
+			hdr_len = sizeof(struct virtio_net_hdr);
+			config = &fifo->vdev[vdev_id]->config.net;
+			if (ntohs(hdr.len) > config->mtu +
+			    MLXBF_TMFIFO_NET_L2_OVERHEAD)
+				return;
+		} else {
+			vdev_id = VIRTIO_ID_CONSOLE;
+			hdr_len = 0;
+		}
+
+		/*
+		 * Check whether the new packet still belongs to this vring.
+		 * If not, update the pkt_len of the new vring.
+		 */
+		if (vdev_id != vring->vdev_id) {
+			struct mlxbf_tmfifo_vdev *tm_dev2 = fifo->vdev[vdev_id];
+
+			if (!tm_dev2)
+				return;
+			vring->desc = desc;
+			vring = &tm_dev2->vrings[MLXBF_TMFIFO_VRING_RX];
+			*vring_change = true;
+		}
+		vring->pkt_len = ntohs(hdr.len) + hdr_len;
+	} else {
+		/* Network virtio has an extra header. */
+		hdr_len = (vring->vdev_id == VIRTIO_ID_NET) ?
+			   sizeof(struct virtio_net_hdr) : 0;
+		vring->pkt_len = mlxbf_tmfifo_get_pkt_len(vring, desc);
+		hdr.data = 0;
+		hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+			    VIRTIO_ID_NET : VIRTIO_ID_CONSOLE;
+		hdr.len = htons(vring->pkt_len - hdr_len);
+		writeq(hdr.data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+	}
+
+	vring->cur_len = hdr_len;
+	vring->rem_len = vring->pkt_len;
+	fifo->vring[is_rx] = vring;
+}
+
+/*
+ * Rx/Tx one descriptor.
+ *
+ * Return true to indicate more data available.
+ */
+static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring,
+				       bool is_rx, int *avail)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct mlxbf_tmfifo *fifo = vring->fifo;
+	struct virtio_device *vdev;
+	bool vring_change = false;
+	struct vring_desc *desc;
+	unsigned long flags;
+	u32 len, idx;
+
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+
+	/* Get the descriptor of the next packet. */
+	if (!vring->desc) {
+		desc = mlxbf_tmfifo_get_next_pkt(vring, is_rx);
+		if (!desc)
+			return false;
+	} else {
+		desc = vring->desc;
+	}
+
+	/* Beginning of a packet. Start to Rx/Tx packet header. */
+	if (vring->pkt_len == 0) {
+		mlxbf_tmfifo_rxtx_header(vring, desc, is_rx, &vring_change);
+		(*avail)--;
+
+		/* Return if new packet is for another ring. */
+		if (vring_change)
+			return false;
+		goto mlxbf_tmfifo_desc_done;
+	}
+
+	/* Get the length of this desc. */
+	len = virtio32_to_cpu(vdev, desc->len);
+	if (len > vring->rem_len)
+		len = vring->rem_len;
+
+	/* Rx/Tx one word (8 bytes) if not done. */
+	if (vring->cur_len < len) {
+		mlxbf_tmfifo_rxtx_word(vring, desc, is_rx, len);
+		(*avail)--;
+	}
+
+	/* Check again whether it's done. */
+	if (vring->cur_len == len) {
+		vring->cur_len = 0;
+		vring->rem_len -= len;
+
+		/* Get the next desc on the chain. */
+		if (vring->rem_len > 0 &&
+		    (virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT)) {
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+			goto mlxbf_tmfifo_desc_done;
+		}
+
+		/* Done and release the pending packet. */
+		mlxbf_tmfifo_release_pending_pkt(vring);
+		desc = NULL;
+		fifo->vring[is_rx] = NULL;
+
+		/* Notify upper layer that packet is done. */
+		spin_lock_irqsave(&fifo->spin_lock, flags);
+		vring_interrupt(0, vring->vq);
+		spin_unlock_irqrestore(&fifo->spin_lock, flags);
+	}
+
+mlxbf_tmfifo_desc_done:
+	/* Save the current desc. */
+	vring->desc = desc;
+
+	return true;
+}
+
+/* Rx & Tx processing of a queue. */
+static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
+{
+	int avail = 0, devid = vring->vdev_id;
+	struct mlxbf_tmfifo *fifo;
+	bool more;
+
+	fifo = vring->fifo;
+
+	/* Return if vdev is not ready. */
+	if (!fifo->vdev[devid])
+		return;
+
+	/* Return if another vring is running. */
+	if (fifo->vring[is_rx] && fifo->vring[is_rx] != vring)
+		return;
+
+	/* Only handle console and network for now. */
+	if (WARN_ON(devid != VIRTIO_ID_NET && devid != VIRTIO_ID_CONSOLE))
+		return;
+
+	do {
+		/* Get available FIFO space. */
+		if (avail == 0) {
+			if (is_rx)
+				avail = mlxbf_tmfifo_get_rx_avail(fifo);
+			else
+				avail = mlxbf_tmfifo_get_tx_avail(fifo, devid);
+			if (avail <= 0)
+				break;
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && devid == VIRTIO_ID_CONSOLE) {
+			mlxbf_tmfifo_console_tx(fifo, avail);
+			break;
+		}
+
+		/* Handle one descriptor. */
+		more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail);
+	} while (more);
+}
+
+/* Handle Rx or Tx queues. */
+static void mlxbf_tmfifo_work_rxtx(struct mlxbf_tmfifo *fifo, int queue_id,
+				   int irq_id, bool is_rx)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+	struct mlxbf_tmfifo_vring *vring;
+	int i;
+
+	if (!test_and_clear_bit(irq_id, &fifo->pend_events) ||
+	    !fifo->irq_info[irq_id].irq)
+		return;
+
+	for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++) {
+		tm_vdev = fifo->vdev[i];
+		if (tm_vdev) {
+			vring = &tm_vdev->vrings[queue_id];
+			if (vring->vq)
+				mlxbf_tmfifo_rxtx(vring, is_rx);
+		}
+	}
+}
+
+/* Work handler for Rx and Tx case. */
+static void mlxbf_tmfifo_work_handler(struct work_struct *work)
+{
+	struct mlxbf_tmfifo *fifo;
+
+	fifo = container_of(work, struct mlxbf_tmfifo, work);
+	if (!fifo->is_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx (Send data to the TmFifo). */
+	mlxbf_tmfifo_work_rxtx(fifo, MLXBF_TMFIFO_VRING_TX,
+			       MLXBF_TM_TX_LWM_IRQ, false);
+
+	/* Rx (Receive data from the TmFifo). */
+	mlxbf_tmfifo_work_rxtx(fifo, MLXBF_TMFIFO_VRING_RX,
+			       MLXBF_TM_RX_HWM_IRQ, true);
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool mlxbf_tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct mlxbf_tmfifo_vring *vring = vq->priv;
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+	struct mlxbf_tmfifo *fifo;
+	unsigned long flags;
+
+	fifo = vring->fifo;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->index & BIT(0))) {
+		if (test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events))
+			return true;
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			tm_vdev = fifo->vdev[VIRTIO_ID_CONSOLE];
+			mlxbf_tmfifo_console_output(tm_vdev, vring);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+		} else if (test_and_set_bit(MLXBF_TM_TX_LWM_IRQ,
+					    &fifo->pend_events)) {
+			return true;
+		}
+	}
+
+	schedule_work(&fifo->work);
+
+	return true;
+}
+
+/* Get the array of feature bits for this device. */
+static u64 mlxbf_tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int mlxbf_tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	tm_vdev->features = vdev->features;
+
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void mlxbf_tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+	struct mlxbf_tmfifo_vring *vring;
+	struct virtqueue *vq;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc)
+			mlxbf_tmfifo_release_pending_pkt(vring);
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+					unsigned int nvqs,
+					struct virtqueue *vqs[],
+					vq_callback_t *callbacks[],
+					const char * const names[],
+					const bool *ctx,
+					struct irq_affinity *desc)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+	struct mlxbf_tmfifo_vring *vring;
+	struct virtqueue *vq;
+	int i, ret, size;
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i]) {
+			ret = -EINVAL;
+			goto error;
+		}
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->num, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->num, vring->align, vdev,
+					 false, false, vring->va,
+					 mlxbf_tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	mlxbf_tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 mlxbf_tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void mlxbf_tmfifo_virtio_set_status(struct virtio_device *vdev,
+					   u8 status)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void mlxbf_tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void mlxbf_tmfifo_virtio_get(struct virtio_device *vdev,
+				    unsigned int offset,
+				    void *buf,
+				    unsigned int len)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	if (offset + len > sizeof(tm_vdev->config))
+		return;
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void mlxbf_tmfifo_virtio_set(struct virtio_device *vdev,
+				    unsigned int offset,
+				    const void *buf,
+				    unsigned int len)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	if (offset + len > sizeof(tm_vdev->config))
+		return;
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+static void tmfifo_virtio_dev_release(struct device *device)
+{
+	struct virtio_device *vdev =
+			container_of(device, struct virtio_device, dev);
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	kfree(tm_vdev);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops mlxbf_tmfifo_virtio_config_ops = {
+	.get_features = mlxbf_tmfifo_virtio_get_features,
+	.finalize_features = mlxbf_tmfifo_virtio_finalize_features,
+	.find_vqs = mlxbf_tmfifo_virtio_find_vqs,
+	.del_vqs = mlxbf_tmfifo_virtio_del_vqs,
+	.reset = mlxbf_tmfifo_virtio_reset,
+	.set_status = mlxbf_tmfifo_virtio_set_status,
+	.get_status = mlxbf_tmfifo_virtio_get_status,
+	.get = mlxbf_tmfifo_virtio_get,
+	.set = mlxbf_tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+static int mlxbf_tmfifo_create_vdev(struct device *dev,
+				    struct mlxbf_tmfifo *fifo,
+				    int vdev_id, u64 features,
+				    void *config, u32 size)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev, *reg_dev = NULL;
+	int ret;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		dev_err(dev, "vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto fail;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &mlxbf_tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+
+	if (mlxbf_tmfifo_alloc_vrings(fifo, tm_vdev)) {
+		dev_err(dev, "unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto vdev_fail;
+	}
+
+	/* Allocate an output buffer for the console device. */
+	if (vdev_id == VIRTIO_ID_CONSOLE)
+		tm_vdev->tx_buf.buf = devm_kmalloc(dev,
+						   MLXBF_TMFIFO_CON_TX_BUF_SIZE,
+						   GFP_KERNEL);
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	reg_dev = tm_vdev;
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device failed\n");
+		goto vdev_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+vdev_fail:
+	mlxbf_tmfifo_free_vrings(fifo, tm_vdev);
+	fifo->vdev[vdev_id] = NULL;
+	if (reg_dev)
+		put_device(&tm_vdev->vdev.dev);
+	else
+		kfree(tm_vdev);
+fail:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+static int mlxbf_tmfifo_delete_vdev(struct mlxbf_tmfifo *fifo, int vdev_id)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		mlxbf_tmfifo_free_vrings(fifo, tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void mlxbf_tmfifo_get_cfg_mac(u8 *mac)
+{
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+	unsigned long size = ETH_ALEN;
+	efi_status_t status;
+	u8 buf[ETH_ALEN];
+
+	status = efi.get_variable(mlxbf_tmfifo_efi_name, &guid, NULL, &size,
+				  buf);
+	if (status == EFI_SUCCESS && size == ETH_ALEN)
+		ether_addr_copy(mac, buf);
+	else
+		memcpy(mac, mlxbf_tmfifo_net_default_mac, ETH_ALEN);
+}
+
+/* Set TmFifo thresolds which is used to trigger interrupts. */
+static void mlxbf_tmfifo_set_threshold(struct mlxbf_tmfifo *fifo)
+{
+	u64 ctl;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		FIELD_GET(MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__LWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_TX_CTL__LWM_MASK,
+			   fifo->tx_fifo_size / 2);
+	ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__HWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_TX_CTL__HWM_MASK,
+			   fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		FIELD_GET(MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__LWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__HWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
+}
+
+static void mlxbf_tmfifo_cleanup(struct mlxbf_tmfifo *fifo)
+{
+	int i;
+
+	fifo->is_ready = false;
+	del_timer_sync(&fifo->timer);
+	mlxbf_tmfifo_disable_irqs(fifo);
+	cancel_work_sync(&fifo->work);
+	for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++)
+		mlxbf_tmfifo_delete_vdev(fifo, i);
+}
+
+/* Probe the TMFIFO. */
+static int mlxbf_tmfifo_probe(struct platform_device *pdev)
+{
+	struct virtio_net_config net_config;
+	struct mlxbf_tmfifo *fifo;
+	struct resource *res;
+	int i, ret;
+
+	fifo = devm_kzalloc(&pdev->dev, sizeof(*fifo), GFP_KERNEL);
+	if (!fifo)
+		return -ENOMEM;
+
+	spin_lock_init(&fifo->spin_lock);
+	INIT_WORK(&fifo->work, mlxbf_tmfifo_work_handler);
+	mutex_init(&fifo->lock);
+
+	/* Get the resource of the Rx FIFO. */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	fifo->rx_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(fifo->rx_base))
+		return PTR_ERR(fifo->rx_base);
+
+	/* Get the resource of the Tx FIFO. */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	fifo->tx_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(fifo->tx_base))
+		return PTR_ERR(fifo->tx_base);
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	timer_setup(&fifo->timer, mlxbf_tmfifo_timer, 0);
+
+	for (i = 0; i < MLXBF_TM_MAX_IRQ; i++) {
+		fifo->irq_info[i].index = i;
+		fifo->irq_info[i].fifo = fifo;
+		fifo->irq_info[i].irq = platform_get_irq(pdev, i);
+		ret = devm_request_irq(&pdev->dev, fifo->irq_info[i].irq,
+				       mlxbf_tmfifo_irq_handler, 0,
+				       "tmfifo", &fifo->irq_info[i]);
+		if (ret) {
+			dev_err(&pdev->dev, "devm_request_irq failed\n");
+			fifo->irq_info[i].irq = 0;
+			return ret;
+		}
+	}
+
+	mlxbf_tmfifo_set_threshold(fifo);
+
+	/* Create the console vdev. */
+	ret = mlxbf_tmfifo_create_vdev(&pdev->dev, fifo, VIRTIO_ID_CONSOLE, 0,
+				       NULL, 0);
+	if (ret)
+		goto fail;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = ETH_DATA_LEN;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	mlxbf_tmfifo_get_cfg_mac(net_config.mac);
+	ret = mlxbf_tmfifo_create_vdev(&pdev->dev, fifo, VIRTIO_ID_NET,
+				       MLXBF_TMFIFO_NET_FEATURES, &net_config,
+				       sizeof(net_config));
+	if (ret)
+		goto fail;
+
+	mod_timer(&fifo->timer, jiffies + MLXBF_TMFIFO_TIMER_INTERVAL);
+
+	fifo->is_ready = true;
+	return 0;
+
+fail:
+	mlxbf_tmfifo_cleanup(fifo);
+	return ret;
+}
+
+/* Device remove function. */
+static int mlxbf_tmfifo_remove(struct platform_device *pdev)
+{
+	struct mlxbf_tmfifo *fifo = platform_get_drvdata(pdev);
+
+	mlxbf_tmfifo_cleanup(fifo);
+
+	return 0;
+}
+
+static const struct acpi_device_id mlxbf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, mlxbf_tmfifo_acpi_match);
+
+static struct platform_driver mlxbf_tmfifo_driver = {
+	.probe = mlxbf_tmfifo_probe,
+	.remove = mlxbf_tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.acpi_match_table = mlxbf_tmfifo_acpi_match,
+	},
+};
+
+module_platform_driver(mlxbf_tmfifo_driver);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TmFifo Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 179+ messages in thread

* Re: [PATCH v13] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2019-04-04 19:36 ` [PATCH v13] " Liming Sun
@ 2019-04-05 15:44   ` Andy Shevchenko
  2019-04-05 19:10     ` Liming Sun
  0 siblings, 1 reply; 179+ messages in thread
From: Andy Shevchenko @ 2019-04-05 15:44 UTC (permalink / raw)
  To: Liming Sun
  Cc: David Woods, Andy Shevchenko, Darren Hart, Vadim Pasternak,
	Linux Kernel Mailing List, Platform Driver

On Thu, Apr 4, 2019 at 10:36 PM Liming Sun <lsun@mellanox.com> wrote:
> This commit adds the TmFifo platform driver for Mellanox BlueField
> Soc. TmFifo is a shared FIFO which enables external host machine
> to exchange data with the SoC via USB or PCIe. The driver is based
> on virtio framework and has console and network access enabled.

Thanks for an update. Almost good.
My comments below.

Meanwhile I pushed this to my review and testing queue, thanks!

> +#include <linux/acpi.h>
> +#include <linux/bitfield.h>
> +#include <linux/circ_buf.h>
> +#include <linux/efi.h>
> +#include <linux/irq.h>
> +#include <linux/module.h>
> +#include <linux/mutex.h>
> +#include <linux/platform_device.h>
> +#include <linux/types.h>

Perhaps blank line here. Would be more clear that this is utilizing
virtio framework.

> +#include <linux/virtio_config.h>
> +#include <linux/virtio_console.h>
> +#include <linux/virtio_ids.h>
> +#include <linux/virtio_net.h>
> +#include <linux/virtio_ring.h>

> +/**
> + * mlxbf_tmfifo_msg_hdr - Structure of the TmFifo message header
> + * @type: message type
> + * @len: payload length
> + * @u: 64-bit union data
> + */
> +union mlxbf_tmfifo_msg_hdr {
> +       struct {
> +               u8 type;
> +               __be16 len;
> +               u8 unused[5];
> +       } __packed;
> +       u64 data;

I'm not sure I understand how you can distinguish which field of union to use?
Isn't here some type missed?

> +};

> +static u8 mlxbf_tmfifo_net_default_mac[ETH_ALEN] = {

> +       0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};

This should be two lines.

> +/* Supported virtio-net features. */
> +#define MLXBF_TMFIFO_NET_FEATURES      (BIT_ULL(VIRTIO_NET_F_MTU) | \
> +                                        BIT_ULL(VIRTIO_NET_F_STATUS) | \
> +                                        BIT_ULL(VIRTIO_NET_F_MAC))

Better to write as

#define FOO \
(BIT(x) | BIT(y) ...)

I think I told this earlier?

> +/* Allocate vrings for the fifo. */

fifo -> FIFO (and check all occurrences)

> +static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
> +                                    struct mlxbf_tmfifo_vdev *tm_vdev)
> +{
> +       struct mlxbf_tmfifo_vring *vring;
> +       struct device *dev;
> +       dma_addr_t dma;
> +       int i, size;
> +       void *va;
> +
> +       for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
> +               vring = &tm_vdev->vrings[i];
> +               vring->fifo = fifo;
> +               vring->num = MLXBF_TMFIFO_VRING_SIZE;
> +               vring->align = SMP_CACHE_BYTES;
> +               vring->index = i;
> +               vring->vdev_id = tm_vdev->vdev.id.device;
> +               dev = &tm_vdev->vdev.dev;
> +
> +               size = vring_size(vring->num, vring->align);
> +               va = dma_alloc_coherent(dev->parent, size, &dma, GFP_KERNEL);
> +               if (!va) {

> +                       dev_err(dev->parent, "dma_alloc_coherent failed\n");

I don't see how this will free the allocated entries.
I think I told about this either.

> +                       return -ENOMEM;
> +               }
> +
> +               vring->va = va;
> +               vring->dma = dma;
> +       }
> +
> +       return 0;
> +}

> +/* House-keeping timer. */
> +static void mlxbf_tmfifo_timer(struct timer_list *arg)
> +{

> +       struct mlxbf_tmfifo *fifo = container_of(arg, struct mlxbf_tmfifo,
> +                                                timer);

One line would be still good enough.

> +       int more;
> +
> +       more = !test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events) ||
> +                   !test_and_set_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events);
> +
> +       if (more)
> +               schedule_work(&fifo->work);
> +
> +       mod_timer(&fifo->timer, jiffies + MLXBF_TMFIFO_TIMER_INTERVAL);
> +}

> +       status = efi.get_variable(mlxbf_tmfifo_efi_name, &guid, NULL, &size,
> +                                 buf);
> +       if (status == EFI_SUCCESS && size == ETH_ALEN)
> +               ether_addr_copy(mac, buf);
> +       else

> +               memcpy(mac, mlxbf_tmfifo_net_default_mac, ETH_ALEN);

ether_addr_copy() as well.

> +}

> +       fifo->pdev = pdev;

Do you really need to keep pdev there? Isn't struct device pointer enough?


> +       /* Create the console vdev. */
> +       ret = mlxbf_tmfifo_create_vdev(&pdev->dev, fifo, VIRTIO_ID_CONSOLE, 0,
> +                                      NULL, 0);

If you define temporary variable
  struct device *dev = &pdev->dev;
these lines can be merged into one.

> +       if (ret)
> +               goto fail;

-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v13] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2019-04-05 15:44   ` Andy Shevchenko
@ 2019-04-05 19:10     ` Liming Sun
  2019-04-07  2:05       ` Liming Sun
  0 siblings, 1 reply; 179+ messages in thread
From: Liming Sun @ 2019-04-05 19:10 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: David Woods, Andy Shevchenko, Darren Hart, Vadim Pasternak,
	Linux Kernel Mailing List, Platform Driver

Thanks Andy! I'll address the comments in v14.

Some question for the comment below:

> > +               size = vring_size(vring->num, vring->align);
> > +               va = dma_alloc_coherent(dev->parent, size, &dma, GFP_KERNEL);
> > +               if (!va) {
> 
> > +                       dev_err(dev->parent, "dma_alloc_coherent failed\n");
> > I don't see how this will free the allocated entries.
> I think I told about this either.

When an error is returned, all the allocated entries will be released by the
in the caller context by calling mlxbf_tmfifo_free_vrings(), like the logic below.
Or do you prefer releasing the entries in mlxbf_tmfifo_alloc_vrings() instead?

1073         if (mlxbf_tmfifo_alloc_vrings(fifo, tm_vdev)) {
1074                 dev_err(dev, "unable to allocate vring\n");
1075                 ret = -ENOMEM;
1076                 goto vdev_fail;
1077         }
...
1097 vdev_fail:
1098         mlxbf_tmfifo_free_vrings(fifo, tm_vdev);

Regards,
Liming

> -----Original Message-----
> From: Andy Shevchenko <andy.shevchenko@gmail.com>
> Sent: Friday, April 5, 2019 11:44 AM
> To: Liming Sun <lsun@mellanox.com>
> Cc: David Woods <dwoods@mellanox.com>; Andy Shevchenko <andy@infradead.org>; Darren Hart <dvhart@infradead.org>; Vadim
> Pasternak <vadimp@mellanox.com>; Linux Kernel Mailing List <linux-kernel@vger.kernel.org>; Platform Driver <platform-driver-
> x86@vger.kernel.org>
> Subject: Re: [PATCH v13] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
> 
> On Thu, Apr 4, 2019 at 10:36 PM Liming Sun <lsun@mellanox.com> wrote:
> > This commit adds the TmFifo platform driver for Mellanox BlueField
> > Soc. TmFifo is a shared FIFO which enables external host machine
> > to exchange data with the SoC via USB or PCIe. The driver is based
> > on virtio framework and has console and network access enabled.
> 
> Thanks for an update. Almost good.
> My comments below.
> 
> Meanwhile I pushed this to my review and testing queue, thanks!
> 
> > +#include <linux/acpi.h>
> > +#include <linux/bitfield.h>
> > +#include <linux/circ_buf.h>
> > +#include <linux/efi.h>
> > +#include <linux/irq.h>
> > +#include <linux/module.h>
> > +#include <linux/mutex.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/types.h>
> 
> Perhaps blank line here. Would be more clear that this is utilizing
> virtio framework.
> 
> > +#include <linux/virtio_config.h>
> > +#include <linux/virtio_console.h>
> > +#include <linux/virtio_ids.h>
> > +#include <linux/virtio_net.h>
> > +#include <linux/virtio_ring.h>
> 
> > +/**
> > + * mlxbf_tmfifo_msg_hdr - Structure of the TmFifo message header
> > + * @type: message type
> > + * @len: payload length
> > + * @u: 64-bit union data
> > + */
> > +union mlxbf_tmfifo_msg_hdr {
> > +       struct {
> > +               u8 type;
> > +               __be16 len;
> > +               u8 unused[5];
> > +       } __packed;
> > +       u64 data;
> 
> I'm not sure I understand how you can distinguish which field of union to use?
> Isn't here some type missed?
> 
> > +};
> 
> > +static u8 mlxbf_tmfifo_net_default_mac[ETH_ALEN] = {
> 
> > +       0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
> 
> This should be two lines.
> 
> > +/* Supported virtio-net features. */
> > +#define MLXBF_TMFIFO_NET_FEATURES      (BIT_ULL(VIRTIO_NET_F_MTU) | \
> > +                                        BIT_ULL(VIRTIO_NET_F_STATUS) | \
> > +                                        BIT_ULL(VIRTIO_NET_F_MAC))
> 
> Better to write as
> 
> #define FOO \
> (BIT(x) | BIT(y) ...)
> 
> I think I told this earlier?
> 
> > +/* Allocate vrings for the fifo. */
> 
> fifo -> FIFO (and check all occurrences)
> 
> > +static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
> > +                                    struct mlxbf_tmfifo_vdev *tm_vdev)
> > +{
> > +       struct mlxbf_tmfifo_vring *vring;
> > +       struct device *dev;
> > +       dma_addr_t dma;
> > +       int i, size;
> > +       void *va;
> > +
> > +       for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
> > +               vring = &tm_vdev->vrings[i];
> > +               vring->fifo = fifo;
> > +               vring->num = MLXBF_TMFIFO_VRING_SIZE;
> > +               vring->align = SMP_CACHE_BYTES;
> > +               vring->index = i;
> > +               vring->vdev_id = tm_vdev->vdev.id.device;
> > +               dev = &tm_vdev->vdev.dev;
> > +
> > +               size = vring_size(vring->num, vring->align);
> > +               va = dma_alloc_coherent(dev->parent, size, &dma, GFP_KERNEL);
> > +               if (!va) {
> 
> > +                       dev_err(dev->parent, "dma_alloc_coherent failed\n");
> 
> I don't see how this will free the allocated entries.
> I think I told about this either.
> 
> > +                       return -ENOMEM;
> > +               }
> > +
> > +               vring->va = va;
> > +               vring->dma = dma;
> > +       }
> > +
> > +       return 0;
> > +}
> 
> > +/* House-keeping timer. */
> > +static void mlxbf_tmfifo_timer(struct timer_list *arg)
> > +{
> 
> > +       struct mlxbf_tmfifo *fifo = container_of(arg, struct mlxbf_tmfifo,
> > +                                                timer);
> 
> One line would be still good enough.
> 
> > +       int more;
> > +
> > +       more = !test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events) ||
> > +                   !test_and_set_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events);
> > +
> > +       if (more)
> > +               schedule_work(&fifo->work);
> > +
> > +       mod_timer(&fifo->timer, jiffies + MLXBF_TMFIFO_TIMER_INTERVAL);
> > +}
> 
> > +       status = efi.get_variable(mlxbf_tmfifo_efi_name, &guid, NULL, &size,
> > +                                 buf);
> > +       if (status == EFI_SUCCESS && size == ETH_ALEN)
> > +               ether_addr_copy(mac, buf);
> > +       else
> 
> > +               memcpy(mac, mlxbf_tmfifo_net_default_mac, ETH_ALEN);
> 
> ether_addr_copy() as well.
> 
> > +}
> 
> > +       fifo->pdev = pdev;
> 
> Do you really need to keep pdev there? Isn't struct device pointer enough?
> 
> 
> > +       /* Create the console vdev. */
> > +       ret = mlxbf_tmfifo_create_vdev(&pdev->dev, fifo, VIRTIO_ID_CONSOLE, 0,
> > +                                      NULL, 0);
> 
> If you define temporary variable
>   struct device *dev = &pdev->dev;
> these lines can be merged into one.
> 
> > +       if (ret)
> > +               goto fail;
> 
> --
> With Best Regards,
> Andy Shevchenko

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v14] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 16:06 ` Liming Sun
                   ` (50 preceding siblings ...)
  (?)
@ 2019-04-07  2:03 ` Liming Sun
  2019-04-11 14:09   ` Andy Shevchenko
  -1 siblings, 1 reply; 179+ messages in thread
From: Liming Sun @ 2019-04-07  2:03 UTC (permalink / raw)
  To: David Woods, Andy Shevchenko, Darren Hart, Vadim Pasternak
  Cc: Liming Sun, linux-kernel, platform-driver-x86

This commit adds the TmFifo platform driver for Mellanox BlueField
Soc. TmFifo is a shared FIFO which enables external host machine
to exchange data with the SoC via USB or PCIe. The driver is based
on virtio framework and has console and network access enabled.

Reviewed-by: Vadim Pasternak <vadimp@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
v13->v14:
    Fixes for comments from Andy:
    - Add a blank line to separate the virtio header files;
    - Update the comment for 'union mlxbf_tmfifo_msg_hdr' to be
      more clear how this union is used;
    - Update the 'mlxbf_tmfifo_net_default_mac[ETH_ALEN]' definition
      to be two lines;
    - Reformat macro MLXBF_TMFIFO_NET_FEATURES to put the definition
      in a seperate line;
    - Update all 'fifo' to 'FIFO' in the comments;
    - Update mlxbf_tmfifo_alloc_vrings() to specifically release the
      allocated entries in case of failures, so the logic looks more
      clear. In the caller function the mlxbf_tmfifo_free_vrings()
      might be called again in case of other failures, which is ok
      since the 'va' pointer will be set to NULL once released;
    - Update mlxbf_tmfifo_timer() to change the first statement to
      one line;
    - Update one memcpy() to ether_addr_copy() in
      mlxbf_tmfifo_get_cfg_mac();
    - Remove 'fifo->pdev' since it is really not needed;
    - Define temporary variable to update the mlxbf_tmfifo_create_vdev()
      statement into single line.
    New changes by Liming:
    - Reorder the logic a little bit in mlxbf_tmfifo_timer(). Previously
      it has logic like "!a || !b" while the '!b' will not be evaluated
      if '!a' is true. It was changed to this way during review, but is
      actually not the desired behavior since both bits need to be
      tested/set in fifo->pend_events. This issue was found during
      verification which caused extra delays for Tx packets.
v12->v13:
    Rebase and resubmit (no new changes).
v11->v12:
    Fixed the two unsolved comments from v11.
    - "Change macro mlxbf_vdev_to_tmfifo() to one line"
      Done. Seems not hard.
    - "Is it appropriate use of devm_* for 'tm_vdev = devm_kzalloc'"
      Yes, understand the comment now. The tmfifo is fixed, but the
      vdev is dynamic. Use kzalloc() instead, and free the device
      in the release callback which is the right place for it.
v10->v11:
    Fixes for comments from Andy:
    - Use GENMASK_ULL() instead of GENMASK() in mlxbf-tmfifo-regs.h
    - Removed the cpu_to_le64()/le64_to_cpu() conversion since
      readq()/writeq() already takes care of it.
    - Remove the "if (irq)" check in mlxbf_tmfifo_disable_irqs().
    - Add "u32 count" temp variable in mlxbf_tmfifo_get_tx_avail().
    - Clean up mlxbf_tmfifo_get_cfg_mac(), use ETH_ALEN instead of
      value 6.
    - Change the tx_buf to use Linux existing 'struct circ_buf'.
    Comment not applied:
    - "Change macro mlxbf_vdev_to_tmfifo() to one line"
      Couldn't fit in one line with 80 chracters
    - "Is it appropriate use of devm_* for 'tm_vdev = devm_kzalloc'"
      This is SoC, the device won't be closed or detached.
      The only case is when the driver is unloaded. So it appears
      ok to use devm_kzalloc() since it's allocated during probe()
      and released during module unload.
    Comments from Vadim: OK
v9->v10:
    Fixes for comments from Andy:
    - Use devm_ioremap_resource() instead of devm_ioremap().
    - Use kernel-doc comments.
    - Keep Makefile contents sorted.
    - Use same fixed format for offsets.
    - Use SZ_1K/SZ_32K instead of 1024/23*1024.
    - Remove unnecessary comments.
    - Use one style for max numbers.
    - More comments for mlxbf_tmfifo_vdev and mlxbf_tmfifo_data_64bit.
    - Use globally defined MTU instead of new definition.
    - Remove forward declaration of mlxbf_tmfifo_remove().
    - Remove PAGE_ALIGN() for dma_alloc_coherent)().
    - Remove the cast of "struct vring *".
    - Check return result of test_and_set_bit().
    - Add a macro mlxbt_vdev_to_tmfifo().
    - Several other minor coding style comments.
    Comment not applied:
    - "Shouldn't be rather helper in EFI lib in kernel"
      Looks like efi.get_variable() is the way I found in the kernel
      tree.
    - "this one is not protected anyhow? Potential race condition"
      In mlxbf_tmfifo_console_tx(), the spin-lock is used to protect the
      'tx_buf' only, not the FIFO writes. So there is no race condition.
    - "Is __packed needed in mlxbf_tmfifo_msg_hdr".
      Yes, it is needed to make sure the structure is 8 bytes.
    Fixes for comments from Vadim:
    - Use tab in mlxbf-tmfifo-regs.h
    - Use kernel-doc comments for struct mlxbf_tmfifo_msg_hdr and
      mlxbf_tmfifo_irq_info as well.
    - Use _MAX instead of _CNT in the macro definition to be consistent.
    - Fix the MODULE_LICENSE.
    - Use BIT_ULL() instead of BIT().
    - Remove argument of 'avail' for mlxbf_tmfifo_rxtx_header() and
      mlxbf_tmfifo_rxtx_word()
    - Revise logic in mlxbf_tmfifo_rxtx_one_desc() to remove the
      WARN_ON().
    - Change "union mlxbf_tmfifo_u64 u" to "union mlxbf_tmfifo_u64 buf"
      in mlxbf_tmfifo_rxtx_word().
    - Change date type of vring_change from 'int' to 'bool'.
    - Remove the blank lines after Signed-off.
    - Don’t use declaration in the middle.
    - Make the network header initialization in some more elegant way.
    - Change label done to mlxbf_tmfifo_desc_done.
    - Remove some unnecessary comments, and several other misc coding
      style comments.
    - Simplify code logic in mlxbf_tmfifo_virtio_notify()
    New changes by Liming:
    - Simplify the Rx/Tx function arguments to make it more readable.
v8->v9:
    Fixes for comments from Andy:
    - Use modern devm_xxx() API instead.
    Fixes for comments from Vadim:
    - Split the Rx/Tx function into smaller funcitons.
    - File name, copyright information.
    - Function and variable name conversion.
    - Local variable and indent coding styles.
    - Remove unnecessary 'inline' declarations.
    - Use devm_xxx() APIs.
    - Move the efi_char16_t MAC address definition to global.
    - Fix warnings reported by 'checkpatch --strict'.
    - Fix warnings reported by 'make CF="-D__CHECK_ENDIAN__"'.
    - Change select VIRTIO_xxx to depends on  VIRTIO_ in Kconfig.
    - Merge mlxbf_tmfifo_vdev_tx_buf_push() and
      mlxbf_tmfifo_vdev_tx_buf_pop().
    - Add union to avoid casting between __le64 and u64.
    - Several other misc coding style comments.
    New changes by Liming:
    - Removed the DT binding documentation since only ACPI is
      supported for now by UEFI on the SoC.
v8: Re-submit under drivers/platform/mellanox for the target-side
    platform driver only.
v7: Added host side drivers into the same patch set.
v5~v6: Coding style fix.
v1~v4: Initial version for directory drivers/soc/mellanox.
---
 drivers/platform/mellanox/Kconfig             |   12 +-
 drivers/platform/mellanox/Makefile            |    1 +
 drivers/platform/mellanox/mlxbf-tmfifo-regs.h |   63 ++
 drivers/platform/mellanox/mlxbf-tmfifo.c      | 1294 +++++++++++++++++++++++++
 4 files changed, 1369 insertions(+), 1 deletion(-)
 create mode 100644 drivers/platform/mellanox/mlxbf-tmfifo-regs.h
 create mode 100644 drivers/platform/mellanox/mlxbf-tmfifo.c

diff --git a/drivers/platform/mellanox/Kconfig b/drivers/platform/mellanox/Kconfig
index cd8a908..530fe7e 100644
--- a/drivers/platform/mellanox/Kconfig
+++ b/drivers/platform/mellanox/Kconfig
@@ -5,7 +5,7 @@
 
 menuconfig MELLANOX_PLATFORM
 	bool "Platform support for Mellanox hardware"
-	depends on X86 || ARM || COMPILE_TEST
+	depends on X86 || ARM || ARM64 || COMPILE_TEST
 	---help---
 	  Say Y here to get to see options for platform support for
 	  Mellanox systems. This option alone does not add any kernel code.
@@ -34,4 +34,14 @@ config MLXREG_IO
 	  to system resets operation, system reset causes monitoring and some
 	  kinds of mux selection.
 
+config MLXBF_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo platform driver"
+	depends on ARM64
+	depends on ACPI
+	depends on VIRTIO_CONSOLE && VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides
+          platform driver support for the TmFifo which supports console
+          and networking based on the virtio framework.
+
 endif # MELLANOX_PLATFORM
diff --git a/drivers/platform/mellanox/Makefile b/drivers/platform/mellanox/Makefile
index 57074d9c..a229bda1 100644
--- a/drivers/platform/mellanox/Makefile
+++ b/drivers/platform/mellanox/Makefile
@@ -3,5 +3,6 @@
 # Makefile for linux/drivers/platform/mellanox
 # Mellanox Platform-Specific Drivers
 #
+obj-$(CONFIG_MLXBF_TMFIFO)	+= mlxbf-tmfifo.o
 obj-$(CONFIG_MLXREG_HOTPLUG)	+= mlxreg-hotplug.o
 obj-$(CONFIG_MLXREG_IO) += mlxreg-io.o
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo-regs.h b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
new file mode 100644
index 0000000..e4f0d2e
--- /dev/null
+++ b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2019, Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef __MLXBF_TMFIFO_REGS_H__
+#define __MLXBF_TMFIFO_REGS_H__
+
+#include <linux/types.h>
+#include <linux/bits.h>
+
+#define MLXBF_TMFIFO_TX_DATA				0x00
+#define MLXBF_TMFIFO_TX_STS				0x08
+#define MLXBF_TMFIFO_TX_STS__LENGTH			0x0001
+#define MLXBF_TMFIFO_TX_STS__COUNT_SHIFT		0
+#define MLXBF_TMFIFO_TX_STS__COUNT_WIDTH		9
+#define MLXBF_TMFIFO_TX_STS__COUNT_RESET_VAL		0
+#define MLXBF_TMFIFO_TX_STS__COUNT_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_TX_STS__COUNT_MASK			GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_TX_CTL				0x10
+#define MLXBF_TMFIFO_TX_CTL__LENGTH			0x0001
+#define MLXBF_TMFIFO_TX_CTL__LWM_SHIFT			0
+#define MLXBF_TMFIFO_TX_CTL__LWM_WIDTH			8
+#define MLXBF_TMFIFO_TX_CTL__LWM_RESET_VAL		128
+#define MLXBF_TMFIFO_TX_CTL__LWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_TX_CTL__LWM_MASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_TX_CTL__HWM_SHIFT			8
+#define MLXBF_TMFIFO_TX_CTL__HWM_WIDTH			8
+#define MLXBF_TMFIFO_TX_CTL__HWM_RESET_VAL		128
+#define MLXBF_TMFIFO_TX_CTL__HWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_TX_CTL__HWM_MASK			GENMASK_ULL(15, 8)
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT		32
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH		9
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL	256
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK		GENMASK_ULL(40, 32)
+#define MLXBF_TMFIFO_RX_DATA				0x00
+#define MLXBF_TMFIFO_RX_STS				0x08
+#define MLXBF_TMFIFO_RX_STS__LENGTH			0x0001
+#define MLXBF_TMFIFO_RX_STS__COUNT_SHIFT		0
+#define MLXBF_TMFIFO_RX_STS__COUNT_WIDTH		9
+#define MLXBF_TMFIFO_RX_STS__COUNT_RESET_VAL		0
+#define MLXBF_TMFIFO_RX_STS__COUNT_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_RX_STS__COUNT_MASK			GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_RX_CTL				0x10
+#define MLXBF_TMFIFO_RX_CTL__LENGTH			0x0001
+#define MLXBF_TMFIFO_RX_CTL__LWM_SHIFT			0
+#define MLXBF_TMFIFO_RX_CTL__LWM_WIDTH			8
+#define MLXBF_TMFIFO_RX_CTL__LWM_RESET_VAL		128
+#define MLXBF_TMFIFO_RX_CTL__LWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_RX_CTL__LWM_MASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_RX_CTL__HWM_SHIFT			8
+#define MLXBF_TMFIFO_RX_CTL__HWM_WIDTH			8
+#define MLXBF_TMFIFO_RX_CTL__HWM_RESET_VAL		128
+#define MLXBF_TMFIFO_RX_CTL__HWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_RX_CTL__HWM_MASK			GENMASK_ULL(15, 8)
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT		32
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH		9
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL	256
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK		GENMASK_ULL(40, 32)
+
+#endif /* !defined(__MLXBF_TMFIFO_REGS_H__) */
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
new file mode 100644
index 0000000..d9b7008
--- /dev/null
+++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
@@ -0,0 +1,1294 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Mellanox BlueField SoC TmFifo driver
+ *
+ * Copyright (C) 2019 Mellanox Technologies
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/circ_buf.h>
+#include <linux/efi.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+
+#include "mlxbf-tmfifo-regs.h"
+
+/* Vring size. */
+#define MLXBF_TMFIFO_VRING_SIZE			SZ_1K
+
+/* Console Tx buffer size. */
+#define MLXBF_TMFIFO_CON_TX_BUF_SIZE		SZ_32K
+
+/* Console Tx buffer reserved space. */
+#define MLXBF_TMFIFO_CON_TX_BUF_RSV_SIZE	8
+
+/* House-keeping timer interval. */
+#define MLXBF_TMFIFO_TIMER_INTERVAL		(HZ / 10)
+
+/* Virtual devices sharing the TM FIFO. */
+#define MLXBF_TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/*
+ * Reserve 1/16 of TmFifo space, so console messages are not starved by
+ * the networking traffic.
+ */
+#define MLXBF_TMFIFO_RESERVE_RATIO		16
+
+/* Message with data needs at least two words (for header & data). */
+#define MLXBF_TMFIFO_DATA_MIN_WORDS		2
+
+struct mlxbf_tmfifo;
+
+/**
+ * mlxbf_tmfifo_vring - Structure of the TmFifo virtual ring
+ * @va: virtual address of the ring
+ * @dma: dma address of the ring
+ * @vq: pointer to the virtio virtqueue
+ * @desc: current descriptor of the pending packet
+ * @desc_head: head descriptor of the pending packet
+ * @cur_len: processed length of the current descriptor
+ * @rem_len: remaining length of the pending packet
+ * @pkt_len: total length of the pending packet
+ * @next_avail: next avail descriptor id
+ * @num: vring size (number of descriptors)
+ * @align: vring alignment size
+ * @index: vring index
+ * @vdev_id: vring virtio id (VIRTIO_ID_xxx)
+ * @fifo: pointer to the tmfifo structure
+ */
+struct mlxbf_tmfifo_vring {
+	void *va;
+	dma_addr_t dma;
+	struct virtqueue *vq;
+	struct vring_desc *desc;
+	struct vring_desc *desc_head;
+	int cur_len;
+	int rem_len;
+	u32 pkt_len;
+	u16 next_avail;
+	int num;
+	int align;
+	int index;
+	int vdev_id;
+	struct mlxbf_tmfifo *fifo;
+};
+
+/* Interrupt types. */
+enum {
+	MLXBF_TM_RX_LWM_IRQ,
+	MLXBF_TM_RX_HWM_IRQ,
+	MLXBF_TM_TX_LWM_IRQ,
+	MLXBF_TM_TX_HWM_IRQ,
+	MLXBF_TM_MAX_IRQ
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	MLXBF_TMFIFO_VRING_RX,
+	MLXBF_TMFIFO_VRING_TX,
+	MLXBF_TMFIFO_VRING_MAX
+};
+
+/**
+ * mlxbf_tmfifo_vdev - Structure of the TmFifo virtual device
+ * @vdev: virtio device, in which the vdev.id.device field has the
+ *        VIRTIO_ID_xxx id to distinguish the virtual device.
+ * @status: status of the device
+ * @features: supported features of the device
+ * @vrings: array of tmfifo vrings of this device
+ * @config.cons: virtual console config -
+ *               select if vdev.id.device is VIRTIO_ID_CONSOLE
+ * @config.net: virtual network config -
+ *              select if vdev.id.device is VIRTIO_ID_NET
+ * @tx_buf: tx buffer used to buffer data before writing into the FIFO
+ */
+struct mlxbf_tmfifo_vdev {
+	struct virtio_device vdev;
+	u8 status;
+	u64 features;
+	struct mlxbf_tmfifo_vring vrings[MLXBF_TMFIFO_VRING_MAX];
+	union {
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct circ_buf tx_buf;
+};
+
+/**
+ * mlxbf_tmfifo_irq_info - Structure of the interrupt information
+ * @fifo: pointer to the tmfifo structure
+ * @irq: interrupt number
+ * @index: index into the interrupt array
+ */
+struct mlxbf_tmfifo_irq_info {
+	struct mlxbf_tmfifo *fifo;
+	int irq;
+	int index;
+};
+
+/**
+ * mlxbf_tmfifo - Structure of the TmFifo
+ * @vdev: array of the virtual devices running over the TmFifo
+ * @lock: lock to protect the TmFifo access
+ * @rx_base: mapped register base address for the Rx FIFO
+ * @tx_base: mapped register base address for the Tx FIFO
+ * @rx_fifo_size: number of entries of the Rx FIFO
+ * @tx_fifo_size: number of entries of the Tx FIFO
+ * @pend_events: pending bits for deferred events
+ * @irq_info: interrupt information
+ * @work: work struct for deferred process
+ * @timer: background timer
+ * @vring: Tx/Rx ring
+ * @spin_lock: spin lock
+ * @is_ready: ready flag
+ */
+struct mlxbf_tmfifo {
+	struct mlxbf_tmfifo_vdev *vdev[MLXBF_TMFIFO_VDEV_MAX];
+	struct mutex lock;		/* TmFifo lock */
+	void __iomem *rx_base;
+	void __iomem *tx_base;
+	int rx_fifo_size;
+	int tx_fifo_size;
+	unsigned long pend_events;
+	struct mlxbf_tmfifo_irq_info irq_info[MLXBF_TM_MAX_IRQ];
+	struct work_struct work;
+	struct timer_list timer;
+	struct mlxbf_tmfifo_vring *vring[2];
+	spinlock_t spin_lock;		/* spin lock */
+	bool is_ready;
+};
+
+/**
+ * mlxbf_tmfifo_msg_hdr - Structure of the TmFifo message header
+ * @type: message type
+ * @len: payload length
+ * @data: 64-bit data used to write the message header into the TmFifo register.
+ *
+ * This message header is a union of struct and u64 data. The 'struct' has
+ * type and length field which are used to encode & decode the message. The
+ * 'data' field is used to read/write the message header from/to the FIFO.
+ */
+union mlxbf_tmfifo_msg_hdr {
+	struct {
+		u8 type;
+		__be16 len;
+		u8 unused[5];
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 mlxbf_tmfifo_net_default_mac[ETH_ALEN] = {
+	0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01
+};
+
+/* EFI variable name of the MAC address. */
+static efi_char16_t mlxbf_tmfifo_efi_name[] = L"RshimMacAddr";
+
+/* Maximum L2 header length. */
+#define MLXBF_TMFIFO_NET_L2_OVERHEAD	36
+
+/* Supported virtio-net features. */
+#define MLXBF_TMFIFO_NET_FEATURES \
+	(BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_STATUS) | \
+	 BIT_ULL(VIRTIO_NET_F_MAC))
+
+#define mlxbf_vdev_to_tmfifo(d) container_of(d, struct mlxbf_tmfifo_vdev, vdev)
+
+/* Free vrings of the FIFO device. */
+static void mlxbf_tmfifo_free_vrings(struct mlxbf_tmfifo *fifo,
+				     struct mlxbf_tmfifo_vdev *tm_vdev)
+{
+	struct mlxbf_tmfifo_vring *vring;
+	int i, size;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		if (vring->va) {
+			size = vring_size(vring->num, vring->align);
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Allocate vrings for the FIFO. */
+static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
+				     struct mlxbf_tmfifo_vdev *tm_vdev)
+{
+	struct mlxbf_tmfifo_vring *vring;
+	struct device *dev;
+	dma_addr_t dma;
+	int i, size;
+	void *va;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->num = MLXBF_TMFIFO_VRING_SIZE;
+		vring->align = SMP_CACHE_BYTES;
+		vring->index = i;
+		vring->vdev_id = tm_vdev->vdev.id.device;
+		dev = &tm_vdev->vdev.dev;
+
+		size = vring_size(vring->num, vring->align);
+		va = dma_alloc_coherent(dev->parent, size, &dma, GFP_KERNEL);
+		if (!va) {
+			dev_err(dev->parent, "dma_alloc_coherent failed\n");
+			mlxbf_tmfifo_free_vrings(fifo, tm_vdev);
+			return -ENOMEM;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Disable interrupts of the FIFO device. */
+static void mlxbf_tmfifo_disable_irqs(struct mlxbf_tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < MLXBF_TM_MAX_IRQ; i++) {
+		irq = fifo->irq_info[i].irq;
+		fifo->irq_info[i].irq = 0;
+		disable_irq(irq);
+	}
+}
+
+/* Interrupt handler. */
+static irqreturn_t mlxbf_tmfifo_irq_handler(int irq, void *arg)
+{
+	struct mlxbf_tmfifo_irq_info *irq_info = arg;
+
+	if (irq_info->index < MLXBF_TM_MAX_IRQ &&
+	    !test_and_set_bit(irq_info->index, &irq_info->fifo->pend_events))
+		schedule_work(&irq_info->fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Get the next packet descriptor from the vring. */
+static struct vring_desc *
+mlxbf_tmfifo_get_next_desc(struct mlxbf_tmfifo_vring *vring)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = vring->vq->vdev;
+	unsigned int idx, head;
+
+	if (vring->next_avail == virtio16_to_cpu(vdev, vr->avail->idx))
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = virtio16_to_cpu(vdev, vr->avail->ring[idx]);
+	if (WARN_ON(head >= vr->num))
+		return NULL;
+
+	vring->next_avail++;
+
+	return &vr->desc[head];
+}
+
+/* Release virtio descriptor. */
+static void mlxbf_tmfifo_release_desc(struct mlxbf_tmfifo_vring *vring,
+				      struct vring_desc *desc, u32 len)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = vring->vq->vdev;
+	u16 idx, vr_idx;
+
+	vr_idx = virtio16_to_cpu(vdev, vr->used->idx);
+	idx = vr_idx % vr->num;
+	vr->used->ring[idx].id = cpu_to_virtio32(vdev, desc - vr->desc);
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/*
+	 * Virtio could poll and check the 'idx' to decide whether the desc is
+	 * done or not. Add a memory barrier here to make sure the update above
+	 * completes before updating the idx.
+	 */
+	mb();
+	vr->used->idx = cpu_to_virtio16(vdev, vr_idx + 1);
+}
+
+/* Get the total length of the descriptor chain. */
+static u32 mlxbf_tmfifo_get_pkt_len(struct mlxbf_tmfifo_vring *vring,
+				    struct vring_desc *desc)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = vring->vq->vdev;
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void mlxbf_tmfifo_release_pending_pkt(struct mlxbf_tmfifo_vring *vring)
+{
+	struct vring_desc *desc_head;
+	u32 len = 0;
+
+	if (vring->desc_head) {
+		desc_head = vring->desc_head;
+		len = vring->pkt_len;
+	} else {
+		desc_head = mlxbf_tmfifo_get_next_desc(vring);
+		if (desc_head)
+			len = mlxbf_tmfifo_get_pkt_len(vring, desc_head);
+	}
+
+	if (desc_head)
+		mlxbf_tmfifo_release_desc(vring, desc_head, len);
+
+	vring->pkt_len = 0;
+	vring->desc = NULL;
+	vring->desc_head = NULL;
+}
+
+static void mlxbf_tmfifo_init_net_desc(struct mlxbf_tmfifo_vring *vring,
+				       struct vring_desc *desc, bool is_rx)
+{
+	struct virtio_device *vdev = vring->vq->vdev;
+	struct virtio_net_hdr *net_hdr;
+
+	net_hdr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+	memset(net_hdr, 0, sizeof(*net_hdr));
+}
+
+/* Get and initialize the next packet. */
+static struct vring_desc *
+mlxbf_tmfifo_get_next_pkt(struct mlxbf_tmfifo_vring *vring, bool is_rx)
+{
+	struct vring_desc *desc;
+
+	desc = mlxbf_tmfifo_get_next_desc(vring);
+	if (desc && is_rx && vring->vdev_id == VIRTIO_ID_NET)
+		mlxbf_tmfifo_init_net_desc(vring, desc, is_rx);
+
+	vring->desc_head = desc;
+	vring->desc = desc;
+
+	return desc;
+}
+
+/* House-keeping timer. */
+static void mlxbf_tmfifo_timer(struct timer_list *t)
+{
+	struct mlxbf_tmfifo *fifo = container_of(t, struct mlxbf_tmfifo, timer);
+	int rx, tx;
+
+	rx = !test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events);
+	tx = !test_and_set_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	if (rx || tx)
+		schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + MLXBF_TMFIFO_TIMER_INTERVAL);
+}
+
+/* Copy one console packet into the output buffer. */
+static void mlxbf_tmfifo_console_output_one(struct mlxbf_tmfifo_vdev *cons,
+					    struct mlxbf_tmfifo_vring *vring,
+					    struct vring_desc *desc)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = &cons->vdev;
+	u32 len, idx, seg;
+	void *addr;
+
+	while (desc) {
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+		len = virtio32_to_cpu(vdev, desc->len);
+
+		seg = CIRC_SPACE_TO_END(cons->tx_buf.head, cons->tx_buf.tail,
+					MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+		if (len <= seg) {
+			memcpy(cons->tx_buf.buf + cons->tx_buf.head, addr, len);
+		} else {
+			memcpy(cons->tx_buf.buf + cons->tx_buf.head, addr, seg);
+			addr += seg;
+			memcpy(cons->tx_buf.buf, addr, len - seg);
+		}
+		cons->tx_buf.head = (cons->tx_buf.head + len) %
+			MLXBF_TMFIFO_CON_TX_BUF_SIZE;
+
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+}
+
+/* Copy console data into the output buffer. */
+static void mlxbf_tmfifo_console_output(struct mlxbf_tmfifo_vdev *cons,
+					struct mlxbf_tmfifo_vring *vring)
+{
+	struct vring_desc *desc;
+	u32 len, avail;
+
+	desc = mlxbf_tmfifo_get_next_desc(vring);
+	while (desc) {
+		/* Release the packet if not enough space. */
+		len = mlxbf_tmfifo_get_pkt_len(vring, desc);
+		avail = CIRC_SPACE(cons->tx_buf.head, cons->tx_buf.tail,
+				   MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+		if (len + MLXBF_TMFIFO_CON_TX_BUF_RSV_SIZE > avail) {
+			mlxbf_tmfifo_release_desc(vring, desc, len);
+			break;
+		}
+
+		mlxbf_tmfifo_console_output_one(cons, vring, desc);
+		mlxbf_tmfifo_release_desc(vring, desc, len);
+		desc = mlxbf_tmfifo_get_next_desc(vring);
+	}
+}
+
+/* Get the number of available words in Rx FIFO for receiving. */
+static int mlxbf_tmfifo_get_rx_avail(struct mlxbf_tmfifo *fifo)
+{
+	u64 sts;
+
+	sts = readq(fifo->rx_base + MLXBF_TMFIFO_RX_STS);
+	return FIELD_GET(MLXBF_TMFIFO_RX_STS__COUNT_MASK, sts);
+}
+
+/* Get the number of available words in the TmFifo for sending. */
+static int mlxbf_tmfifo_get_tx_avail(struct mlxbf_tmfifo *fifo, int vdev_id)
+{
+	int tx_reserve;
+	u32 count;
+	u64 sts;
+
+	/* Reserve some room in FIFO for console messages. */
+	if (vdev_id == VIRTIO_ID_NET)
+		tx_reserve = fifo->tx_fifo_size / MLXBF_TMFIFO_RESERVE_RATIO;
+	else
+		tx_reserve = 1;
+
+	sts = readq(fifo->tx_base + MLXBF_TMFIFO_TX_STS);
+	count = FIELD_GET(MLXBF_TMFIFO_TX_STS__COUNT_MASK, sts);
+	return fifo->tx_fifo_size - tx_reserve - count;
+}
+
+/* Console Tx (move data from the output buffer into the TmFifo). */
+static void mlxbf_tmfifo_console_tx(struct mlxbf_tmfifo *fifo, int avail)
+{
+	union mlxbf_tmfifo_msg_hdr hdr;
+	struct mlxbf_tmfifo_vdev *cons;
+	unsigned long flags;
+	int size, seg;
+	void *addr;
+	u64 data;
+
+	/* Return if not enough space available. */
+	if (avail < MLXBF_TMFIFO_DATA_MIN_WORDS)
+		return;
+
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+	if (!cons || !cons->tx_buf.buf)
+		return;
+
+	/* Return if no data to send. */
+	size = CIRC_CNT(cons->tx_buf.head, cons->tx_buf.tail,
+			MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+	if (size == 0)
+		return;
+
+	/* Adjust the size to available space. */
+	if (size + sizeof(hdr) > avail * sizeof(u64))
+		size = avail * sizeof(u64) - sizeof(hdr);
+
+	/* Write header. */
+	hdr.data = 0;
+	hdr.type = VIRTIO_ID_CONSOLE;
+	hdr.len = htons(size);
+	writeq(hdr.data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+
+	/* Use spin-lock to protect the 'cons->tx_buf'. */
+	spin_lock_irqsave(&fifo->spin_lock, flags);
+
+	while (size > 0) {
+		addr = cons->tx_buf.buf + cons->tx_buf.tail;
+
+		seg = CIRC_CNT_TO_END(cons->tx_buf.head, cons->tx_buf.tail,
+				      MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+		if (seg >= sizeof(u64)) {
+			memcpy(&data, addr, sizeof(u64));
+		} else {
+			memcpy(&data, addr, seg);
+			memcpy((u8 *)&data + seg, cons->tx_buf.buf,
+			       sizeof(u64) - seg);
+		}
+		writeq(data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+
+		if (size >= sizeof(u64)) {
+			cons->tx_buf.tail = (cons->tx_buf.tail + sizeof(u64)) %
+				MLXBF_TMFIFO_CON_TX_BUF_SIZE;
+			size -= sizeof(u64);
+		} else {
+			cons->tx_buf.tail = (cons->tx_buf.tail + size) %
+				MLXBF_TMFIFO_CON_TX_BUF_SIZE;
+			size = 0;
+		}
+	}
+
+	spin_unlock_irqrestore(&fifo->spin_lock, flags);
+}
+
+/* Rx/Tx one word in the descriptor buffer. */
+static void mlxbf_tmfifo_rxtx_word(struct mlxbf_tmfifo_vring *vring,
+				   struct vring_desc *desc,
+				   bool is_rx, int len)
+{
+	struct virtio_device *vdev = vring->vq->vdev;
+	struct mlxbf_tmfifo *fifo = vring->fifo;
+	void *addr;
+	u64 data;
+
+	/* Get the buffer address of this desc. */
+	addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+	/* Read a word from FIFO for Rx. */
+	if (is_rx)
+		data = readq(fifo->rx_base + MLXBF_TMFIFO_RX_DATA);
+
+	if (vring->cur_len + sizeof(u64) <= len) {
+		/* The whole word. */
+		if (is_rx)
+			memcpy(addr + vring->cur_len, &data, sizeof(u64));
+		else
+			memcpy(&data, addr + vring->cur_len, sizeof(u64));
+		vring->cur_len += sizeof(u64);
+	} else {
+		/* Leftover bytes. */
+		if (is_rx)
+			memcpy(addr + vring->cur_len, &data,
+			       len - vring->cur_len);
+		else
+			memcpy(&data, addr + vring->cur_len,
+			       len - vring->cur_len);
+		vring->cur_len = len;
+	}
+
+	/* Write the word into FIFO for Tx. */
+	if (!is_rx)
+		writeq(data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+}
+
+/*
+ * Rx/Tx packet header.
+ *
+ * In Rx case, the packet might be found to belong to a different vring since
+ * the TmFifo is shared by different services. In such case, the 'vring_change'
+ * flag is set.
+ */
+static void mlxbf_tmfifo_rxtx_header(struct mlxbf_tmfifo_vring *vring,
+				     struct vring_desc *desc,
+				     bool is_rx, bool *vring_change)
+{
+	struct mlxbf_tmfifo *fifo = vring->fifo;
+	struct virtio_net_config *config;
+	union mlxbf_tmfifo_msg_hdr hdr;
+	int vdev_id, hdr_len;
+
+	/* Read/Write packet header. */
+	if (is_rx) {
+		/* Drain one word from the FIFO. */
+		hdr.data = readq(fifo->rx_base + MLXBF_TMFIFO_RX_DATA);
+
+		/* Skip the length 0 packets (keepalive). */
+		if (hdr.len == 0)
+			return;
+
+		/* Check packet type. */
+		if (hdr.type == VIRTIO_ID_NET) {
+			vdev_id = VIRTIO_ID_NET;
+			hdr_len = sizeof(struct virtio_net_hdr);
+			config = &fifo->vdev[vdev_id]->config.net;
+			if (ntohs(hdr.len) > config->mtu +
+			    MLXBF_TMFIFO_NET_L2_OVERHEAD)
+				return;
+		} else {
+			vdev_id = VIRTIO_ID_CONSOLE;
+			hdr_len = 0;
+		}
+
+		/*
+		 * Check whether the new packet still belongs to this vring.
+		 * If not, update the pkt_len of the new vring.
+		 */
+		if (vdev_id != vring->vdev_id) {
+			struct mlxbf_tmfifo_vdev *tm_dev2 = fifo->vdev[vdev_id];
+
+			if (!tm_dev2)
+				return;
+			vring->desc = desc;
+			vring = &tm_dev2->vrings[MLXBF_TMFIFO_VRING_RX];
+			*vring_change = true;
+		}
+		vring->pkt_len = ntohs(hdr.len) + hdr_len;
+	} else {
+		/* Network virtio has an extra header. */
+		hdr_len = (vring->vdev_id == VIRTIO_ID_NET) ?
+			   sizeof(struct virtio_net_hdr) : 0;
+		vring->pkt_len = mlxbf_tmfifo_get_pkt_len(vring, desc);
+		hdr.data = 0;
+		hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+			    VIRTIO_ID_NET : VIRTIO_ID_CONSOLE;
+		hdr.len = htons(vring->pkt_len - hdr_len);
+		writeq(hdr.data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+	}
+
+	vring->cur_len = hdr_len;
+	vring->rem_len = vring->pkt_len;
+	fifo->vring[is_rx] = vring;
+}
+
+/*
+ * Rx/Tx one descriptor.
+ *
+ * Return true to indicate more data available.
+ */
+static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring,
+				       bool is_rx, int *avail)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct mlxbf_tmfifo *fifo = vring->fifo;
+	struct virtio_device *vdev;
+	bool vring_change = false;
+	struct vring_desc *desc;
+	unsigned long flags;
+	u32 len, idx;
+
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+
+	/* Get the descriptor of the next packet. */
+	if (!vring->desc) {
+		desc = mlxbf_tmfifo_get_next_pkt(vring, is_rx);
+		if (!desc)
+			return false;
+	} else {
+		desc = vring->desc;
+	}
+
+	/* Beginning of a packet. Start to Rx/Tx packet header. */
+	if (vring->pkt_len == 0) {
+		mlxbf_tmfifo_rxtx_header(vring, desc, is_rx, &vring_change);
+		(*avail)--;
+
+		/* Return if new packet is for another ring. */
+		if (vring_change)
+			return false;
+		goto mlxbf_tmfifo_desc_done;
+	}
+
+	/* Get the length of this desc. */
+	len = virtio32_to_cpu(vdev, desc->len);
+	if (len > vring->rem_len)
+		len = vring->rem_len;
+
+	/* Rx/Tx one word (8 bytes) if not done. */
+	if (vring->cur_len < len) {
+		mlxbf_tmfifo_rxtx_word(vring, desc, is_rx, len);
+		(*avail)--;
+	}
+
+	/* Check again whether it's done. */
+	if (vring->cur_len == len) {
+		vring->cur_len = 0;
+		vring->rem_len -= len;
+
+		/* Get the next desc on the chain. */
+		if (vring->rem_len > 0 &&
+		    (virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT)) {
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+			goto mlxbf_tmfifo_desc_done;
+		}
+
+		/* Done and release the pending packet. */
+		mlxbf_tmfifo_release_pending_pkt(vring);
+		desc = NULL;
+		fifo->vring[is_rx] = NULL;
+
+		/* Notify upper layer that packet is done. */
+		spin_lock_irqsave(&fifo->spin_lock, flags);
+		vring_interrupt(0, vring->vq);
+		spin_unlock_irqrestore(&fifo->spin_lock, flags);
+	}
+
+mlxbf_tmfifo_desc_done:
+	/* Save the current desc. */
+	vring->desc = desc;
+
+	return true;
+}
+
+/* Rx & Tx processing of a queue. */
+static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
+{
+	int avail = 0, devid = vring->vdev_id;
+	struct mlxbf_tmfifo *fifo;
+	bool more;
+
+	fifo = vring->fifo;
+
+	/* Return if vdev is not ready. */
+	if (!fifo->vdev[devid])
+		return;
+
+	/* Return if another vring is running. */
+	if (fifo->vring[is_rx] && fifo->vring[is_rx] != vring)
+		return;
+
+	/* Only handle console and network for now. */
+	if (WARN_ON(devid != VIRTIO_ID_NET && devid != VIRTIO_ID_CONSOLE))
+		return;
+
+	do {
+		/* Get available FIFO space. */
+		if (avail == 0) {
+			if (is_rx)
+				avail = mlxbf_tmfifo_get_rx_avail(fifo);
+			else
+				avail = mlxbf_tmfifo_get_tx_avail(fifo, devid);
+			if (avail <= 0)
+				break;
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && devid == VIRTIO_ID_CONSOLE) {
+			mlxbf_tmfifo_console_tx(fifo, avail);
+			break;
+		}
+
+		/* Handle one descriptor. */
+		more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail);
+	} while (more);
+}
+
+/* Handle Rx or Tx queues. */
+static void mlxbf_tmfifo_work_rxtx(struct mlxbf_tmfifo *fifo, int queue_id,
+				   int irq_id, bool is_rx)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+	struct mlxbf_tmfifo_vring *vring;
+	int i;
+
+	if (!test_and_clear_bit(irq_id, &fifo->pend_events) ||
+	    !fifo->irq_info[irq_id].irq)
+		return;
+
+	for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++) {
+		tm_vdev = fifo->vdev[i];
+		if (tm_vdev) {
+			vring = &tm_vdev->vrings[queue_id];
+			if (vring->vq)
+				mlxbf_tmfifo_rxtx(vring, is_rx);
+		}
+	}
+}
+
+/* Work handler for Rx and Tx case. */
+static void mlxbf_tmfifo_work_handler(struct work_struct *work)
+{
+	struct mlxbf_tmfifo *fifo;
+
+	fifo = container_of(work, struct mlxbf_tmfifo, work);
+	if (!fifo->is_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx (Send data to the TmFifo). */
+	mlxbf_tmfifo_work_rxtx(fifo, MLXBF_TMFIFO_VRING_TX,
+			       MLXBF_TM_TX_LWM_IRQ, false);
+
+	/* Rx (Receive data from the TmFifo). */
+	mlxbf_tmfifo_work_rxtx(fifo, MLXBF_TMFIFO_VRING_RX,
+			       MLXBF_TM_RX_HWM_IRQ, true);
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool mlxbf_tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct mlxbf_tmfifo_vring *vring = vq->priv;
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+	struct mlxbf_tmfifo *fifo;
+	unsigned long flags;
+
+	fifo = vring->fifo;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->index & BIT(0))) {
+		if (test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events))
+			return true;
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			tm_vdev = fifo->vdev[VIRTIO_ID_CONSOLE];
+			mlxbf_tmfifo_console_output(tm_vdev, vring);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+		} else if (test_and_set_bit(MLXBF_TM_TX_LWM_IRQ,
+					    &fifo->pend_events)) {
+			return true;
+		}
+	}
+
+	schedule_work(&fifo->work);
+
+	return true;
+}
+
+/* Get the array of feature bits for this device. */
+static u64 mlxbf_tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int mlxbf_tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	tm_vdev->features = vdev->features;
+
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void mlxbf_tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+	struct mlxbf_tmfifo_vring *vring;
+	struct virtqueue *vq;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc)
+			mlxbf_tmfifo_release_pending_pkt(vring);
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+					unsigned int nvqs,
+					struct virtqueue *vqs[],
+					vq_callback_t *callbacks[],
+					const char * const names[],
+					const bool *ctx,
+					struct irq_affinity *desc)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+	struct mlxbf_tmfifo_vring *vring;
+	struct virtqueue *vq;
+	int i, ret, size;
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i]) {
+			ret = -EINVAL;
+			goto error;
+		}
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->num, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->num, vring->align, vdev,
+					 false, false, vring->va,
+					 mlxbf_tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	mlxbf_tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 mlxbf_tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void mlxbf_tmfifo_virtio_set_status(struct virtio_device *vdev,
+					   u8 status)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void mlxbf_tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void mlxbf_tmfifo_virtio_get(struct virtio_device *vdev,
+				    unsigned int offset,
+				    void *buf,
+				    unsigned int len)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	if (offset + len > sizeof(tm_vdev->config))
+		return;
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void mlxbf_tmfifo_virtio_set(struct virtio_device *vdev,
+				    unsigned int offset,
+				    const void *buf,
+				    unsigned int len)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	if (offset + len > sizeof(tm_vdev->config))
+		return;
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+static void tmfifo_virtio_dev_release(struct device *device)
+{
+	struct virtio_device *vdev =
+			container_of(device, struct virtio_device, dev);
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	kfree(tm_vdev);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops mlxbf_tmfifo_virtio_config_ops = {
+	.get_features = mlxbf_tmfifo_virtio_get_features,
+	.finalize_features = mlxbf_tmfifo_virtio_finalize_features,
+	.find_vqs = mlxbf_tmfifo_virtio_find_vqs,
+	.del_vqs = mlxbf_tmfifo_virtio_del_vqs,
+	.reset = mlxbf_tmfifo_virtio_reset,
+	.set_status = mlxbf_tmfifo_virtio_set_status,
+	.get_status = mlxbf_tmfifo_virtio_get_status,
+	.get = mlxbf_tmfifo_virtio_get,
+	.set = mlxbf_tmfifo_virtio_set,
+};
+
+/* Create vdev for the FIFO. */
+static int mlxbf_tmfifo_create_vdev(struct device *dev,
+				    struct mlxbf_tmfifo *fifo,
+				    int vdev_id, u64 features,
+				    void *config, u32 size)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev, *reg_dev = NULL;
+	int ret;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		dev_err(dev, "vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto fail;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &mlxbf_tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+
+	if (mlxbf_tmfifo_alloc_vrings(fifo, tm_vdev)) {
+		dev_err(dev, "unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto vdev_fail;
+	}
+
+	/* Allocate an output buffer for the console device. */
+	if (vdev_id == VIRTIO_ID_CONSOLE)
+		tm_vdev->tx_buf.buf = devm_kmalloc(dev,
+						   MLXBF_TMFIFO_CON_TX_BUF_SIZE,
+						   GFP_KERNEL);
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	reg_dev = tm_vdev;
+	if (ret) {
+		dev_err(dev, "register_virtio_device failed\n");
+		goto vdev_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+vdev_fail:
+	mlxbf_tmfifo_free_vrings(fifo, tm_vdev);
+	fifo->vdev[vdev_id] = NULL;
+	if (reg_dev)
+		put_device(&tm_vdev->vdev.dev);
+	else
+		kfree(tm_vdev);
+fail:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev for the FIFO. */
+static int mlxbf_tmfifo_delete_vdev(struct mlxbf_tmfifo *fifo, int vdev_id)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		mlxbf_tmfifo_free_vrings(fifo, tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void mlxbf_tmfifo_get_cfg_mac(u8 *mac)
+{
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+	unsigned long size = ETH_ALEN;
+	efi_status_t status;
+	u8 buf[ETH_ALEN];
+
+	status = efi.get_variable(mlxbf_tmfifo_efi_name, &guid, NULL, &size,
+				  buf);
+	if (status == EFI_SUCCESS && size == ETH_ALEN)
+		ether_addr_copy(mac, buf);
+	else
+		ether_addr_copy(mac, mlxbf_tmfifo_net_default_mac);
+}
+
+/* Set TmFifo thresolds which is used to trigger interrupts. */
+static void mlxbf_tmfifo_set_threshold(struct mlxbf_tmfifo *fifo)
+{
+	u64 ctl;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		FIELD_GET(MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__LWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_TX_CTL__LWM_MASK,
+			   fifo->tx_fifo_size / 2);
+	ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__HWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_TX_CTL__HWM_MASK,
+			   fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		FIELD_GET(MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__LWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__HWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
+}
+
+static void mlxbf_tmfifo_cleanup(struct mlxbf_tmfifo *fifo)
+{
+	int i;
+
+	fifo->is_ready = false;
+	del_timer_sync(&fifo->timer);
+	mlxbf_tmfifo_disable_irqs(fifo);
+	cancel_work_sync(&fifo->work);
+	for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++)
+		mlxbf_tmfifo_delete_vdev(fifo, i);
+}
+
+/* Probe the TMFIFO. */
+static int mlxbf_tmfifo_probe(struct platform_device *pdev)
+{
+	struct virtio_net_config net_config;
+	struct device *dev = &pdev->dev;
+	struct mlxbf_tmfifo *fifo;
+	struct resource *res;
+	int i, rc;
+
+	fifo = devm_kzalloc(dev, sizeof(*fifo), GFP_KERNEL);
+	if (!fifo)
+		return -ENOMEM;
+
+	spin_lock_init(&fifo->spin_lock);
+	INIT_WORK(&fifo->work, mlxbf_tmfifo_work_handler);
+	mutex_init(&fifo->lock);
+
+	/* Get the resource of the Rx FIFO. */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	fifo->rx_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(fifo->rx_base))
+		return PTR_ERR(fifo->rx_base);
+
+	/* Get the resource of the Tx FIFO. */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	fifo->tx_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(fifo->tx_base))
+		return PTR_ERR(fifo->tx_base);
+
+	platform_set_drvdata(pdev, fifo);
+
+	timer_setup(&fifo->timer, mlxbf_tmfifo_timer, 0);
+
+	for (i = 0; i < MLXBF_TM_MAX_IRQ; i++) {
+		fifo->irq_info[i].index = i;
+		fifo->irq_info[i].fifo = fifo;
+		fifo->irq_info[i].irq = platform_get_irq(pdev, i);
+		rc = devm_request_irq(dev, fifo->irq_info[i].irq,
+				      mlxbf_tmfifo_irq_handler, 0,
+				      "tmfifo", &fifo->irq_info[i]);
+		if (rc) {
+			dev_err(dev, "devm_request_irq failed\n");
+			fifo->irq_info[i].irq = 0;
+			return rc;
+		}
+	}
+
+	mlxbf_tmfifo_set_threshold(fifo);
+
+	/* Create the console vdev. */
+	rc = mlxbf_tmfifo_create_vdev(dev, fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (rc)
+		goto fail;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = ETH_DATA_LEN;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	mlxbf_tmfifo_get_cfg_mac(net_config.mac);
+	rc = mlxbf_tmfifo_create_vdev(dev, fifo, VIRTIO_ID_NET,
+				      MLXBF_TMFIFO_NET_FEATURES, &net_config,
+				      sizeof(net_config));
+	if (rc)
+		goto fail;
+
+	mod_timer(&fifo->timer, jiffies + MLXBF_TMFIFO_TIMER_INTERVAL);
+
+	fifo->is_ready = true;
+	return 0;
+
+fail:
+	mlxbf_tmfifo_cleanup(fifo);
+	return rc;
+}
+
+/* Device remove function. */
+static int mlxbf_tmfifo_remove(struct platform_device *pdev)
+{
+	struct mlxbf_tmfifo *fifo = platform_get_drvdata(pdev);
+
+	mlxbf_tmfifo_cleanup(fifo);
+
+	return 0;
+}
+
+static const struct acpi_device_id mlxbf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, mlxbf_tmfifo_acpi_match);
+
+static struct platform_driver mlxbf_tmfifo_driver = {
+	.probe = mlxbf_tmfifo_probe,
+	.remove = mlxbf_tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.acpi_match_table = mlxbf_tmfifo_acpi_match,
+	},
+};
+
+module_platform_driver(mlxbf_tmfifo_driver);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TmFifo Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 179+ messages in thread

* Re: [PATCH v13] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2019-04-05 19:10     ` Liming Sun
@ 2019-04-07  2:05       ` Liming Sun
  2019-04-11 14:13         ` Andy Shevchenko
  0 siblings, 1 reply; 179+ messages in thread
From: Liming Sun @ 2019-04-07  2:05 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: David Woods, Andy Shevchenko, Darren Hart, Vadim Pasternak,
	Linux Kernel Mailing List, Platform Driver

Thanks Andy!  I just posted v14, which addresses all the comments you mentioned below for v13.

Regards,
Liming

> -----Original Message-----
> From: Andy Shevchenko <andy.shevchenko@gmail.com>
> Sent: Friday, April 5, 2019 11:44 AM
> To: Liming Sun <lsun@mellanox.com>
> Cc: David Woods <dwoods@mellanox.com>; Andy Shevchenko <andy@infradead.org>; Darren Hart <dvhart@infradead.org>; Vadim
> Pasternak <vadimp@mellanox.com>; Linux Kernel Mailing List <linux-kernel@vger.kernel.org>; Platform Driver <platform-driver-
> x86@vger.kernel.org>
> Subject: Re: [PATCH v13] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
>
> On Thu, Apr 4, 2019 at 10:36 PM Liming Sun <lsun@mellanox.com> wrote:
> > This commit adds the TmFifo platform driver for Mellanox BlueField
> > Soc. TmFifo is a shared FIFO which enables external host machine
> > to exchange data with the SoC via USB or PCIe. The driver is based
> > on virtio framework and has console and network access enabled.
>
> Thanks for an update. Almost good.
> My comments below.
>
> Meanwhile I pushed this to my review and testing queue, thanks!
>
> > +#include <linux/acpi.h>
> > +#include <linux/bitfield.h>
> > +#include <linux/circ_buf.h>
> > +#include <linux/efi.h>
> > +#include <linux/irq.h>
> > +#include <linux/module.h>
> > +#include <linux/mutex.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/types.h>
>
> Perhaps blank line here. Would be more clear that this is utilizing
> virtio framework.

Updated in v14.

>
> > +#include <linux/virtio_config.h>
> > +#include <linux/virtio_console.h>
> > +#include <linux/virtio_ids.h>
> > +#include <linux/virtio_net.h>
> > +#include <linux/virtio_ring.h>
>
> > +/**
> > + * mlxbf_tmfifo_msg_hdr - Structure of the TmFifo message header
> > + * @type: message type
> > + * @len: payload length
> > + * @u: 64-bit union data
> > + */
> > +union mlxbf_tmfifo_msg_hdr {
> > +       struct {
> > +               u8 type;
> > +               __be16 len;
> > +               u8 unused[5];
> > +       } __packed;
> > +       u64 data;
>
> I'm not sure I understand how you can distinguish which field of union to use?
> Isn't here some type missed?

Updated the comment in v14.

This message header is a union of struct and u64 data.
The 'struct' has
type and length field which are used to encode & decode the message. 
The 'data' field is used to read/write the message header from/to the FIFO.

>
> > +};
>
> > +static u8 mlxbf_tmfifo_net_default_mac[ETH_ALEN] = {
>
> > +       0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
>
> This should be two lines.

Updated in v14.

>
> > +/* Supported virtio-net features. */
> > +#define MLXBF_TMFIFO_NET_FEATURES      (BIT_ULL(VIRTIO_NET_F_MTU) | \
> > +                                        BIT_ULL(VIRTIO_NET_F_STATUS) | \
> > +                                        BIT_ULL(VIRTIO_NET_F_MAC))
>
> Better to write as
>
> #define FOO \
> (BIT(x) | BIT(y) ...)
>
> I think I told this earlier?

Updated in v14.

>
> > +/* Allocate vrings for the fifo. */
>
> fifo -> FIFO (and check all occurrences)

Updated in v14.

>
> > +static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
> > +                                    struct mlxbf_tmfifo_vdev *tm_vdev)
> > +{
> > +       struct mlxbf_tmfifo_vring *vring;
> > +       struct device *dev;
> > +       dma_addr_t dma;
> > +       int i, size;
> > +       void *va;
> > +
> > +       for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
> > +               vring = &tm_vdev->vrings[i];
> > +               vring->fifo = fifo;
> > +               vring->num = MLXBF_TMFIFO_VRING_SIZE;
> > +               vring->align = SMP_CACHE_BYTES;
> > +               vring->index = i;
> > +               vring->vdev_id = tm_vdev->vdev.id.device;
> > +               dev = &tm_vdev->vdev.dev;
> > +
> > +               size = vring_size(vring->num, vring->align);
> > +               va = dma_alloc_coherent(dev->parent, size, &dma, GFP_KERNEL);
> > +               if (!va) {
>
> > +                       dev_err(dev->parent, "dma_alloc_coherent failed\n");
>
> I don't see how this will free the allocated entries.
> I think I told about this either.

Updated in v14.
It's not a memory leak since the caller will release them
in case of failures. I added one line in this function to
call the mlxbf_tmfifo_free_vrings() to be more clear.

>
> > +                       return -ENOMEM;
> > +               }
> > +
> > +               vring->va = va;
> > +               vring->dma = dma;
> > +       }
> > +
> > +       return 0;
> > +}
>
> > +/* House-keeping timer. */
> > +static void mlxbf_tmfifo_timer(struct timer_list *arg)
> > +{
>
> > +       struct mlxbf_tmfifo *fifo = container_of(arg, struct mlxbf_tmfifo,
> > +                                                timer);
>
> One line would be still good enough.

Updated in v14.

>
> > +       int more;
> > +
> > +       more = !test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events) ||
> > +                   !test_and_set_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events);
> > +
> > +       if (more)
> > +               schedule_work(&fifo->work);
> > +
> > +       mod_timer(&fifo->timer, jiffies + MLXBF_TMFIFO_TIMER_INTERVAL);
> > +}
>
> > +       status = efi.get_variable(mlxbf_tmfifo_efi_name, &guid, NULL, &size,
> > +                                 buf);
> > +       if (status == EFI_SUCCESS && size == ETH_ALEN)
> > +               ether_addr_copy(mac, buf);
> > +       else
>
> > +               memcpy(mac, mlxbf_tmfifo_net_default_mac, ETH_ALEN);
>
> ether_addr_copy() as well.

Updated in v14.

>
> > +}
>
> > +       fifo->pdev = pdev;
>
> Do you really need to keep pdev there? Isn't struct device pointer enough?

Not needed. Updated in v14. Thanks!

>
>
> > +       /* Create the console vdev. */
> > +       ret = mlxbf_tmfifo_create_vdev(&pdev->dev, fifo, VIRTIO_ID_CONSOLE, 0,
> > +                                      NULL, 0);
>
> If you define temporary variable
>   struct device *dev = &pdev->dev;
> these lines can be merged into one.

Yes, updated in v14.

>
> > +       if (ret)
> > +               goto fail;
>
> --
> With Best Regards,
> Andy Shevchenko

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v14] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2019-04-07  2:03 ` [PATCH v14] " Liming Sun
@ 2019-04-11 14:09   ` Andy Shevchenko
  2019-04-12 14:23     ` Liming Sun
  0 siblings, 1 reply; 179+ messages in thread
From: Andy Shevchenko @ 2019-04-11 14:09 UTC (permalink / raw)
  To: Liming Sun
  Cc: David Woods, Andy Shevchenko, Darren Hart, Vadim Pasternak,
	Linux Kernel Mailing List, Platform Driver

On Sun, Apr 7, 2019 at 5:03 AM Liming Sun <lsun@mellanox.com> wrote:
>
> This commit adds the TmFifo platform driver for Mellanox BlueField
> Soc. TmFifo is a shared FIFO which enables external host machine
> to exchange data with the SoC via USB or PCIe. The driver is based
> on virtio framework and has console and network access enabled.

Thanks for an update, my comments below.

> +/**
> + * mlxbf_tmfifo_vdev - Structure of the TmFifo virtual device
> + * @vdev: virtio device, in which the vdev.id.device field has the
> + *        VIRTIO_ID_xxx id to distinguish the virtual device.
> + * @status: status of the device
> + * @features: supported features of the device
> + * @vrings: array of tmfifo vrings of this device
> + * @config.cons: virtual console config -
> + *               select if vdev.id.device is VIRTIO_ID_CONSOLE
> + * @config.net: virtual network config -
> + *              select if vdev.id.device is VIRTIO_ID_NET
> + * @tx_buf: tx buffer used to buffer data before writing into the FIFO
> + */
> +struct mlxbf_tmfifo_vdev {
> +       struct virtio_device vdev;
> +       u8 status;
> +       u64 features;
> +       struct mlxbf_tmfifo_vring vrings[MLXBF_TMFIFO_VRING_MAX];
> +       union {
> +               struct virtio_console_config cons;
> +               struct virtio_net_config net;
> +       } config;
> +       struct circ_buf tx_buf;
> +};

(1)

> +/**
> + * mlxbf_tmfifo_msg_hdr - Structure of the TmFifo message header
> + * @type: message type
> + * @len: payload length
> + * @data: 64-bit data used to write the message header into the TmFifo register.
> + *
> + * This message header is a union of struct and u64 data. The 'struct' has
> + * type and length field which are used to encode & decode the message. The
> + * 'data' field is used to read/write the message header from/to the FIFO.
> + */
> +union mlxbf_tmfifo_msg_hdr {
> +       struct {
> +               u8 type;
> +               __be16 len;
> +               u8 unused[5];
> +       } __packed;
> +       u64 data;
> +};

This union misses a type. See, for example, above structure (1) where
union is used correctly.

> +/* Allocate vrings for the FIFO. */
> +static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
> +                                    struct mlxbf_tmfifo_vdev *tm_vdev)
> +{
> +       struct mlxbf_tmfifo_vring *vring;
> +       struct device *dev;
> +       dma_addr_t dma;
> +       int i, size;
> +       void *va;
> +
> +       for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
> +               vring = &tm_vdev->vrings[i];
> +               vring->fifo = fifo;
> +               vring->num = MLXBF_TMFIFO_VRING_SIZE;
> +               vring->align = SMP_CACHE_BYTES;
> +               vring->index = i;
> +               vring->vdev_id = tm_vdev->vdev.id.device;
> +               dev = &tm_vdev->vdev.dev;
> +
> +               size = vring_size(vring->num, vring->align);
> +               va = dma_alloc_coherent(dev->parent, size, &dma, GFP_KERNEL);
> +               if (!va) {

> +                       dev_err(dev->parent, "dma_alloc_coherent failed\n");
> +                       mlxbf_tmfifo_free_vrings(fifo, tm_vdev);

First do things, then report about what has been done.

> +                       return -ENOMEM;
> +               }
> +
> +               vring->va = va;
> +               vring->dma = dma;
> +       }
> +
> +       return 0;
> +}

> +/* Interrupt handler. */
> +static irqreturn_t mlxbf_tmfifo_irq_handler(int irq, void *arg)
> +{
> +       struct mlxbf_tmfifo_irq_info *irq_info = arg;
> +

> +       if (irq_info->index < MLXBF_TM_MAX_IRQ &&

On which circumstances this is possible?

> +           !test_and_set_bit(irq_info->index, &irq_info->fifo->pend_events))
> +               schedule_work(&irq_info->fifo->work);
> +
> +       return IRQ_HANDLED;
> +}

> +static void mlxbf_tmfifo_release_pending_pkt(struct mlxbf_tmfifo_vring *vring)
> +{
> +       struct vring_desc *desc_head;
> +       u32 len = 0;
> +
> +       if (vring->desc_head) {
> +               desc_head = vring->desc_head;
> +               len = vring->pkt_len;
> +       } else {
> +               desc_head = mlxbf_tmfifo_get_next_desc(vring);

> +               if (desc_head)

Redundant...

> +                       len = mlxbf_tmfifo_get_pkt_len(vring, desc_head);

...this is NULL-aware AFAICS.

> +       }
> +
> +       if (desc_head)
> +               mlxbf_tmfifo_release_desc(vring, desc_head, len);
> +
> +       vring->pkt_len = 0;
> +       vring->desc = NULL;
> +       vring->desc_head = NULL;
> +}

> +/* The notify function is called when new buffers are posted. */
> +static bool mlxbf_tmfifo_virtio_notify(struct virtqueue *vq)
> +{
> +       struct mlxbf_tmfifo_vring *vring = vq->priv;
> +       struct mlxbf_tmfifo_vdev *tm_vdev;
> +       struct mlxbf_tmfifo *fifo;
> +       unsigned long flags;
> +
> +       fifo = vring->fifo;
> +
> +       /*
> +        * Virtio maintains vrings in pairs, even number ring for Rx
> +        * and odd number ring for Tx.
> +        */

> +       if (!(vring->index & BIT(0))) {

Perhaps positive conditional is better.

> +               if (test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events))
> +                       return true;
> +       } else {
> +               /*
> +                * Console could make blocking call with interrupts disabled.
> +                * In such case, the vring needs to be served right away. For
> +                * other cases, just set the TX LWM bit to start Tx in the
> +                * worker handler.
> +                */
> +               if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
> +                       spin_lock_irqsave(&fifo->spin_lock, flags);
> +                       tm_vdev = fifo->vdev[VIRTIO_ID_CONSOLE];
> +                       mlxbf_tmfifo_console_output(tm_vdev, vring);
> +                       spin_unlock_irqrestore(&fifo->spin_lock, flags);
> +               } else if (test_and_set_bit(MLXBF_TM_TX_LWM_IRQ,
> +                                           &fifo->pend_events)) {
> +                       return true;
> +               }
> +       }
> +
> +       schedule_work(&fifo->work);
> +
> +       return true;
> +}

> +/* Read the value of a configuration field. */
> +static void mlxbf_tmfifo_virtio_get(struct virtio_device *vdev,
> +                                   unsigned int offset,
> +                                   void *buf,
> +                                   unsigned int len)
> +{
> +       struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
> +

> +       if (offset + len > sizeof(tm_vdev->config))
> +               return;

This doesn't protect against too big len and offset.
Same for other similar checks.

> +
> +       memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
> +}

> +/* Read the configured network MAC address from efi variable. */
> +static void mlxbf_tmfifo_get_cfg_mac(u8 *mac)
> +{
> +       efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
> +       unsigned long size = ETH_ALEN;
> +       efi_status_t status;
> +       u8 buf[ETH_ALEN];
> +

> +       status = efi.get_variable(mlxbf_tmfifo_efi_name, &guid, NULL, &size,
> +                                 buf);

Use one line.

> +       if (status == EFI_SUCCESS && size == ETH_ALEN)
> +               ether_addr_copy(mac, buf);
> +       else
> +               ether_addr_copy(mac, mlxbf_tmfifo_net_default_mac);
> +}

> +/* Probe the TMFIFO. */
> +static int mlxbf_tmfifo_probe(struct platform_device *pdev)
> +{

> +       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +       fifo->rx_base = devm_ioremap_resource(dev, res);

There is new helper devm_platform_ioremap_resource().
Please, use it instead.

> +       if (IS_ERR(fifo->rx_base))
> +               return PTR_ERR(fifo->rx_base);

> +       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> +       fifo->tx_base = devm_ioremap_resource(dev, res);

Ditto.

> +       if (IS_ERR(fifo->tx_base))
> +               return PTR_ERR(fifo->tx_base);

> +}

-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply	[flat|nested] 179+ messages in thread

* Re: [PATCH v13] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2019-04-07  2:05       ` Liming Sun
@ 2019-04-11 14:13         ` Andy Shevchenko
  2019-04-12 16:15           ` Liming Sun
  0 siblings, 1 reply; 179+ messages in thread
From: Andy Shevchenko @ 2019-04-11 14:13 UTC (permalink / raw)
  To: Liming Sun
  Cc: David Woods, Andy Shevchenko, Darren Hart, Vadim Pasternak,
	Linux Kernel Mailing List, Platform Driver

On Sun, Apr 7, 2019 at 5:05 AM Liming Sun <lsun@mellanox.com> wrote:

> > > + * mlxbf_tmfifo_msg_hdr - Structure of the TmFifo message header
> > > + * @type: message type
> > > + * @len: payload length
> > > + * @u: 64-bit union data
> > > + */
> > > +union mlxbf_tmfifo_msg_hdr {
> > > +       struct {
> > > +               u8 type;
> > > +               __be16 len;
> > > +               u8 unused[5];
> > > +       } __packed;
> > > +       u64 data;
> >
> > I'm not sure I understand how you can distinguish which field of union to use?
> > Isn't here some type missed?
>
> Updated the comment in v14.
>
> This message header is a union of struct and u64 data.
> The 'struct' has
> type and length field which are used to encode & decode the message.
> The 'data' field is used to read/write the message header from/to the FIFO.

Something fishy here.

You are using a structure of data which you would like to write with
one call? Perhaps you need to construct this on-the-fly.
Moreover, the __be memeber is used in a data which is written as LE.
This needs more explanation.

-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v14] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2019-04-11 14:09   ` Andy Shevchenko
@ 2019-04-12 14:23     ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-04-12 14:23 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: David Woods, Andy Shevchenko, Darren Hart, Vadim Pasternak,
	Linux Kernel Mailing List, Platform Driver

Thanks Andy! I'll try to post v15 to address these comments this weekend.
(Please also see responses to each comments below).

> -----Original Message-----
> From: Andy Shevchenko <andy.shevchenko@gmail.com>
> Sent: Thursday, April 11, 2019 10:10 AM
> To: Liming Sun <lsun@mellanox.com>
> Cc: David Woods <dwoods@mellanox.com>; Andy Shevchenko <andy@infradead.org>; Darren Hart <dvhart@infradead.org>; Vadim
> Pasternak <vadimp@mellanox.com>; Linux Kernel Mailing List <linux-kernel@vger.kernel.org>; Platform Driver <platform-driver-
> x86@vger.kernel.org>
> Subject: Re: [PATCH v14] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
> 
> On Sun, Apr 7, 2019 at 5:03 AM Liming Sun <lsun@mellanox.com> wrote:
> >
> > This commit adds the TmFifo platform driver for Mellanox BlueField
> > Soc. TmFifo is a shared FIFO which enables external host machine
> > to exchange data with the SoC via USB or PCIe. The driver is based
> > on virtio framework and has console and network access enabled.
> 
> Thanks for an update, my comments below.

Thanks for the comments!

> 
> > +/**
> > + * mlxbf_tmfifo_vdev - Structure of the TmFifo virtual device
> > + * @vdev: virtio device, in which the vdev.id.device field has the
> > + *        VIRTIO_ID_xxx id to distinguish the virtual device.
> > + * @status: status of the device
> > + * @features: supported features of the device
> > + * @vrings: array of tmfifo vrings of this device
> > + * @config.cons: virtual console config -
> > + *               select if vdev.id.device is VIRTIO_ID_CONSOLE
> > + * @config.net: virtual network config -
> > + *              select if vdev.id.device is VIRTIO_ID_NET
> > + * @tx_buf: tx buffer used to buffer data before writing into the FIFO
> > + */
> > +struct mlxbf_tmfifo_vdev {
> > +       struct virtio_device vdev;
> > +       u8 status;
> > +       u64 features;
> > +       struct mlxbf_tmfifo_vring vrings[MLXBF_TMFIFO_VRING_MAX];
> > +       union {
> > +               struct virtio_console_config cons;
> > +               struct virtio_net_config net;
> > +       } config;
> > +       struct circ_buf tx_buf;
> > +};
> 
> (1)
> 
> > +/**
> > + * mlxbf_tmfifo_msg_hdr - Structure of the TmFifo message header
> > + * @type: message type
> > + * @len: payload length
> > + * @data: 64-bit data used to write the message header into the TmFifo register.
> > + *
> > + * This message header is a union of struct and u64 data. The 'struct' has
> > + * type and length field which are used to encode & decode the message. The
> > + * 'data' field is used to read/write the message header from/to the FIFO.
> > + */
> > +union mlxbf_tmfifo_msg_hdr {
> > +       struct {
> > +               u8 type;
> > +               __be16 len;
> > +               u8 unused[5];
> > +       } __packed;
> > +       u64 data;
> > +};
> 
> This union misses a type. See, for example, above structure (1) where
> union is used correctly.

This union seems causing confusion. I'll try to remove the union in v15 
and "construct this on-the-fly" just like you mentioned in another email. 
So instead of " writeq(hdr.data, ...)" we could simply do 
"writeq(*(u64 *)&hdr, ...)", thus no need for a union.

> 
> > +/* Allocate vrings for the FIFO. */
> > +static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
> > +                                    struct mlxbf_tmfifo_vdev *tm_vdev)
> > +{
> > +       struct mlxbf_tmfifo_vring *vring;
> > +       struct device *dev;
> > +       dma_addr_t dma;
> > +       int i, size;
> > +       void *va;
> > +
> > +       for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
> > +               vring = &tm_vdev->vrings[i];
> > +               vring->fifo = fifo;
> > +               vring->num = MLXBF_TMFIFO_VRING_SIZE;
> > +               vring->align = SMP_CACHE_BYTES;
> > +               vring->index = i;
> > +               vring->vdev_id = tm_vdev->vdev.id.device;
> > +               dev = &tm_vdev->vdev.dev;
> > +
> > +               size = vring_size(vring->num, vring->align);
> > +               va = dma_alloc_coherent(dev->parent, size, &dma, GFP_KERNEL);
> > +               if (!va) {
> 
> > +                       dev_err(dev->parent, "dma_alloc_coherent failed\n");
> > +                       mlxbf_tmfifo_free_vrings(fifo, tm_vdev);
> 
> First do things, then report about what has been done.

Will update it in v15.

> 
> > +                       return -ENOMEM;
> > +               }
> > +
> > +               vring->va = va;
> > +               vring->dma = dma;
> > +       }
> > +
> > +       return 0;
> > +}
> 
> > +/* Interrupt handler. */
> > +static irqreturn_t mlxbf_tmfifo_irq_handler(int irq, void *arg)
> > +{
> > +       struct mlxbf_tmfifo_irq_info *irq_info = arg;
> > +
> 
> > +       if (irq_info->index < MLXBF_TM_MAX_IRQ &&
> 
> On which circumstances this is possible?

Yes, Not needed at all. Will update it in v15.

> 
> > +           !test_and_set_bit(irq_info->index, &irq_info->fifo->pend_events))
> > +               schedule_work(&irq_info->fifo->work);
> > +
> > +       return IRQ_HANDLED;
> > +}
> 
> > +static void mlxbf_tmfifo_release_pending_pkt(struct mlxbf_tmfifo_vring *vring)
> > +{
> > +       struct vring_desc *desc_head;
> > +       u32 len = 0;
> > +
> > +       if (vring->desc_head) {
> > +               desc_head = vring->desc_head;
> > +               len = vring->pkt_len;
> > +       } else {
> > +               desc_head = mlxbf_tmfifo_get_next_desc(vring);
> 
> > +               if (desc_head)
> 
> Redundant...

Will update it in v15.

> 
> > +                       len = mlxbf_tmfifo_get_pkt_len(vring, desc_head);
> 
> ...this is NULL-aware AFAICS.

Yes, it is.

> 
> > +       }
> > +
> > +       if (desc_head)
> > +               mlxbf_tmfifo_release_desc(vring, desc_head, len);
> > +
> > +       vring->pkt_len = 0;
> > +       vring->desc = NULL;
> > +       vring->desc_head = NULL;
> > +}
> 
> > +/* The notify function is called when new buffers are posted. */
> > +static bool mlxbf_tmfifo_virtio_notify(struct virtqueue *vq)
> > +{
> > +       struct mlxbf_tmfifo_vring *vring = vq->priv;
> > +       struct mlxbf_tmfifo_vdev *tm_vdev;
> > +       struct mlxbf_tmfifo *fifo;
> > +       unsigned long flags;
> > +
> > +       fifo = vring->fifo;
> > +
> > +       /*
> > +        * Virtio maintains vrings in pairs, even number ring for Rx
> > +        * and odd number ring for Tx.
> > +        */
> 
> > +       if (!(vring->index & BIT(0))) {
> 
> Perhaps positive conditional is better.

Will update it in v15.

> 
> > +               if (test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events))
> > +                       return true;
> > +       } else {
> > +               /*
> > +                * Console could make blocking call with interrupts disabled.
> > +                * In such case, the vring needs to be served right away. For
> > +                * other cases, just set the TX LWM bit to start Tx in the
> > +                * worker handler.
> > +                */
> > +               if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
> > +                       spin_lock_irqsave(&fifo->spin_lock, flags);
> > +                       tm_vdev = fifo->vdev[VIRTIO_ID_CONSOLE];
> > +                       mlxbf_tmfifo_console_output(tm_vdev, vring);
> > +                       spin_unlock_irqrestore(&fifo->spin_lock, flags);
> > +               } else if (test_and_set_bit(MLXBF_TM_TX_LWM_IRQ,
> > +                                           &fifo->pend_events)) {
> > +                       return true;
> > +               }
> > +       }
> > +
> > +       schedule_work(&fifo->work);
> > +
> > +       return true;
> > +}
> 
> > +/* Read the value of a configuration field. */
> > +static void mlxbf_tmfifo_virtio_get(struct virtio_device *vdev,
> > +                                   unsigned int offset,
> > +                                   void *buf,
> > +                                   unsigned int len)
> > +{
> > +       struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
> > +
> 
> > +       if (offset + len > sizeof(tm_vdev->config))
> > +               return;
> 
> This doesn't protect against too big len and offset.
> Same for other similar checks.

Will revise it in v15 like "if ((u64)offset + len > sizeof(tm_vdev->config))"

> 
> > +
> > +       memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
> > +}
> 
> > +/* Read the configured network MAC address from efi variable. */
> > +static void mlxbf_tmfifo_get_cfg_mac(u8 *mac)
> > +{
> > +       efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
> > +       unsigned long size = ETH_ALEN;
> > +       efi_status_t status;
> > +       u8 buf[ETH_ALEN];
> > +
> 
> > +       status = efi.get_variable(mlxbf_tmfifo_efi_name, &guid, NULL, &size,
> > +                                 buf);
> 
> Use one line.

Will update it in v15.

> 
> > +       if (status == EFI_SUCCESS && size == ETH_ALEN)
> > +               ether_addr_copy(mac, buf);
> > +       else
> > +               ether_addr_copy(mac, mlxbf_tmfifo_net_default_mac);
> > +}
> 
> > +/* Probe the TMFIFO. */
> > +static int mlxbf_tmfifo_probe(struct platform_device *pdev)
> > +{
> 
> > +       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> > +       fifo->rx_base = devm_ioremap_resource(dev, res);
> 
> There is new helper devm_platform_ioremap_resource().
> Please, use it instead.

Will update it in v15.

> 
> > +       if (IS_ERR(fifo->rx_base))
> > +               return PTR_ERR(fifo->rx_base);
> 
> > +       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> > +       fifo->tx_base = devm_ioremap_resource(dev, res);
> 
> Ditto.

Will update it in v15.

> 
> > +       if (IS_ERR(fifo->tx_base))
> > +               return PTR_ERR(fifo->tx_base);
> 
> > +}
> 
> --
> With Best Regards,
> Andy Shevchenko

^ permalink raw reply	[flat|nested] 179+ messages in thread

* RE: [PATCH v13] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2019-04-11 14:13         ` Andy Shevchenko
@ 2019-04-12 16:15           ` Liming Sun
  0 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-04-12 16:15 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: David Woods, Andy Shevchenko, Darren Hart, Vadim Pasternak,
	Linux Kernel Mailing List, Platform Driver

Thanks Andy. Please see my response below for this email as well.

- Liming

> -----Original Message-----
> From: Andy Shevchenko <andy.shevchenko@gmail.com>
> Sent: Thursday, April 11, 2019 10:13 AM
> To: Liming Sun <lsun@mellanox.com>
> Cc: David Woods <dwoods@mellanox.com>; Andy Shevchenko <andy@infradead.org>; Darren Hart <dvhart@infradead.org>; Vadim
> Pasternak <vadimp@mellanox.com>; Linux Kernel Mailing List <linux-kernel@vger.kernel.org>; Platform Driver <platform-driver-
> x86@vger.kernel.org>
> Subject: Re: [PATCH v13] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
> 
> On Sun, Apr 7, 2019 at 5:05 AM Liming Sun <lsun@mellanox.com> wrote:
> 
> > > > + * mlxbf_tmfifo_msg_hdr - Structure of the TmFifo message header
> > > > + * @type: message type
> > > > + * @len: payload length
> > > > + * @u: 64-bit union data
> > > > + */
> > > > +union mlxbf_tmfifo_msg_hdr {
> > > > +       struct {
> > > > +               u8 type;
> > > > +               __be16 len;
> > > > +               u8 unused[5];
> > > > +       } __packed;
> > > > +       u64 data;
> > >
> > > I'm not sure I understand how you can distinguish which field of union to use?
> > > Isn't here some type missed?
> >
> > Updated the comment in v14.
> >
> > This message header is a union of struct and u64 data.
> > The 'struct' has
> > type and length field which are used to encode & decode the message.
> > The 'data' field is used to read/write the message header from/to the FIFO.
> 
> Something fishy here.
> 
> You are using a structure of data which you would like to write with
> one call? Perhaps you need to construct this on-the-fly.

Looks like "union causes confusion".
I will update it in v15 to construct it on-the-fly as suggested.

> Moreover, the __be memeber is used in a data which is written as LE.
> This needs more explanation.

Will update the comment for it in v15.  Below are some explanation for it.

The 'LE' is for the low-level mmio transport layer. The SoC sends data stream
into the FIFO, the other side read it. The byte-order of the data stream keeps
the same when the other side reads it. The "__be16" is for the driver or 
application on both sides to agree on how to decode the 'length'.

For example, the SoC side (little endian) sends a message with
8-byte message header "01 02 03 04 05 06 07 08" into the FIFO. The other
side (assuming big endian host machine using USB bulk transfer) reads the
same byte-stream and try to decode it with the mlxbf_tmfifo_msg_hdr.
Without the "__be16" conversion, the SoC side will think 
"type=1, length=0x0302" while the big endian host-side will think 
"type=1, length=0x0203".

> 
> --
> With Best Regards,
> Andy Shevchenko

^ permalink raw reply	[flat|nested] 179+ messages in thread

* [PATCH v15] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 16:06 ` Liming Sun
                   ` (51 preceding siblings ...)
  (?)
@ 2019-04-12 17:30 ` Liming Sun
  -1 siblings, 0 replies; 179+ messages in thread
From: Liming Sun @ 2019-04-12 17:30 UTC (permalink / raw)
  To: David Woods, Andy Shevchenko, Darren Hart, Vadim Pasternak
  Cc: Liming Sun, linux-kernel, platform-driver-x86

This commit adds the TmFifo platform driver for Mellanox BlueField
Soc. TmFifo is a shared FIFO which enables external host machine
to exchange data with the SoC via USB or PCIe. The driver is based
on virtio framework and has console and network access enabled.

Reviewed-by: Vadim Pasternak <vadimp@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
v14->v15:
    Fixes for comments from Andy:
    - Remove the 'union' definition of mlxbf_tmfifo_msg_hdr and use
      on-the-fly conversion when sending the 8-byte message header
      into the FIFO;
    - Update comment of mlxbf_tmfifo_msg_hdr explaining why '__be16'
      is needed for the 'len' field. The SoC sends data stream into
      the FIFO and the other side reads it. The byte order of the data
      stream (byte-stream) stays the same. The 'len' field is encoded
      into network byte order so external host machine with different
      endianness could decode it. The implementation has been verified
      over USB with an external PPC host machine running in big-endian
      mode.
    - Move the 'dev_err()' line to the end of the block in function
      mlxbf_tmfifo_alloc_vrings();
    - Remove the 'irq_info->index < MLXBF_TM_MAX_IRQ' check in
      mlxbf_tmfifo_irq_handler() since it's unnecessary;
    - Remove the 'if (desc_head)' check in
      mlxbf_tmfifo_release_pending_pkt() since function
      mlxbf_tmfifo_get_pkt_len() is already NULL-aware;
    - Adjust the testing order of 'if (!(vring->index & BIT(0)))'
      in bool mlxbf_tmfifo_virtio_notify() to test the positive case
      'if (vring->index & BIT(0))' first;
    - Add '(u64)offset' conversion in mlxbf_tmfifo_virtio_get() to
      avoid 32-bit length addition overflow;
    - Update the 'efi.get_variable' statement into single line in
      mlxbf_tmfifo_get_cfg_mac();
    - Use new helper devm_platform_ioremap_resource() to replace
      'platform_get_resource() + devm_ioremap_resource()' in
      mlxbf_tmfifo_probe();
v13->v14:
    Fixes for comments from Andy:
    - Add a blank line to separate the virtio header files;
    - Update the comment for 'union mlxbf_tmfifo_msg_hdr' to be
      more clear how this union is used;
    - Update the 'mlxbf_tmfifo_net_default_mac[ETH_ALEN]' definition
      to be two lines;
    - Reformat macro MLXBF_TMFIFO_NET_FEATURES to put the definition
      in a seperate line;
    - Update all 'fifo' to 'FIFO' in the comments;
    - Update mlxbf_tmfifo_alloc_vrings() to specifically release the
      allocated entries in case of failures, so the logic looks more
      clear. In the caller function the mlxbf_tmfifo_free_vrings()
      might be called again in case of other failures, which is ok
      since the 'va' pointer will be set to NULL once released;
    - Update mlxbf_tmfifo_timer() to change the first statement to
      one line;
    - Update one memcpy() to ether_addr_copy() in
      mlxbf_tmfifo_get_cfg_mac();
    - Remove 'fifo->pdev' since it is really not needed;
    - Define temporary variable to update the mlxbf_tmfifo_create_vdev()
      statement into single line.
    New changes by Liming:
    - Reorder the logic a little bit in mlxbf_tmfifo_timer(). Previously
      it has logic like "!a || !b" while the '!b' will not be evaluated
      if '!a' is true. It was changed to this way during review, but is
      actually not the desired behavior since both bits need to be
      tested/set in fifo->pend_events. This issue was found during
      verification which caused extra delays for Tx packets.
v12->v13:
    Rebase and resubmit (no new changes).
v11->v12:
    Fixed the two unsolved comments from v11.
    - "Change macro mlxbf_vdev_to_tmfifo() to one line"
      Done. Seems not hard.
    - "Is it appropriate use of devm_* for 'tm_vdev = devm_kzalloc'"
      Yes, understand the comment now. The tmfifo is fixed, but the
      vdev is dynamic. Use kzalloc() instead, and free the device
      in the release callback which is the right place for it.
v10->v11:
    Fixes for comments from Andy:
    - Use GENMASK_ULL() instead of GENMASK() in mlxbf-tmfifo-regs.h
    - Removed the cpu_to_le64()/le64_to_cpu() conversion since
      readq()/writeq() already takes care of it.
    - Remove the "if (irq)" check in mlxbf_tmfifo_disable_irqs().
    - Add "u32 count" temp variable in mlxbf_tmfifo_get_tx_avail().
    - Clean up mlxbf_tmfifo_get_cfg_mac(), use ETH_ALEN instead of
      value 6.
    - Change the tx_buf to use Linux existing 'struct circ_buf'.
    Comment not applied:
    - "Change macro mlxbf_vdev_to_tmfifo() to one line"
      Couldn't fit in one line with 80 chracters
    - "Is it appropriate use of devm_* for 'tm_vdev = devm_kzalloc'"
      This is SoC, the device won't be closed or detached.
      The only case is when the driver is unloaded. So it appears
      ok to use devm_kzalloc() since it's allocated during probe()
      and released during module unload.
    Comments from Vadim: OK
v9->v10:
    Fixes for comments from Andy:
    - Use devm_ioremap_resource() instead of devm_ioremap().
    - Use kernel-doc comments.
    - Keep Makefile contents sorted.
    - Use same fixed format for offsets.
    - Use SZ_1K/SZ_32K instead of 1024/23*1024.
    - Remove unnecessary comments.
    - Use one style for max numbers.
    - More comments for mlxbf_tmfifo_vdev and mlxbf_tmfifo_data_64bit.
    - Use globally defined MTU instead of new definition.
    - Remove forward declaration of mlxbf_tmfifo_remove().
    - Remove PAGE_ALIGN() for dma_alloc_coherent)().
    - Remove the cast of "struct vring *".
    - Check return result of test_and_set_bit().
    - Add a macro mlxbt_vdev_to_tmfifo().
    - Several other minor coding style comments.
    Comment not applied:
    - "Shouldn't be rather helper in EFI lib in kernel"
      Looks like efi.get_variable() is the way I found in the kernel
      tree.
    - "this one is not protected anyhow? Potential race condition"
      In mlxbf_tmfifo_console_tx(), the spin-lock is used to protect the
      'tx_buf' only, not the FIFO writes. So there is no race condition.
    - "Is __packed needed in mlxbf_tmfifo_msg_hdr".
      Yes, it is needed to make sure the structure is 8 bytes.
    Fixes for comments from Vadim:
    - Use tab in mlxbf-tmfifo-regs.h
    - Use kernel-doc comments for struct mlxbf_tmfifo_msg_hdr and
      mlxbf_tmfifo_irq_info as well.
    - Use _MAX instead of _CNT in the macro definition to be consistent.
    - Fix the MODULE_LICENSE.
    - Use BIT_ULL() instead of BIT().
    - Remove argument of 'avail' for mlxbf_tmfifo_rxtx_header() and
      mlxbf_tmfifo_rxtx_word()
    - Revise logic in mlxbf_tmfifo_rxtx_one_desc() to remove the
      WARN_ON().
    - Change "union mlxbf_tmfifo_u64 u" to "union mlxbf_tmfifo_u64 buf"
      in mlxbf_tmfifo_rxtx_word().
    - Change date type of vring_change from 'int' to 'bool'.
    - Remove the blank lines after Signed-off.
    - Don’t use declaration in the middle.
    - Make the network header initialization in some more elegant way.
    - Change label done to mlxbf_tmfifo_desc_done.
    - Remove some unnecessary comments, and several other misc coding
      style comments.
    - Simplify code logic in mlxbf_tmfifo_virtio_notify()
    New changes by Liming:
    - Simplify the Rx/Tx function arguments to make it more readable.
v8->v9:
    Fixes for comments from Andy:
    - Use modern devm_xxx() API instead.
    Fixes for comments from Vadim:
    - Split the Rx/Tx function into smaller funcitons.
    - File name, copyright information.
    - Function and variable name conversion.
    - Local variable and indent coding styles.
    - Remove unnecessary 'inline' declarations.
    - Use devm_xxx() APIs.
    - Move the efi_char16_t MAC address definition to global.
    - Fix warnings reported by 'checkpatch --strict'.
    - Fix warnings reported by 'make CF="-D__CHECK_ENDIAN__"'.
    - Change select VIRTIO_xxx to depends on  VIRTIO_ in Kconfig.
    - Merge mlxbf_tmfifo_vdev_tx_buf_push() and
      mlxbf_tmfifo_vdev_tx_buf_pop().
    - Add union to avoid casting between __le64 and u64.
    - Several other misc coding style comments.
    New changes by Liming:
    - Removed the DT binding documentation since only ACPI is
      supported for now by UEFI on the SoC.
v8: Re-submit under drivers/platform/mellanox for the target-side
    platform driver only.
v7: Added host side drivers into the same patch set.
v5~v6: Coding style fix.
v1~v4: Initial version for directory drivers/soc/mellanox.
---
 drivers/platform/mellanox/Kconfig             |   12 +-
 drivers/platform/mellanox/Makefile            |    1 +
 drivers/platform/mellanox/mlxbf-tmfifo-regs.h |   63 ++
 drivers/platform/mellanox/mlxbf-tmfifo.c      | 1281 +++++++++++++++++++++++++
 4 files changed, 1356 insertions(+), 1 deletion(-)
 create mode 100644 drivers/platform/mellanox/mlxbf-tmfifo-regs.h
 create mode 100644 drivers/platform/mellanox/mlxbf-tmfifo.c

diff --git a/drivers/platform/mellanox/Kconfig b/drivers/platform/mellanox/Kconfig
index cd8a908..530fe7e 100644
--- a/drivers/platform/mellanox/Kconfig
+++ b/drivers/platform/mellanox/Kconfig
@@ -5,7 +5,7 @@
 
 menuconfig MELLANOX_PLATFORM
 	bool "Platform support for Mellanox hardware"
-	depends on X86 || ARM || COMPILE_TEST
+	depends on X86 || ARM || ARM64 || COMPILE_TEST
 	---help---
 	  Say Y here to get to see options for platform support for
 	  Mellanox systems. This option alone does not add any kernel code.
@@ -34,4 +34,14 @@ config MLXREG_IO
 	  to system resets operation, system reset causes monitoring and some
 	  kinds of mux selection.
 
+config MLXBF_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo platform driver"
+	depends on ARM64
+	depends on ACPI
+	depends on VIRTIO_CONSOLE && VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides
+          platform driver support for the TmFifo which supports console
+          and networking based on the virtio framework.
+
 endif # MELLANOX_PLATFORM
diff --git a/drivers/platform/mellanox/Makefile b/drivers/platform/mellanox/Makefile
index 57074d9c..a229bda1 100644
--- a/drivers/platform/mellanox/Makefile
+++ b/drivers/platform/mellanox/Makefile
@@ -3,5 +3,6 @@
 # Makefile for linux/drivers/platform/mellanox
 # Mellanox Platform-Specific Drivers
 #
+obj-$(CONFIG_MLXBF_TMFIFO)	+= mlxbf-tmfifo.o
 obj-$(CONFIG_MLXREG_HOTPLUG)	+= mlxreg-hotplug.o
 obj-$(CONFIG_MLXREG_IO) += mlxreg-io.o
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo-regs.h b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
new file mode 100644
index 0000000..e4f0d2e
--- /dev/null
+++ b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2019, Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef __MLXBF_TMFIFO_REGS_H__
+#define __MLXBF_TMFIFO_REGS_H__
+
+#include <linux/types.h>
+#include <linux/bits.h>
+
+#define MLXBF_TMFIFO_TX_DATA				0x00
+#define MLXBF_TMFIFO_TX_STS				0x08
+#define MLXBF_TMFIFO_TX_STS__LENGTH			0x0001
+#define MLXBF_TMFIFO_TX_STS__COUNT_SHIFT		0
+#define MLXBF_TMFIFO_TX_STS__COUNT_WIDTH		9
+#define MLXBF_TMFIFO_TX_STS__COUNT_RESET_VAL		0
+#define MLXBF_TMFIFO_TX_STS__COUNT_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_TX_STS__COUNT_MASK			GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_TX_CTL				0x10
+#define MLXBF_TMFIFO_TX_CTL__LENGTH			0x0001
+#define MLXBF_TMFIFO_TX_CTL__LWM_SHIFT			0
+#define MLXBF_TMFIFO_TX_CTL__LWM_WIDTH			8
+#define MLXBF_TMFIFO_TX_CTL__LWM_RESET_VAL		128
+#define MLXBF_TMFIFO_TX_CTL__LWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_TX_CTL__LWM_MASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_TX_CTL__HWM_SHIFT			8
+#define MLXBF_TMFIFO_TX_CTL__HWM_WIDTH			8
+#define MLXBF_TMFIFO_TX_CTL__HWM_RESET_VAL		128
+#define MLXBF_TMFIFO_TX_CTL__HWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_TX_CTL__HWM_MASK			GENMASK_ULL(15, 8)
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT		32
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH		9
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL	256
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK		GENMASK_ULL(40, 32)
+#define MLXBF_TMFIFO_RX_DATA				0x00
+#define MLXBF_TMFIFO_RX_STS				0x08
+#define MLXBF_TMFIFO_RX_STS__LENGTH			0x0001
+#define MLXBF_TMFIFO_RX_STS__COUNT_SHIFT		0
+#define MLXBF_TMFIFO_RX_STS__COUNT_WIDTH		9
+#define MLXBF_TMFIFO_RX_STS__COUNT_RESET_VAL		0
+#define MLXBF_TMFIFO_RX_STS__COUNT_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_RX_STS__COUNT_MASK			GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_RX_CTL				0x10
+#define MLXBF_TMFIFO_RX_CTL__LENGTH			0x0001
+#define MLXBF_TMFIFO_RX_CTL__LWM_SHIFT			0
+#define MLXBF_TMFIFO_RX_CTL__LWM_WIDTH			8
+#define MLXBF_TMFIFO_RX_CTL__LWM_RESET_VAL		128
+#define MLXBF_TMFIFO_RX_CTL__LWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_RX_CTL__LWM_MASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_RX_CTL__HWM_SHIFT			8
+#define MLXBF_TMFIFO_RX_CTL__HWM_WIDTH			8
+#define MLXBF_TMFIFO_RX_CTL__HWM_RESET_VAL		128
+#define MLXBF_TMFIFO_RX_CTL__HWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_RX_CTL__HWM_MASK			GENMASK_ULL(15, 8)
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT		32
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH		9
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL	256
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK		GENMASK_ULL(40, 32)
+
+#endif /* !defined(__MLXBF_TMFIFO_REGS_H__) */
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
new file mode 100644
index 0000000..9a5c9fd
--- /dev/null
+++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
@@ -0,0 +1,1281 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Mellanox BlueField SoC TmFifo driver
+ *
+ * Copyright (C) 2019 Mellanox Technologies
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/circ_buf.h>
+#include <linux/efi.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+
+#include "mlxbf-tmfifo-regs.h"
+
+/* Vring size. */
+#define MLXBF_TMFIFO_VRING_SIZE			SZ_1K
+
+/* Console Tx buffer size. */
+#define MLXBF_TMFIFO_CON_TX_BUF_SIZE		SZ_32K
+
+/* Console Tx buffer reserved space. */
+#define MLXBF_TMFIFO_CON_TX_BUF_RSV_SIZE	8
+
+/* House-keeping timer interval. */
+#define MLXBF_TMFIFO_TIMER_INTERVAL		(HZ / 10)
+
+/* Virtual devices sharing the TM FIFO. */
+#define MLXBF_TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/*
+ * Reserve 1/16 of TmFifo space, so console messages are not starved by
+ * the networking traffic.
+ */
+#define MLXBF_TMFIFO_RESERVE_RATIO		16
+
+/* Message with data needs at least two words (for header & data). */
+#define MLXBF_TMFIFO_DATA_MIN_WORDS		2
+
+struct mlxbf_tmfifo;
+
+/**
+ * mlxbf_tmfifo_vring - Structure of the TmFifo virtual ring
+ * @va: virtual address of the ring
+ * @dma: dma address of the ring
+ * @vq: pointer to the virtio virtqueue
+ * @desc: current descriptor of the pending packet
+ * @desc_head: head descriptor of the pending packet
+ * @cur_len: processed length of the current descriptor
+ * @rem_len: remaining length of the pending packet
+ * @pkt_len: total length of the pending packet
+ * @next_avail: next avail descriptor id
+ * @num: vring size (number of descriptors)
+ * @align: vring alignment size
+ * @index: vring index
+ * @vdev_id: vring virtio id (VIRTIO_ID_xxx)
+ * @fifo: pointer to the tmfifo structure
+ */
+struct mlxbf_tmfifo_vring {
+	void *va;
+	dma_addr_t dma;
+	struct virtqueue *vq;
+	struct vring_desc *desc;
+	struct vring_desc *desc_head;
+	int cur_len;
+	int rem_len;
+	u32 pkt_len;
+	u16 next_avail;
+	int num;
+	int align;
+	int index;
+	int vdev_id;
+	struct mlxbf_tmfifo *fifo;
+};
+
+/* Interrupt types. */
+enum {
+	MLXBF_TM_RX_LWM_IRQ,
+	MLXBF_TM_RX_HWM_IRQ,
+	MLXBF_TM_TX_LWM_IRQ,
+	MLXBF_TM_TX_HWM_IRQ,
+	MLXBF_TM_MAX_IRQ
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	MLXBF_TMFIFO_VRING_RX,
+	MLXBF_TMFIFO_VRING_TX,
+	MLXBF_TMFIFO_VRING_MAX
+};
+
+/**
+ * mlxbf_tmfifo_vdev - Structure of the TmFifo virtual device
+ * @vdev: virtio device, in which the vdev.id.device field has the
+ *        VIRTIO_ID_xxx id to distinguish the virtual device.
+ * @status: status of the device
+ * @features: supported features of the device
+ * @vrings: array of tmfifo vrings of this device
+ * @config.cons: virtual console config -
+ *               select if vdev.id.device is VIRTIO_ID_CONSOLE
+ * @config.net: virtual network config -
+ *              select if vdev.id.device is VIRTIO_ID_NET
+ * @tx_buf: tx buffer used to buffer data before writing into the FIFO
+ */
+struct mlxbf_tmfifo_vdev {
+	struct virtio_device vdev;
+	u8 status;
+	u64 features;
+	struct mlxbf_tmfifo_vring vrings[MLXBF_TMFIFO_VRING_MAX];
+	union {
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct circ_buf tx_buf;
+};
+
+/**
+ * mlxbf_tmfifo_irq_info - Structure of the interrupt information
+ * @fifo: pointer to the tmfifo structure
+ * @irq: interrupt number
+ * @index: index into the interrupt array
+ */
+struct mlxbf_tmfifo_irq_info {
+	struct mlxbf_tmfifo *fifo;
+	int irq;
+	int index;
+};
+
+/**
+ * mlxbf_tmfifo - Structure of the TmFifo
+ * @vdev: array of the virtual devices running over the TmFifo
+ * @lock: lock to protect the TmFifo access
+ * @rx_base: mapped register base address for the Rx FIFO
+ * @tx_base: mapped register base address for the Tx FIFO
+ * @rx_fifo_size: number of entries of the Rx FIFO
+ * @tx_fifo_size: number of entries of the Tx FIFO
+ * @pend_events: pending bits for deferred events
+ * @irq_info: interrupt information
+ * @work: work struct for deferred process
+ * @timer: background timer
+ * @vring: Tx/Rx ring
+ * @spin_lock: spin lock
+ * @is_ready: ready flag
+ */
+struct mlxbf_tmfifo {
+	struct mlxbf_tmfifo_vdev *vdev[MLXBF_TMFIFO_VDEV_MAX];
+	struct mutex lock;		/* TmFifo lock */
+	void __iomem *rx_base;
+	void __iomem *tx_base;
+	int rx_fifo_size;
+	int tx_fifo_size;
+	unsigned long pend_events;
+	struct mlxbf_tmfifo_irq_info irq_info[MLXBF_TM_MAX_IRQ];
+	struct work_struct work;
+	struct timer_list timer;
+	struct mlxbf_tmfifo_vring *vring[2];
+	spinlock_t spin_lock;		/* spin lock */
+	bool is_ready;
+};
+
+/**
+ * mlxbf_tmfifo_msg_hdr - Structure of the TmFifo message header
+ * @type: message type
+ * @len: payload length in network byte order. Messages sent into the FIFO
+ *       will be read by the other side as data stream in the same byte order.
+ *       The length needs to be encoded into network order so both sides
+ *       could understand it.
+ */
+struct mlxbf_tmfifo_msg_hdr {
+	u8 type;
+	__be16 len;
+	u8 unused[5];
+} __packed __aligned(sizeof(u64));
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 mlxbf_tmfifo_net_default_mac[ETH_ALEN] = {
+	0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01
+};
+
+/* EFI variable name of the MAC address. */
+static efi_char16_t mlxbf_tmfifo_efi_name[] = L"RshimMacAddr";
+
+/* Maximum L2 header length. */
+#define MLXBF_TMFIFO_NET_L2_OVERHEAD	36
+
+/* Supported virtio-net features. */
+#define MLXBF_TMFIFO_NET_FEATURES \
+	(BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_STATUS) | \
+	 BIT_ULL(VIRTIO_NET_F_MAC))
+
+#define mlxbf_vdev_to_tmfifo(d) container_of(d, struct mlxbf_tmfifo_vdev, vdev)
+
+/* Free vrings of the FIFO device. */
+static void mlxbf_tmfifo_free_vrings(struct mlxbf_tmfifo *fifo,
+				     struct mlxbf_tmfifo_vdev *tm_vdev)
+{
+	struct mlxbf_tmfifo_vring *vring;
+	int i, size;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		if (vring->va) {
+			size = vring_size(vring->num, vring->align);
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Allocate vrings for the FIFO. */
+static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
+				     struct mlxbf_tmfifo_vdev *tm_vdev)
+{
+	struct mlxbf_tmfifo_vring *vring;
+	struct device *dev;
+	dma_addr_t dma;
+	int i, size;
+	void *va;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->num = MLXBF_TMFIFO_VRING_SIZE;
+		vring->align = SMP_CACHE_BYTES;
+		vring->index = i;
+		vring->vdev_id = tm_vdev->vdev.id.device;
+		dev = &tm_vdev->vdev.dev;
+
+		size = vring_size(vring->num, vring->align);
+		va = dma_alloc_coherent(dev->parent, size, &dma, GFP_KERNEL);
+		if (!va) {
+			mlxbf_tmfifo_free_vrings(fifo, tm_vdev);
+			dev_err(dev->parent, "dma_alloc_coherent failed\n");
+			return -ENOMEM;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Disable interrupts of the FIFO device. */
+static void mlxbf_tmfifo_disable_irqs(struct mlxbf_tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < MLXBF_TM_MAX_IRQ; i++) {
+		irq = fifo->irq_info[i].irq;
+		fifo->irq_info[i].irq = 0;
+		disable_irq(irq);
+	}
+}
+
+/* Interrupt handler. */
+static irqreturn_t mlxbf_tmfifo_irq_handler(int irq, void *arg)
+{
+	struct mlxbf_tmfifo_irq_info *irq_info = arg;
+
+	if (!test_and_set_bit(irq_info->index, &irq_info->fifo->pend_events))
+		schedule_work(&irq_info->fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Get the next packet descriptor from the vring. */
+static struct vring_desc *
+mlxbf_tmfifo_get_next_desc(struct mlxbf_tmfifo_vring *vring)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = vring->vq->vdev;
+	unsigned int idx, head;
+
+	if (vring->next_avail == virtio16_to_cpu(vdev, vr->avail->idx))
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = virtio16_to_cpu(vdev, vr->avail->ring[idx]);
+	if (WARN_ON(head >= vr->num))
+		return NULL;
+
+	vring->next_avail++;
+
+	return &vr->desc[head];
+}
+
+/* Release virtio descriptor. */
+static void mlxbf_tmfifo_release_desc(struct mlxbf_tmfifo_vring *vring,
+				      struct vring_desc *desc, u32 len)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = vring->vq->vdev;
+	u16 idx, vr_idx;
+
+	vr_idx = virtio16_to_cpu(vdev, vr->used->idx);
+	idx = vr_idx % vr->num;
+	vr->used->ring[idx].id = cpu_to_virtio32(vdev, desc - vr->desc);
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/*
+	 * Virtio could poll and check the 'idx' to decide whether the desc is
+	 * done or not. Add a memory barrier here to make sure the update above
+	 * completes before updating the idx.
+	 */
+	mb();
+	vr->used->idx = cpu_to_virtio16(vdev, vr_idx + 1);
+}
+
+/* Get the total length of the descriptor chain. */
+static u32 mlxbf_tmfifo_get_pkt_len(struct mlxbf_tmfifo_vring *vring,
+				    struct vring_desc *desc)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = vring->vq->vdev;
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void mlxbf_tmfifo_release_pending_pkt(struct mlxbf_tmfifo_vring *vring)
+{
+	struct vring_desc *desc_head;
+	u32 len = 0;
+
+	if (vring->desc_head) {
+		desc_head = vring->desc_head;
+		len = vring->pkt_len;
+	} else {
+		desc_head = mlxbf_tmfifo_get_next_desc(vring);
+		len = mlxbf_tmfifo_get_pkt_len(vring, desc_head);
+	}
+
+	if (desc_head)
+		mlxbf_tmfifo_release_desc(vring, desc_head, len);
+
+	vring->pkt_len = 0;
+	vring->desc = NULL;
+	vring->desc_head = NULL;
+}
+
+static void mlxbf_tmfifo_init_net_desc(struct mlxbf_tmfifo_vring *vring,
+				       struct vring_desc *desc, bool is_rx)
+{
+	struct virtio_device *vdev = vring->vq->vdev;
+	struct virtio_net_hdr *net_hdr;
+
+	net_hdr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+	memset(net_hdr, 0, sizeof(*net_hdr));
+}
+
+/* Get and initialize the next packet. */
+static struct vring_desc *
+mlxbf_tmfifo_get_next_pkt(struct mlxbf_tmfifo_vring *vring, bool is_rx)
+{
+	struct vring_desc *desc;
+
+	desc = mlxbf_tmfifo_get_next_desc(vring);
+	if (desc && is_rx && vring->vdev_id == VIRTIO_ID_NET)
+		mlxbf_tmfifo_init_net_desc(vring, desc, is_rx);
+
+	vring->desc_head = desc;
+	vring->desc = desc;
+
+	return desc;
+}
+
+/* House-keeping timer. */
+static void mlxbf_tmfifo_timer(struct timer_list *t)
+{
+	struct mlxbf_tmfifo *fifo = container_of(t, struct mlxbf_tmfifo, timer);
+	int rx, tx;
+
+	rx = !test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events);
+	tx = !test_and_set_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	if (rx || tx)
+		schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + MLXBF_TMFIFO_TIMER_INTERVAL);
+}
+
+/* Copy one console packet into the output buffer. */
+static void mlxbf_tmfifo_console_output_one(struct mlxbf_tmfifo_vdev *cons,
+					    struct mlxbf_tmfifo_vring *vring,
+					    struct vring_desc *desc)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = &cons->vdev;
+	u32 len, idx, seg;
+	void *addr;
+
+	while (desc) {
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+		len = virtio32_to_cpu(vdev, desc->len);
+
+		seg = CIRC_SPACE_TO_END(cons->tx_buf.head, cons->tx_buf.tail,
+					MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+		if (len <= seg) {
+			memcpy(cons->tx_buf.buf + cons->tx_buf.head, addr, len);
+		} else {
+			memcpy(cons->tx_buf.buf + cons->tx_buf.head, addr, seg);
+			addr += seg;
+			memcpy(cons->tx_buf.buf, addr, len - seg);
+		}
+		cons->tx_buf.head = (cons->tx_buf.head + len) %
+			MLXBF_TMFIFO_CON_TX_BUF_SIZE;
+
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+}
+
+/* Copy console data into the output buffer. */
+static void mlxbf_tmfifo_console_output(struct mlxbf_tmfifo_vdev *cons,
+					struct mlxbf_tmfifo_vring *vring)
+{
+	struct vring_desc *desc;
+	u32 len, avail;
+
+	desc = mlxbf_tmfifo_get_next_desc(vring);
+	while (desc) {
+		/* Release the packet if not enough space. */
+		len = mlxbf_tmfifo_get_pkt_len(vring, desc);
+		avail = CIRC_SPACE(cons->tx_buf.head, cons->tx_buf.tail,
+				   MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+		if (len + MLXBF_TMFIFO_CON_TX_BUF_RSV_SIZE > avail) {
+			mlxbf_tmfifo_release_desc(vring, desc, len);
+			break;
+		}
+
+		mlxbf_tmfifo_console_output_one(cons, vring, desc);
+		mlxbf_tmfifo_release_desc(vring, desc, len);
+		desc = mlxbf_tmfifo_get_next_desc(vring);
+	}
+}
+
+/* Get the number of available words in Rx FIFO for receiving. */
+static int mlxbf_tmfifo_get_rx_avail(struct mlxbf_tmfifo *fifo)
+{
+	u64 sts;
+
+	sts = readq(fifo->rx_base + MLXBF_TMFIFO_RX_STS);
+	return FIELD_GET(MLXBF_TMFIFO_RX_STS__COUNT_MASK, sts);
+}
+
+/* Get the number of available words in the TmFifo for sending. */
+static int mlxbf_tmfifo_get_tx_avail(struct mlxbf_tmfifo *fifo, int vdev_id)
+{
+	int tx_reserve;
+	u32 count;
+	u64 sts;
+
+	/* Reserve some room in FIFO for console messages. */
+	if (vdev_id == VIRTIO_ID_NET)
+		tx_reserve = fifo->tx_fifo_size / MLXBF_TMFIFO_RESERVE_RATIO;
+	else
+		tx_reserve = 1;
+
+	sts = readq(fifo->tx_base + MLXBF_TMFIFO_TX_STS);
+	count = FIELD_GET(MLXBF_TMFIFO_TX_STS__COUNT_MASK, sts);
+	return fifo->tx_fifo_size - tx_reserve - count;
+}
+
+/* Console Tx (move data from the output buffer into the TmFifo). */
+static void mlxbf_tmfifo_console_tx(struct mlxbf_tmfifo *fifo, int avail)
+{
+	struct mlxbf_tmfifo_msg_hdr hdr;
+	struct mlxbf_tmfifo_vdev *cons;
+	unsigned long flags;
+	int size, seg;
+	void *addr;
+	u64 data;
+
+	/* Return if not enough space available. */
+	if (avail < MLXBF_TMFIFO_DATA_MIN_WORDS)
+		return;
+
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+	if (!cons || !cons->tx_buf.buf)
+		return;
+
+	/* Return if no data to send. */
+	size = CIRC_CNT(cons->tx_buf.head, cons->tx_buf.tail,
+			MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+	if (size == 0)
+		return;
+
+	/* Adjust the size to available space. */
+	if (size + sizeof(hdr) > avail * sizeof(u64))
+		size = avail * sizeof(u64) - sizeof(hdr);
+
+	/* Write header. */
+	hdr.type = VIRTIO_ID_CONSOLE;
+	hdr.len = htons(size);
+	writeq(*(u64 *)&hdr, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+
+	/* Use spin-lock to protect the 'cons->tx_buf'. */
+	spin_lock_irqsave(&fifo->spin_lock, flags);
+
+	while (size > 0) {
+		addr = cons->tx_buf.buf + cons->tx_buf.tail;
+
+		seg = CIRC_CNT_TO_END(cons->tx_buf.head, cons->tx_buf.tail,
+				      MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+		if (seg >= sizeof(u64)) {
+			memcpy(&data, addr, sizeof(u64));
+		} else {
+			memcpy(&data, addr, seg);
+			memcpy((u8 *)&data + seg, cons->tx_buf.buf,
+			       sizeof(u64) - seg);
+		}
+		writeq(data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+
+		if (size >= sizeof(u64)) {
+			cons->tx_buf.tail = (cons->tx_buf.tail + sizeof(u64)) %
+				MLXBF_TMFIFO_CON_TX_BUF_SIZE;
+			size -= sizeof(u64);
+		} else {
+			cons->tx_buf.tail = (cons->tx_buf.tail + size) %
+				MLXBF_TMFIFO_CON_TX_BUF_SIZE;
+			size = 0;
+		}
+	}
+
+	spin_unlock_irqrestore(&fifo->spin_lock, flags);
+}
+
+/* Rx/Tx one word in the descriptor buffer. */
+static void mlxbf_tmfifo_rxtx_word(struct mlxbf_tmfifo_vring *vring,
+				   struct vring_desc *desc,
+				   bool is_rx, int len)
+{
+	struct virtio_device *vdev = vring->vq->vdev;
+	struct mlxbf_tmfifo *fifo = vring->fifo;
+	void *addr;
+	u64 data;
+
+	/* Get the buffer address of this desc. */
+	addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+	/* Read a word from FIFO for Rx. */
+	if (is_rx)
+		data = readq(fifo->rx_base + MLXBF_TMFIFO_RX_DATA);
+
+	if (vring->cur_len + sizeof(u64) <= len) {
+		/* The whole word. */
+		if (is_rx)
+			memcpy(addr + vring->cur_len, &data, sizeof(u64));
+		else
+			memcpy(&data, addr + vring->cur_len, sizeof(u64));
+		vring->cur_len += sizeof(u64);
+	} else {
+		/* Leftover bytes. */
+		if (is_rx)
+			memcpy(addr + vring->cur_len, &data,
+			       len - vring->cur_len);
+		else
+			memcpy(&data, addr + vring->cur_len,
+			       len - vring->cur_len);
+		vring->cur_len = len;
+	}
+
+	/* Write the word into FIFO for Tx. */
+	if (!is_rx)
+		writeq(data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+}
+
+/*
+ * Rx/Tx packet header.
+ *
+ * In Rx case, the packet might be found to belong to a different vring since
+ * the TmFifo is shared by different services. In such case, the 'vring_change'
+ * flag is set.
+ */
+static void mlxbf_tmfifo_rxtx_header(struct mlxbf_tmfifo_vring *vring,
+				     struct vring_desc *desc,
+				     bool is_rx, bool *vring_change)
+{
+	struct mlxbf_tmfifo *fifo = vring->fifo;
+	struct virtio_net_config *config;
+	struct mlxbf_tmfifo_msg_hdr hdr;
+	int vdev_id, hdr_len;
+
+	/* Read/Write packet header. */
+	if (is_rx) {
+		/* Drain one word from the FIFO. */
+		*(u64 *)&hdr = readq(fifo->rx_base + MLXBF_TMFIFO_RX_DATA);
+
+		/* Skip the length 0 packets (keepalive). */
+		if (hdr.len == 0)
+			return;
+
+		/* Check packet type. */
+		if (hdr.type == VIRTIO_ID_NET) {
+			vdev_id = VIRTIO_ID_NET;
+			hdr_len = sizeof(struct virtio_net_hdr);
+			config = &fifo->vdev[vdev_id]->config.net;
+			if (ntohs(hdr.len) > config->mtu +
+			    MLXBF_TMFIFO_NET_L2_OVERHEAD)
+				return;
+		} else {
+			vdev_id = VIRTIO_ID_CONSOLE;
+			hdr_len = 0;
+		}
+
+		/*
+		 * Check whether the new packet still belongs to this vring.
+		 * If not, update the pkt_len of the new vring.
+		 */
+		if (vdev_id != vring->vdev_id) {
+			struct mlxbf_tmfifo_vdev *tm_dev2 = fifo->vdev[vdev_id];
+
+			if (!tm_dev2)
+				return;
+			vring->desc = desc;
+			vring = &tm_dev2->vrings[MLXBF_TMFIFO_VRING_RX];
+			*vring_change = true;
+		}
+		vring->pkt_len = ntohs(hdr.len) + hdr_len;
+	} else {
+		/* Network virtio has an extra header. */
+		hdr_len = (vring->vdev_id == VIRTIO_ID_NET) ?
+			   sizeof(struct virtio_net_hdr) : 0;
+		vring->pkt_len = mlxbf_tmfifo_get_pkt_len(vring, desc);
+		hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+			    VIRTIO_ID_NET : VIRTIO_ID_CONSOLE;
+		hdr.len = htons(vring->pkt_len - hdr_len);
+		writeq(*(u64 *)&hdr, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+	}
+
+	vring->cur_len = hdr_len;
+	vring->rem_len = vring->pkt_len;
+	fifo->vring[is_rx] = vring;
+}
+
+/*
+ * Rx/Tx one descriptor.
+ *
+ * Return true to indicate more data available.
+ */
+static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring,
+				       bool is_rx, int *avail)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct mlxbf_tmfifo *fifo = vring->fifo;
+	struct virtio_device *vdev;
+	bool vring_change = false;
+	struct vring_desc *desc;
+	unsigned long flags;
+	u32 len, idx;
+
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+
+	/* Get the descriptor of the next packet. */
+	if (!vring->desc) {
+		desc = mlxbf_tmfifo_get_next_pkt(vring, is_rx);
+		if (!desc)
+			return false;
+	} else {
+		desc = vring->desc;
+	}
+
+	/* Beginning of a packet. Start to Rx/Tx packet header. */
+	if (vring->pkt_len == 0) {
+		mlxbf_tmfifo_rxtx_header(vring, desc, is_rx, &vring_change);
+		(*avail)--;
+
+		/* Return if new packet is for another ring. */
+		if (vring_change)
+			return false;
+		goto mlxbf_tmfifo_desc_done;
+	}
+
+	/* Get the length of this desc. */
+	len = virtio32_to_cpu(vdev, desc->len);
+	if (len > vring->rem_len)
+		len = vring->rem_len;
+
+	/* Rx/Tx one word (8 bytes) if not done. */
+	if (vring->cur_len < len) {
+		mlxbf_tmfifo_rxtx_word(vring, desc, is_rx, len);
+		(*avail)--;
+	}
+
+	/* Check again whether it's done. */
+	if (vring->cur_len == len) {
+		vring->cur_len = 0;
+		vring->rem_len -= len;
+
+		/* Get the next desc on the chain. */
+		if (vring->rem_len > 0 &&
+		    (virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT)) {
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+			goto mlxbf_tmfifo_desc_done;
+		}
+
+		/* Done and release the pending packet. */
+		mlxbf_tmfifo_release_pending_pkt(vring);
+		desc = NULL;
+		fifo->vring[is_rx] = NULL;
+
+		/* Notify upper layer that packet is done. */
+		spin_lock_irqsave(&fifo->spin_lock, flags);
+		vring_interrupt(0, vring->vq);
+		spin_unlock_irqrestore(&fifo->spin_lock, flags);
+	}
+
+mlxbf_tmfifo_desc_done:
+	/* Save the current desc. */
+	vring->desc = desc;
+
+	return true;
+}
+
+/* Rx & Tx processing of a queue. */
+static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
+{
+	int avail = 0, devid = vring->vdev_id;
+	struct mlxbf_tmfifo *fifo;
+	bool more;
+
+	fifo = vring->fifo;
+
+	/* Return if vdev is not ready. */
+	if (!fifo->vdev[devid])
+		return;
+
+	/* Return if another vring is running. */
+	if (fifo->vring[is_rx] && fifo->vring[is_rx] != vring)
+		return;
+
+	/* Only handle console and network for now. */
+	if (WARN_ON(devid != VIRTIO_ID_NET && devid != VIRTIO_ID_CONSOLE))
+		return;
+
+	do {
+		/* Get available FIFO space. */
+		if (avail == 0) {
+			if (is_rx)
+				avail = mlxbf_tmfifo_get_rx_avail(fifo);
+			else
+				avail = mlxbf_tmfifo_get_tx_avail(fifo, devid);
+			if (avail <= 0)
+				break;
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && devid == VIRTIO_ID_CONSOLE) {
+			mlxbf_tmfifo_console_tx(fifo, avail);
+			break;
+		}
+
+		/* Handle one descriptor. */
+		more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail);
+	} while (more);
+}
+
+/* Handle Rx or Tx queues. */
+static void mlxbf_tmfifo_work_rxtx(struct mlxbf_tmfifo *fifo, int queue_id,
+				   int irq_id, bool is_rx)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+	struct mlxbf_tmfifo_vring *vring;
+	int i;
+
+	if (!test_and_clear_bit(irq_id, &fifo->pend_events) ||
+	    !fifo->irq_info[irq_id].irq)
+		return;
+
+	for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++) {
+		tm_vdev = fifo->vdev[i];
+		if (tm_vdev) {
+			vring = &tm_vdev->vrings[queue_id];
+			if (vring->vq)
+				mlxbf_tmfifo_rxtx(vring, is_rx);
+		}
+	}
+}
+
+/* Work handler for Rx and Tx case. */
+static void mlxbf_tmfifo_work_handler(struct work_struct *work)
+{
+	struct mlxbf_tmfifo *fifo;
+
+	fifo = container_of(work, struct mlxbf_tmfifo, work);
+	if (!fifo->is_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx (Send data to the TmFifo). */
+	mlxbf_tmfifo_work_rxtx(fifo, MLXBF_TMFIFO_VRING_TX,
+			       MLXBF_TM_TX_LWM_IRQ, false);
+
+	/* Rx (Receive data from the TmFifo). */
+	mlxbf_tmfifo_work_rxtx(fifo, MLXBF_TMFIFO_VRING_RX,
+			       MLXBF_TM_RX_HWM_IRQ, true);
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool mlxbf_tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct mlxbf_tmfifo_vring *vring = vq->priv;
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+	struct mlxbf_tmfifo *fifo;
+	unsigned long flags;
+
+	fifo = vring->fifo;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (vring->index & BIT(0)) {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			tm_vdev = fifo->vdev[VIRTIO_ID_CONSOLE];
+			mlxbf_tmfifo_console_output(tm_vdev, vring);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+		} else if (test_and_set_bit(MLXBF_TM_TX_LWM_IRQ,
+					    &fifo->pend_events)) {
+			return true;
+		}
+	} else {
+		if (test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events))
+			return true;
+	}
+
+	schedule_work(&fifo->work);
+
+	return true;
+}
+
+/* Get the array of feature bits for this device. */
+static u64 mlxbf_tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int mlxbf_tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	tm_vdev->features = vdev->features;
+
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void mlxbf_tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+	struct mlxbf_tmfifo_vring *vring;
+	struct virtqueue *vq;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc)
+			mlxbf_tmfifo_release_pending_pkt(vring);
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+					unsigned int nvqs,
+					struct virtqueue *vqs[],
+					vq_callback_t *callbacks[],
+					const char * const names[],
+					const bool *ctx,
+					struct irq_affinity *desc)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+	struct mlxbf_tmfifo_vring *vring;
+	struct virtqueue *vq;
+	int i, ret, size;
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i]) {
+			ret = -EINVAL;
+			goto error;
+		}
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->num, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->num, vring->align, vdev,
+					 false, false, vring->va,
+					 mlxbf_tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	mlxbf_tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 mlxbf_tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void mlxbf_tmfifo_virtio_set_status(struct virtio_device *vdev,
+					   u8 status)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void mlxbf_tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void mlxbf_tmfifo_virtio_get(struct virtio_device *vdev,
+				    unsigned int offset,
+				    void *buf,
+				    unsigned int len)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	if ((u64)offset + len > sizeof(tm_vdev->config))
+		return;
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void mlxbf_tmfifo_virtio_set(struct virtio_device *vdev,
+				    unsigned int offset,
+				    const void *buf,
+				    unsigned int len)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	if ((u64)offset + len > sizeof(tm_vdev->config))
+		return;
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+static void tmfifo_virtio_dev_release(struct device *device)
+{
+	struct virtio_device *vdev =
+			container_of(device, struct virtio_device, dev);
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	kfree(tm_vdev);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops mlxbf_tmfifo_virtio_config_ops = {
+	.get_features = mlxbf_tmfifo_virtio_get_features,
+	.finalize_features = mlxbf_tmfifo_virtio_finalize_features,
+	.find_vqs = mlxbf_tmfifo_virtio_find_vqs,
+	.del_vqs = mlxbf_tmfifo_virtio_del_vqs,
+	.reset = mlxbf_tmfifo_virtio_reset,
+	.set_status = mlxbf_tmfifo_virtio_set_status,
+	.get_status = mlxbf_tmfifo_virtio_get_status,
+	.get = mlxbf_tmfifo_virtio_get,
+	.set = mlxbf_tmfifo_virtio_set,
+};
+
+/* Create vdev for the FIFO. */
+static int mlxbf_tmfifo_create_vdev(struct device *dev,
+				    struct mlxbf_tmfifo *fifo,
+				    int vdev_id, u64 features,
+				    void *config, u32 size)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev, *reg_dev = NULL;
+	int ret;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		dev_err(dev, "vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto fail;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &mlxbf_tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+
+	if (mlxbf_tmfifo_alloc_vrings(fifo, tm_vdev)) {
+		dev_err(dev, "unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto vdev_fail;
+	}
+
+	/* Allocate an output buffer for the console device. */
+	if (vdev_id == VIRTIO_ID_CONSOLE)
+		tm_vdev->tx_buf.buf = devm_kmalloc(dev,
+						   MLXBF_TMFIFO_CON_TX_BUF_SIZE,
+						   GFP_KERNEL);
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	reg_dev = tm_vdev;
+	if (ret) {
+		dev_err(dev, "register_virtio_device failed\n");
+		goto vdev_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+vdev_fail:
+	mlxbf_tmfifo_free_vrings(fifo, tm_vdev);
+	fifo->vdev[vdev_id] = NULL;
+	if (reg_dev)
+		put_device(&tm_vdev->vdev.dev);
+	else
+		kfree(tm_vdev);
+fail:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev for the FIFO. */
+static int mlxbf_tmfifo_delete_vdev(struct mlxbf_tmfifo *fifo, int vdev_id)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		mlxbf_tmfifo_free_vrings(fifo, tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void mlxbf_tmfifo_get_cfg_mac(u8 *mac)
+{
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+	unsigned long size = ETH_ALEN;
+	u8 buf[ETH_ALEN];
+	efi_status_t rc;
+
+	rc = efi.get_variable(mlxbf_tmfifo_efi_name, &guid, NULL, &size, buf);
+	if (rc == EFI_SUCCESS && size == ETH_ALEN)
+		ether_addr_copy(mac, buf);
+	else
+		ether_addr_copy(mac, mlxbf_tmfifo_net_default_mac);
+}
+
+/* Set TmFifo thresolds which is used to trigger interrupts. */
+static void mlxbf_tmfifo_set_threshold(struct mlxbf_tmfifo *fifo)
+{
+	u64 ctl;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		FIELD_GET(MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__LWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_TX_CTL__LWM_MASK,
+			   fifo->tx_fifo_size / 2);
+	ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__HWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_TX_CTL__HWM_MASK,
+			   fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		FIELD_GET(MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__LWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__HWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
+}
+
+static void mlxbf_tmfifo_cleanup(struct mlxbf_tmfifo *fifo)
+{
+	int i;
+
+	fifo->is_ready = false;
+	del_timer_sync(&fifo->timer);
+	mlxbf_tmfifo_disable_irqs(fifo);
+	cancel_work_sync(&fifo->work);
+	for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++)
+		mlxbf_tmfifo_delete_vdev(fifo, i);
+}
+
+/* Probe the TMFIFO. */
+static int mlxbf_tmfifo_probe(struct platform_device *pdev)
+{
+	struct virtio_net_config net_config;
+	struct device *dev = &pdev->dev;
+	struct mlxbf_tmfifo *fifo;
+	int i, rc;
+
+	fifo = devm_kzalloc(dev, sizeof(*fifo), GFP_KERNEL);
+	if (!fifo)
+		return -ENOMEM;
+
+	spin_lock_init(&fifo->spin_lock);
+	INIT_WORK(&fifo->work, mlxbf_tmfifo_work_handler);
+	mutex_init(&fifo->lock);
+
+	/* Get the resource of the Rx FIFO. */
+	fifo->rx_base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(fifo->rx_base))
+		return PTR_ERR(fifo->rx_base);
+
+	/* Get the resource of the Tx FIFO. */
+	fifo->tx_base = devm_platform_ioremap_resource(pdev, 1);
+	if (IS_ERR(fifo->tx_base))
+		return PTR_ERR(fifo->tx_base);
+
+	platform_set_drvdata(pdev, fifo);
+
+	timer_setup(&fifo->timer, mlxbf_tmfifo_timer, 0);
+
+	for (i = 0; i < MLXBF_TM_MAX_IRQ; i++) {
+		fifo->irq_info[i].index = i;
+		fifo->irq_info[i].fifo = fifo;
+		fifo->irq_info[i].irq = platform_get_irq(pdev, i);
+		rc = devm_request_irq(dev, fifo->irq_info[i].irq,
+				      mlxbf_tmfifo_irq_handler, 0,
+				      "tmfifo", &fifo->irq_info[i]);
+		if (rc) {
+			dev_err(dev, "devm_request_irq failed\n");
+			fifo->irq_info[i].irq = 0;
+			return rc;
+		}
+	}
+
+	mlxbf_tmfifo_set_threshold(fifo);
+
+	/* Create the console vdev. */
+	rc = mlxbf_tmfifo_create_vdev(dev, fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (rc)
+		goto fail;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = ETH_DATA_LEN;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	mlxbf_tmfifo_get_cfg_mac(net_config.mac);
+	rc = mlxbf_tmfifo_create_vdev(dev, fifo, VIRTIO_ID_NET,
+				      MLXBF_TMFIFO_NET_FEATURES, &net_config,
+				      sizeof(net_config));
+	if (rc)
+		goto fail;
+
+	mod_timer(&fifo->timer, jiffies + MLXBF_TMFIFO_TIMER_INTERVAL);
+
+	fifo->is_ready = true;
+	return 0;
+
+fail:
+	mlxbf_tmfifo_cleanup(fifo);
+	return rc;
+}
+
+/* Device remove function. */
+static int mlxbf_tmfifo_remove(struct platform_device *pdev)
+{
+	struct mlxbf_tmfifo *fifo = platform_get_drvdata(pdev);
+
+	mlxbf_tmfifo_cleanup(fifo);
+
+	return 0;
+}
+
+static const struct acpi_device_id mlxbf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, mlxbf_tmfifo_acpi_match);
+
+static struct platform_driver mlxbf_tmfifo_driver = {
+	.probe = mlxbf_tmfifo_probe,
+	.remove = mlxbf_tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.acpi_match_table = mlxbf_tmfifo_acpi_match,
+	},
+};
+
+module_platform_driver(mlxbf_tmfifo_driver);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TmFifo Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 179+ messages in thread

* [PATCH v16] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 16:06 ` Liming Sun
                   ` (52 preceding siblings ...)
  (?)
@ 2019-05-03 13:49 ` Liming Sun
  2019-05-06  9:13   ` Andy Shevchenko
  -1 siblings, 1 reply; 179+ messages in thread
From: Liming Sun @ 2019-05-03 13:49 UTC (permalink / raw)
  To: David Woods, Andy Shevchenko, Darren Hart, Vadim Pasternak
  Cc: Liming Sun, linux-kernel, platform-driver-x86

This commit adds the TmFifo platform driver for Mellanox BlueField
Soc. TmFifo is a shared FIFO which enables external host machine
to exchange data with the SoC via USB or PCIe. The driver is based
on virtio framework and has console and network access enabled.

Reviewed-by: Vadim Pasternak <vadimp@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
v15->v16:
    Rebase and resubmit (no new changes).
v14->v15:
    Fixes for comments from Andy:
    - Remove the 'union' definition of mlxbf_tmfifo_msg_hdr and use
      on-the-fly conversion when sending the 8-byte message header
      into the FIFO;
    - Update comment of mlxbf_tmfifo_msg_hdr explaining why '__be16'
      is needed for the 'len' field. The SoC sends data stream into
      the FIFO and the other side reads it. The byte order of the data
      stream (byte-stream) stays the same. The 'len' field is encoded
      into network byte order so upper-level applications in external
      host machine with different endianness could decode it. This
      implementation was verified over USB with an external PPC host
      machine running in big-endian mode.
    - Move the 'dev_err()' line to the end of the block in function
      mlxbf_tmfifo_alloc_vrings();
    - Remove the 'irq_info->index < MLXBF_TM_MAX_IRQ' check in
      mlxbf_tmfifo_irq_handler() since it's unnecessary;
    - Remove the 'if (desc_head)' check in
      mlxbf_tmfifo_release_pending_pkt() since function
      mlxbf_tmfifo_get_pkt_len() is already NULL-aware;
    - Adjust the testing order of 'if (!(vring->index & BIT(0)))'
      in bool mlxbf_tmfifo_virtio_notify() to test the positive case
      'if (vring->index & BIT(0))' first;
    - Add '(u64)offset' conversion in mlxbf_tmfifo_virtio_get() to
      avoid 32-bit length addition overflow;
    - Update the 'efi.get_variable' statement into single line in
      mlxbf_tmfifo_get_cfg_mac();
    - Use new helper devm_platform_ioremap_resource() to replace
      'platform_get_resource() + devm_ioremap_resource()' in
      mlxbf_tmfifo_probe();
v13->v14:
    Fixes for comments from Andy:
    - Add a blank line to separate the virtio header files;
    - Update the comment for 'union mlxbf_tmfifo_msg_hdr' to be
      more clear how this union is used;
    - Update the 'mlxbf_tmfifo_net_default_mac[ETH_ALEN]' definition
      to be two lines;
    - Reformat macro MLXBF_TMFIFO_NET_FEATURES to put the definition
      in a seperate line;
    - Update all 'fifo' to 'FIFO' in the comments;
    - Update mlxbf_tmfifo_alloc_vrings() to specifically release the
      allocated entries in case of failures, so the logic looks more
      clear. In the caller function the mlxbf_tmfifo_free_vrings()
      might be called again in case of other failures, which is ok
      since the 'va' pointer will be set to NULL once released;
    - Update mlxbf_tmfifo_timer() to change the first statement to
      one line;
    - Update one memcpy() to ether_addr_copy() in
      mlxbf_tmfifo_get_cfg_mac();
    - Remove 'fifo->pdev' since it is really not needed;
    - Define temporary variable to update the mlxbf_tmfifo_create_vdev()
      statement into single line.
    New changes by Liming:
    - Reorder the logic a little bit in mlxbf_tmfifo_timer(). Previously
      it has logic like "!a || !b" while the '!b' will not be evaluated
      if '!a' is true. It was changed to this way during review, but is
      actually not the desired behavior since both bits need to be
      tested/set in fifo->pend_events. This issue was found during
      verification which caused extra delays for Tx packets.
v12->v13:
    Rebase and resubmit (no new changes).
v11->v12:
    Fixed the two unsolved comments from v11.
    - "Change macro mlxbf_vdev_to_tmfifo() to one line"
      Done. Seems not hard.
    - "Is it appropriate use of devm_* for 'tm_vdev = devm_kzalloc'"
      Yes, understand the comment now. The tmfifo is fixed, but the
      vdev is dynamic. Use kzalloc() instead, and free the device
      in the release callback which is the right place for it.
v10->v11:
    Fixes for comments from Andy:
    - Use GENMASK_ULL() instead of GENMASK() in mlxbf-tmfifo-regs.h
    - Removed the cpu_to_le64()/le64_to_cpu() conversion since
      readq()/writeq() already takes care of it.
    - Remove the "if (irq)" check in mlxbf_tmfifo_disable_irqs().
    - Add "u32 count" temp variable in mlxbf_tmfifo_get_tx_avail().
    - Clean up mlxbf_tmfifo_get_cfg_mac(), use ETH_ALEN instead of
      value 6.
    - Change the tx_buf to use Linux existing 'struct circ_buf'.
    Comment not applied:
    - "Change macro mlxbf_vdev_to_tmfifo() to one line"
      Couldn't fit in one line with 80 chracters
    - "Is it appropriate use of devm_* for 'tm_vdev = devm_kzalloc'"
      This is SoC, the device won't be closed or detached.
      The only case is when the driver is unloaded. So it appears
      ok to use devm_kzalloc() since it's allocated during probe()
      and released during module unload.
    Comments from Vadim: OK
v9->v10:
    Fixes for comments from Andy:
    - Use devm_ioremap_resource() instead of devm_ioremap().
    - Use kernel-doc comments.
    - Keep Makefile contents sorted.
    - Use same fixed format for offsets.
    - Use SZ_1K/SZ_32K instead of 1024/23*1024.
    - Remove unnecessary comments.
    - Use one style for max numbers.
    - More comments for mlxbf_tmfifo_vdev and mlxbf_tmfifo_data_64bit.
    - Use globally defined MTU instead of new definition.
    - Remove forward declaration of mlxbf_tmfifo_remove().
    - Remove PAGE_ALIGN() for dma_alloc_coherent)().
    - Remove the cast of "struct vring *".
    - Check return result of test_and_set_bit().
    - Add a macro mlxbt_vdev_to_tmfifo().
    - Several other minor coding style comments.
    Comment not applied:
    - "Shouldn't be rather helper in EFI lib in kernel"
      Looks like efi.get_variable() is the way I found in the kernel
      tree.
    - "this one is not protected anyhow? Potential race condition"
      In mlxbf_tmfifo_console_tx(), the spin-lock is used to protect the
      'tx_buf' only, not the FIFO writes. So there is no race condition.
    - "Is __packed needed in mlxbf_tmfifo_msg_hdr".
      Yes, it is needed to make sure the structure is 8 bytes.
    Fixes for comments from Vadim:
    - Use tab in mlxbf-tmfifo-regs.h
    - Use kernel-doc comments for struct mlxbf_tmfifo_msg_hdr and
      mlxbf_tmfifo_irq_info as well.
    - Use _MAX instead of _CNT in the macro definition to be consistent.
    - Fix the MODULE_LICENSE.
    - Use BIT_ULL() instead of BIT().
    - Remove argument of 'avail' for mlxbf_tmfifo_rxtx_header() and
      mlxbf_tmfifo_rxtx_word()
    - Revise logic in mlxbf_tmfifo_rxtx_one_desc() to remove the
      WARN_ON().
    - Change "union mlxbf_tmfifo_u64 u" to "union mlxbf_tmfifo_u64 buf"
      in mlxbf_tmfifo_rxtx_word().
    - Change date type of vring_change from 'int' to 'bool'.
    - Remove the blank lines after Signed-off.
    - Don’t use declaration in the middle.
    - Make the network header initialization in some more elegant way.
    - Change label done to mlxbf_tmfifo_desc_done.
    - Remove some unnecessary comments, and several other misc coding
      style comments.
    - Simplify code logic in mlxbf_tmfifo_virtio_notify()
    New changes by Liming:
    - Simplify the Rx/Tx function arguments to make it more readable.
v8->v9:
    Fixes for comments from Andy:
    - Use modern devm_xxx() API instead.
    Fixes for comments from Vadim:
    - Split the Rx/Tx function into smaller funcitons.
    - File name, copyright information.
    - Function and variable name conversion.
    - Local variable and indent coding styles.
    - Remove unnecessary 'inline' declarations.
    - Use devm_xxx() APIs.
    - Move the efi_char16_t MAC address definition to global.
    - Fix warnings reported by 'checkpatch --strict'.
    - Fix warnings reported by 'make CF="-D__CHECK_ENDIAN__"'.
    - Change select VIRTIO_xxx to depends on  VIRTIO_ in Kconfig.
    - Merge mlxbf_tmfifo_vdev_tx_buf_push() and
      mlxbf_tmfifo_vdev_tx_buf_pop().
    - Add union to avoid casting between __le64 and u64.
    - Several other misc coding style comments.
    New changes by Liming:
    - Removed the DT binding documentation since only ACPI is
      supported for now by UEFI on the SoC.
v8: Re-submit under drivers/platform/mellanox for the target-side
    platform driver only.
v7: Added host side drivers into the same patch set.
v5~v6: Coding style fix.
v1~v4: Initial version for directory drivers/soc/mellanox.
---
 drivers/platform/mellanox/Kconfig             |   12 +-
 drivers/platform/mellanox/Makefile            |    1 +
 drivers/platform/mellanox/mlxbf-tmfifo-regs.h |   63 ++
 drivers/platform/mellanox/mlxbf-tmfifo.c      | 1281 +++++++++++++++++++++++++
 4 files changed, 1356 insertions(+), 1 deletion(-)
 create mode 100644 drivers/platform/mellanox/mlxbf-tmfifo-regs.h
 create mode 100644 drivers/platform/mellanox/mlxbf-tmfifo.c

diff --git a/drivers/platform/mellanox/Kconfig b/drivers/platform/mellanox/Kconfig
index cd8a908..530fe7e 100644
--- a/drivers/platform/mellanox/Kconfig
+++ b/drivers/platform/mellanox/Kconfig
@@ -5,7 +5,7 @@
 
 menuconfig MELLANOX_PLATFORM
 	bool "Platform support for Mellanox hardware"
-	depends on X86 || ARM || COMPILE_TEST
+	depends on X86 || ARM || ARM64 || COMPILE_TEST
 	---help---
 	  Say Y here to get to see options for platform support for
 	  Mellanox systems. This option alone does not add any kernel code.
@@ -34,4 +34,14 @@ config MLXREG_IO
 	  to system resets operation, system reset causes monitoring and some
 	  kinds of mux selection.
 
+config MLXBF_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo platform driver"
+	depends on ARM64
+	depends on ACPI
+	depends on VIRTIO_CONSOLE && VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides
+          platform driver support for the TmFifo which supports console
+          and networking based on the virtio framework.
+
 endif # MELLANOX_PLATFORM
diff --git a/drivers/platform/mellanox/Makefile b/drivers/platform/mellanox/Makefile
index 57074d9c..a229bda1 100644
--- a/drivers/platform/mellanox/Makefile
+++ b/drivers/platform/mellanox/Makefile
@@ -3,5 +3,6 @@
 # Makefile for linux/drivers/platform/mellanox
 # Mellanox Platform-Specific Drivers
 #
+obj-$(CONFIG_MLXBF_TMFIFO)	+= mlxbf-tmfifo.o
 obj-$(CONFIG_MLXREG_HOTPLUG)	+= mlxreg-hotplug.o
 obj-$(CONFIG_MLXREG_IO) += mlxreg-io.o
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo-regs.h b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
new file mode 100644
index 0000000..e4f0d2e
--- /dev/null
+++ b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2019, Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef __MLXBF_TMFIFO_REGS_H__
+#define __MLXBF_TMFIFO_REGS_H__
+
+#include <linux/types.h>
+#include <linux/bits.h>
+
+#define MLXBF_TMFIFO_TX_DATA				0x00
+#define MLXBF_TMFIFO_TX_STS				0x08
+#define MLXBF_TMFIFO_TX_STS__LENGTH			0x0001
+#define MLXBF_TMFIFO_TX_STS__COUNT_SHIFT		0
+#define MLXBF_TMFIFO_TX_STS__COUNT_WIDTH		9
+#define MLXBF_TMFIFO_TX_STS__COUNT_RESET_VAL		0
+#define MLXBF_TMFIFO_TX_STS__COUNT_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_TX_STS__COUNT_MASK			GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_TX_CTL				0x10
+#define MLXBF_TMFIFO_TX_CTL__LENGTH			0x0001
+#define MLXBF_TMFIFO_TX_CTL__LWM_SHIFT			0
+#define MLXBF_TMFIFO_TX_CTL__LWM_WIDTH			8
+#define MLXBF_TMFIFO_TX_CTL__LWM_RESET_VAL		128
+#define MLXBF_TMFIFO_TX_CTL__LWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_TX_CTL__LWM_MASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_TX_CTL__HWM_SHIFT			8
+#define MLXBF_TMFIFO_TX_CTL__HWM_WIDTH			8
+#define MLXBF_TMFIFO_TX_CTL__HWM_RESET_VAL		128
+#define MLXBF_TMFIFO_TX_CTL__HWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_TX_CTL__HWM_MASK			GENMASK_ULL(15, 8)
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT		32
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH		9
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL	256
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK		GENMASK_ULL(40, 32)
+#define MLXBF_TMFIFO_RX_DATA				0x00
+#define MLXBF_TMFIFO_RX_STS				0x08
+#define MLXBF_TMFIFO_RX_STS__LENGTH			0x0001
+#define MLXBF_TMFIFO_RX_STS__COUNT_SHIFT		0
+#define MLXBF_TMFIFO_RX_STS__COUNT_WIDTH		9
+#define MLXBF_TMFIFO_RX_STS__COUNT_RESET_VAL		0
+#define MLXBF_TMFIFO_RX_STS__COUNT_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_RX_STS__COUNT_MASK			GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_RX_CTL				0x10
+#define MLXBF_TMFIFO_RX_CTL__LENGTH			0x0001
+#define MLXBF_TMFIFO_RX_CTL__LWM_SHIFT			0
+#define MLXBF_TMFIFO_RX_CTL__LWM_WIDTH			8
+#define MLXBF_TMFIFO_RX_CTL__LWM_RESET_VAL		128
+#define MLXBF_TMFIFO_RX_CTL__LWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_RX_CTL__LWM_MASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_RX_CTL__HWM_SHIFT			8
+#define MLXBF_TMFIFO_RX_CTL__HWM_WIDTH			8
+#define MLXBF_TMFIFO_RX_CTL__HWM_RESET_VAL		128
+#define MLXBF_TMFIFO_RX_CTL__HWM_RMASK			GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_RX_CTL__HWM_MASK			GENMASK_ULL(15, 8)
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT		32
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH		9
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL	256
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RMASK		GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK		GENMASK_ULL(40, 32)
+
+#endif /* !defined(__MLXBF_TMFIFO_REGS_H__) */
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
new file mode 100644
index 0000000..9a5c9fd
--- /dev/null
+++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
@@ -0,0 +1,1281 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Mellanox BlueField SoC TmFifo driver
+ *
+ * Copyright (C) 2019 Mellanox Technologies
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/circ_buf.h>
+#include <linux/efi.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+
+#include "mlxbf-tmfifo-regs.h"
+
+/* Vring size. */
+#define MLXBF_TMFIFO_VRING_SIZE			SZ_1K
+
+/* Console Tx buffer size. */
+#define MLXBF_TMFIFO_CON_TX_BUF_SIZE		SZ_32K
+
+/* Console Tx buffer reserved space. */
+#define MLXBF_TMFIFO_CON_TX_BUF_RSV_SIZE	8
+
+/* House-keeping timer interval. */
+#define MLXBF_TMFIFO_TIMER_INTERVAL		(HZ / 10)
+
+/* Virtual devices sharing the TM FIFO. */
+#define MLXBF_TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/*
+ * Reserve 1/16 of TmFifo space, so console messages are not starved by
+ * the networking traffic.
+ */
+#define MLXBF_TMFIFO_RESERVE_RATIO		16
+
+/* Message with data needs at least two words (for header & data). */
+#define MLXBF_TMFIFO_DATA_MIN_WORDS		2
+
+struct mlxbf_tmfifo;
+
+/**
+ * mlxbf_tmfifo_vring - Structure of the TmFifo virtual ring
+ * @va: virtual address of the ring
+ * @dma: dma address of the ring
+ * @vq: pointer to the virtio virtqueue
+ * @desc: current descriptor of the pending packet
+ * @desc_head: head descriptor of the pending packet
+ * @cur_len: processed length of the current descriptor
+ * @rem_len: remaining length of the pending packet
+ * @pkt_len: total length of the pending packet
+ * @next_avail: next avail descriptor id
+ * @num: vring size (number of descriptors)
+ * @align: vring alignment size
+ * @index: vring index
+ * @vdev_id: vring virtio id (VIRTIO_ID_xxx)
+ * @fifo: pointer to the tmfifo structure
+ */
+struct mlxbf_tmfifo_vring {
+	void *va;
+	dma_addr_t dma;
+	struct virtqueue *vq;
+	struct vring_desc *desc;
+	struct vring_desc *desc_head;
+	int cur_len;
+	int rem_len;
+	u32 pkt_len;
+	u16 next_avail;
+	int num;
+	int align;
+	int index;
+	int vdev_id;
+	struct mlxbf_tmfifo *fifo;
+};
+
+/* Interrupt types. */
+enum {
+	MLXBF_TM_RX_LWM_IRQ,
+	MLXBF_TM_RX_HWM_IRQ,
+	MLXBF_TM_TX_LWM_IRQ,
+	MLXBF_TM_TX_HWM_IRQ,
+	MLXBF_TM_MAX_IRQ
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	MLXBF_TMFIFO_VRING_RX,
+	MLXBF_TMFIFO_VRING_TX,
+	MLXBF_TMFIFO_VRING_MAX
+};
+
+/**
+ * mlxbf_tmfifo_vdev - Structure of the TmFifo virtual device
+ * @vdev: virtio device, in which the vdev.id.device field has the
+ *        VIRTIO_ID_xxx id to distinguish the virtual device.
+ * @status: status of the device
+ * @features: supported features of the device
+ * @vrings: array of tmfifo vrings of this device
+ * @config.cons: virtual console config -
+ *               select if vdev.id.device is VIRTIO_ID_CONSOLE
+ * @config.net: virtual network config -
+ *              select if vdev.id.device is VIRTIO_ID_NET
+ * @tx_buf: tx buffer used to buffer data before writing into the FIFO
+ */
+struct mlxbf_tmfifo_vdev {
+	struct virtio_device vdev;
+	u8 status;
+	u64 features;
+	struct mlxbf_tmfifo_vring vrings[MLXBF_TMFIFO_VRING_MAX];
+	union {
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct circ_buf tx_buf;
+};
+
+/**
+ * mlxbf_tmfifo_irq_info - Structure of the interrupt information
+ * @fifo: pointer to the tmfifo structure
+ * @irq: interrupt number
+ * @index: index into the interrupt array
+ */
+struct mlxbf_tmfifo_irq_info {
+	struct mlxbf_tmfifo *fifo;
+	int irq;
+	int index;
+};
+
+/**
+ * mlxbf_tmfifo - Structure of the TmFifo
+ * @vdev: array of the virtual devices running over the TmFifo
+ * @lock: lock to protect the TmFifo access
+ * @rx_base: mapped register base address for the Rx FIFO
+ * @tx_base: mapped register base address for the Tx FIFO
+ * @rx_fifo_size: number of entries of the Rx FIFO
+ * @tx_fifo_size: number of entries of the Tx FIFO
+ * @pend_events: pending bits for deferred events
+ * @irq_info: interrupt information
+ * @work: work struct for deferred process
+ * @timer: background timer
+ * @vring: Tx/Rx ring
+ * @spin_lock: spin lock
+ * @is_ready: ready flag
+ */
+struct mlxbf_tmfifo {
+	struct mlxbf_tmfifo_vdev *vdev[MLXBF_TMFIFO_VDEV_MAX];
+	struct mutex lock;		/* TmFifo lock */
+	void __iomem *rx_base;
+	void __iomem *tx_base;
+	int rx_fifo_size;
+	int tx_fifo_size;
+	unsigned long pend_events;
+	struct mlxbf_tmfifo_irq_info irq_info[MLXBF_TM_MAX_IRQ];
+	struct work_struct work;
+	struct timer_list timer;
+	struct mlxbf_tmfifo_vring *vring[2];
+	spinlock_t spin_lock;		/* spin lock */
+	bool is_ready;
+};
+
+/**
+ * mlxbf_tmfifo_msg_hdr - Structure of the TmFifo message header
+ * @type: message type
+ * @len: payload length in network byte order. Messages sent into the FIFO
+ *       will be read by the other side as data stream in the same byte order.
+ *       The length needs to be encoded into network order so both sides
+ *       could understand it.
+ */
+struct mlxbf_tmfifo_msg_hdr {
+	u8 type;
+	__be16 len;
+	u8 unused[5];
+} __packed __aligned(sizeof(u64));
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 mlxbf_tmfifo_net_default_mac[ETH_ALEN] = {
+	0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01
+};
+
+/* EFI variable name of the MAC address. */
+static efi_char16_t mlxbf_tmfifo_efi_name[] = L"RshimMacAddr";
+
+/* Maximum L2 header length. */
+#define MLXBF_TMFIFO_NET_L2_OVERHEAD	36
+
+/* Supported virtio-net features. */
+#define MLXBF_TMFIFO_NET_FEATURES \
+	(BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_STATUS) | \
+	 BIT_ULL(VIRTIO_NET_F_MAC))
+
+#define mlxbf_vdev_to_tmfifo(d) container_of(d, struct mlxbf_tmfifo_vdev, vdev)
+
+/* Free vrings of the FIFO device. */
+static void mlxbf_tmfifo_free_vrings(struct mlxbf_tmfifo *fifo,
+				     struct mlxbf_tmfifo_vdev *tm_vdev)
+{
+	struct mlxbf_tmfifo_vring *vring;
+	int i, size;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		if (vring->va) {
+			size = vring_size(vring->num, vring->align);
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Allocate vrings for the FIFO. */
+static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
+				     struct mlxbf_tmfifo_vdev *tm_vdev)
+{
+	struct mlxbf_tmfifo_vring *vring;
+	struct device *dev;
+	dma_addr_t dma;
+	int i, size;
+	void *va;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->num = MLXBF_TMFIFO_VRING_SIZE;
+		vring->align = SMP_CACHE_BYTES;
+		vring->index = i;
+		vring->vdev_id = tm_vdev->vdev.id.device;
+		dev = &tm_vdev->vdev.dev;
+
+		size = vring_size(vring->num, vring->align);
+		va = dma_alloc_coherent(dev->parent, size, &dma, GFP_KERNEL);
+		if (!va) {
+			mlxbf_tmfifo_free_vrings(fifo, tm_vdev);
+			dev_err(dev->parent, "dma_alloc_coherent failed\n");
+			return -ENOMEM;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Disable interrupts of the FIFO device. */
+static void mlxbf_tmfifo_disable_irqs(struct mlxbf_tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < MLXBF_TM_MAX_IRQ; i++) {
+		irq = fifo->irq_info[i].irq;
+		fifo->irq_info[i].irq = 0;
+		disable_irq(irq);
+	}
+}
+
+/* Interrupt handler. */
+static irqreturn_t mlxbf_tmfifo_irq_handler(int irq, void *arg)
+{
+	struct mlxbf_tmfifo_irq_info *irq_info = arg;
+
+	if (!test_and_set_bit(irq_info->index, &irq_info->fifo->pend_events))
+		schedule_work(&irq_info->fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Get the next packet descriptor from the vring. */
+static struct vring_desc *
+mlxbf_tmfifo_get_next_desc(struct mlxbf_tmfifo_vring *vring)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = vring->vq->vdev;
+	unsigned int idx, head;
+
+	if (vring->next_avail == virtio16_to_cpu(vdev, vr->avail->idx))
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = virtio16_to_cpu(vdev, vr->avail->ring[idx]);
+	if (WARN_ON(head >= vr->num))
+		return NULL;
+
+	vring->next_avail++;
+
+	return &vr->desc[head];
+}
+
+/* Release virtio descriptor. */
+static void mlxbf_tmfifo_release_desc(struct mlxbf_tmfifo_vring *vring,
+				      struct vring_desc *desc, u32 len)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = vring->vq->vdev;
+	u16 idx, vr_idx;
+
+	vr_idx = virtio16_to_cpu(vdev, vr->used->idx);
+	idx = vr_idx % vr->num;
+	vr->used->ring[idx].id = cpu_to_virtio32(vdev, desc - vr->desc);
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/*
+	 * Virtio could poll and check the 'idx' to decide whether the desc is
+	 * done or not. Add a memory barrier here to make sure the update above
+	 * completes before updating the idx.
+	 */
+	mb();
+	vr->used->idx = cpu_to_virtio16(vdev, vr_idx + 1);
+}
+
+/* Get the total length of the descriptor chain. */
+static u32 mlxbf_tmfifo_get_pkt_len(struct mlxbf_tmfifo_vring *vring,
+				    struct vring_desc *desc)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = vring->vq->vdev;
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void mlxbf_tmfifo_release_pending_pkt(struct mlxbf_tmfifo_vring *vring)
+{
+	struct vring_desc *desc_head;
+	u32 len = 0;
+
+	if (vring->desc_head) {
+		desc_head = vring->desc_head;
+		len = vring->pkt_len;
+	} else {
+		desc_head = mlxbf_tmfifo_get_next_desc(vring);
+		len = mlxbf_tmfifo_get_pkt_len(vring, desc_head);
+	}
+
+	if (desc_head)
+		mlxbf_tmfifo_release_desc(vring, desc_head, len);
+
+	vring->pkt_len = 0;
+	vring->desc = NULL;
+	vring->desc_head = NULL;
+}
+
+static void mlxbf_tmfifo_init_net_desc(struct mlxbf_tmfifo_vring *vring,
+				       struct vring_desc *desc, bool is_rx)
+{
+	struct virtio_device *vdev = vring->vq->vdev;
+	struct virtio_net_hdr *net_hdr;
+
+	net_hdr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+	memset(net_hdr, 0, sizeof(*net_hdr));
+}
+
+/* Get and initialize the next packet. */
+static struct vring_desc *
+mlxbf_tmfifo_get_next_pkt(struct mlxbf_tmfifo_vring *vring, bool is_rx)
+{
+	struct vring_desc *desc;
+
+	desc = mlxbf_tmfifo_get_next_desc(vring);
+	if (desc && is_rx && vring->vdev_id == VIRTIO_ID_NET)
+		mlxbf_tmfifo_init_net_desc(vring, desc, is_rx);
+
+	vring->desc_head = desc;
+	vring->desc = desc;
+
+	return desc;
+}
+
+/* House-keeping timer. */
+static void mlxbf_tmfifo_timer(struct timer_list *t)
+{
+	struct mlxbf_tmfifo *fifo = container_of(t, struct mlxbf_tmfifo, timer);
+	int rx, tx;
+
+	rx = !test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events);
+	tx = !test_and_set_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	if (rx || tx)
+		schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + MLXBF_TMFIFO_TIMER_INTERVAL);
+}
+
+/* Copy one console packet into the output buffer. */
+static void mlxbf_tmfifo_console_output_one(struct mlxbf_tmfifo_vdev *cons,
+					    struct mlxbf_tmfifo_vring *vring,
+					    struct vring_desc *desc)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct virtio_device *vdev = &cons->vdev;
+	u32 len, idx, seg;
+	void *addr;
+
+	while (desc) {
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+		len = virtio32_to_cpu(vdev, desc->len);
+
+		seg = CIRC_SPACE_TO_END(cons->tx_buf.head, cons->tx_buf.tail,
+					MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+		if (len <= seg) {
+			memcpy(cons->tx_buf.buf + cons->tx_buf.head, addr, len);
+		} else {
+			memcpy(cons->tx_buf.buf + cons->tx_buf.head, addr, seg);
+			addr += seg;
+			memcpy(cons->tx_buf.buf, addr, len - seg);
+		}
+		cons->tx_buf.head = (cons->tx_buf.head + len) %
+			MLXBF_TMFIFO_CON_TX_BUF_SIZE;
+
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+}
+
+/* Copy console data into the output buffer. */
+static void mlxbf_tmfifo_console_output(struct mlxbf_tmfifo_vdev *cons,
+					struct mlxbf_tmfifo_vring *vring)
+{
+	struct vring_desc *desc;
+	u32 len, avail;
+
+	desc = mlxbf_tmfifo_get_next_desc(vring);
+	while (desc) {
+		/* Release the packet if not enough space. */
+		len = mlxbf_tmfifo_get_pkt_len(vring, desc);
+		avail = CIRC_SPACE(cons->tx_buf.head, cons->tx_buf.tail,
+				   MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+		if (len + MLXBF_TMFIFO_CON_TX_BUF_RSV_SIZE > avail) {
+			mlxbf_tmfifo_release_desc(vring, desc, len);
+			break;
+		}
+
+		mlxbf_tmfifo_console_output_one(cons, vring, desc);
+		mlxbf_tmfifo_release_desc(vring, desc, len);
+		desc = mlxbf_tmfifo_get_next_desc(vring);
+	}
+}
+
+/* Get the number of available words in Rx FIFO for receiving. */
+static int mlxbf_tmfifo_get_rx_avail(struct mlxbf_tmfifo *fifo)
+{
+	u64 sts;
+
+	sts = readq(fifo->rx_base + MLXBF_TMFIFO_RX_STS);
+	return FIELD_GET(MLXBF_TMFIFO_RX_STS__COUNT_MASK, sts);
+}
+
+/* Get the number of available words in the TmFifo for sending. */
+static int mlxbf_tmfifo_get_tx_avail(struct mlxbf_tmfifo *fifo, int vdev_id)
+{
+	int tx_reserve;
+	u32 count;
+	u64 sts;
+
+	/* Reserve some room in FIFO for console messages. */
+	if (vdev_id == VIRTIO_ID_NET)
+		tx_reserve = fifo->tx_fifo_size / MLXBF_TMFIFO_RESERVE_RATIO;
+	else
+		tx_reserve = 1;
+
+	sts = readq(fifo->tx_base + MLXBF_TMFIFO_TX_STS);
+	count = FIELD_GET(MLXBF_TMFIFO_TX_STS__COUNT_MASK, sts);
+	return fifo->tx_fifo_size - tx_reserve - count;
+}
+
+/* Console Tx (move data from the output buffer into the TmFifo). */
+static void mlxbf_tmfifo_console_tx(struct mlxbf_tmfifo *fifo, int avail)
+{
+	struct mlxbf_tmfifo_msg_hdr hdr;
+	struct mlxbf_tmfifo_vdev *cons;
+	unsigned long flags;
+	int size, seg;
+	void *addr;
+	u64 data;
+
+	/* Return if not enough space available. */
+	if (avail < MLXBF_TMFIFO_DATA_MIN_WORDS)
+		return;
+
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+	if (!cons || !cons->tx_buf.buf)
+		return;
+
+	/* Return if no data to send. */
+	size = CIRC_CNT(cons->tx_buf.head, cons->tx_buf.tail,
+			MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+	if (size == 0)
+		return;
+
+	/* Adjust the size to available space. */
+	if (size + sizeof(hdr) > avail * sizeof(u64))
+		size = avail * sizeof(u64) - sizeof(hdr);
+
+	/* Write header. */
+	hdr.type = VIRTIO_ID_CONSOLE;
+	hdr.len = htons(size);
+	writeq(*(u64 *)&hdr, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+
+	/* Use spin-lock to protect the 'cons->tx_buf'. */
+	spin_lock_irqsave(&fifo->spin_lock, flags);
+
+	while (size > 0) {
+		addr = cons->tx_buf.buf + cons->tx_buf.tail;
+
+		seg = CIRC_CNT_TO_END(cons->tx_buf.head, cons->tx_buf.tail,
+				      MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+		if (seg >= sizeof(u64)) {
+			memcpy(&data, addr, sizeof(u64));
+		} else {
+			memcpy(&data, addr, seg);
+			memcpy((u8 *)&data + seg, cons->tx_buf.buf,
+			       sizeof(u64) - seg);
+		}
+		writeq(data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+
+		if (size >= sizeof(u64)) {
+			cons->tx_buf.tail = (cons->tx_buf.tail + sizeof(u64)) %
+				MLXBF_TMFIFO_CON_TX_BUF_SIZE;
+			size -= sizeof(u64);
+		} else {
+			cons->tx_buf.tail = (cons->tx_buf.tail + size) %
+				MLXBF_TMFIFO_CON_TX_BUF_SIZE;
+			size = 0;
+		}
+	}
+
+	spin_unlock_irqrestore(&fifo->spin_lock, flags);
+}
+
+/* Rx/Tx one word in the descriptor buffer. */
+static void mlxbf_tmfifo_rxtx_word(struct mlxbf_tmfifo_vring *vring,
+				   struct vring_desc *desc,
+				   bool is_rx, int len)
+{
+	struct virtio_device *vdev = vring->vq->vdev;
+	struct mlxbf_tmfifo *fifo = vring->fifo;
+	void *addr;
+	u64 data;
+
+	/* Get the buffer address of this desc. */
+	addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+	/* Read a word from FIFO for Rx. */
+	if (is_rx)
+		data = readq(fifo->rx_base + MLXBF_TMFIFO_RX_DATA);
+
+	if (vring->cur_len + sizeof(u64) <= len) {
+		/* The whole word. */
+		if (is_rx)
+			memcpy(addr + vring->cur_len, &data, sizeof(u64));
+		else
+			memcpy(&data, addr + vring->cur_len, sizeof(u64));
+		vring->cur_len += sizeof(u64);
+	} else {
+		/* Leftover bytes. */
+		if (is_rx)
+			memcpy(addr + vring->cur_len, &data,
+			       len - vring->cur_len);
+		else
+			memcpy(&data, addr + vring->cur_len,
+			       len - vring->cur_len);
+		vring->cur_len = len;
+	}
+
+	/* Write the word into FIFO for Tx. */
+	if (!is_rx)
+		writeq(data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+}
+
+/*
+ * Rx/Tx packet header.
+ *
+ * In Rx case, the packet might be found to belong to a different vring since
+ * the TmFifo is shared by different services. In such case, the 'vring_change'
+ * flag is set.
+ */
+static void mlxbf_tmfifo_rxtx_header(struct mlxbf_tmfifo_vring *vring,
+				     struct vring_desc *desc,
+				     bool is_rx, bool *vring_change)
+{
+	struct mlxbf_tmfifo *fifo = vring->fifo;
+	struct virtio_net_config *config;
+	struct mlxbf_tmfifo_msg_hdr hdr;
+	int vdev_id, hdr_len;
+
+	/* Read/Write packet header. */
+	if (is_rx) {
+		/* Drain one word from the FIFO. */
+		*(u64 *)&hdr = readq(fifo->rx_base + MLXBF_TMFIFO_RX_DATA);
+
+		/* Skip the length 0 packets (keepalive). */
+		if (hdr.len == 0)
+			return;
+
+		/* Check packet type. */
+		if (hdr.type == VIRTIO_ID_NET) {
+			vdev_id = VIRTIO_ID_NET;
+			hdr_len = sizeof(struct virtio_net_hdr);
+			config = &fifo->vdev[vdev_id]->config.net;
+			if (ntohs(hdr.len) > config->mtu +
+			    MLXBF_TMFIFO_NET_L2_OVERHEAD)
+				return;
+		} else {
+			vdev_id = VIRTIO_ID_CONSOLE;
+			hdr_len = 0;
+		}
+
+		/*
+		 * Check whether the new packet still belongs to this vring.
+		 * If not, update the pkt_len of the new vring.
+		 */
+		if (vdev_id != vring->vdev_id) {
+			struct mlxbf_tmfifo_vdev *tm_dev2 = fifo->vdev[vdev_id];
+
+			if (!tm_dev2)
+				return;
+			vring->desc = desc;
+			vring = &tm_dev2->vrings[MLXBF_TMFIFO_VRING_RX];
+			*vring_change = true;
+		}
+		vring->pkt_len = ntohs(hdr.len) + hdr_len;
+	} else {
+		/* Network virtio has an extra header. */
+		hdr_len = (vring->vdev_id == VIRTIO_ID_NET) ?
+			   sizeof(struct virtio_net_hdr) : 0;
+		vring->pkt_len = mlxbf_tmfifo_get_pkt_len(vring, desc);
+		hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+			    VIRTIO_ID_NET : VIRTIO_ID_CONSOLE;
+		hdr.len = htons(vring->pkt_len - hdr_len);
+		writeq(*(u64 *)&hdr, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+	}
+
+	vring->cur_len = hdr_len;
+	vring->rem_len = vring->pkt_len;
+	fifo->vring[is_rx] = vring;
+}
+
+/*
+ * Rx/Tx one descriptor.
+ *
+ * Return true to indicate more data available.
+ */
+static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring,
+				       bool is_rx, int *avail)
+{
+	const struct vring *vr = virtqueue_get_vring(vring->vq);
+	struct mlxbf_tmfifo *fifo = vring->fifo;
+	struct virtio_device *vdev;
+	bool vring_change = false;
+	struct vring_desc *desc;
+	unsigned long flags;
+	u32 len, idx;
+
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+
+	/* Get the descriptor of the next packet. */
+	if (!vring->desc) {
+		desc = mlxbf_tmfifo_get_next_pkt(vring, is_rx);
+		if (!desc)
+			return false;
+	} else {
+		desc = vring->desc;
+	}
+
+	/* Beginning of a packet. Start to Rx/Tx packet header. */
+	if (vring->pkt_len == 0) {
+		mlxbf_tmfifo_rxtx_header(vring, desc, is_rx, &vring_change);
+		(*avail)--;
+
+		/* Return if new packet is for another ring. */
+		if (vring_change)
+			return false;
+		goto mlxbf_tmfifo_desc_done;
+	}
+
+	/* Get the length of this desc. */
+	len = virtio32_to_cpu(vdev, desc->len);
+	if (len > vring->rem_len)
+		len = vring->rem_len;
+
+	/* Rx/Tx one word (8 bytes) if not done. */
+	if (vring->cur_len < len) {
+		mlxbf_tmfifo_rxtx_word(vring, desc, is_rx, len);
+		(*avail)--;
+	}
+
+	/* Check again whether it's done. */
+	if (vring->cur_len == len) {
+		vring->cur_len = 0;
+		vring->rem_len -= len;
+
+		/* Get the next desc on the chain. */
+		if (vring->rem_len > 0 &&
+		    (virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT)) {
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+			goto mlxbf_tmfifo_desc_done;
+		}
+
+		/* Done and release the pending packet. */
+		mlxbf_tmfifo_release_pending_pkt(vring);
+		desc = NULL;
+		fifo->vring[is_rx] = NULL;
+
+		/* Notify upper layer that packet is done. */
+		spin_lock_irqsave(&fifo->spin_lock, flags);
+		vring_interrupt(0, vring->vq);
+		spin_unlock_irqrestore(&fifo->spin_lock, flags);
+	}
+
+mlxbf_tmfifo_desc_done:
+	/* Save the current desc. */
+	vring->desc = desc;
+
+	return true;
+}
+
+/* Rx & Tx processing of a queue. */
+static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
+{
+	int avail = 0, devid = vring->vdev_id;
+	struct mlxbf_tmfifo *fifo;
+	bool more;
+
+	fifo = vring->fifo;
+
+	/* Return if vdev is not ready. */
+	if (!fifo->vdev[devid])
+		return;
+
+	/* Return if another vring is running. */
+	if (fifo->vring[is_rx] && fifo->vring[is_rx] != vring)
+		return;
+
+	/* Only handle console and network for now. */
+	if (WARN_ON(devid != VIRTIO_ID_NET && devid != VIRTIO_ID_CONSOLE))
+		return;
+
+	do {
+		/* Get available FIFO space. */
+		if (avail == 0) {
+			if (is_rx)
+				avail = mlxbf_tmfifo_get_rx_avail(fifo);
+			else
+				avail = mlxbf_tmfifo_get_tx_avail(fifo, devid);
+			if (avail <= 0)
+				break;
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && devid == VIRTIO_ID_CONSOLE) {
+			mlxbf_tmfifo_console_tx(fifo, avail);
+			break;
+		}
+
+		/* Handle one descriptor. */
+		more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail);
+	} while (more);
+}
+
+/* Handle Rx or Tx queues. */
+static void mlxbf_tmfifo_work_rxtx(struct mlxbf_tmfifo *fifo, int queue_id,
+				   int irq_id, bool is_rx)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+	struct mlxbf_tmfifo_vring *vring;
+	int i;
+
+	if (!test_and_clear_bit(irq_id, &fifo->pend_events) ||
+	    !fifo->irq_info[irq_id].irq)
+		return;
+
+	for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++) {
+		tm_vdev = fifo->vdev[i];
+		if (tm_vdev) {
+			vring = &tm_vdev->vrings[queue_id];
+			if (vring->vq)
+				mlxbf_tmfifo_rxtx(vring, is_rx);
+		}
+	}
+}
+
+/* Work handler for Rx and Tx case. */
+static void mlxbf_tmfifo_work_handler(struct work_struct *work)
+{
+	struct mlxbf_tmfifo *fifo;
+
+	fifo = container_of(work, struct mlxbf_tmfifo, work);
+	if (!fifo->is_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx (Send data to the TmFifo). */
+	mlxbf_tmfifo_work_rxtx(fifo, MLXBF_TMFIFO_VRING_TX,
+			       MLXBF_TM_TX_LWM_IRQ, false);
+
+	/* Rx (Receive data from the TmFifo). */
+	mlxbf_tmfifo_work_rxtx(fifo, MLXBF_TMFIFO_VRING_RX,
+			       MLXBF_TM_RX_HWM_IRQ, true);
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool mlxbf_tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct mlxbf_tmfifo_vring *vring = vq->priv;
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+	struct mlxbf_tmfifo *fifo;
+	unsigned long flags;
+
+	fifo = vring->fifo;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (vring->index & BIT(0)) {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			tm_vdev = fifo->vdev[VIRTIO_ID_CONSOLE];
+			mlxbf_tmfifo_console_output(tm_vdev, vring);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+		} else if (test_and_set_bit(MLXBF_TM_TX_LWM_IRQ,
+					    &fifo->pend_events)) {
+			return true;
+		}
+	} else {
+		if (test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events))
+			return true;
+	}
+
+	schedule_work(&fifo->work);
+
+	return true;
+}
+
+/* Get the array of feature bits for this device. */
+static u64 mlxbf_tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int mlxbf_tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	tm_vdev->features = vdev->features;
+
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void mlxbf_tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+	struct mlxbf_tmfifo_vring *vring;
+	struct virtqueue *vq;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc)
+			mlxbf_tmfifo_release_pending_pkt(vring);
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+					unsigned int nvqs,
+					struct virtqueue *vqs[],
+					vq_callback_t *callbacks[],
+					const char * const names[],
+					const bool *ctx,
+					struct irq_affinity *desc)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+	struct mlxbf_tmfifo_vring *vring;
+	struct virtqueue *vq;
+	int i, ret, size;
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i]) {
+			ret = -EINVAL;
+			goto error;
+		}
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->num, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->num, vring->align, vdev,
+					 false, false, vring->va,
+					 mlxbf_tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	mlxbf_tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 mlxbf_tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void mlxbf_tmfifo_virtio_set_status(struct virtio_device *vdev,
+					   u8 status)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void mlxbf_tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void mlxbf_tmfifo_virtio_get(struct virtio_device *vdev,
+				    unsigned int offset,
+				    void *buf,
+				    unsigned int len)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	if ((u64)offset + len > sizeof(tm_vdev->config))
+		return;
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void mlxbf_tmfifo_virtio_set(struct virtio_device *vdev,
+				    unsigned int offset,
+				    const void *buf,
+				    unsigned int len)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	if ((u64)offset + len > sizeof(tm_vdev->config))
+		return;
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+static void tmfifo_virtio_dev_release(struct device *device)
+{
+	struct virtio_device *vdev =
+			container_of(device, struct virtio_device, dev);
+	struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+	kfree(tm_vdev);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops mlxbf_tmfifo_virtio_config_ops = {
+	.get_features = mlxbf_tmfifo_virtio_get_features,
+	.finalize_features = mlxbf_tmfifo_virtio_finalize_features,
+	.find_vqs = mlxbf_tmfifo_virtio_find_vqs,
+	.del_vqs = mlxbf_tmfifo_virtio_del_vqs,
+	.reset = mlxbf_tmfifo_virtio_reset,
+	.set_status = mlxbf_tmfifo_virtio_set_status,
+	.get_status = mlxbf_tmfifo_virtio_get_status,
+	.get = mlxbf_tmfifo_virtio_get,
+	.set = mlxbf_tmfifo_virtio_set,
+};
+
+/* Create vdev for the FIFO. */
+static int mlxbf_tmfifo_create_vdev(struct device *dev,
+				    struct mlxbf_tmfifo *fifo,
+				    int vdev_id, u64 features,
+				    void *config, u32 size)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev, *reg_dev = NULL;
+	int ret;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		dev_err(dev, "vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto fail;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &mlxbf_tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+
+	if (mlxbf_tmfifo_alloc_vrings(fifo, tm_vdev)) {
+		dev_err(dev, "unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto vdev_fail;
+	}
+
+	/* Allocate an output buffer for the console device. */
+	if (vdev_id == VIRTIO_ID_CONSOLE)
+		tm_vdev->tx_buf.buf = devm_kmalloc(dev,
+						   MLXBF_TMFIFO_CON_TX_BUF_SIZE,
+						   GFP_KERNEL);
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	reg_dev = tm_vdev;
+	if (ret) {
+		dev_err(dev, "register_virtio_device failed\n");
+		goto vdev_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+vdev_fail:
+	mlxbf_tmfifo_free_vrings(fifo, tm_vdev);
+	fifo->vdev[vdev_id] = NULL;
+	if (reg_dev)
+		put_device(&tm_vdev->vdev.dev);
+	else
+		kfree(tm_vdev);
+fail:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev for the FIFO. */
+static int mlxbf_tmfifo_delete_vdev(struct mlxbf_tmfifo *fifo, int vdev_id)
+{
+	struct mlxbf_tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		mlxbf_tmfifo_free_vrings(fifo, tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void mlxbf_tmfifo_get_cfg_mac(u8 *mac)
+{
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+	unsigned long size = ETH_ALEN;
+	u8 buf[ETH_ALEN];
+	efi_status_t rc;
+
+	rc = efi.get_variable(mlxbf_tmfifo_efi_name, &guid, NULL, &size, buf);
+	if (rc == EFI_SUCCESS && size == ETH_ALEN)
+		ether_addr_copy(mac, buf);
+	else
+		ether_addr_copy(mac, mlxbf_tmfifo_net_default_mac);
+}
+
+/* Set TmFifo thresolds which is used to trigger interrupts. */
+static void mlxbf_tmfifo_set_threshold(struct mlxbf_tmfifo *fifo)
+{
+	u64 ctl;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		FIELD_GET(MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__LWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_TX_CTL__LWM_MASK,
+			   fifo->tx_fifo_size / 2);
+	ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__HWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_TX_CTL__HWM_MASK,
+			   fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		FIELD_GET(MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__LWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__HWM_MASK) |
+		FIELD_PREP(MLXBF_TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
+}
+
+static void mlxbf_tmfifo_cleanup(struct mlxbf_tmfifo *fifo)
+{
+	int i;
+
+	fifo->is_ready = false;
+	del_timer_sync(&fifo->timer);
+	mlxbf_tmfifo_disable_irqs(fifo);
+	cancel_work_sync(&fifo->work);
+	for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++)
+		mlxbf_tmfifo_delete_vdev(fifo, i);
+}
+
+/* Probe the TMFIFO. */
+static int mlxbf_tmfifo_probe(struct platform_device *pdev)
+{
+	struct virtio_net_config net_config;
+	struct device *dev = &pdev->dev;
+	struct mlxbf_tmfifo *fifo;
+	int i, rc;
+
+	fifo = devm_kzalloc(dev, sizeof(*fifo), GFP_KERNEL);
+	if (!fifo)
+		return -ENOMEM;
+
+	spin_lock_init(&fifo->spin_lock);
+	INIT_WORK(&fifo->work, mlxbf_tmfifo_work_handler);
+	mutex_init(&fifo->lock);
+
+	/* Get the resource of the Rx FIFO. */
+	fifo->rx_base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(fifo->rx_base))
+		return PTR_ERR(fifo->rx_base);
+
+	/* Get the resource of the Tx FIFO. */
+	fifo->tx_base = devm_platform_ioremap_resource(pdev, 1);
+	if (IS_ERR(fifo->tx_base))
+		return PTR_ERR(fifo->tx_base);
+
+	platform_set_drvdata(pdev, fifo);
+
+	timer_setup(&fifo->timer, mlxbf_tmfifo_timer, 0);
+
+	for (i = 0; i < MLXBF_TM_MAX_IRQ; i++) {
+		fifo->irq_info[i].index = i;
+		fifo->irq_info[i].fifo = fifo;
+		fifo->irq_info[i].irq = platform_get_irq(pdev, i);
+		rc = devm_request_irq(dev, fifo->irq_info[i].irq,
+				      mlxbf_tmfifo_irq_handler, 0,
+				      "tmfifo", &fifo->irq_info[i]);
+		if (rc) {
+			dev_err(dev, "devm_request_irq failed\n");
+			fifo->irq_info[i].irq = 0;
+			return rc;
+		}
+	}
+
+	mlxbf_tmfifo_set_threshold(fifo);
+
+	/* Create the console vdev. */
+	rc = mlxbf_tmfifo_create_vdev(dev, fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (rc)
+		goto fail;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = ETH_DATA_LEN;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	mlxbf_tmfifo_get_cfg_mac(net_config.mac);
+	rc = mlxbf_tmfifo_create_vdev(dev, fifo, VIRTIO_ID_NET,
+				      MLXBF_TMFIFO_NET_FEATURES, &net_config,
+				      sizeof(net_config));
+	if (rc)
+		goto fail;
+
+	mod_timer(&fifo->timer, jiffies + MLXBF_TMFIFO_TIMER_INTERVAL);
+
+	fifo->is_ready = true;
+	return 0;
+
+fail:
+	mlxbf_tmfifo_cleanup(fifo);
+	return rc;
+}
+
+/* Device remove function. */
+static int mlxbf_tmfifo_remove(struct platform_device *pdev)
+{
+	struct mlxbf_tmfifo *fifo = platform_get_drvdata(pdev);
+
+	mlxbf_tmfifo_cleanup(fifo);
+
+	return 0;
+}
+
+static const struct acpi_device_id mlxbf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, mlxbf_tmfifo_acpi_match);
+
+static struct platform_driver mlxbf_tmfifo_driver = {
+	.probe = mlxbf_tmfifo_probe,
+	.remove = mlxbf_tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.acpi_match_table = mlxbf_tmfifo_acpi_match,
+	},
+};
+
+module_platform_driver(mlxbf_tmfifo_driver);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TmFifo Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 179+ messages in thread

* Re: [PATCH v16] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc
  2019-05-03 13:49 ` [PATCH v16] " Liming Sun
@ 2019-05-06  9:13   ` Andy Shevchenko
  0 siblings, 0 replies; 179+ messages in thread
From: Andy Shevchenko @ 2019-05-06  9:13 UTC (permalink / raw)
  To: Liming Sun
  Cc: David Woods, Andy Shevchenko, Darren Hart, Vadim Pasternak,
	Linux Kernel Mailing List, Platform Driver

On Fri, May 3, 2019 at 4:49 PM Liming Sun <lsun@mellanox.com> wrote:
>
> This commit adds the TmFifo platform driver for Mellanox BlueField
> Soc. TmFifo is a shared FIFO which enables external host machine
> to exchange data with the SoC via USB or PCIe. The driver is based
> on virtio framework and has console and network access enabled.
>

Pushed to my review and testing queue, thanks!

> Reviewed-by: Vadim Pasternak <vadimp@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>
> ---
> v15->v16:
>     Rebase and resubmit (no new changes).
> v14->v15:
>     Fixes for comments from Andy:
>     - Remove the 'union' definition of mlxbf_tmfifo_msg_hdr and use
>       on-the-fly conversion when sending the 8-byte message header
>       into the FIFO;
>     - Update comment of mlxbf_tmfifo_msg_hdr explaining why '__be16'
>       is needed for the 'len' field. The SoC sends data stream into
>       the FIFO and the other side reads it. The byte order of the data
>       stream (byte-stream) stays the same. The 'len' field is encoded
>       into network byte order so upper-level applications in external
>       host machine with different endianness could decode it. This
>       implementation was verified over USB with an external PPC host
>       machine running in big-endian mode.
>     - Move the 'dev_err()' line to the end of the block in function
>       mlxbf_tmfifo_alloc_vrings();
>     - Remove the 'irq_info->index < MLXBF_TM_MAX_IRQ' check in
>       mlxbf_tmfifo_irq_handler() since it's unnecessary;
>     - Remove the 'if (desc_head)' check in
>       mlxbf_tmfifo_release_pending_pkt() since function
>       mlxbf_tmfifo_get_pkt_len() is already NULL-aware;
>     - Adjust the testing order of 'if (!(vring->index & BIT(0)))'
>       in bool mlxbf_tmfifo_virtio_notify() to test the positive case
>       'if (vring->index & BIT(0))' first;
>     - Add '(u64)offset' conversion in mlxbf_tmfifo_virtio_get() to
>       avoid 32-bit length addition overflow;
>     - Update the 'efi.get_variable' statement into single line in
>       mlxbf_tmfifo_get_cfg_mac();
>     - Use new helper devm_platform_ioremap_resource() to replace
>       'platform_get_resource() + devm_ioremap_resource()' in
>       mlxbf_tmfifo_probe();
> v13->v14:
>     Fixes for comments from Andy:
>     - Add a blank line to separate the virtio header files;
>     - Update the comment for 'union mlxbf_tmfifo_msg_hdr' to be
>       more clear how this union is used;
>     - Update the 'mlxbf_tmfifo_net_default_mac[ETH_ALEN]' definition
>       to be two lines;
>     - Reformat macro MLXBF_TMFIFO_NET_FEATURES to put the definition
>       in a seperate line;
>     - Update all 'fifo' to 'FIFO' in the comments;
>     - Update mlxbf_tmfifo_alloc_vrings() to specifically release the
>       allocated entries in case of failures, so the logic looks more
>       clear. In the caller function the mlxbf_tmfifo_free_vrings()
>       might be called again in case of other failures, which is ok
>       since the 'va' pointer will be set to NULL once released;
>     - Update mlxbf_tmfifo_timer() to change the first statement to
>       one line;
>     - Update one memcpy() to ether_addr_copy() in
>       mlxbf_tmfifo_get_cfg_mac();
>     - Remove 'fifo->pdev' since it is really not needed;
>     - Define temporary variable to update the mlxbf_tmfifo_create_vdev()
>       statement into single line.
>     New changes by Liming:
>     - Reorder the logic a little bit in mlxbf_tmfifo_timer(). Previously
>       it has logic like "!a || !b" while the '!b' will not be evaluated
>       if '!a' is true. It was changed to this way during review, but is
>       actually not the desired behavior since both bits need to be
>       tested/set in fifo->pend_events. This issue was found during
>       verification which caused extra delays for Tx packets.
> v12->v13:
>     Rebase and resubmit (no new changes).
> v11->v12:
>     Fixed the two unsolved comments from v11.
>     - "Change macro mlxbf_vdev_to_tmfifo() to one line"
>       Done. Seems not hard.
>     - "Is it appropriate use of devm_* for 'tm_vdev = devm_kzalloc'"
>       Yes, understand the comment now. The tmfifo is fixed, but the
>       vdev is dynamic. Use kzalloc() instead, and free the device
>       in the release callback which is the right place for it.
> v10->v11:
>     Fixes for comments from Andy:
>     - Use GENMASK_ULL() instead of GENMASK() in mlxbf-tmfifo-regs.h
>     - Removed the cpu_to_le64()/le64_to_cpu() conversion since
>       readq()/writeq() already takes care of it.
>     - Remove the "if (irq)" check in mlxbf_tmfifo_disable_irqs().
>     - Add "u32 count" temp variable in mlxbf_tmfifo_get_tx_avail().
>     - Clean up mlxbf_tmfifo_get_cfg_mac(), use ETH_ALEN instead of
>       value 6.
>     - Change the tx_buf to use Linux existing 'struct circ_buf'.
>     Comment not applied:
>     - "Change macro mlxbf_vdev_to_tmfifo() to one line"
>       Couldn't fit in one line with 80 chracters
>     - "Is it appropriate use of devm_* for 'tm_vdev = devm_kzalloc'"
>       This is SoC, the device won't be closed or detached.
>       The only case is when the driver is unloaded. So it appears
>       ok to use devm_kzalloc() since it's allocated during probe()
>       and released during module unload.
>     Comments from Vadim: OK
> v9->v10:
>     Fixes for comments from Andy:
>     - Use devm_ioremap_resource() instead of devm_ioremap().
>     - Use kernel-doc comments.
>     - Keep Makefile contents sorted.
>     - Use same fixed format for offsets.
>     - Use SZ_1K/SZ_32K instead of 1024/23*1024.
>     - Remove unnecessary comments.
>     - Use one style for max numbers.
>     - More comments for mlxbf_tmfifo_vdev and mlxbf_tmfifo_data_64bit.
>     - Use globally defined MTU instead of new definition.
>     - Remove forward declaration of mlxbf_tmfifo_remove().
>     - Remove PAGE_ALIGN() for dma_alloc_coherent)().
>     - Remove the cast of "struct vring *".
>     - Check return result of test_and_set_bit().
>     - Add a macro mlxbt_vdev_to_tmfifo().
>     - Several other minor coding style comments.
>     Comment not applied:
>     - "Shouldn't be rather helper in EFI lib in kernel"
>       Looks like efi.get_variable() is the way I found in the kernel
>       tree.
>     - "this one is not protected anyhow? Potential race condition"
>       In mlxbf_tmfifo_console_tx(), the spin-lock is used to protect the
>       'tx_buf' only, not the FIFO writes. So there is no race condition.
>     - "Is __packed needed in mlxbf_tmfifo_msg_hdr".
>       Yes, it is needed to make sure the structure is 8 bytes.
>     Fixes for comments from Vadim:
>     - Use tab in mlxbf-tmfifo-regs.h
>     - Use kernel-doc comments for struct mlxbf_tmfifo_msg_hdr and
>       mlxbf_tmfifo_irq_info as well.
>     - Use _MAX instead of _CNT in the macro definition to be consistent.
>     - Fix the MODULE_LICENSE.
>     - Use BIT_ULL() instead of BIT().
>     - Remove argument of 'avail' for mlxbf_tmfifo_rxtx_header() and
>       mlxbf_tmfifo_rxtx_word()
>     - Revise logic in mlxbf_tmfifo_rxtx_one_desc() to remove the
>       WARN_ON().
>     - Change "union mlxbf_tmfifo_u64 u" to "union mlxbf_tmfifo_u64 buf"
>       in mlxbf_tmfifo_rxtx_word().
>     - Change date type of vring_change from 'int' to 'bool'.
>     - Remove the blank lines after Signed-off.
>     - Don’t use declaration in the middle.
>     - Make the network header initialization in some more elegant way.
>     - Change label done to mlxbf_tmfifo_desc_done.
>     - Remove some unnecessary comments, and several other misc coding
>       style comments.
>     - Simplify code logic in mlxbf_tmfifo_virtio_notify()
>     New changes by Liming:
>     - Simplify the Rx/Tx function arguments to make it more readable.
> v8->v9:
>     Fixes for comments from Andy:
>     - Use modern devm_xxx() API instead.
>     Fixes for comments from Vadim:
>     - Split the Rx/Tx function into smaller funcitons.
>     - File name, copyright information.
>     - Function and variable name conversion.
>     - Local variable and indent coding styles.
>     - Remove unnecessary 'inline' declarations.
>     - Use devm_xxx() APIs.
>     - Move the efi_char16_t MAC address definition to global.
>     - Fix warnings reported by 'checkpatch --strict'.
>     - Fix warnings reported by 'make CF="-D__CHECK_ENDIAN__"'.
>     - Change select VIRTIO_xxx to depends on  VIRTIO_ in Kconfig.
>     - Merge mlxbf_tmfifo_vdev_tx_buf_push() and
>       mlxbf_tmfifo_vdev_tx_buf_pop().
>     - Add union to avoid casting between __le64 and u64.
>     - Several other misc coding style comments.
>     New changes by Liming:
>     - Removed the DT binding documentation since only ACPI is
>       supported for now by UEFI on the SoC.
> v8: Re-submit under drivers/platform/mellanox for the target-side
>     platform driver only.
> v7: Added host side drivers into the same patch set.
> v5~v6: Coding style fix.
> v1~v4: Initial version for directory drivers/soc/mellanox.
> ---
>  drivers/platform/mellanox/Kconfig             |   12 +-
>  drivers/platform/mellanox/Makefile            |    1 +
>  drivers/platform/mellanox/mlxbf-tmfifo-regs.h |   63 ++
>  drivers/platform/mellanox/mlxbf-tmfifo.c      | 1281 +++++++++++++++++++++++++
>  4 files changed, 1356 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/platform/mellanox/mlxbf-tmfifo-regs.h
>  create mode 100644 drivers/platform/mellanox/mlxbf-tmfifo.c
>
> diff --git a/drivers/platform/mellanox/Kconfig b/drivers/platform/mellanox/Kconfig
> index cd8a908..530fe7e 100644
> --- a/drivers/platform/mellanox/Kconfig
> +++ b/drivers/platform/mellanox/Kconfig
> @@ -5,7 +5,7 @@
>
>  menuconfig MELLANOX_PLATFORM
>         bool "Platform support for Mellanox hardware"
> -       depends on X86 || ARM || COMPILE_TEST
> +       depends on X86 || ARM || ARM64 || COMPILE_TEST
>         ---help---
>           Say Y here to get to see options for platform support for
>           Mellanox systems. This option alone does not add any kernel code.
> @@ -34,4 +34,14 @@ config MLXREG_IO
>           to system resets operation, system reset causes monitoring and some
>           kinds of mux selection.
>
> +config MLXBF_TMFIFO
> +       tristate "Mellanox BlueField SoC TmFifo platform driver"
> +       depends on ARM64
> +       depends on ACPI
> +       depends on VIRTIO_CONSOLE && VIRTIO_NET
> +       help
> +         Say y here to enable TmFifo support. The TmFifo driver provides
> +          platform driver support for the TmFifo which supports console
> +          and networking based on the virtio framework.
> +
>  endif # MELLANOX_PLATFORM
> diff --git a/drivers/platform/mellanox/Makefile b/drivers/platform/mellanox/Makefile
> index 57074d9c..a229bda1 100644
> --- a/drivers/platform/mellanox/Makefile
> +++ b/drivers/platform/mellanox/Makefile
> @@ -3,5 +3,6 @@
>  # Makefile for linux/drivers/platform/mellanox
>  # Mellanox Platform-Specific Drivers
>  #
> +obj-$(CONFIG_MLXBF_TMFIFO)     += mlxbf-tmfifo.o
>  obj-$(CONFIG_MLXREG_HOTPLUG)   += mlxreg-hotplug.o
>  obj-$(CONFIG_MLXREG_IO) += mlxreg-io.o
> diff --git a/drivers/platform/mellanox/mlxbf-tmfifo-regs.h b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
> new file mode 100644
> index 0000000..e4f0d2e
> --- /dev/null
> +++ b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
> @@ -0,0 +1,63 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (c) 2019, Mellanox Technologies. All rights reserved.
> + */
> +
> +#ifndef __MLXBF_TMFIFO_REGS_H__
> +#define __MLXBF_TMFIFO_REGS_H__
> +
> +#include <linux/types.h>
> +#include <linux/bits.h>
> +
> +#define MLXBF_TMFIFO_TX_DATA                           0x00
> +#define MLXBF_TMFIFO_TX_STS                            0x08
> +#define MLXBF_TMFIFO_TX_STS__LENGTH                    0x0001
> +#define MLXBF_TMFIFO_TX_STS__COUNT_SHIFT               0
> +#define MLXBF_TMFIFO_TX_STS__COUNT_WIDTH               9
> +#define MLXBF_TMFIFO_TX_STS__COUNT_RESET_VAL           0
> +#define MLXBF_TMFIFO_TX_STS__COUNT_RMASK               GENMASK_ULL(8, 0)
> +#define MLXBF_TMFIFO_TX_STS__COUNT_MASK                        GENMASK_ULL(8, 0)
> +#define MLXBF_TMFIFO_TX_CTL                            0x10
> +#define MLXBF_TMFIFO_TX_CTL__LENGTH                    0x0001
> +#define MLXBF_TMFIFO_TX_CTL__LWM_SHIFT                 0
> +#define MLXBF_TMFIFO_TX_CTL__LWM_WIDTH                 8
> +#define MLXBF_TMFIFO_TX_CTL__LWM_RESET_VAL             128
> +#define MLXBF_TMFIFO_TX_CTL__LWM_RMASK                 GENMASK_ULL(7, 0)
> +#define MLXBF_TMFIFO_TX_CTL__LWM_MASK                  GENMASK_ULL(7, 0)
> +#define MLXBF_TMFIFO_TX_CTL__HWM_SHIFT                 8
> +#define MLXBF_TMFIFO_TX_CTL__HWM_WIDTH                 8
> +#define MLXBF_TMFIFO_TX_CTL__HWM_RESET_VAL             128
> +#define MLXBF_TMFIFO_TX_CTL__HWM_RMASK                 GENMASK_ULL(7, 0)
> +#define MLXBF_TMFIFO_TX_CTL__HWM_MASK                  GENMASK_ULL(15, 8)
> +#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT         32
> +#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH         9
> +#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL     256
> +#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RMASK         GENMASK_ULL(8, 0)
> +#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK          GENMASK_ULL(40, 32)
> +#define MLXBF_TMFIFO_RX_DATA                           0x00
> +#define MLXBF_TMFIFO_RX_STS                            0x08
> +#define MLXBF_TMFIFO_RX_STS__LENGTH                    0x0001
> +#define MLXBF_TMFIFO_RX_STS__COUNT_SHIFT               0
> +#define MLXBF_TMFIFO_RX_STS__COUNT_WIDTH               9
> +#define MLXBF_TMFIFO_RX_STS__COUNT_RESET_VAL           0
> +#define MLXBF_TMFIFO_RX_STS__COUNT_RMASK               GENMASK_ULL(8, 0)
> +#define MLXBF_TMFIFO_RX_STS__COUNT_MASK                        GENMASK_ULL(8, 0)
> +#define MLXBF_TMFIFO_RX_CTL                            0x10
> +#define MLXBF_TMFIFO_RX_CTL__LENGTH                    0x0001
> +#define MLXBF_TMFIFO_RX_CTL__LWM_SHIFT                 0
> +#define MLXBF_TMFIFO_RX_CTL__LWM_WIDTH                 8
> +#define MLXBF_TMFIFO_RX_CTL__LWM_RESET_VAL             128
> +#define MLXBF_TMFIFO_RX_CTL__LWM_RMASK                 GENMASK_ULL(7, 0)
> +#define MLXBF_TMFIFO_RX_CTL__LWM_MASK                  GENMASK_ULL(7, 0)
> +#define MLXBF_TMFIFO_RX_CTL__HWM_SHIFT                 8
> +#define MLXBF_TMFIFO_RX_CTL__HWM_WIDTH                 8
> +#define MLXBF_TMFIFO_RX_CTL__HWM_RESET_VAL             128
> +#define MLXBF_TMFIFO_RX_CTL__HWM_RMASK                 GENMASK_ULL(7, 0)
> +#define MLXBF_TMFIFO_RX_CTL__HWM_MASK                  GENMASK_ULL(15, 8)
> +#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT         32
> +#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH         9
> +#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL     256
> +#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RMASK         GENMASK_ULL(8, 0)
> +#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK          GENMASK_ULL(40, 32)
> +
> +#endif /* !defined(__MLXBF_TMFIFO_REGS_H__) */
> diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
> new file mode 100644
> index 0000000..9a5c9fd
> --- /dev/null
> +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
> @@ -0,0 +1,1281 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * Mellanox BlueField SoC TmFifo driver
> + *
> + * Copyright (C) 2019 Mellanox Technologies
> + */
> +
> +#include <linux/acpi.h>
> +#include <linux/bitfield.h>
> +#include <linux/circ_buf.h>
> +#include <linux/efi.h>
> +#include <linux/irq.h>
> +#include <linux/module.h>
> +#include <linux/mutex.h>
> +#include <linux/platform_device.h>
> +#include <linux/types.h>
> +
> +#include <linux/virtio_config.h>
> +#include <linux/virtio_console.h>
> +#include <linux/virtio_ids.h>
> +#include <linux/virtio_net.h>
> +#include <linux/virtio_ring.h>
> +
> +#include "mlxbf-tmfifo-regs.h"
> +
> +/* Vring size. */
> +#define MLXBF_TMFIFO_VRING_SIZE                        SZ_1K
> +
> +/* Console Tx buffer size. */
> +#define MLXBF_TMFIFO_CON_TX_BUF_SIZE           SZ_32K
> +
> +/* Console Tx buffer reserved space. */
> +#define MLXBF_TMFIFO_CON_TX_BUF_RSV_SIZE       8
> +
> +/* House-keeping timer interval. */
> +#define MLXBF_TMFIFO_TIMER_INTERVAL            (HZ / 10)
> +
> +/* Virtual devices sharing the TM FIFO. */
> +#define MLXBF_TMFIFO_VDEV_MAX          (VIRTIO_ID_CONSOLE + 1)
> +
> +/*
> + * Reserve 1/16 of TmFifo space, so console messages are not starved by
> + * the networking traffic.
> + */
> +#define MLXBF_TMFIFO_RESERVE_RATIO             16
> +
> +/* Message with data needs at least two words (for header & data). */
> +#define MLXBF_TMFIFO_DATA_MIN_WORDS            2
> +
> +struct mlxbf_tmfifo;
> +
> +/**
> + * mlxbf_tmfifo_vring - Structure of the TmFifo virtual ring
> + * @va: virtual address of the ring
> + * @dma: dma address of the ring
> + * @vq: pointer to the virtio virtqueue
> + * @desc: current descriptor of the pending packet
> + * @desc_head: head descriptor of the pending packet
> + * @cur_len: processed length of the current descriptor
> + * @rem_len: remaining length of the pending packet
> + * @pkt_len: total length of the pending packet
> + * @next_avail: next avail descriptor id
> + * @num: vring size (number of descriptors)
> + * @align: vring alignment size
> + * @index: vring index
> + * @vdev_id: vring virtio id (VIRTIO_ID_xxx)
> + * @fifo: pointer to the tmfifo structure
> + */
> +struct mlxbf_tmfifo_vring {
> +       void *va;
> +       dma_addr_t dma;
> +       struct virtqueue *vq;
> +       struct vring_desc *desc;
> +       struct vring_desc *desc_head;
> +       int cur_len;
> +       int rem_len;
> +       u32 pkt_len;
> +       u16 next_avail;
> +       int num;
> +       int align;
> +       int index;
> +       int vdev_id;
> +       struct mlxbf_tmfifo *fifo;
> +};
> +
> +/* Interrupt types. */
> +enum {
> +       MLXBF_TM_RX_LWM_IRQ,
> +       MLXBF_TM_RX_HWM_IRQ,
> +       MLXBF_TM_TX_LWM_IRQ,
> +       MLXBF_TM_TX_HWM_IRQ,
> +       MLXBF_TM_MAX_IRQ
> +};
> +
> +/* Ring types (Rx & Tx). */
> +enum {
> +       MLXBF_TMFIFO_VRING_RX,
> +       MLXBF_TMFIFO_VRING_TX,
> +       MLXBF_TMFIFO_VRING_MAX
> +};
> +
> +/**
> + * mlxbf_tmfifo_vdev - Structure of the TmFifo virtual device
> + * @vdev: virtio device, in which the vdev.id.device field has the
> + *        VIRTIO_ID_xxx id to distinguish the virtual device.
> + * @status: status of the device
> + * @features: supported features of the device
> + * @vrings: array of tmfifo vrings of this device
> + * @config.cons: virtual console config -
> + *               select if vdev.id.device is VIRTIO_ID_CONSOLE
> + * @config.net: virtual network config -
> + *              select if vdev.id.device is VIRTIO_ID_NET
> + * @tx_buf: tx buffer used to buffer data before writing into the FIFO
> + */
> +struct mlxbf_tmfifo_vdev {
> +       struct virtio_device vdev;
> +       u8 status;
> +       u64 features;
> +       struct mlxbf_tmfifo_vring vrings[MLXBF_TMFIFO_VRING_MAX];
> +       union {
> +               struct virtio_console_config cons;
> +               struct virtio_net_config net;
> +       } config;
> +       struct circ_buf tx_buf;
> +};
> +
> +/**
> + * mlxbf_tmfifo_irq_info - Structure of the interrupt information
> + * @fifo: pointer to the tmfifo structure
> + * @irq: interrupt number
> + * @index: index into the interrupt array
> + */
> +struct mlxbf_tmfifo_irq_info {
> +       struct mlxbf_tmfifo *fifo;
> +       int irq;
> +       int index;
> +};
> +
> +/**
> + * mlxbf_tmfifo - Structure of the TmFifo
> + * @vdev: array of the virtual devices running over the TmFifo
> + * @lock: lock to protect the TmFifo access
> + * @rx_base: mapped register base address for the Rx FIFO
> + * @tx_base: mapped register base address for the Tx FIFO
> + * @rx_fifo_size: number of entries of the Rx FIFO
> + * @tx_fifo_size: number of entries of the Tx FIFO
> + * @pend_events: pending bits for deferred events
> + * @irq_info: interrupt information
> + * @work: work struct for deferred process
> + * @timer: background timer
> + * @vring: Tx/Rx ring
> + * @spin_lock: spin lock
> + * @is_ready: ready flag
> + */
> +struct mlxbf_tmfifo {
> +       struct mlxbf_tmfifo_vdev *vdev[MLXBF_TMFIFO_VDEV_MAX];
> +       struct mutex lock;              /* TmFifo lock */
> +       void __iomem *rx_base;
> +       void __iomem *tx_base;
> +       int rx_fifo_size;
> +       int tx_fifo_size;
> +       unsigned long pend_events;
> +       struct mlxbf_tmfifo_irq_info irq_info[MLXBF_TM_MAX_IRQ];
> +       struct work_struct work;
> +       struct timer_list timer;
> +       struct mlxbf_tmfifo_vring *vring[2];
> +       spinlock_t spin_lock;           /* spin lock */
> +       bool is_ready;
> +};
> +
> +/**
> + * mlxbf_tmfifo_msg_hdr - Structure of the TmFifo message header
> + * @type: message type
> + * @len: payload length in network byte order. Messages sent into the FIFO
> + *       will be read by the other side as data stream in the same byte order.
> + *       The length needs to be encoded into network order so both sides
> + *       could understand it.
> + */
> +struct mlxbf_tmfifo_msg_hdr {
> +       u8 type;
> +       __be16 len;
> +       u8 unused[5];
> +} __packed __aligned(sizeof(u64));
> +
> +/*
> + * Default MAC.
> + * This MAC address will be read from EFI persistent variable if configured.
> + * It can also be reconfigured with standard Linux tools.
> + */
> +static u8 mlxbf_tmfifo_net_default_mac[ETH_ALEN] = {
> +       0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01
> +};
> +
> +/* EFI variable name of the MAC address. */
> +static efi_char16_t mlxbf_tmfifo_efi_name[] = L"RshimMacAddr";
> +
> +/* Maximum L2 header length. */
> +#define MLXBF_TMFIFO_NET_L2_OVERHEAD   36
> +
> +/* Supported virtio-net features. */
> +#define MLXBF_TMFIFO_NET_FEATURES \
> +       (BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_STATUS) | \
> +        BIT_ULL(VIRTIO_NET_F_MAC))
> +
> +#define mlxbf_vdev_to_tmfifo(d) container_of(d, struct mlxbf_tmfifo_vdev, vdev)
> +
> +/* Free vrings of the FIFO device. */
> +static void mlxbf_tmfifo_free_vrings(struct mlxbf_tmfifo *fifo,
> +                                    struct mlxbf_tmfifo_vdev *tm_vdev)
> +{
> +       struct mlxbf_tmfifo_vring *vring;
> +       int i, size;
> +
> +       for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
> +               vring = &tm_vdev->vrings[i];
> +               if (vring->va) {
> +                       size = vring_size(vring->num, vring->align);
> +                       dma_free_coherent(tm_vdev->vdev.dev.parent, size,
> +                                         vring->va, vring->dma);
> +                       vring->va = NULL;
> +                       if (vring->vq) {
> +                               vring_del_virtqueue(vring->vq);
> +                               vring->vq = NULL;
> +                       }
> +               }
> +       }
> +}
> +
> +/* Allocate vrings for the FIFO. */
> +static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
> +                                    struct mlxbf_tmfifo_vdev *tm_vdev)
> +{
> +       struct mlxbf_tmfifo_vring *vring;
> +       struct device *dev;
> +       dma_addr_t dma;
> +       int i, size;
> +       void *va;
> +
> +       for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
> +               vring = &tm_vdev->vrings[i];
> +               vring->fifo = fifo;
> +               vring->num = MLXBF_TMFIFO_VRING_SIZE;
> +               vring->align = SMP_CACHE_BYTES;
> +               vring->index = i;
> +               vring->vdev_id = tm_vdev->vdev.id.device;
> +               dev = &tm_vdev->vdev.dev;
> +
> +               size = vring_size(vring->num, vring->align);
> +               va = dma_alloc_coherent(dev->parent, size, &dma, GFP_KERNEL);
> +               if (!va) {
> +                       mlxbf_tmfifo_free_vrings(fifo, tm_vdev);
> +                       dev_err(dev->parent, "dma_alloc_coherent failed\n");
> +                       return -ENOMEM;
> +               }
> +
> +               vring->va = va;
> +               vring->dma = dma;
> +       }
> +
> +       return 0;
> +}
> +
> +/* Disable interrupts of the FIFO device. */
> +static void mlxbf_tmfifo_disable_irqs(struct mlxbf_tmfifo *fifo)
> +{
> +       int i, irq;
> +
> +       for (i = 0; i < MLXBF_TM_MAX_IRQ; i++) {
> +               irq = fifo->irq_info[i].irq;
> +               fifo->irq_info[i].irq = 0;
> +               disable_irq(irq);
> +       }
> +}
> +
> +/* Interrupt handler. */
> +static irqreturn_t mlxbf_tmfifo_irq_handler(int irq, void *arg)
> +{
> +       struct mlxbf_tmfifo_irq_info *irq_info = arg;
> +
> +       if (!test_and_set_bit(irq_info->index, &irq_info->fifo->pend_events))
> +               schedule_work(&irq_info->fifo->work);
> +
> +       return IRQ_HANDLED;
> +}
> +
> +/* Get the next packet descriptor from the vring. */
> +static struct vring_desc *
> +mlxbf_tmfifo_get_next_desc(struct mlxbf_tmfifo_vring *vring)
> +{
> +       const struct vring *vr = virtqueue_get_vring(vring->vq);
> +       struct virtio_device *vdev = vring->vq->vdev;
> +       unsigned int idx, head;
> +
> +       if (vring->next_avail == virtio16_to_cpu(vdev, vr->avail->idx))
> +               return NULL;
> +
> +       idx = vring->next_avail % vr->num;
> +       head = virtio16_to_cpu(vdev, vr->avail->ring[idx]);
> +       if (WARN_ON(head >= vr->num))
> +               return NULL;
> +
> +       vring->next_avail++;
> +
> +       return &vr->desc[head];
> +}
> +
> +/* Release virtio descriptor. */
> +static void mlxbf_tmfifo_release_desc(struct mlxbf_tmfifo_vring *vring,
> +                                     struct vring_desc *desc, u32 len)
> +{
> +       const struct vring *vr = virtqueue_get_vring(vring->vq);
> +       struct virtio_device *vdev = vring->vq->vdev;
> +       u16 idx, vr_idx;
> +
> +       vr_idx = virtio16_to_cpu(vdev, vr->used->idx);
> +       idx = vr_idx % vr->num;
> +       vr->used->ring[idx].id = cpu_to_virtio32(vdev, desc - vr->desc);
> +       vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
> +
> +       /*
> +        * Virtio could poll and check the 'idx' to decide whether the desc is
> +        * done or not. Add a memory barrier here to make sure the update above
> +        * completes before updating the idx.
> +        */
> +       mb();
> +       vr->used->idx = cpu_to_virtio16(vdev, vr_idx + 1);
> +}
> +
> +/* Get the total length of the descriptor chain. */
> +static u32 mlxbf_tmfifo_get_pkt_len(struct mlxbf_tmfifo_vring *vring,
> +                                   struct vring_desc *desc)
> +{
> +       const struct vring *vr = virtqueue_get_vring(vring->vq);
> +       struct virtio_device *vdev = vring->vq->vdev;
> +       u32 len = 0, idx;
> +
> +       while (desc) {
> +               len += virtio32_to_cpu(vdev, desc->len);
> +               if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
> +                       break;
> +               idx = virtio16_to_cpu(vdev, desc->next);
> +               desc = &vr->desc[idx];
> +       }
> +
> +       return len;
> +}
> +
> +static void mlxbf_tmfifo_release_pending_pkt(struct mlxbf_tmfifo_vring *vring)
> +{
> +       struct vring_desc *desc_head;
> +       u32 len = 0;
> +
> +       if (vring->desc_head) {
> +               desc_head = vring->desc_head;
> +               len = vring->pkt_len;
> +       } else {
> +               desc_head = mlxbf_tmfifo_get_next_desc(vring);
> +               len = mlxbf_tmfifo_get_pkt_len(vring, desc_head);
> +       }
> +
> +       if (desc_head)
> +               mlxbf_tmfifo_release_desc(vring, desc_head, len);
> +
> +       vring->pkt_len = 0;
> +       vring->desc = NULL;
> +       vring->desc_head = NULL;
> +}
> +
> +static void mlxbf_tmfifo_init_net_desc(struct mlxbf_tmfifo_vring *vring,
> +                                      struct vring_desc *desc, bool is_rx)
> +{
> +       struct virtio_device *vdev = vring->vq->vdev;
> +       struct virtio_net_hdr *net_hdr;
> +
> +       net_hdr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
> +       memset(net_hdr, 0, sizeof(*net_hdr));
> +}
> +
> +/* Get and initialize the next packet. */
> +static struct vring_desc *
> +mlxbf_tmfifo_get_next_pkt(struct mlxbf_tmfifo_vring *vring, bool is_rx)
> +{
> +       struct vring_desc *desc;
> +
> +       desc = mlxbf_tmfifo_get_next_desc(vring);
> +       if (desc && is_rx && vring->vdev_id == VIRTIO_ID_NET)
> +               mlxbf_tmfifo_init_net_desc(vring, desc, is_rx);
> +
> +       vring->desc_head = desc;
> +       vring->desc = desc;
> +
> +       return desc;
> +}
> +
> +/* House-keeping timer. */
> +static void mlxbf_tmfifo_timer(struct timer_list *t)
> +{
> +       struct mlxbf_tmfifo *fifo = container_of(t, struct mlxbf_tmfifo, timer);
> +       int rx, tx;
> +
> +       rx = !test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events);
> +       tx = !test_and_set_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events);
> +
> +       if (rx || tx)
> +               schedule_work(&fifo->work);
> +
> +       mod_timer(&fifo->timer, jiffies + MLXBF_TMFIFO_TIMER_INTERVAL);
> +}
> +
> +/* Copy one console packet into the output buffer. */
> +static void mlxbf_tmfifo_console_output_one(struct mlxbf_tmfifo_vdev *cons,
> +                                           struct mlxbf_tmfifo_vring *vring,
> +                                           struct vring_desc *desc)
> +{
> +       const struct vring *vr = virtqueue_get_vring(vring->vq);
> +       struct virtio_device *vdev = &cons->vdev;
> +       u32 len, idx, seg;
> +       void *addr;
> +
> +       while (desc) {
> +               addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
> +               len = virtio32_to_cpu(vdev, desc->len);
> +
> +               seg = CIRC_SPACE_TO_END(cons->tx_buf.head, cons->tx_buf.tail,
> +                                       MLXBF_TMFIFO_CON_TX_BUF_SIZE);
> +               if (len <= seg) {
> +                       memcpy(cons->tx_buf.buf + cons->tx_buf.head, addr, len);
> +               } else {
> +                       memcpy(cons->tx_buf.buf + cons->tx_buf.head, addr, seg);
> +                       addr += seg;
> +                       memcpy(cons->tx_buf.buf, addr, len - seg);
> +               }
> +               cons->tx_buf.head = (cons->tx_buf.head + len) %
> +                       MLXBF_TMFIFO_CON_TX_BUF_SIZE;
> +
> +               if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
> +                       break;
> +               idx = virtio16_to_cpu(vdev, desc->next);
> +               desc = &vr->desc[idx];
> +       }
> +}
> +
> +/* Copy console data into the output buffer. */
> +static void mlxbf_tmfifo_console_output(struct mlxbf_tmfifo_vdev *cons,
> +                                       struct mlxbf_tmfifo_vring *vring)
> +{
> +       struct vring_desc *desc;
> +       u32 len, avail;
> +
> +       desc = mlxbf_tmfifo_get_next_desc(vring);
> +       while (desc) {
> +               /* Release the packet if not enough space. */
> +               len = mlxbf_tmfifo_get_pkt_len(vring, desc);
> +               avail = CIRC_SPACE(cons->tx_buf.head, cons->tx_buf.tail,
> +                                  MLXBF_TMFIFO_CON_TX_BUF_SIZE);
> +               if (len + MLXBF_TMFIFO_CON_TX_BUF_RSV_SIZE > avail) {
> +                       mlxbf_tmfifo_release_desc(vring, desc, len);
> +                       break;
> +               }
> +
> +               mlxbf_tmfifo_console_output_one(cons, vring, desc);
> +               mlxbf_tmfifo_release_desc(vring, desc, len);
> +               desc = mlxbf_tmfifo_get_next_desc(vring);
> +       }
> +}
> +
> +/* Get the number of available words in Rx FIFO for receiving. */
> +static int mlxbf_tmfifo_get_rx_avail(struct mlxbf_tmfifo *fifo)
> +{
> +       u64 sts;
> +
> +       sts = readq(fifo->rx_base + MLXBF_TMFIFO_RX_STS);
> +       return FIELD_GET(MLXBF_TMFIFO_RX_STS__COUNT_MASK, sts);
> +}
> +
> +/* Get the number of available words in the TmFifo for sending. */
> +static int mlxbf_tmfifo_get_tx_avail(struct mlxbf_tmfifo *fifo, int vdev_id)
> +{
> +       int tx_reserve;
> +       u32 count;
> +       u64 sts;
> +
> +       /* Reserve some room in FIFO for console messages. */
> +       if (vdev_id == VIRTIO_ID_NET)
> +               tx_reserve = fifo->tx_fifo_size / MLXBF_TMFIFO_RESERVE_RATIO;
> +       else
> +               tx_reserve = 1;
> +
> +       sts = readq(fifo->tx_base + MLXBF_TMFIFO_TX_STS);
> +       count = FIELD_GET(MLXBF_TMFIFO_TX_STS__COUNT_MASK, sts);
> +       return fifo->tx_fifo_size - tx_reserve - count;
> +}
> +
> +/* Console Tx (move data from the output buffer into the TmFifo). */
> +static void mlxbf_tmfifo_console_tx(struct mlxbf_tmfifo *fifo, int avail)
> +{
> +       struct mlxbf_tmfifo_msg_hdr hdr;
> +       struct mlxbf_tmfifo_vdev *cons;
> +       unsigned long flags;
> +       int size, seg;
> +       void *addr;
> +       u64 data;
> +
> +       /* Return if not enough space available. */
> +       if (avail < MLXBF_TMFIFO_DATA_MIN_WORDS)
> +               return;
> +
> +       cons = fifo->vdev[VIRTIO_ID_CONSOLE];
> +       if (!cons || !cons->tx_buf.buf)
> +               return;
> +
> +       /* Return if no data to send. */
> +       size = CIRC_CNT(cons->tx_buf.head, cons->tx_buf.tail,
> +                       MLXBF_TMFIFO_CON_TX_BUF_SIZE);
> +       if (size == 0)
> +               return;
> +
> +       /* Adjust the size to available space. */
> +       if (size + sizeof(hdr) > avail * sizeof(u64))
> +               size = avail * sizeof(u64) - sizeof(hdr);
> +
> +       /* Write header. */
> +       hdr.type = VIRTIO_ID_CONSOLE;
> +       hdr.len = htons(size);
> +       writeq(*(u64 *)&hdr, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
> +
> +       /* Use spin-lock to protect the 'cons->tx_buf'. */
> +       spin_lock_irqsave(&fifo->spin_lock, flags);
> +
> +       while (size > 0) {
> +               addr = cons->tx_buf.buf + cons->tx_buf.tail;
> +
> +               seg = CIRC_CNT_TO_END(cons->tx_buf.head, cons->tx_buf.tail,
> +                                     MLXBF_TMFIFO_CON_TX_BUF_SIZE);
> +               if (seg >= sizeof(u64)) {
> +                       memcpy(&data, addr, sizeof(u64));
> +               } else {
> +                       memcpy(&data, addr, seg);
> +                       memcpy((u8 *)&data + seg, cons->tx_buf.buf,
> +                              sizeof(u64) - seg);
> +               }
> +               writeq(data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
> +
> +               if (size >= sizeof(u64)) {
> +                       cons->tx_buf.tail = (cons->tx_buf.tail + sizeof(u64)) %
> +                               MLXBF_TMFIFO_CON_TX_BUF_SIZE;
> +                       size -= sizeof(u64);
> +               } else {
> +                       cons->tx_buf.tail = (cons->tx_buf.tail + size) %
> +                               MLXBF_TMFIFO_CON_TX_BUF_SIZE;
> +                       size = 0;
> +               }
> +       }
> +
> +       spin_unlock_irqrestore(&fifo->spin_lock, flags);
> +}
> +
> +/* Rx/Tx one word in the descriptor buffer. */
> +static void mlxbf_tmfifo_rxtx_word(struct mlxbf_tmfifo_vring *vring,
> +                                  struct vring_desc *desc,
> +                                  bool is_rx, int len)
> +{
> +       struct virtio_device *vdev = vring->vq->vdev;
> +       struct mlxbf_tmfifo *fifo = vring->fifo;
> +       void *addr;
> +       u64 data;
> +
> +       /* Get the buffer address of this desc. */
> +       addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
> +
> +       /* Read a word from FIFO for Rx. */
> +       if (is_rx)
> +               data = readq(fifo->rx_base + MLXBF_TMFIFO_RX_DATA);
> +
> +       if (vring->cur_len + sizeof(u64) <= len) {
> +               /* The whole word. */
> +               if (is_rx)
> +                       memcpy(addr + vring->cur_len, &data, sizeof(u64));
> +               else
> +                       memcpy(&data, addr + vring->cur_len, sizeof(u64));
> +               vring->cur_len += sizeof(u64);
> +       } else {
> +               /* Leftover bytes. */
> +               if (is_rx)
> +                       memcpy(addr + vring->cur_len, &data,
> +                              len - vring->cur_len);
> +               else
> +                       memcpy(&data, addr + vring->cur_len,
> +                              len - vring->cur_len);
> +               vring->cur_len = len;
> +       }
> +
> +       /* Write the word into FIFO for Tx. */
> +       if (!is_rx)
> +               writeq(data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
> +}
> +
> +/*
> + * Rx/Tx packet header.
> + *
> + * In Rx case, the packet might be found to belong to a different vring since
> + * the TmFifo is shared by different services. In such case, the 'vring_change'
> + * flag is set.
> + */
> +static void mlxbf_tmfifo_rxtx_header(struct mlxbf_tmfifo_vring *vring,
> +                                    struct vring_desc *desc,
> +                                    bool is_rx, bool *vring_change)
> +{
> +       struct mlxbf_tmfifo *fifo = vring->fifo;
> +       struct virtio_net_config *config;
> +       struct mlxbf_tmfifo_msg_hdr hdr;
> +       int vdev_id, hdr_len;
> +
> +       /* Read/Write packet header. */
> +       if (is_rx) {
> +               /* Drain one word from the FIFO. */
> +               *(u64 *)&hdr = readq(fifo->rx_base + MLXBF_TMFIFO_RX_DATA);
> +
> +               /* Skip the length 0 packets (keepalive). */
> +               if (hdr.len == 0)
> +                       return;
> +
> +               /* Check packet type. */
> +               if (hdr.type == VIRTIO_ID_NET) {
> +                       vdev_id = VIRTIO_ID_NET;
> +                       hdr_len = sizeof(struct virtio_net_hdr);
> +                       config = &fifo->vdev[vdev_id]->config.net;
> +                       if (ntohs(hdr.len) > config->mtu +
> +                           MLXBF_TMFIFO_NET_L2_OVERHEAD)
> +                               return;
> +               } else {
> +                       vdev_id = VIRTIO_ID_CONSOLE;
> +                       hdr_len = 0;
> +               }
> +
> +               /*
> +                * Check whether the new packet still belongs to this vring.
> +                * If not, update the pkt_len of the new vring.
> +                */
> +               if (vdev_id != vring->vdev_id) {
> +                       struct mlxbf_tmfifo_vdev *tm_dev2 = fifo->vdev[vdev_id];
> +
> +                       if (!tm_dev2)
> +                               return;
> +                       vring->desc = desc;
> +                       vring = &tm_dev2->vrings[MLXBF_TMFIFO_VRING_RX];
> +                       *vring_change = true;
> +               }
> +               vring->pkt_len = ntohs(hdr.len) + hdr_len;
> +       } else {
> +               /* Network virtio has an extra header. */
> +               hdr_len = (vring->vdev_id == VIRTIO_ID_NET) ?
> +                          sizeof(struct virtio_net_hdr) : 0;
> +               vring->pkt_len = mlxbf_tmfifo_get_pkt_len(vring, desc);
> +               hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
> +                           VIRTIO_ID_NET : VIRTIO_ID_CONSOLE;
> +               hdr.len = htons(vring->pkt_len - hdr_len);
> +               writeq(*(u64 *)&hdr, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
> +       }
> +
> +       vring->cur_len = hdr_len;
> +       vring->rem_len = vring->pkt_len;
> +       fifo->vring[is_rx] = vring;
> +}
> +
> +/*
> + * Rx/Tx one descriptor.
> + *
> + * Return true to indicate more data available.
> + */
> +static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring,
> +                                      bool is_rx, int *avail)
> +{
> +       const struct vring *vr = virtqueue_get_vring(vring->vq);
> +       struct mlxbf_tmfifo *fifo = vring->fifo;
> +       struct virtio_device *vdev;
> +       bool vring_change = false;
> +       struct vring_desc *desc;
> +       unsigned long flags;
> +       u32 len, idx;
> +
> +       vdev = &fifo->vdev[vring->vdev_id]->vdev;
> +
> +       /* Get the descriptor of the next packet. */
> +       if (!vring->desc) {
> +               desc = mlxbf_tmfifo_get_next_pkt(vring, is_rx);
> +               if (!desc)
> +                       return false;
> +       } else {
> +               desc = vring->desc;
> +       }
> +
> +       /* Beginning of a packet. Start to Rx/Tx packet header. */
> +       if (vring->pkt_len == 0) {
> +               mlxbf_tmfifo_rxtx_header(vring, desc, is_rx, &vring_change);
> +               (*avail)--;
> +
> +               /* Return if new packet is for another ring. */
> +               if (vring_change)
> +                       return false;
> +               goto mlxbf_tmfifo_desc_done;
> +       }
> +
> +       /* Get the length of this desc. */
> +       len = virtio32_to_cpu(vdev, desc->len);
> +       if (len > vring->rem_len)
> +               len = vring->rem_len;
> +
> +       /* Rx/Tx one word (8 bytes) if not done. */
> +       if (vring->cur_len < len) {
> +               mlxbf_tmfifo_rxtx_word(vring, desc, is_rx, len);
> +               (*avail)--;
> +       }
> +
> +       /* Check again whether it's done. */
> +       if (vring->cur_len == len) {
> +               vring->cur_len = 0;
> +               vring->rem_len -= len;
> +
> +               /* Get the next desc on the chain. */
> +               if (vring->rem_len > 0 &&
> +                   (virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT)) {
> +                       idx = virtio16_to_cpu(vdev, desc->next);
> +                       desc = &vr->desc[idx];
> +                       goto mlxbf_tmfifo_desc_done;
> +               }
> +
> +               /* Done and release the pending packet. */
> +               mlxbf_tmfifo_release_pending_pkt(vring);
> +               desc = NULL;
> +               fifo->vring[is_rx] = NULL;
> +
> +               /* Notify upper layer that packet is done. */
> +               spin_lock_irqsave(&fifo->spin_lock, flags);
> +               vring_interrupt(0, vring->vq);
> +               spin_unlock_irqrestore(&fifo->spin_lock, flags);
> +       }
> +
> +mlxbf_tmfifo_desc_done:
> +       /* Save the current desc. */
> +       vring->desc = desc;
> +
> +       return true;
> +}
> +
> +/* Rx & Tx processing of a queue. */
> +static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
> +{
> +       int avail = 0, devid = vring->vdev_id;
> +       struct mlxbf_tmfifo *fifo;
> +       bool more;
> +
> +       fifo = vring->fifo;
> +
> +       /* Return if vdev is not ready. */
> +       if (!fifo->vdev[devid])
> +               return;
> +
> +       /* Return if another vring is running. */
> +       if (fifo->vring[is_rx] && fifo->vring[is_rx] != vring)
> +               return;
> +
> +       /* Only handle console and network for now. */
> +       if (WARN_ON(devid != VIRTIO_ID_NET && devid != VIRTIO_ID_CONSOLE))
> +               return;
> +
> +       do {
> +               /* Get available FIFO space. */
> +               if (avail == 0) {
> +                       if (is_rx)
> +                               avail = mlxbf_tmfifo_get_rx_avail(fifo);
> +                       else
> +                               avail = mlxbf_tmfifo_get_tx_avail(fifo, devid);
> +                       if (avail <= 0)
> +                               break;
> +               }
> +
> +               /* Console output always comes from the Tx buffer. */
> +               if (!is_rx && devid == VIRTIO_ID_CONSOLE) {
> +                       mlxbf_tmfifo_console_tx(fifo, avail);
> +                       break;
> +               }
> +
> +               /* Handle one descriptor. */
> +               more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail);
> +       } while (more);
> +}
> +
> +/* Handle Rx or Tx queues. */
> +static void mlxbf_tmfifo_work_rxtx(struct mlxbf_tmfifo *fifo, int queue_id,
> +                                  int irq_id, bool is_rx)
> +{
> +       struct mlxbf_tmfifo_vdev *tm_vdev;
> +       struct mlxbf_tmfifo_vring *vring;
> +       int i;
> +
> +       if (!test_and_clear_bit(irq_id, &fifo->pend_events) ||
> +           !fifo->irq_info[irq_id].irq)
> +               return;
> +
> +       for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++) {
> +               tm_vdev = fifo->vdev[i];
> +               if (tm_vdev) {
> +                       vring = &tm_vdev->vrings[queue_id];
> +                       if (vring->vq)
> +                               mlxbf_tmfifo_rxtx(vring, is_rx);
> +               }
> +       }
> +}
> +
> +/* Work handler for Rx and Tx case. */
> +static void mlxbf_tmfifo_work_handler(struct work_struct *work)
> +{
> +       struct mlxbf_tmfifo *fifo;
> +
> +       fifo = container_of(work, struct mlxbf_tmfifo, work);
> +       if (!fifo->is_ready)
> +               return;
> +
> +       mutex_lock(&fifo->lock);
> +
> +       /* Tx (Send data to the TmFifo). */
> +       mlxbf_tmfifo_work_rxtx(fifo, MLXBF_TMFIFO_VRING_TX,
> +                              MLXBF_TM_TX_LWM_IRQ, false);
> +
> +       /* Rx (Receive data from the TmFifo). */
> +       mlxbf_tmfifo_work_rxtx(fifo, MLXBF_TMFIFO_VRING_RX,
> +                              MLXBF_TM_RX_HWM_IRQ, true);
> +
> +       mutex_unlock(&fifo->lock);
> +}
> +
> +/* The notify function is called when new buffers are posted. */
> +static bool mlxbf_tmfifo_virtio_notify(struct virtqueue *vq)
> +{
> +       struct mlxbf_tmfifo_vring *vring = vq->priv;
> +       struct mlxbf_tmfifo_vdev *tm_vdev;
> +       struct mlxbf_tmfifo *fifo;
> +       unsigned long flags;
> +
> +       fifo = vring->fifo;
> +
> +       /*
> +        * Virtio maintains vrings in pairs, even number ring for Rx
> +        * and odd number ring for Tx.
> +        */
> +       if (vring->index & BIT(0)) {
> +               /*
> +                * Console could make blocking call with interrupts disabled.
> +                * In such case, the vring needs to be served right away. For
> +                * other cases, just set the TX LWM bit to start Tx in the
> +                * worker handler.
> +                */
> +               if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
> +                       spin_lock_irqsave(&fifo->spin_lock, flags);
> +                       tm_vdev = fifo->vdev[VIRTIO_ID_CONSOLE];
> +                       mlxbf_tmfifo_console_output(tm_vdev, vring);
> +                       spin_unlock_irqrestore(&fifo->spin_lock, flags);
> +               } else if (test_and_set_bit(MLXBF_TM_TX_LWM_IRQ,
> +                                           &fifo->pend_events)) {
> +                       return true;
> +               }
> +       } else {
> +               if (test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events))
> +                       return true;
> +       }
> +
> +       schedule_work(&fifo->work);
> +
> +       return true;
> +}
> +
> +/* Get the array of feature bits for this device. */
> +static u64 mlxbf_tmfifo_virtio_get_features(struct virtio_device *vdev)
> +{
> +       struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
> +
> +       return tm_vdev->features;
> +}
> +
> +/* Confirm device features to use. */
> +static int mlxbf_tmfifo_virtio_finalize_features(struct virtio_device *vdev)
> +{
> +       struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
> +
> +       tm_vdev->features = vdev->features;
> +
> +       return 0;
> +}
> +
> +/* Free virtqueues found by find_vqs(). */
> +static void mlxbf_tmfifo_virtio_del_vqs(struct virtio_device *vdev)
> +{
> +       struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
> +       struct mlxbf_tmfifo_vring *vring;
> +       struct virtqueue *vq;
> +       int i;
> +
> +       for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
> +               vring = &tm_vdev->vrings[i];
> +
> +               /* Release the pending packet. */
> +               if (vring->desc)
> +                       mlxbf_tmfifo_release_pending_pkt(vring);
> +               vq = vring->vq;
> +               if (vq) {
> +                       vring->vq = NULL;
> +                       vring_del_virtqueue(vq);
> +               }
> +       }
> +}
> +
> +/* Create and initialize the virtual queues. */
> +static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev,
> +                                       unsigned int nvqs,
> +                                       struct virtqueue *vqs[],
> +                                       vq_callback_t *callbacks[],
> +                                       const char * const names[],
> +                                       const bool *ctx,
> +                                       struct irq_affinity *desc)
> +{
> +       struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
> +       struct mlxbf_tmfifo_vring *vring;
> +       struct virtqueue *vq;
> +       int i, ret, size;
> +
> +       if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
> +               return -EINVAL;
> +
> +       for (i = 0; i < nvqs; ++i) {
> +               if (!names[i]) {
> +                       ret = -EINVAL;
> +                       goto error;
> +               }
> +               vring = &tm_vdev->vrings[i];
> +
> +               /* zero vring */
> +               size = vring_size(vring->num, vring->align);
> +               memset(vring->va, 0, size);
> +               vq = vring_new_virtqueue(i, vring->num, vring->align, vdev,
> +                                        false, false, vring->va,
> +                                        mlxbf_tmfifo_virtio_notify,
> +                                        callbacks[i], names[i]);
> +               if (!vq) {
> +                       dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
> +                       ret = -ENOMEM;
> +                       goto error;
> +               }
> +
> +               vqs[i] = vq;
> +               vring->vq = vq;
> +               vq->priv = vring;
> +       }
> +
> +       return 0;
> +
> +error:
> +       mlxbf_tmfifo_virtio_del_vqs(vdev);
> +       return ret;
> +}
> +
> +/* Read the status byte. */
> +static u8 mlxbf_tmfifo_virtio_get_status(struct virtio_device *vdev)
> +{
> +       struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
> +
> +       return tm_vdev->status;
> +}
> +
> +/* Write the status byte. */
> +static void mlxbf_tmfifo_virtio_set_status(struct virtio_device *vdev,
> +                                          u8 status)
> +{
> +       struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
> +
> +       tm_vdev->status = status;
> +}
> +
> +/* Reset the device. Not much here for now. */
> +static void mlxbf_tmfifo_virtio_reset(struct virtio_device *vdev)
> +{
> +       struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
> +
> +       tm_vdev->status = 0;
> +}
> +
> +/* Read the value of a configuration field. */
> +static void mlxbf_tmfifo_virtio_get(struct virtio_device *vdev,
> +                                   unsigned int offset,
> +                                   void *buf,
> +                                   unsigned int len)
> +{
> +       struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
> +
> +       if ((u64)offset + len > sizeof(tm_vdev->config))
> +               return;
> +
> +       memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
> +}
> +
> +/* Write the value of a configuration field. */
> +static void mlxbf_tmfifo_virtio_set(struct virtio_device *vdev,
> +                                   unsigned int offset,
> +                                   const void *buf,
> +                                   unsigned int len)
> +{
> +       struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
> +
> +       if ((u64)offset + len > sizeof(tm_vdev->config))
> +               return;
> +
> +       memcpy((u8 *)&tm_vdev->config + offset, buf, len);
> +}
> +
> +static void tmfifo_virtio_dev_release(struct device *device)
> +{
> +       struct virtio_device *vdev =
> +                       container_of(device, struct virtio_device, dev);
> +       struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
> +
> +       kfree(tm_vdev);
> +}
> +
> +/* Virtio config operations. */
> +static const struct virtio_config_ops mlxbf_tmfifo_virtio_config_ops = {
> +       .get_features = mlxbf_tmfifo_virtio_get_features,
> +       .finalize_features = mlxbf_tmfifo_virtio_finalize_features,
> +       .find_vqs = mlxbf_tmfifo_virtio_find_vqs,
> +       .del_vqs = mlxbf_tmfifo_virtio_del_vqs,
> +       .reset = mlxbf_tmfifo_virtio_reset,
> +       .set_status = mlxbf_tmfifo_virtio_set_status,
> +       .get_status = mlxbf_tmfifo_virtio_get_status,
> +       .get = mlxbf_tmfifo_virtio_get,
> +       .set = mlxbf_tmfifo_virtio_set,
> +};
> +
> +/* Create vdev for the FIFO. */
> +static int mlxbf_tmfifo_create_vdev(struct device *dev,
> +                                   struct mlxbf_tmfifo *fifo,
> +                                   int vdev_id, u64 features,
> +                                   void *config, u32 size)
> +{
> +       struct mlxbf_tmfifo_vdev *tm_vdev, *reg_dev = NULL;
> +       int ret;
> +
> +       mutex_lock(&fifo->lock);
> +
> +       tm_vdev = fifo->vdev[vdev_id];
> +       if (tm_vdev) {
> +               dev_err(dev, "vdev %d already exists\n", vdev_id);
> +               ret = -EEXIST;
> +               goto fail;
> +       }
> +
> +       tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
> +       if (!tm_vdev) {
> +               ret = -ENOMEM;
> +               goto fail;
> +       }
> +
> +       tm_vdev->vdev.id.device = vdev_id;
> +       tm_vdev->vdev.config = &mlxbf_tmfifo_virtio_config_ops;
> +       tm_vdev->vdev.dev.parent = dev;
> +       tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
> +       tm_vdev->features = features;
> +       if (config)
> +               memcpy(&tm_vdev->config, config, size);
> +
> +       if (mlxbf_tmfifo_alloc_vrings(fifo, tm_vdev)) {
> +               dev_err(dev, "unable to allocate vring\n");
> +               ret = -ENOMEM;
> +               goto vdev_fail;
> +       }
> +
> +       /* Allocate an output buffer for the console device. */
> +       if (vdev_id == VIRTIO_ID_CONSOLE)
> +               tm_vdev->tx_buf.buf = devm_kmalloc(dev,
> +                                                  MLXBF_TMFIFO_CON_TX_BUF_SIZE,
> +                                                  GFP_KERNEL);
> +       fifo->vdev[vdev_id] = tm_vdev;
> +
> +       /* Register the virtio device. */
> +       ret = register_virtio_device(&tm_vdev->vdev);
> +       reg_dev = tm_vdev;
> +       if (ret) {
> +               dev_err(dev, "register_virtio_device failed\n");
> +               goto vdev_fail;
> +       }
> +
> +       mutex_unlock(&fifo->lock);
> +       return 0;
> +
> +vdev_fail:
> +       mlxbf_tmfifo_free_vrings(fifo, tm_vdev);
> +       fifo->vdev[vdev_id] = NULL;
> +       if (reg_dev)
> +               put_device(&tm_vdev->vdev.dev);
> +       else
> +               kfree(tm_vdev);
> +fail:
> +       mutex_unlock(&fifo->lock);
> +       return ret;
> +}
> +
> +/* Delete vdev for the FIFO. */
> +static int mlxbf_tmfifo_delete_vdev(struct mlxbf_tmfifo *fifo, int vdev_id)
> +{
> +       struct mlxbf_tmfifo_vdev *tm_vdev;
> +
> +       mutex_lock(&fifo->lock);
> +
> +       /* Unregister vdev. */
> +       tm_vdev = fifo->vdev[vdev_id];
> +       if (tm_vdev) {
> +               unregister_virtio_device(&tm_vdev->vdev);
> +               mlxbf_tmfifo_free_vrings(fifo, tm_vdev);
> +               fifo->vdev[vdev_id] = NULL;
> +       }
> +
> +       mutex_unlock(&fifo->lock);
> +
> +       return 0;
> +}
> +
> +/* Read the configured network MAC address from efi variable. */
> +static void mlxbf_tmfifo_get_cfg_mac(u8 *mac)
> +{
> +       efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
> +       unsigned long size = ETH_ALEN;
> +       u8 buf[ETH_ALEN];
> +       efi_status_t rc;
> +
> +       rc = efi.get_variable(mlxbf_tmfifo_efi_name, &guid, NULL, &size, buf);
> +       if (rc == EFI_SUCCESS && size == ETH_ALEN)
> +               ether_addr_copy(mac, buf);
> +       else
> +               ether_addr_copy(mac, mlxbf_tmfifo_net_default_mac);
> +}
> +
> +/* Set TmFifo thresolds which is used to trigger interrupts. */
> +static void mlxbf_tmfifo_set_threshold(struct mlxbf_tmfifo *fifo)
> +{
> +       u64 ctl;
> +
> +       /* Get Tx FIFO size and set the low/high watermark. */
> +       ctl = readq(fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
> +       fifo->tx_fifo_size =
> +               FIELD_GET(MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK, ctl);
> +       ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__LWM_MASK) |
> +               FIELD_PREP(MLXBF_TMFIFO_TX_CTL__LWM_MASK,
> +                          fifo->tx_fifo_size / 2);
> +       ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__HWM_MASK) |
> +               FIELD_PREP(MLXBF_TMFIFO_TX_CTL__HWM_MASK,
> +                          fifo->tx_fifo_size - 1);
> +       writeq(ctl, fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
> +
> +       /* Get Rx FIFO size and set the low/high watermark. */
> +       ctl = readq(fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
> +       fifo->rx_fifo_size =
> +               FIELD_GET(MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK, ctl);
> +       ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__LWM_MASK) |
> +               FIELD_PREP(MLXBF_TMFIFO_RX_CTL__LWM_MASK, 0);
> +       ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__HWM_MASK) |
> +               FIELD_PREP(MLXBF_TMFIFO_RX_CTL__HWM_MASK, 1);
> +       writeq(ctl, fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
> +}
> +
> +static void mlxbf_tmfifo_cleanup(struct mlxbf_tmfifo *fifo)
> +{
> +       int i;
> +
> +       fifo->is_ready = false;
> +       del_timer_sync(&fifo->timer);
> +       mlxbf_tmfifo_disable_irqs(fifo);
> +       cancel_work_sync(&fifo->work);
> +       for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++)
> +               mlxbf_tmfifo_delete_vdev(fifo, i);
> +}
> +
> +/* Probe the TMFIFO. */
> +static int mlxbf_tmfifo_probe(struct platform_device *pdev)
> +{
> +       struct virtio_net_config net_config;
> +       struct device *dev = &pdev->dev;
> +       struct mlxbf_tmfifo *fifo;
> +       int i, rc;
> +
> +       fifo = devm_kzalloc(dev, sizeof(*fifo), GFP_KERNEL);
> +       if (!fifo)
> +               return -ENOMEM;
> +
> +       spin_lock_init(&fifo->spin_lock);
> +       INIT_WORK(&fifo->work, mlxbf_tmfifo_work_handler);
> +       mutex_init(&fifo->lock);
> +
> +       /* Get the resource of the Rx FIFO. */
> +       fifo->rx_base = devm_platform_ioremap_resource(pdev, 0);
> +       if (IS_ERR(fifo->rx_base))
> +               return PTR_ERR(fifo->rx_base);
> +
> +       /* Get the resource of the Tx FIFO. */
> +       fifo->tx_base = devm_platform_ioremap_resource(pdev, 1);
> +       if (IS_ERR(fifo->tx_base))
> +               return PTR_ERR(fifo->tx_base);
> +
> +       platform_set_drvdata(pdev, fifo);
> +
> +       timer_setup(&fifo->timer, mlxbf_tmfifo_timer, 0);
> +
> +       for (i = 0; i < MLXBF_TM_MAX_IRQ; i++) {
> +               fifo->irq_info[i].index = i;
> +               fifo->irq_info[i].fifo = fifo;
> +               fifo->irq_info[i].irq = platform_get_irq(pdev, i);
> +               rc = devm_request_irq(dev, fifo->irq_info[i].irq,
> +                                     mlxbf_tmfifo_irq_handler, 0,
> +                                     "tmfifo", &fifo->irq_info[i]);
> +               if (rc) {
> +                       dev_err(dev, "devm_request_irq failed\n");
> +                       fifo->irq_info[i].irq = 0;
> +                       return rc;
> +               }
> +       }
> +
> +       mlxbf_tmfifo_set_threshold(fifo);
> +
> +       /* Create the console vdev. */
> +       rc = mlxbf_tmfifo_create_vdev(dev, fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
> +       if (rc)
> +               goto fail;
> +
> +       /* Create the network vdev. */
> +       memset(&net_config, 0, sizeof(net_config));
> +       net_config.mtu = ETH_DATA_LEN;
> +       net_config.status = VIRTIO_NET_S_LINK_UP;
> +       mlxbf_tmfifo_get_cfg_mac(net_config.mac);
> +       rc = mlxbf_tmfifo_create_vdev(dev, fifo, VIRTIO_ID_NET,
> +                                     MLXBF_TMFIFO_NET_FEATURES, &net_config,
> +                                     sizeof(net_config));
> +       if (rc)
> +               goto fail;
> +
> +       mod_timer(&fifo->timer, jiffies + MLXBF_TMFIFO_TIMER_INTERVAL);
> +
> +       fifo->is_ready = true;
> +       return 0;
> +
> +fail:
> +       mlxbf_tmfifo_cleanup(fifo);
> +       return rc;
> +}
> +
> +/* Device remove function. */
> +static int mlxbf_tmfifo_remove(struct platform_device *pdev)
> +{
> +       struct mlxbf_tmfifo *fifo = platform_get_drvdata(pdev);
> +
> +       mlxbf_tmfifo_cleanup(fifo);
> +
> +       return 0;
> +}
> +
> +static const struct acpi_device_id mlxbf_tmfifo_acpi_match[] = {
> +       { "MLNXBF01", 0 },
> +       {}
> +};
> +MODULE_DEVICE_TABLE(acpi, mlxbf_tmfifo_acpi_match);
> +
> +static struct platform_driver mlxbf_tmfifo_driver = {
> +       .probe = mlxbf_tmfifo_probe,
> +       .remove = mlxbf_tmfifo_remove,
> +       .driver = {
> +               .name = "bf-tmfifo",
> +               .acpi_match_table = mlxbf_tmfifo_acpi_match,
> +       },
> +};
> +
> +module_platform_driver(mlxbf_tmfifo_driver);
> +
> +MODULE_DESCRIPTION("Mellanox BlueField SoC TmFifo Driver");
> +MODULE_LICENSE("GPL v2");
> +MODULE_AUTHOR("Mellanox Technologies");
> --
> 1.8.3.1
>


-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply	[flat|nested] 179+ messages in thread

end of thread, other threads:[~2019-05-06  9:13 UTC | newest]

Thread overview: 179+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-25 16:06 [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
2018-05-25 16:06 ` Liming Sun
2018-05-25 16:06 ` [PATCH v1 2/4] arm64: Add Mellanox BlueField SoC config option Liming Sun
2018-05-25 16:06   ` Liming Sun
2018-05-25 16:06 ` [PATCH v1 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
2018-05-25 16:06   ` Liming Sun
2018-05-25 16:06 ` [PATCH v1 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
2018-05-25 16:06   ` Liming Sun
2018-05-25 17:14 ` [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Robin Murphy
2018-05-25 17:14   ` Robin Murphy
2018-05-25 20:18   ` Liming Sun
2018-05-25 20:18     ` Liming Sun
2018-05-25 20:17 ` [PATCH v2 " Liming Sun
2018-05-25 20:17   ` Liming Sun
2018-05-25 20:17 ` [PATCH v2 2/4] arm64: Add Mellanox BlueField SoC config option Liming Sun
2018-05-25 20:17   ` Liming Sun
2018-05-25 20:17 ` [PATCH v2 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
2018-05-25 20:17   ` Liming Sun
2018-05-31  3:43   ` Rob Herring
2018-05-31  3:43     ` Rob Herring
2018-06-01 14:31     ` Liming Sun
2018-06-01 14:31       ` Liming Sun
2018-05-25 20:17 ` [PATCH v2 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
2018-05-25 20:17   ` Liming Sun
2018-06-01 14:31 ` [PATCH v3 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
2018-06-01 14:31   ` Liming Sun
2018-06-01 14:31 ` [PATCH v3 2/4] arm64: Add Mellanox BlueField SoC config option Liming Sun
2018-06-01 14:31   ` Liming Sun
2018-06-01 14:31 ` [PATCH v3 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
2018-06-01 14:31   ` Liming Sun
2018-06-11 18:19   ` Rob Herring
2018-06-11 18:19     ` Rob Herring
2018-06-01 14:31 ` [PATCH v3 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
2018-06-01 14:31   ` Liming Sun
2018-10-24 17:55 ` [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
2018-10-24 17:55   ` Liming Sun
2018-10-25 15:57   ` Arnd Bergmann
2018-10-25 15:57     ` Arnd Bergmann
2018-10-26 18:24     ` Liming Sun
2018-10-26 18:24       ` Liming Sun
2018-10-26 18:35       ` Arnd Bergmann
2018-10-26 18:35         ` Arnd Bergmann
2018-10-29 14:17         ` Liming Sun
2018-10-29 14:17           ` Liming Sun
2018-10-29 14:52           ` Arnd Bergmann
2018-10-29 14:52             ` Arnd Bergmann
2018-12-04 22:12     ` Liming Sun
2018-12-04 22:12       ` Liming Sun
2018-10-24 17:55 ` [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option Liming Sun
2018-10-24 17:55   ` Liming Sun
2018-10-25 15:38   ` Arnd Bergmann
2018-10-25 15:38     ` Arnd Bergmann
2018-10-26 19:18     ` Liming Sun
2018-10-26 19:18       ` Liming Sun
2018-10-26 19:32       ` Arnd Bergmann
2018-10-26 19:32         ` Arnd Bergmann
2018-10-29 14:58         ` Liming Sun
2018-10-29 14:58           ` Liming Sun
2018-10-29 15:26           ` Arnd Bergmann
2018-10-29 15:26             ` Arnd Bergmann
2018-10-29 16:09             ` Liming Sun
2018-10-29 16:09               ` Liming Sun
2018-10-24 17:55 ` [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
2018-10-24 17:55   ` Liming Sun
2018-10-25 15:32   ` Arnd Bergmann
2018-10-25 15:32     ` Arnd Bergmann
2018-10-26 19:36     ` Liming Sun
2018-10-26 19:36       ` Liming Sun
2018-10-26 20:33       ` Arnd Bergmann
2018-10-26 20:33         ` Arnd Bergmann
2018-10-29 16:48         ` Liming Sun
2018-10-29 16:48           ` Liming Sun
2019-01-24 15:07         ` Liming Sun
2019-01-24 15:07           ` Liming Sun
2018-10-24 17:55 ` [PATCH v4 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
2018-10-24 17:55   ` Liming Sun
2018-10-31 18:09 ` [PATCH v5 1/5] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
2018-10-31 18:09   ` Liming Sun
2018-10-31 18:09 ` [PATCH v5 2/5] arm64: Add Mellanox BlueField SoC config option Liming Sun
2018-10-31 18:09   ` Liming Sun
2018-10-31 18:09 ` [PATCH v5 3/5] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
2018-10-31 18:09   ` Liming Sun
2018-10-31 18:09 ` [PATCH v5 4/5] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
2018-10-31 18:09   ` Liming Sun
2018-10-31 18:09 ` [PATCH v5 5/5] soc: mellanox: Add host side drivers to support Mellanox BlueField SoCs Liming Sun
2018-11-01 16:23 ` [PATCH v6 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
2018-11-01 16:23   ` Liming Sun
2018-12-12 23:07   ` Matthias Brugger
2018-12-12 23:07     ` Matthias Brugger
2019-01-03 19:20     ` Liming Sun
2019-01-03 19:20       ` Liming Sun
2018-11-01 16:25 ` Liming Sun
2018-11-01 16:25   ` Liming Sun
2018-11-01 16:25 ` [PATCH v6 2/9] arm64: Add Mellanox BlueField SoC config option Liming Sun
2018-11-01 16:25   ` Liming Sun
2018-11-01 16:25 ` [PATCH v6 3/9] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
2018-11-01 16:25   ` Liming Sun
2018-11-01 16:25 ` [PATCH v6 4/9] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
2018-11-01 16:25   ` Liming Sun
2018-11-01 16:25 ` [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver Liming Sun
2018-11-01 16:25   ` Liming Sun
2019-01-18 16:02   ` Arnd Bergmann
2019-01-18 16:02     ` Arnd Bergmann
2019-01-18 16:02     ` Arnd Bergmann
2019-01-21 19:22     ` Liming Sun
2019-01-21 19:22       ` Liming Sun
2019-01-21 19:22       ` Liming Sun
2019-01-22 12:20     ` Vincent Whitchurch
2019-01-22 12:20       ` Vincent Whitchurch
2019-01-22 12:20       ` Vincent Whitchurch
2019-01-22 13:27       ` Liming Sun
2019-01-22 13:36         ` Liming Sun
2019-01-22 13:36           ` Liming Sun
2019-01-22 13:36           ` Liming Sun
2018-11-01 16:25 ` [PATCH v6 6/9] soc: mellanox: host: Add networking support over Rshim Liming Sun
2018-11-01 16:25   ` Liming Sun
2018-11-01 16:25 ` [PATCH v6 7/9] soc: mellanox: host: Add the Rshim USB backend driver Liming Sun
2018-11-01 16:25   ` Liming Sun
2018-11-01 16:25 ` [PATCH v6 8/9] soc: mellanox: host: Add the Rshim PCIe " Liming Sun
2018-11-01 16:25   ` Liming Sun
2018-11-01 16:25 ` [PATCH v6 9/9] soc: mellanox: host: Add the Rshim PCIe live-fish " Liming Sun
2018-11-01 16:25   ` Liming Sun
2019-01-03 19:17 ` [PATCH v7 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
2019-01-03 19:17   ` Liming Sun
2019-03-15 13:18   ` Matthias Brugger
2019-03-15 13:18     ` Matthias Brugger
2019-01-03 19:17 ` [PATCH v7 2/9] arm64: Add Mellanox BlueField SoC config option Liming Sun
2019-01-03 19:17   ` Liming Sun
2019-01-03 19:17 ` [PATCH v7 3/9] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
2019-01-03 19:17   ` Liming Sun
2019-01-03 19:17 ` [PATCH v7 4/9] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
2019-01-03 19:17   ` Liming Sun
2019-01-03 19:17 ` [PATCH v7 5/9] soc: mellanox: host: Add the common host side Rshim driver Liming Sun
2019-01-03 19:17   ` Liming Sun
2019-01-03 19:17 ` [PATCH v7 6/9] soc: mellanox: host: Add networking support over Rshim Liming Sun
2019-01-03 19:17   ` Liming Sun
2019-01-03 19:17 ` [PATCH v7 7/9] soc: mellanox: host: Add the Rshim USB backend driver Liming Sun
2019-01-03 19:17   ` Liming Sun
2019-01-03 19:17 ` [PATCH v7 8/9] soc: mellanox: host: Add the Rshim PCIe " Liming Sun
2019-01-03 19:17   ` Liming Sun
2019-01-03 19:17 ` [PATCH v7 9/9] soc: mellanox: host: Add the Rshim PCIe live-fish " Liming Sun
2019-01-03 19:17   ` Liming Sun
2019-01-21 19:17 ` [PATCH v7 0/9] Mellanox BlueField ARM SoC Rshim driver Liming Sun
2019-01-21 19:17   ` Liming Sun
2019-02-18 13:24   ` Arnd Bergmann
2019-02-18 13:24     ` Arnd Bergmann
2019-01-28 17:28 ` [PATCH v8 0/2] TmFifo platform driver for Mellanox BlueField SoC Liming Sun
2019-01-28 17:28 ` [PATCH v8 1/2] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
2019-01-29 22:06   ` Andy Shevchenko
2019-02-13 13:34     ` Liming Sun
2019-02-13 16:33     ` Liming Sun
2019-01-30  6:24   ` Vadim Pasternak
2019-01-30  6:24     ` Vadim Pasternak
2019-02-13 13:42     ` Liming Sun
2019-01-28 17:28 ` [PATCH v8 2/2] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
2019-02-13 13:27 ` [PATCH v9] platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
2019-02-13 18:11   ` Andy Shevchenko
2019-02-13 18:34     ` Liming Sun
2019-02-14 16:25     ` Liming Sun
2019-02-28 15:51     ` Liming Sun
2019-02-28 15:51 ` [PATCH v10] " Liming Sun
2019-03-05 15:34   ` Andy Shevchenko
2019-03-06 20:00     ` Liming Sun
2019-03-08 14:44       ` Liming Sun
2019-03-08 14:41 ` [PATCH v11] " Liming Sun
2019-03-26 21:13 ` Liming Sun
2019-03-28 19:56 ` [PATCH v12] " Liming Sun
2019-04-04 19:36 ` [PATCH v13] " Liming Sun
2019-04-05 15:44   ` Andy Shevchenko
2019-04-05 19:10     ` Liming Sun
2019-04-07  2:05       ` Liming Sun
2019-04-11 14:13         ` Andy Shevchenko
2019-04-12 16:15           ` Liming Sun
2019-04-07  2:03 ` [PATCH v14] " Liming Sun
2019-04-11 14:09   ` Andy Shevchenko
2019-04-12 14:23     ` Liming Sun
2019-04-12 17:30 ` [PATCH v15] " Liming Sun
2019-05-03 13:49 ` [PATCH v16] " Liming Sun
2019-05-06  9:13   ` Andy Shevchenko

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.