linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
@ 2018-05-25 16:06 Liming Sun
  2018-05-25 16:06 ` [PATCH v1 2/4] arm64: Add Mellanox BlueField SoC config option Liming Sun
                   ` (19 more replies)
  0 siblings, 20 replies; 70+ messages in thread
From: Liming Sun @ 2018-05-25 16:06 UTC (permalink / raw)
  To: linux-arm-kernel

This commit adds the TmFifo driver for Mellanox BlueField Soc.
TmFifo is a shared FIFO which enables external host machine to
exchange data with the SoC via USB or PCIe. The driver is based on
virtio framework and has console and network access enabled.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/Kconfig                |    1 +
 drivers/soc/Makefile               |    1 +
 drivers/soc/mellanox/Kconfig       |   18 +
 drivers/soc/mellanox/Makefile      |    5 +
 drivers/soc/mellanox/tmfifo.c      | 1265 ++++++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/tmfifo_regs.h |  112 ++++
 6 files changed, 1402 insertions(+)
 create mode 100644 drivers/soc/mellanox/Kconfig
 create mode 100644 drivers/soc/mellanox/Makefile
 create mode 100644 drivers/soc/mellanox/tmfifo.c
 create mode 100644 drivers/soc/mellanox/tmfifo_regs.h

diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index c07b4a8..fa87dc8 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -7,6 +7,7 @@ source "drivers/soc/bcm/Kconfig"
 source "drivers/soc/fsl/Kconfig"
 source "drivers/soc/imx/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
+source "drivers/soc/mellanox/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/renesas/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 4052357..868163f 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ARCH_GEMINI)	+= gemini/
 obj-$(CONFIG_ARCH_MXC)		+= imx/
 obj-$(CONFIG_SOC_XWAY)		+= lantiq/
 obj-y				+= mediatek/
+obj-$(CONFIG_SOC_MLNX)		+= mellanox/
 obj-$(CONFIG_ARCH_MESON)	+= amlogic/
 obj-$(CONFIG_ARCH_QCOM)		+= qcom/
 obj-y				+= renesas/
diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
new file mode 100644
index 0000000..d88efa1
--- /dev/null
+++ b/drivers/soc/mellanox/Kconfig
@@ -0,0 +1,18 @@
+menuconfig SOC_MLNX
+	bool "Mellanox SoC drivers"
+	default y if ARCH_MLNX_BLUEFIELD
+
+if ARCH_MLNX_BLUEFIELD || COMPILE_TEST
+
+config MLNX_BLUEFIELD_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo driver"
+	depends on ARM64
+	default m
+	select VIRTIO_CONSOLE
+	select VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides the
+	  virtio driver framework for the TMFIFO of Mellanox BlueField SoC and
+	  the implementation of a console and network driver.
+
+endif # ARCH_MLNX_BLUEFIELD
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
new file mode 100644
index 0000000..c44c0e2
--- /dev/null
+++ b/drivers/soc/mellanox/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Mellanox SoC drivers.
+#
+obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
new file mode 100644
index 0000000..a3303d1
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo.c
@@ -0,0 +1,1265 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/acpi.h>
+#include <linux/cache.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <asm/byteorder.h>
+
+#include "tmfifo_regs.h"
+
+/* Several utility macros to get/set the register fields. */
+#define TMFIFO_GET_FIELD(reg, field) \
+	(((reg) >> field##_SHIFT) & ((1UL << field##_WIDTH) - 1))
+
+#define TMFIFO_SET_FIELD(reg, field, value) ({ \
+	u64 _mask = ((1UL << field##_WIDTH) - 1) << field##_SHIFT; \
+	((reg) & ~_mask) | (((value) << field##_SHIFT) & _mask); \
+})
+
+#define TMFIFO_RX_GET_STS_CNT(sts) \
+	TMFIFO_GET_FIELD(sts, TMFIFO_RX_STS__COUNT)
+
+#define TMFIFO_TX_GET_STS_CNT(sts) \
+	TMFIFO_GET_FIELD(sts, TMFIFO_TX_STS__COUNT)
+
+/* Vring size. */
+#define TMFIFO_VRING_SIZE			1024
+
+/* Console Tx buffer size. */
+#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
+
+/* Use a timer for house-keeping. */
+static int tmfifo_timer_interval = HZ / 10;
+
+/* Global lock. */
+static struct mutex tmfifo_lock;
+
+/* Virtio ring size. */
+static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
+module_param(tmfifo_vring_size, int, 0444);
+MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring.");
+
+struct tmfifo;
+
+/* A flag to indicate TmFifo ready. */
+static bool tmfifo_ready;
+
+/* Virtual devices sharing the TM FIFO. */
+#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/* Spin lock. */
+static DEFINE_SPINLOCK(tmfifo_spin_lock);
+
+/* Structure to maintain the ring state. */
+struct tmfifo_vring {
+	void *va;			/* virtual address */
+	dma_addr_t dma;			/* dma address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	int vdev_id;			/* TMFIFO_VDEV_xxx */
+	u32 pkt_len;			/* packet total length */
+	__virtio16 next_avail;		/* next avail desc id */
+	struct tmfifo *fifo;		/* pointer back to the tmfifo */
+};
+
+/* Interrupt types. */
+enum {
+	TM_RX_LWM_IRQ,			/* Rx low water mark irq */
+	TM_RX_HWM_IRQ,			/* Rx high water mark irq */
+	TM_TX_LWM_IRQ,			/* Tx low water mark irq */
+	TM_TX_HWM_IRQ,			/* Tx high water mark irq */
+	TM_IRQ_CNT
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	TMFIFO_VRING_RX,		/* Rx ring */
+	TMFIFO_VRING_TX,		/* Tx ring */
+	TMFIFO_VRING_NUM
+};
+
+struct tmfifo_vdev {
+	struct virtio_device vdev;	/* virtual device */
+	u8 status;
+	u64 features;
+	union {				/* virtio config space */
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
+	u8 *tx_buf;			/* tx buffer */
+	u32 tx_head;			/* tx buffer head */
+	u32 tx_tail;			/* tx buffer tail */
+};
+
+#define TMFIFO_VDEV_TX_BUF_AVAIL(vdev) \
+	(((vdev)->tx_tail >= (vdev)->tx_head) ? \
+	(TMFIFO_CONS_TX_BUF_SIZE - 8 - ((vdev)->tx_tail - (vdev)->tx_head)) : \
+	((vdev)->tx_head - (vdev)->tx_tail - 8))
+
+#define TMFIFO_VDEV_TX_BUF_PUSH(vdev, len) do { \
+	(vdev)->tx_tail += (len); \
+	if ((vdev)->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE) \
+		(vdev)->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE; \
+} while (0)
+
+#define TMFIFO_VDEV_TX_BUF_POP(vdev, len) do { \
+	(vdev)->tx_head += (len); \
+	if ((vdev)->tx_head >= TMFIFO_CONS_TX_BUF_SIZE) \
+		(vdev)->tx_head -= TMFIFO_CONS_TX_BUF_SIZE; \
+} while (0)
+
+/* TMFIFO device structure */
+struct tmfifo {
+	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
+	struct platform_device *pdev;	/* platform device */
+	struct mutex lock;
+	void __iomem *rx_base;		/* mapped register base */
+	void __iomem *tx_base;		/* mapped register base */
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	int irq[TM_IRQ_CNT];		/* irq numbers */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
+};
+
+union tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* MTU setting of the virtio-net interface. */
+#define TMFIFO_NET_MTU		1500
+
+/* Supported virtio-net features. */
+#define TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
+				 (1UL << VIRTIO_NET_F_STATUS) | \
+				 (1UL << VIRTIO_NET_F_MAC))
+
+/* Forward declaration. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx);
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc);
+
+/* Allocate vrings for the fifo. */
+static int tmfifo_alloc_vrings(struct tmfifo *fifo,
+			       struct tmfifo_vdev *tm_vdev, int vdev_id)
+{
+	dma_addr_t dma;
+	void *va;
+	int i, size;
+	struct tmfifo_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->size = tmfifo_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+		vring->vdev_id = vdev_id;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
+					GFP_KERNEL);
+		if (!va) {
+			dev_err(tm_vdev->vdev.dev.parent,
+				"vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void tmfifo_free_vrings(struct tmfifo *fifo, int vdev_id)
+{
+	int i, size;
+	struct tmfifo_vring *vring;
+	struct tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Free interrupts of the fifo device. */
+static void tmfifo_free_irqs(struct tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		irq = fifo->irq[i];
+		if (irq) {
+			fifo->irq[i] = 0;
+			disable_irq(irq);
+			free_irq(irq, (u8 *)fifo + i);
+		}
+	}
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void tmfifo_work_handler(struct work_struct *work)
+{
+	int i;
+	struct tmfifo_vdev *tm_vdev;
+	struct tmfifo *fifo = container_of(work, struct tmfifo, work);
+
+	if (!tmfifo_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx. */
+	if (test_and_clear_bit(TM_TX_LWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_TX_LWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_TX].vq,
+					false);
+			}
+		}
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(TM_RX_HWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_RX_HWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_RX].vq,
+					true);
+			}
+		}
+	}
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* Interrupt handler. */
+static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
+{
+	int i = (uintptr_t)dev_id % sizeof(void *);
+	struct tmfifo *fifo = dev_id - i;
+
+	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
+		schedule_work(&fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Nothing to do for now. */
+static void tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+tmfifo_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+
+	if (!vr || vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vr->num);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+static inline void tmfifo_virtio_release_desc(struct virtio_device *vdev,
+					      struct vring *vr,
+					      struct vring_desc *desc, u32 len)
+{
+	unsigned int idx;
+
+	idx = vr->used->idx % vr->num;
+	vr->used->ring[idx].id = desc - vr->desc;
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/* Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc)
+{
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+	struct vring_desc *desc_head;
+	uint32_t pkt_len = 0;
+
+	if (!vr)
+		return;
+
+	if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
+		desc_head = vring->desc_head;
+		pkt_len = vring->pkt_len;
+	} else {
+		desc_head = tmfifo_virtio_get_next_desc(vring->vq);
+		if (desc_head != NULL) {
+			pkt_len = tmfifo_virtio_get_pkt_len(vdev,
+							desc_head, vr);
+		}
+	}
+
+	if (desc_head != NULL)
+		tmfifo_virtio_release_desc(vdev, vr, desc_head, pkt_len);
+
+	if (desc != NULL)
+		*desc = NULL;
+	vring->pkt_len = 0;
+}
+
+/* House-keeping timer. */
+static void tmfifo_timer(struct timer_list *arg)
+{
+	struct tmfifo *fifo = container_of(arg, struct tmfifo, timer);
+
+	/*
+	 * Wake up the work handler to poll the Rx FIFO in case interrupt
+	 * missing or any leftover bytes stuck in the FIFO.
+	 */
+	test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+}
+
+/* Buffer the console output. */
+static void tmfifo_console_output(struct tmfifo_vdev *cons,
+				  struct virtqueue *vq)
+{
+	u32 len, pkt_len, idx;
+	struct vring_desc *head_desc, *desc = NULL;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &cons->vdev;
+	void *addr;
+	union tmfifo_msg_hdr *hdr;
+
+	for (;;) {
+		head_desc = tmfifo_virtio_get_next_desc(vq);
+		if (head_desc == NULL)
+			break;
+
+		/* Release the packet if no more space. */
+		pkt_len = tmfifo_virtio_get_pkt_len(vdev, head_desc, vr);
+		if (pkt_len + sizeof(*hdr) > TMFIFO_VDEV_TX_BUF_AVAIL(cons)) {
+			tmfifo_virtio_release_desc(vdev, vr, head_desc,
+						   pkt_len);
+			break;
+		}
+
+		hdr = (union tmfifo_msg_hdr *)&cons->tx_buf[cons->tx_tail];
+		hdr->data = 0;
+		hdr->type = VIRTIO_ID_CONSOLE;
+		hdr->len = htons(pkt_len);
+
+		TMFIFO_VDEV_TX_BUF_PUSH(cons, sizeof(*hdr));
+		desc = head_desc;
+
+		while (desc != NULL) {
+			addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+			len = virtio32_to_cpu(vdev, desc->len);
+
+			if (len <= TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail) {
+				memcpy(cons->tx_buf + cons->tx_tail, addr, len);
+			} else {
+				u32 seg;
+
+				seg = TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail;
+				memcpy(cons->tx_buf + cons->tx_tail, addr, seg);
+				addr += seg;
+				memcpy(cons->tx_buf, addr, len - seg);
+			}
+			TMFIFO_VDEV_TX_BUF_PUSH(cons, len);
+
+			if (!(virtio16_to_cpu(vdev, desc->flags) &
+			    VRING_DESC_F_NEXT))
+				break;
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+		}
+
+		/* Make each packet 8-byte aligned. */
+		TMFIFO_VDEV_TX_BUF_PUSH(cons, ((pkt_len + 7) & -8) - pkt_len);
+
+		tmfifo_virtio_release_desc(vdev, vr, head_desc, pkt_len);
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct tmfifo_vring *vring;
+	struct tmfifo *fifo;
+	struct vring *vr;
+	struct virtio_device *vdev;
+	u64 sts, data;
+	int num_avail = 0, hdr_len, tx_reserve;
+	void *addr;
+	u32 len, idx;
+	struct vring_desc *desc;
+	unsigned long flags;
+	struct tmfifo_vdev *cons;
+
+	if (!vq)
+		return;
+
+	vring = (struct tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+	vr = (struct vring *)virtqueue_get_vring(vq);
+
+	if (!fifo->vdev[vring->vdev_id])
+		return;
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+
+	/* Don't continue if another vring is running. */
+	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
+		return;
+
+	/* tx_reserve is used to reserved some room in FIFO for console. */
+	if (vring->vdev_id == VIRTIO_ID_NET) {
+		hdr_len = sizeof(struct virtio_net_hdr);
+		tx_reserve = fifo->tx_fifo_size / 16;
+	} else {
+		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
+		hdr_len = 0;
+		tx_reserve = 1;
+	}
+
+	desc = vring->desc;
+
+again:
+	while (1) {
+		/* Get available FIFO space. */
+		if (num_avail == 0) {
+			if (is_rx) {
+				/* Get the number of available words in FIFO. */
+				sts = readq(fifo->rx_base + TMFIFO_RX_STS);
+				num_avail = TMFIFO_RX_GET_STS_CNT(sts);
+
+				/* Don't continue if nothing in FIFO. */
+				if (num_avail <= 0)
+					break;
+			} else {
+				/* Get available space in FIFO. */
+				sts = readq(fifo->tx_base + TMFIFO_TX_STS);
+				num_avail = fifo->tx_fifo_size - tx_reserve -
+					TMFIFO_TX_GET_STS_CNT(sts);
+
+				if (num_avail <= 0)
+					break;
+			}
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
+		    cons != NULL && cons->tx_buf != NULL) {
+			for (;;) {
+				spin_lock_irqsave(&tmfifo_spin_lock, flags);
+				if (cons->tx_head == cons->tx_tail) {
+					spin_unlock_irqrestore(
+						&tmfifo_spin_lock, flags);
+					return;
+				}
+				addr = cons->tx_buf + cons->tx_head;
+				writeq(cpu_to_le64(*(u64 *)addr),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+				TMFIFO_VDEV_TX_BUF_POP(cons, sizeof(u64));
+				spin_unlock_irqrestore(&tmfifo_spin_lock,
+						       flags);
+				if (--num_avail <= 0)
+					goto again;
+			}
+		}
+
+		/* Get the desc of next packet. */
+		if (!desc) {
+			/* Save the head desc of the chain. */
+			vring->desc_head = tmfifo_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				return;
+			}
+			desc = vring->desc_head;
+			vring->desc = desc;
+
+			if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+						vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			}
+		}
+
+		/* Beginning of each packet. */
+		if (vring->pkt_len == 0) {
+			int vdev_id, vring_change = 0;
+			union tmfifo_msg_hdr hdr;
+
+			num_avail--;
+
+			/* Read/Write packet length. */
+			if (is_rx) {
+				hdr.data = readq(fifo->rx_base +
+						 TMFIFO_RX_DATA);
+				hdr.data = le64_to_cpu(hdr.data);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (hdr.len == 0)
+					continue;
+
+				/* Check packet type. */
+				if (hdr.type == VIRTIO_ID_NET) {
+					vdev_id = VIRTIO_ID_NET;
+					hdr_len = sizeof(struct virtio_net_hdr);
+				} else if (hdr.type == VIRTIO_ID_CONSOLE) {
+					vdev_id = VIRTIO_ID_CONSOLE;
+					hdr_len = 0;
+				} else {
+					continue;
+				}
+
+				/*
+				 * Check whether the new packet still belongs
+				 * to this vring or not. If not, update the
+				 * pkt_len of the new vring and return.
+				 */
+				if (vdev_id != vring->vdev_id) {
+					struct tmfifo_vdev *dev2 =
+						fifo->vdev[vdev_id];
+
+					if (!dev2)
+						break;
+					vring->desc = desc;
+					vring = &dev2->vrings[TMFIFO_VRING_RX];
+					vring_change = 1;
+				}
+				vring->pkt_len = ntohs(hdr.len) + hdr_len;
+			} else {
+				vring->pkt_len = tmfifo_virtio_get_pkt_len(
+					vdev, desc, vr);
+
+				hdr.data = 0;
+				hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+					VIRTIO_ID_NET :
+					VIRTIO_ID_CONSOLE;
+				hdr.len = htons(vring->pkt_len - hdr_len);
+				writeq(cpu_to_le64(hdr.data),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+			}
+
+			vring->cur_len = hdr_len;
+			vring->rem_len = vring->pkt_len;
+			fifo->vring[is_rx] = vring;
+
+			if (vring_change)
+				return;
+			continue;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check if the current desc is already done. */
+		if (vring->cur_len == len)
+			goto check_done;
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		/* Read a word from FIFO for Rx. */
+		if (is_rx) {
+			data = readq(fifo->rx_base + TMFIFO_RX_DATA);
+			data = le64_to_cpu(data);
+		}
+
+		if (vring->cur_len + sizeof(u64) <= len) {
+			/* The whole word. */
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       sizeof(u64));
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       sizeof(u64));
+			}
+			vring->cur_len += sizeof(u64);
+		} else {
+			/* Leftover bytes. */
+			BUG_ON(vring->cur_len > len);
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       len - vring->cur_len);
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       len - vring->cur_len);
+			}
+			vring->cur_len = len;
+		}
+
+		/* Write the word into FIFO for Tx. */
+		if (!is_rx) {
+			writeq(cpu_to_le64(data),
+			       fifo->tx_base + TMFIFO_TX_DATA);
+		}
+
+		num_avail--;
+
+check_done:
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done and release the desc. */
+			tmfifo_release_pkt(vdev, vring, &desc);
+			fifo->vring[is_rx] = NULL;
+
+			/* Notify upper layer that packet is done. */
+			spin_lock_irqsave(&tmfifo_spin_lock, flags);
+			vring_interrupt(0, vq);
+			spin_unlock_irqrestore(&tmfifo_spin_lock, flags);
+			continue;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+	struct tmfifo *fifo = vring->fifo;
+	unsigned long flags;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX HWM bit to start Rx. */
+		if (!test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&tmfifo_spin_lock, flags);
+			tmfifo_console_output(fifo->vdev[VIRTIO_ID_CONSOLE],
+					      vq);
+			spin_unlock_irqrestore(&tmfifo_spin_lock, flags);
+			schedule_work(&fifo->work);
+		} else if (!test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	}
+
+	return true;
+}
+
+/* Get the array of feature bits for this device. */
+static u64 tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			tmfifo_release_pkt(&tm_vdev->vdev, vring, &vring->desc);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+				  unsigned int nvqs,
+				  struct virtqueue *vqs[],
+				  vq_callback_t *callbacks[],
+				  const char * const names[],
+				  const bool *ctx,
+				  struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void tmfifo_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void tmfifo_virtio_get(struct virtio_device *vdev,
+			      unsigned int offset,
+			      void *buf,
+			      unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void tmfifo_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops tmfifo_virtio_config_ops = {
+	.get_features = tmfifo_virtio_get_features,
+	.finalize_features = tmfifo_virtio_finalize_features,
+	.find_vqs = tmfifo_virtio_find_vqs,
+	.del_vqs = tmfifo_virtio_del_vqs,
+	.reset = tmfifo_virtio_reset,
+	.set_status = tmfifo_virtio_set_status,
+	.get_status = tmfifo_virtio_get_status,
+	.get = tmfifo_virtio_get,
+	.set = tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+int tmfifo_create_vdev(struct tmfifo *fifo, int vdev_id, u64 features,
+		       void *config, u32 size)
+{
+	struct tmfifo_vdev *tm_vdev;
+	int ret = 0;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev != NULL) {
+		pr_err("vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto already_exist;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto already_exist;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+	if (tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
+		pr_err("Unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto alloc_vring_fail;
+	}
+	if (vdev_id == VIRTIO_ID_CONSOLE) {
+		tm_vdev->tx_buf = kmalloc(TMFIFO_CONS_TX_BUF_SIZE,
+					  GFP_KERNEL);
+	}
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
+		goto register_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+register_fail:
+	tmfifo_free_vrings(fifo, vdev_id);
+	fifo->vdev[vdev_id] = NULL;
+alloc_vring_fail:
+	kfree(tm_vdev);
+already_exist:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+int tmfifo_delete_vdev(struct tmfifo *fifo, int vdev_id)
+{
+	struct tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		tmfifo_free_vrings(fifo, vdev_id);
+		kfree(tm_vdev->tx_buf);
+		kfree(tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Device remove function. */
+static int tmfifo_remove(struct platform_device *pdev)
+{
+	int i;
+	struct tmfifo *fifo = platform_get_drvdata(pdev);
+	struct resource *rx_res, *tx_res;
+
+	tmfifo_ready = false;
+
+	if (fifo) {
+		mutex_lock(&tmfifo_lock);
+
+		/* Stop the timer. */
+		del_timer_sync(&fifo->timer);
+
+		/* Release interrupts. */
+		tmfifo_free_irqs(fifo);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&fifo->work);
+
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++)
+			tmfifo_delete_vdev(fifo, i);
+
+		/* Release IO resources. */
+		if (fifo->rx_base)
+			iounmap(fifo->rx_base);
+		if (fifo->tx_base)
+			iounmap(fifo->tx_base);
+
+		platform_set_drvdata(pdev, NULL);
+		kfree(fifo);
+
+		mutex_unlock(&tmfifo_lock);
+	}
+
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (rx_res)
+		release_mem_region(rx_res->start, resource_size(rx_res));
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (tx_res)
+		release_mem_region(tx_res->start, resource_size(tx_res));
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void tmfifo_get_cfg_mac(u8 *mac)
+{
+	u8 buf[6];
+	efi_status_t status;
+	unsigned long size = sizeof(buf);
+	efi_char16_t name[] = { 'R', 's', 'h', 'i', 'm', 'M', 'a', 'c',
+				'A', 'd', 'd', 'r', 0 };
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+
+	status = efi.get_variable(name, &guid, NULL, &size, buf);
+	if (status == EFI_SUCCESS && size == sizeof(buf))
+		memcpy(mac, buf, sizeof(buf));
+}
+
+/* Probe the TMFIFO. */
+static int tmfifo_probe(struct platform_device *pdev)
+{
+	u64 ctl;
+	struct tmfifo *fifo;
+	struct resource *rx_res, *tx_res;
+	struct virtio_net_config net_config;
+	int i, ret;
+
+	/* Get the resource of the Rx & Tx FIFO. */
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!rx_res || !tx_res) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (request_mem_region(rx_res->start,
+			       resource_size(rx_res), "bf-tmfifo") == NULL) {
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	if (request_mem_region(tx_res->start,
+			       resource_size(tx_res), "bf-tmfifo") == NULL) {
+		release_mem_region(rx_res->start, resource_size(rx_res));
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	ret = -ENOMEM;
+	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
+	if (!fifo)
+		goto err;
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	INIT_WORK(&fifo->work, tmfifo_work_handler);
+
+	timer_setup(&fifo->timer, tmfifo_timer, 0);
+	fifo->timer.function = tmfifo_timer;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		fifo->irq[i] = platform_get_irq(pdev, i);
+		ret = request_irq(fifo->irq[i], tmfifo_irq_handler, 0,
+				  "tmfifo", (u8 *)fifo + i);
+		if (ret) {
+			pr_err("Unable to request irq\n");
+			fifo->irq[i] = 0;
+			goto err;
+		}
+	}
+
+	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
+	if (!fifo->rx_base)
+		goto err;
+
+	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
+	if (!fifo->tx_base)
+		goto err;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		TMFIFO_GET_FIELD(ctl, TMFIFO_TX_CTL__MAX_ENTRIES);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_TX_CTL__LWM, fifo->tx_fifo_size / 2);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_TX_CTL__HWM, fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		TMFIFO_GET_FIELD(ctl, TMFIFO_RX_CTL__MAX_ENTRIES);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_RX_CTL__LWM, 0);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_RX_CTL__HWM, 1);
+	writeq(ctl, fifo->rx_base + TMFIFO_RX_CTL);
+
+	mutex_init(&fifo->lock);
+
+	/* Create the console vdev. */
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (ret)
+		goto err;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = TMFIFO_NET_MTU;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	memcpy(net_config.mac, tmfifo_net_default_mac, 6);
+	tmfifo_get_cfg_mac(net_config.mac);
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_NET, TMFIFO_NET_FEATURES,
+				 &net_config, sizeof(net_config));
+	if (ret)
+		goto err;
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+
+	tmfifo_ready = true;
+
+	return 0;
+
+err:
+	tmfifo_remove(pdev);
+early_err:
+	dev_err(&pdev->dev, "Probe Failed\n");
+	return ret;
+}
+
+static const struct of_device_id tmfifo_match[] = {
+	{ .compatible = "mellanox,bf-tmfifo" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, tmfifo_match);
+
+static const struct acpi_device_id bf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, bf_tmfifo_acpi_match);
+
+static struct platform_driver tmfifo_driver = {
+	.probe = tmfifo_probe,
+	.remove = tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.of_match_table = tmfifo_match,
+		.acpi_match_table = ACPI_PTR(bf_tmfifo_acpi_match),
+	},
+};
+
+static int __init tmfifo_init(void)
+{
+	int ret;
+
+	mutex_init(&tmfifo_lock);
+
+	ret = platform_driver_register(&tmfifo_driver);
+	if (ret)
+		pr_err("Failed to register tmfifo driver.\n");
+
+	return ret;
+}
+
+static void __exit tmfifo_exit(void)
+{
+	platform_driver_unregister(&tmfifo_driver);
+}
+
+module_init(tmfifo_init);
+module_exit(tmfifo_exit);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
+MODULE_AUTHOR("Mellanox Technologies, Ltd");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("0.7");
diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
new file mode 100644
index 0000000..b07f353
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo_regs.h
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __TMFIFO_REGS_H__
+#define __TMFIFO_REGS_H__
+
+#ifdef __ASSEMBLER__
+#define _64bit(x) x
+#else /* __ASSEMBLER__ */
+#ifdef __tile__
+#define _64bit(x) x ## UL
+#else /* __tile__ */
+#define _64bit(x) x ## ULL
+#endif /* __tile__ */
+#endif /* __ASSEMBLER */
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+#ifndef __DOXYGEN__
+
+#define TMFIFO_TX_DATA 0x0
+
+#define TMFIFO_TX_STS 0x8
+#define TMFIFO_TX_STS__LENGTH 0x0001
+#define TMFIFO_TX_STS__COUNT_SHIFT 0
+#define TMFIFO_TX_STS__COUNT_WIDTH 9
+#define TMFIFO_TX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_TX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_TX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_TX_CTL 0x10
+#define TMFIFO_TX_CTL__LENGTH 0x0001
+#define TMFIFO_TX_CTL__LWM_SHIFT 0
+#define TMFIFO_TX_CTL__LWM_WIDTH 8
+#define TMFIFO_TX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__LWM_RMASK 0xff
+#define TMFIFO_TX_CTL__LWM_MASK  0xff
+#define TMFIFO_TX_CTL__HWM_SHIFT 8
+#define TMFIFO_TX_CTL__HWM_WIDTH 8
+#define TMFIFO_TX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__HWM_RMASK 0xff
+#define TMFIFO_TX_CTL__HWM_MASK  0xff00
+#define TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_TX_CTL__MAX_ENTRIES_MASK  _64bit(0x1ff00000000)
+
+#define TMFIFO_RX_DATA 0x0
+
+#define TMFIFO_RX_STS 0x8
+#define TMFIFO_RX_STS__LENGTH 0x0001
+#define TMFIFO_RX_STS__COUNT_SHIFT 0
+#define TMFIFO_RX_STS__COUNT_WIDTH 9
+#define TMFIFO_RX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_RX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_RX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_RX_CTL 0x10
+#define TMFIFO_RX_CTL__LENGTH 0x0001
+#define TMFIFO_RX_CTL__LWM_SHIFT 0
+#define TMFIFO_RX_CTL__LWM_WIDTH 8
+#define TMFIFO_RX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__LWM_RMASK 0xff
+#define TMFIFO_RX_CTL__LWM_MASK  0xff
+#define TMFIFO_RX_CTL__HWM_SHIFT 8
+#define TMFIFO_RX_CTL__HWM_WIDTH 8
+#define TMFIFO_RX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__HWM_RMASK 0xff
+#define TMFIFO_RX_CTL__HWM_MASK  0xff00
+#define TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_RX_CTL__MAX_ENTRIES_MASK  _64bit(0x1ff00000000)
+
+#endif /* !defined(__DOXYGEN__) */
+#endif /* !defined(__TMFIFO_REGS_H__) */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v1 2/4] arm64: Add Mellanox BlueField SoC config option
  2018-05-25 16:06 [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
@ 2018-05-25 16:06 ` Liming Sun
  2018-05-25 16:06 ` [PATCH v1 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
                   ` (18 subsequent siblings)
  19 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2018-05-25 16:06 UTC (permalink / raw)
  To: linux-arm-kernel

This commit introduces config option for Mellanox BlueField SoC,
which can be used to build the SoC specific drivers, and enables
it by default in configs/defconfig.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 arch/arm64/Kconfig.platforms | 6 ++++++
 arch/arm64/configs/defconfig | 1 +
 2 files changed, 7 insertions(+)

diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 2b1535c..74ad03f 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -110,6 +110,12 @@ config ARCH_MESON
 	help
 	  This enables support for the Amlogic S905 SoCs.
 
+config ARCH_MLNX_BLUEFIELD
+	bool "Mellanox BlueField SoC Family"
+	select SOC_MLNX
+	help
+	  This enables support for the Mellanox BlueField SoC.
+
 config ARCH_MVEBU
 	bool "Marvell EBU SoC Family"
 	select ARMADA_AP806_SYSCON
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 1c98939..842f607 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -43,6 +43,7 @@ CONFIG_ARCH_LG1K=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
 CONFIG_ARCH_MESON=y
+CONFIG_ARCH_MLNX_BLUEFIELD=y
 CONFIG_ARCH_MVEBU=y
 CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v1 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-05-25 16:06 [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
  2018-05-25 16:06 ` [PATCH v1 2/4] arm64: Add Mellanox BlueField SoC config option Liming Sun
@ 2018-05-25 16:06 ` Liming Sun
  2018-05-25 16:06 ` [PATCH v1 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
                   ` (17 subsequent siblings)
  19 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2018-05-25 16:06 UTC (permalink / raw)
  To: linux-arm-kernel

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 .../devicetree/bindings/soc/mellanox/tmfifo.txt      | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt

diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
new file mode 100644
index 0000000..0a362f5
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
@@ -0,0 +1,20 @@
+* Mellanox BlueField SoC TmFifo
+
+BlueField TmFifo provides a shared FIFO between the target and the
+external host machine, which can be accessed via USB or PCIe.
+
+Required properties:
+
+- compatible:	Should be "mellanox,bf-tmfifo"
+- reg:		Physical base address and length of Rx/Tx block
+- interrupts:	The interrupt number of Rx low water mark, Rx high water mark
+		Tx low water mark, Tx high water mark respectively.
+
+Example:
+
+tmfifo at 800a20 {
+	compatible = "mellanox,bf-tmfifo";
+	reg = <0x00800a20 0x00000018
+	       0x00800a40 0x00000018>;
+	interrupts = <41, 42, 43, 44>;
+};
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v1 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc
  2018-05-25 16:06 [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
  2018-05-25 16:06 ` [PATCH v1 2/4] arm64: Add Mellanox BlueField SoC config option Liming Sun
  2018-05-25 16:06 ` [PATCH v1 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
@ 2018-05-25 16:06 ` Liming Sun
  2018-05-25 17:14 ` [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Robin Murphy
                   ` (16 subsequent siblings)
  19 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2018-05-25 16:06 UTC (permalink / raw)
  To: linux-arm-kernel

Add maintainer information for Mellanox BlueField SoC.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 58b9861..85d5639 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1636,6 +1636,14 @@ L:	linux-mediatek at lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	drivers/phy/mediatek/phy-mtk-tphy.c
 
+ARM/Mellanox BlueField SoC support
+M:	David Woods <dwoods@mellanox.com>
+M:	Liming Sun <lsun@mellanox.com>
+L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	drivers/soc/mellanox/*
+F:	Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
+
 ARM/MICREL KS8695 ARCHITECTURE
 M:	Greg Ungerer <gerg@uclinux.org>
 L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 16:06 [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
                   ` (2 preceding siblings ...)
  2018-05-25 16:06 ` [PATCH v1 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
@ 2018-05-25 17:14 ` Robin Murphy
  2018-05-25 20:18   ` Liming Sun
  2018-05-25 20:17 ` [PATCH v2 " Liming Sun
                   ` (15 subsequent siblings)
  19 siblings, 1 reply; 70+ messages in thread
From: Robin Murphy @ 2018-05-25 17:14 UTC (permalink / raw)
  To: linux-arm-kernel

On 25/05/18 17:06, Liming Sun wrote:
[...]
> diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
> new file mode 100644
> index 0000000..a3303d1
> --- /dev/null
> +++ b/drivers/soc/mellanox/tmfifo.c
> @@ -0,0 +1,1265 @@
> +// SPDX-License-Identifier: GPL-2.0

This tag doesn't match the included license text...

> +/*
> + * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
> + *
> + * This software is available to you under a choice of one of two
> + * licenses.  You may choose to be licensed under the terms of the GNU
> + * General Public License (GPL) Version 2, available from the file
> + * COPYING in the main directory of this source tree, or the
> + * OpenIB.org BSD license below:
> + *
> + *     Redistribution and use in source and binary forms, with or
> + *     without modification, are permitted provided that the following
> + *     conditions are met:
> + *
> + *      - Redistributions of source code must retain the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer.
> + *
> + *      - Redistributions in binary form must reproduce the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer in the documentation and/or other materials
> + *        provided with the distribution.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + */

[...]
> +/* Several utility macros to get/set the register fields. */
> +#define TMFIFO_GET_FIELD(reg, field) \
> +	(((reg) >> field##_SHIFT) & ((1UL << field##_WIDTH) - 1))
> +
> +#define TMFIFO_SET_FIELD(reg, field, value) ({ \
> +	u64 _mask = ((1UL << field##_WIDTH) - 1) << field##_SHIFT; \
> +	((reg) & ~_mask) | (((value) << field##_SHIFT) & _mask); \
> +})

There's no need to reinvent <linux/bitfield.h>

[...]
> +MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
> +MODULE_AUTHOR("Mellanox Technologies, Ltd");
> +MODULE_LICENSE("GPL");

...and this implies yet another different license (since it indicates 
"GPLv2 or later")

> +MODULE_VERSION("0.7");
> diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
> new file mode 100644
> index 0000000..b07f353
> --- /dev/null
> +++ b/drivers/soc/mellanox/tmfifo_regs.h
> @@ -0,0 +1,112 @@
> +// SPDX-License-Identifier: GPL-2.0

Again, this doesn't match the included text. Also, the SPDX comment 
style is /* */ for headers.

> +/*
> + * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
> + *
> + * This software is available to you under a choice of one of two
> + * licenses.  You may choose to be licensed under the terms of the GNU
> + * General Public License (GPL) Version 2, available from the file
> + * COPYING in the main directory of this source tree, or the
> + * OpenIB.org BSD license below:
> + *
> + *     Redistribution and use in source and binary forms, with or
> + *     without modification, are permitted provided that the following
> + *     conditions are met:
> + *
> + *      - Redistributions of source code must retain the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer.
> + *
> + *      - Redistributions in binary form must reproduce the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer in the documentation and/or other materials
> + *        provided with the distribution.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + */

[...]
> +#ifdef __ASSEMBLER__
> +#define _64bit(x) x
> +#else /* __ASSEMBLER__ */
> +#ifdef __tile__
> +#define _64bit(x) x ## UL
> +#else /* __tile__ */
> +#define _64bit(x) x ## ULL
> +#endif /* __tile__ */
> +#endif /* __ASSEMBLER */
> +
> +#ifdef __KERNEL__
> +#include <linux/types.h>
> +#else
> +#include <stdint.h>
> +#endif
> +
> +#ifndef __DOXYGEN__

Given that this is a private header under drivers/, is any of that lot 
necessary?

Robin.

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v2 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 16:06 [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
                   ` (3 preceding siblings ...)
  2018-05-25 17:14 ` [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Robin Murphy
@ 2018-05-25 20:17 ` Liming Sun
  2018-05-25 20:17   ` [PATCH v2 2/4] arm64: Add Mellanox BlueField SoC config option Liming Sun
                     ` (2 more replies)
  2018-06-01 14:31 ` [PATCH v3 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
                   ` (14 subsequent siblings)
  19 siblings, 3 replies; 70+ messages in thread
From: Liming Sun @ 2018-05-25 20:17 UTC (permalink / raw)
  To: linux-arm-kernel

This commit adds the TmFifo driver for Mellanox BlueField Soc.
TmFifo is a shared FIFO which enables external host machine to
exchange data with the SoC via USB or PCIe. The driver is based on
virtio framework and has console and network access enabled.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/Kconfig                |    1 +
 drivers/soc/Makefile               |    1 +
 drivers/soc/mellanox/Kconfig       |   18 +
 drivers/soc/mellanox/Makefile      |    5 +
 drivers/soc/mellanox/tmfifo.c      | 1239 ++++++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/tmfifo_regs.h |   75 +++
 6 files changed, 1339 insertions(+)
 create mode 100644 drivers/soc/mellanox/Kconfig
 create mode 100644 drivers/soc/mellanox/Makefile
 create mode 100644 drivers/soc/mellanox/tmfifo.c
 create mode 100644 drivers/soc/mellanox/tmfifo_regs.h

diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index c07b4a8..fa87dc8 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -7,6 +7,7 @@ source "drivers/soc/bcm/Kconfig"
 source "drivers/soc/fsl/Kconfig"
 source "drivers/soc/imx/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
+source "drivers/soc/mellanox/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/renesas/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 4052357..868163f 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ARCH_GEMINI)	+= gemini/
 obj-$(CONFIG_ARCH_MXC)		+= imx/
 obj-$(CONFIG_SOC_XWAY)		+= lantiq/
 obj-y				+= mediatek/
+obj-$(CONFIG_SOC_MLNX)		+= mellanox/
 obj-$(CONFIG_ARCH_MESON)	+= amlogic/
 obj-$(CONFIG_ARCH_QCOM)		+= qcom/
 obj-y				+= renesas/
diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
new file mode 100644
index 0000000..d88efa1
--- /dev/null
+++ b/drivers/soc/mellanox/Kconfig
@@ -0,0 +1,18 @@
+menuconfig SOC_MLNX
+	bool "Mellanox SoC drivers"
+	default y if ARCH_MLNX_BLUEFIELD
+
+if ARCH_MLNX_BLUEFIELD || COMPILE_TEST
+
+config MLNX_BLUEFIELD_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo driver"
+	depends on ARM64
+	default m
+	select VIRTIO_CONSOLE
+	select VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides the
+	  virtio driver framework for the TMFIFO of Mellanox BlueField SoC and
+	  the implementation of a console and network driver.
+
+endif # ARCH_MLNX_BLUEFIELD
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
new file mode 100644
index 0000000..c44c0e2
--- /dev/null
+++ b/drivers/soc/mellanox/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Mellanox SoC drivers.
+#
+obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
new file mode 100644
index 0000000..5647cb6
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo.c
@@ -0,0 +1,1239 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/cache.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <asm/byteorder.h>
+
+#include "tmfifo_regs.h"
+
+#define TMFIFO_GET_FIELD(reg, mask)	FIELD_GET(mask, reg)
+
+#define TMFIFO_SET_FIELD(reg, mask, value) \
+	((reg & ~mask) | FIELD_PREP(mask, value))
+
+/* Vring size. */
+#define TMFIFO_VRING_SIZE			1024
+
+/* Console Tx buffer size. */
+#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
+
+/* Use a timer for house-keeping. */
+static int tmfifo_timer_interval = HZ / 10;
+
+/* Global lock. */
+static struct mutex tmfifo_lock;
+
+/* Virtio ring size. */
+static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
+module_param(tmfifo_vring_size, int, 0444);
+MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring.");
+
+struct tmfifo;
+
+/* A flag to indicate TmFifo ready. */
+static bool tmfifo_ready;
+
+/* Virtual devices sharing the TM FIFO. */
+#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/* Spin lock. */
+static DEFINE_SPINLOCK(tmfifo_spin_lock);
+
+/* Structure to maintain the ring state. */
+struct tmfifo_vring {
+	void *va;			/* virtual address */
+	dma_addr_t dma;			/* dma address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	int vdev_id;			/* TMFIFO_VDEV_xxx */
+	u32 pkt_len;			/* packet total length */
+	__virtio16 next_avail;		/* next avail desc id */
+	struct tmfifo *fifo;		/* pointer back to the tmfifo */
+};
+
+/* Interrupt types. */
+enum {
+	TM_RX_LWM_IRQ,			/* Rx low water mark irq */
+	TM_RX_HWM_IRQ,			/* Rx high water mark irq */
+	TM_TX_LWM_IRQ,			/* Tx low water mark irq */
+	TM_TX_HWM_IRQ,			/* Tx high water mark irq */
+	TM_IRQ_CNT
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	TMFIFO_VRING_RX,		/* Rx ring */
+	TMFIFO_VRING_TX,		/* Tx ring */
+	TMFIFO_VRING_NUM
+};
+
+struct tmfifo_vdev {
+	struct virtio_device vdev;	/* virtual device */
+	u8 status;
+	u64 features;
+	union {				/* virtio config space */
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
+	u8 *tx_buf;			/* tx buffer */
+	u32 tx_head;			/* tx buffer head */
+	u32 tx_tail;			/* tx buffer tail */
+};
+
+#define TMFIFO_VDEV_TX_BUF_AVAIL(vdev) \
+	(((vdev)->tx_tail >= (vdev)->tx_head) ? \
+	(TMFIFO_CONS_TX_BUF_SIZE - 8 - ((vdev)->tx_tail - (vdev)->tx_head)) : \
+	((vdev)->tx_head - (vdev)->tx_tail - 8))
+
+#define TMFIFO_VDEV_TX_BUF_PUSH(vdev, len) do { \
+	(vdev)->tx_tail += (len); \
+	if ((vdev)->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE) \
+		(vdev)->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE; \
+} while (0)
+
+#define TMFIFO_VDEV_TX_BUF_POP(vdev, len) do { \
+	(vdev)->tx_head += (len); \
+	if ((vdev)->tx_head >= TMFIFO_CONS_TX_BUF_SIZE) \
+		(vdev)->tx_head -= TMFIFO_CONS_TX_BUF_SIZE; \
+} while (0)
+
+/* TMFIFO device structure */
+struct tmfifo {
+	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
+	struct platform_device *pdev;	/* platform device */
+	struct mutex lock;
+	void __iomem *rx_base;		/* mapped register base */
+	void __iomem *tx_base;		/* mapped register base */
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	int irq[TM_IRQ_CNT];		/* irq numbers */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
+};
+
+union tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* MTU setting of the virtio-net interface. */
+#define TMFIFO_NET_MTU		1500
+
+/* Supported virtio-net features. */
+#define TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
+				 (1UL << VIRTIO_NET_F_STATUS) | \
+				 (1UL << VIRTIO_NET_F_MAC))
+
+/* Forward declaration. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx);
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc);
+
+/* Allocate vrings for the fifo. */
+static int tmfifo_alloc_vrings(struct tmfifo *fifo,
+			       struct tmfifo_vdev *tm_vdev, int vdev_id)
+{
+	dma_addr_t dma;
+	void *va;
+	int i, size;
+	struct tmfifo_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->size = tmfifo_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+		vring->vdev_id = vdev_id;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
+					GFP_KERNEL);
+		if (!va) {
+			dev_err(tm_vdev->vdev.dev.parent,
+				"vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void tmfifo_free_vrings(struct tmfifo *fifo, int vdev_id)
+{
+	int i, size;
+	struct tmfifo_vring *vring;
+	struct tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Free interrupts of the fifo device. */
+static void tmfifo_free_irqs(struct tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		irq = fifo->irq[i];
+		if (irq) {
+			fifo->irq[i] = 0;
+			disable_irq(irq);
+			free_irq(irq, (u8 *)fifo + i);
+		}
+	}
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void tmfifo_work_handler(struct work_struct *work)
+{
+	int i;
+	struct tmfifo_vdev *tm_vdev;
+	struct tmfifo *fifo = container_of(work, struct tmfifo, work);
+
+	if (!tmfifo_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx. */
+	if (test_and_clear_bit(TM_TX_LWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_TX_LWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_TX].vq,
+					false);
+			}
+		}
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(TM_RX_HWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_RX_HWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_RX].vq,
+					true);
+			}
+		}
+	}
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* Interrupt handler. */
+static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
+{
+	int i = (uintptr_t)dev_id % sizeof(void *);
+	struct tmfifo *fifo = dev_id - i;
+
+	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
+		schedule_work(&fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Nothing to do for now. */
+static void tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+tmfifo_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+
+	if (!vr || vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vr->num);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+static inline void tmfifo_virtio_release_desc(struct virtio_device *vdev,
+					      struct vring *vr,
+					      struct vring_desc *desc, u32 len)
+{
+	unsigned int idx;
+
+	idx = vr->used->idx % vr->num;
+	vr->used->ring[idx].id = desc - vr->desc;
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/* Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc)
+{
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+	struct vring_desc *desc_head;
+	uint32_t pkt_len = 0;
+
+	if (!vr)
+		return;
+
+	if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
+		desc_head = vring->desc_head;
+		pkt_len = vring->pkt_len;
+	} else {
+		desc_head = tmfifo_virtio_get_next_desc(vring->vq);
+		if (desc_head != NULL) {
+			pkt_len = tmfifo_virtio_get_pkt_len(vdev,
+							desc_head, vr);
+		}
+	}
+
+	if (desc_head != NULL)
+		tmfifo_virtio_release_desc(vdev, vr, desc_head, pkt_len);
+
+	if (desc != NULL)
+		*desc = NULL;
+	vring->pkt_len = 0;
+}
+
+/* House-keeping timer. */
+static void tmfifo_timer(struct timer_list *arg)
+{
+	struct tmfifo *fifo = container_of(arg, struct tmfifo, timer);
+
+	/*
+	 * Wake up the work handler to poll the Rx FIFO in case interrupt
+	 * missing or any leftover bytes stuck in the FIFO.
+	 */
+	test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+}
+
+/* Buffer the console output. */
+static void tmfifo_console_output(struct tmfifo_vdev *cons,
+				  struct virtqueue *vq)
+{
+	u32 len, pkt_len, idx;
+	struct vring_desc *head_desc, *desc = NULL;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &cons->vdev;
+	void *addr;
+	union tmfifo_msg_hdr *hdr;
+
+	for (;;) {
+		head_desc = tmfifo_virtio_get_next_desc(vq);
+		if (head_desc == NULL)
+			break;
+
+		/* Release the packet if no more space. */
+		pkt_len = tmfifo_virtio_get_pkt_len(vdev, head_desc, vr);
+		if (pkt_len + sizeof(*hdr) > TMFIFO_VDEV_TX_BUF_AVAIL(cons)) {
+			tmfifo_virtio_release_desc(vdev, vr, head_desc,
+						   pkt_len);
+			break;
+		}
+
+		hdr = (union tmfifo_msg_hdr *)&cons->tx_buf[cons->tx_tail];
+		hdr->data = 0;
+		hdr->type = VIRTIO_ID_CONSOLE;
+		hdr->len = htons(pkt_len);
+
+		TMFIFO_VDEV_TX_BUF_PUSH(cons, sizeof(*hdr));
+		desc = head_desc;
+
+		while (desc != NULL) {
+			addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+			len = virtio32_to_cpu(vdev, desc->len);
+
+			if (len <= TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail) {
+				memcpy(cons->tx_buf + cons->tx_tail, addr, len);
+			} else {
+				u32 seg;
+
+				seg = TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail;
+				memcpy(cons->tx_buf + cons->tx_tail, addr, seg);
+				addr += seg;
+				memcpy(cons->tx_buf, addr, len - seg);
+			}
+			TMFIFO_VDEV_TX_BUF_PUSH(cons, len);
+
+			if (!(virtio16_to_cpu(vdev, desc->flags) &
+			    VRING_DESC_F_NEXT))
+				break;
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+		}
+
+		/* Make each packet 8-byte aligned. */
+		TMFIFO_VDEV_TX_BUF_PUSH(cons, ((pkt_len + 7) & -8) - pkt_len);
+
+		tmfifo_virtio_release_desc(vdev, vr, head_desc, pkt_len);
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct tmfifo_vring *vring;
+	struct tmfifo *fifo;
+	struct vring *vr;
+	struct virtio_device *vdev;
+	u64 sts, data;
+	int num_avail = 0, hdr_len, tx_reserve;
+	void *addr;
+	u32 len, idx;
+	struct vring_desc *desc;
+	unsigned long flags;
+	struct tmfifo_vdev *cons;
+
+	if (!vq)
+		return;
+
+	vring = (struct tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+	vr = (struct vring *)virtqueue_get_vring(vq);
+
+	if (!fifo->vdev[vring->vdev_id])
+		return;
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+
+	/* Don't continue if another vring is running. */
+	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
+		return;
+
+	/* tx_reserve is used to reserved some room in FIFO for console. */
+	if (vring->vdev_id == VIRTIO_ID_NET) {
+		hdr_len = sizeof(struct virtio_net_hdr);
+		tx_reserve = fifo->tx_fifo_size / 16;
+	} else {
+		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
+		hdr_len = 0;
+		tx_reserve = 1;
+	}
+
+	desc = vring->desc;
+
+again:
+	while (1) {
+		/* Get available FIFO space. */
+		if (num_avail == 0) {
+			if (is_rx) {
+				/* Get the number of available words in FIFO. */
+				sts = readq(fifo->rx_base + TMFIFO_RX_STS);
+				num_avail = TMFIFO_GET_FIELD(sts,
+						TMFIFO_RX_STS__COUNT_MASK);
+
+				/* Don't continue if nothing in FIFO. */
+				if (num_avail <= 0)
+					break;
+			} else {
+				/* Get available space in FIFO. */
+				sts = readq(fifo->tx_base + TMFIFO_TX_STS);
+				num_avail = fifo->tx_fifo_size - tx_reserve -
+					TMFIFO_GET_FIELD(sts,
+						TMFIFO_TX_STS__COUNT_MASK);
+
+				if (num_avail <= 0)
+					break;
+			}
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
+		    cons != NULL && cons->tx_buf != NULL) {
+			for (;;) {
+				spin_lock_irqsave(&tmfifo_spin_lock, flags);
+				if (cons->tx_head == cons->tx_tail) {
+					spin_unlock_irqrestore(
+						&tmfifo_spin_lock, flags);
+					return;
+				}
+				addr = cons->tx_buf + cons->tx_head;
+				writeq(cpu_to_le64(*(u64 *)addr),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+				TMFIFO_VDEV_TX_BUF_POP(cons, sizeof(u64));
+				spin_unlock_irqrestore(&tmfifo_spin_lock,
+						       flags);
+				if (--num_avail <= 0)
+					goto again;
+			}
+		}
+
+		/* Get the desc of next packet. */
+		if (!desc) {
+			/* Save the head desc of the chain. */
+			vring->desc_head = tmfifo_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				return;
+			}
+			desc = vring->desc_head;
+			vring->desc = desc;
+
+			if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+						vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			}
+		}
+
+		/* Beginning of each packet. */
+		if (vring->pkt_len == 0) {
+			int vdev_id, vring_change = 0;
+			union tmfifo_msg_hdr hdr;
+
+			num_avail--;
+
+			/* Read/Write packet length. */
+			if (is_rx) {
+				hdr.data = readq(fifo->rx_base +
+						 TMFIFO_RX_DATA);
+				hdr.data = le64_to_cpu(hdr.data);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (hdr.len == 0)
+					continue;
+
+				/* Check packet type. */
+				if (hdr.type == VIRTIO_ID_NET) {
+					vdev_id = VIRTIO_ID_NET;
+					hdr_len = sizeof(struct virtio_net_hdr);
+				} else if (hdr.type == VIRTIO_ID_CONSOLE) {
+					vdev_id = VIRTIO_ID_CONSOLE;
+					hdr_len = 0;
+				} else {
+					continue;
+				}
+
+				/*
+				 * Check whether the new packet still belongs
+				 * to this vring or not. If not, update the
+				 * pkt_len of the new vring and return.
+				 */
+				if (vdev_id != vring->vdev_id) {
+					struct tmfifo_vdev *dev2 =
+						fifo->vdev[vdev_id];
+
+					if (!dev2)
+						break;
+					vring->desc = desc;
+					vring = &dev2->vrings[TMFIFO_VRING_RX];
+					vring_change = 1;
+				}
+				vring->pkt_len = ntohs(hdr.len) + hdr_len;
+			} else {
+				vring->pkt_len = tmfifo_virtio_get_pkt_len(
+					vdev, desc, vr);
+
+				hdr.data = 0;
+				hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+					VIRTIO_ID_NET :
+					VIRTIO_ID_CONSOLE;
+				hdr.len = htons(vring->pkt_len - hdr_len);
+				writeq(cpu_to_le64(hdr.data),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+			}
+
+			vring->cur_len = hdr_len;
+			vring->rem_len = vring->pkt_len;
+			fifo->vring[is_rx] = vring;
+
+			if (vring_change)
+				return;
+			continue;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check if the current desc is already done. */
+		if (vring->cur_len == len)
+			goto check_done;
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		/* Read a word from FIFO for Rx. */
+		if (is_rx) {
+			data = readq(fifo->rx_base + TMFIFO_RX_DATA);
+			data = le64_to_cpu(data);
+		}
+
+		if (vring->cur_len + sizeof(u64) <= len) {
+			/* The whole word. */
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       sizeof(u64));
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       sizeof(u64));
+			}
+			vring->cur_len += sizeof(u64);
+		} else {
+			/* Leftover bytes. */
+			BUG_ON(vring->cur_len > len);
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       len - vring->cur_len);
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       len - vring->cur_len);
+			}
+			vring->cur_len = len;
+		}
+
+		/* Write the word into FIFO for Tx. */
+		if (!is_rx) {
+			writeq(cpu_to_le64(data),
+			       fifo->tx_base + TMFIFO_TX_DATA);
+		}
+
+		num_avail--;
+
+check_done:
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done and release the desc. */
+			tmfifo_release_pkt(vdev, vring, &desc);
+			fifo->vring[is_rx] = NULL;
+
+			/* Notify upper layer that packet is done. */
+			spin_lock_irqsave(&tmfifo_spin_lock, flags);
+			vring_interrupt(0, vq);
+			spin_unlock_irqrestore(&tmfifo_spin_lock, flags);
+			continue;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+	struct tmfifo *fifo = vring->fifo;
+	unsigned long flags;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX HWM bit to start Rx. */
+		if (!test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&tmfifo_spin_lock, flags);
+			tmfifo_console_output(fifo->vdev[VIRTIO_ID_CONSOLE],
+					      vq);
+			spin_unlock_irqrestore(&tmfifo_spin_lock, flags);
+			schedule_work(&fifo->work);
+		} else if (!test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	}
+
+	return true;
+}
+
+/* Get the array of feature bits for this device. */
+static u64 tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			tmfifo_release_pkt(&tm_vdev->vdev, vring, &vring->desc);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+				  unsigned int nvqs,
+				  struct virtqueue *vqs[],
+				  vq_callback_t *callbacks[],
+				  const char * const names[],
+				  const bool *ctx,
+				  struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void tmfifo_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void tmfifo_virtio_get(struct virtio_device *vdev,
+			      unsigned int offset,
+			      void *buf,
+			      unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void tmfifo_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops tmfifo_virtio_config_ops = {
+	.get_features = tmfifo_virtio_get_features,
+	.finalize_features = tmfifo_virtio_finalize_features,
+	.find_vqs = tmfifo_virtio_find_vqs,
+	.del_vqs = tmfifo_virtio_del_vqs,
+	.reset = tmfifo_virtio_reset,
+	.set_status = tmfifo_virtio_set_status,
+	.get_status = tmfifo_virtio_get_status,
+	.get = tmfifo_virtio_get,
+	.set = tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+int tmfifo_create_vdev(struct tmfifo *fifo, int vdev_id, u64 features,
+		       void *config, u32 size)
+{
+	struct tmfifo_vdev *tm_vdev;
+	int ret = 0;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev != NULL) {
+		pr_err("vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto already_exist;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto already_exist;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+	if (tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
+		pr_err("Unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto alloc_vring_fail;
+	}
+	if (vdev_id == VIRTIO_ID_CONSOLE) {
+		tm_vdev->tx_buf = kmalloc(TMFIFO_CONS_TX_BUF_SIZE,
+					  GFP_KERNEL);
+	}
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
+		goto register_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+register_fail:
+	tmfifo_free_vrings(fifo, vdev_id);
+	fifo->vdev[vdev_id] = NULL;
+alloc_vring_fail:
+	kfree(tm_vdev);
+already_exist:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+int tmfifo_delete_vdev(struct tmfifo *fifo, int vdev_id)
+{
+	struct tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		tmfifo_free_vrings(fifo, vdev_id);
+		kfree(tm_vdev->tx_buf);
+		kfree(tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Device remove function. */
+static int tmfifo_remove(struct platform_device *pdev)
+{
+	int i;
+	struct tmfifo *fifo = platform_get_drvdata(pdev);
+	struct resource *rx_res, *tx_res;
+
+	tmfifo_ready = false;
+
+	if (fifo) {
+		mutex_lock(&tmfifo_lock);
+
+		/* Stop the timer. */
+		del_timer_sync(&fifo->timer);
+
+		/* Release interrupts. */
+		tmfifo_free_irqs(fifo);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&fifo->work);
+
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++)
+			tmfifo_delete_vdev(fifo, i);
+
+		/* Release IO resources. */
+		if (fifo->rx_base)
+			iounmap(fifo->rx_base);
+		if (fifo->tx_base)
+			iounmap(fifo->tx_base);
+
+		platform_set_drvdata(pdev, NULL);
+		kfree(fifo);
+
+		mutex_unlock(&tmfifo_lock);
+	}
+
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (rx_res)
+		release_mem_region(rx_res->start, resource_size(rx_res));
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (tx_res)
+		release_mem_region(tx_res->start, resource_size(tx_res));
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void tmfifo_get_cfg_mac(u8 *mac)
+{
+	u8 buf[6];
+	efi_status_t status;
+	unsigned long size = sizeof(buf);
+	efi_char16_t name[] = { 'R', 's', 'h', 'i', 'm', 'M', 'a', 'c',
+				'A', 'd', 'd', 'r', 0 };
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+
+	status = efi.get_variable(name, &guid, NULL, &size, buf);
+	if (status == EFI_SUCCESS && size == sizeof(buf))
+		memcpy(mac, buf, sizeof(buf));
+}
+
+/* Probe the TMFIFO. */
+static int tmfifo_probe(struct platform_device *pdev)
+{
+	u64 ctl;
+	struct tmfifo *fifo;
+	struct resource *rx_res, *tx_res;
+	struct virtio_net_config net_config;
+	int i, ret;
+
+	/* Get the resource of the Rx & Tx FIFO. */
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!rx_res || !tx_res) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (request_mem_region(rx_res->start,
+			       resource_size(rx_res), "bf-tmfifo") == NULL) {
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	if (request_mem_region(tx_res->start,
+			       resource_size(tx_res), "bf-tmfifo") == NULL) {
+		release_mem_region(rx_res->start, resource_size(rx_res));
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	ret = -ENOMEM;
+	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
+	if (!fifo)
+		goto err;
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	INIT_WORK(&fifo->work, tmfifo_work_handler);
+
+	timer_setup(&fifo->timer, tmfifo_timer, 0);
+	fifo->timer.function = tmfifo_timer;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		fifo->irq[i] = platform_get_irq(pdev, i);
+		ret = request_irq(fifo->irq[i], tmfifo_irq_handler, 0,
+				  "tmfifo", (u8 *)fifo + i);
+		if (ret) {
+			pr_err("Unable to request irq\n");
+			fifo->irq[i] = 0;
+			goto err;
+		}
+	}
+
+	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
+	if (!fifo->rx_base)
+		goto err;
+
+	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
+	if (!fifo->tx_base)
+		goto err;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		TMFIFO_GET_FIELD(ctl, TMFIFO_TX_CTL__MAX_ENTRIES_MASK);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_TX_CTL__LWM_MASK,
+			       fifo->tx_fifo_size / 2);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_TX_CTL__HWM_MASK,
+			       fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		TMFIFO_GET_FIELD(ctl, TMFIFO_RX_CTL__MAX_ENTRIES_MASK);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + TMFIFO_RX_CTL);
+
+	mutex_init(&fifo->lock);
+
+	/* Create the console vdev. */
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (ret)
+		goto err;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = TMFIFO_NET_MTU;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	memcpy(net_config.mac, tmfifo_net_default_mac, 6);
+	tmfifo_get_cfg_mac(net_config.mac);
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_NET, TMFIFO_NET_FEATURES,
+				 &net_config, sizeof(net_config));
+	if (ret)
+		goto err;
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+
+	tmfifo_ready = true;
+
+	return 0;
+
+err:
+	tmfifo_remove(pdev);
+early_err:
+	dev_err(&pdev->dev, "Probe Failed\n");
+	return ret;
+}
+
+static const struct of_device_id tmfifo_match[] = {
+	{ .compatible = "mellanox,bf-tmfifo" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, tmfifo_match);
+
+static const struct acpi_device_id bf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, bf_tmfifo_acpi_match);
+
+static struct platform_driver tmfifo_driver = {
+	.probe = tmfifo_probe,
+	.remove = tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.of_match_table = tmfifo_match,
+		.acpi_match_table = ACPI_PTR(bf_tmfifo_acpi_match),
+	},
+};
+
+static int __init tmfifo_init(void)
+{
+	int ret;
+
+	mutex_init(&tmfifo_lock);
+
+	ret = platform_driver_register(&tmfifo_driver);
+	if (ret)
+		pr_err("Failed to register tmfifo driver.\n");
+
+	return ret;
+}
+
+static void __exit tmfifo_exit(void)
+{
+	platform_driver_unregister(&tmfifo_driver);
+}
+
+module_init(tmfifo_init);
+module_exit(tmfifo_exit);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
new file mode 100644
index 0000000..f42c9d6
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo_regs.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __TMFIFO_REGS_H__
+#define __TMFIFO_REGS_H__
+
+#include <linux/types.h>
+
+#define TMFIFO_TX_DATA 0x0
+
+#define TMFIFO_TX_STS 0x8
+#define TMFIFO_TX_STS__LENGTH 0x0001
+#define TMFIFO_TX_STS__COUNT_SHIFT 0
+#define TMFIFO_TX_STS__COUNT_WIDTH 9
+#define TMFIFO_TX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_TX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_TX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_TX_CTL 0x10
+#define TMFIFO_TX_CTL__LENGTH 0x0001
+#define TMFIFO_TX_CTL__LWM_SHIFT 0
+#define TMFIFO_TX_CTL__LWM_WIDTH 8
+#define TMFIFO_TX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__LWM_RMASK 0xff
+#define TMFIFO_TX_CTL__LWM_MASK  0xff
+#define TMFIFO_TX_CTL__HWM_SHIFT 8
+#define TMFIFO_TX_CTL__HWM_WIDTH 8
+#define TMFIFO_TX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__HWM_RMASK 0xff
+#define TMFIFO_TX_CTL__HWM_MASK  0xff00
+#define TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#define TMFIFO_RX_DATA 0x0
+
+#define TMFIFO_RX_STS 0x8
+#define TMFIFO_RX_STS__LENGTH 0x0001
+#define TMFIFO_RX_STS__COUNT_SHIFT 0
+#define TMFIFO_RX_STS__COUNT_WIDTH 9
+#define TMFIFO_RX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_RX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_RX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_RX_CTL 0x10
+#define TMFIFO_RX_CTL__LENGTH 0x0001
+#define TMFIFO_RX_CTL__LWM_SHIFT 0
+#define TMFIFO_RX_CTL__LWM_WIDTH 8
+#define TMFIFO_RX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__LWM_RMASK 0xff
+#define TMFIFO_RX_CTL__LWM_MASK  0xff
+#define TMFIFO_RX_CTL__HWM_SHIFT 8
+#define TMFIFO_RX_CTL__HWM_WIDTH 8
+#define TMFIFO_RX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__HWM_RMASK 0xff
+#define TMFIFO_RX_CTL__HWM_MASK  0xff00
+#define TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#endif /* !defined(__TMFIFO_REGS_H__) */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v2 2/4] arm64: Add Mellanox BlueField SoC config option
  2018-05-25 20:17 ` [PATCH v2 " Liming Sun
@ 2018-05-25 20:17   ` Liming Sun
  2018-05-25 20:17   ` [PATCH v2 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
  2018-05-25 20:17   ` [PATCH v2 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
  2 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2018-05-25 20:17 UTC (permalink / raw)
  To: linux-arm-kernel

This commit introduces config option for Mellanox BlueField SoC,
which can be used to build the SoC specific drivers, and enables
it by default in configs/defconfig.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 arch/arm64/Kconfig.platforms | 6 ++++++
 arch/arm64/configs/defconfig | 1 +
 2 files changed, 7 insertions(+)

diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 2b1535c..74ad03f 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -110,6 +110,12 @@ config ARCH_MESON
 	help
 	  This enables support for the Amlogic S905 SoCs.
 
+config ARCH_MLNX_BLUEFIELD
+	bool "Mellanox BlueField SoC Family"
+	select SOC_MLNX
+	help
+	  This enables support for the Mellanox BlueField SoC.
+
 config ARCH_MVEBU
 	bool "Marvell EBU SoC Family"
 	select ARMADA_AP806_SYSCON
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 1c98939..842f607 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -43,6 +43,7 @@ CONFIG_ARCH_LG1K=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
 CONFIG_ARCH_MESON=y
+CONFIG_ARCH_MLNX_BLUEFIELD=y
 CONFIG_ARCH_MVEBU=y
 CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v2 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-05-25 20:17 ` [PATCH v2 " Liming Sun
  2018-05-25 20:17   ` [PATCH v2 2/4] arm64: Add Mellanox BlueField SoC config option Liming Sun
@ 2018-05-25 20:17   ` Liming Sun
  2018-05-31  3:43     ` Rob Herring
  2018-05-25 20:17   ` [PATCH v2 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
  2 siblings, 1 reply; 70+ messages in thread
From: Liming Sun @ 2018-05-25 20:17 UTC (permalink / raw)
  To: linux-arm-kernel

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 .../devicetree/bindings/soc/mellanox/tmfifo.txt      | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt

diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
new file mode 100644
index 0000000..0a362f5
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
@@ -0,0 +1,20 @@
+* Mellanox BlueField SoC TmFifo
+
+BlueField TmFifo provides a shared FIFO between the target and the
+external host machine, which can be accessed via USB or PCIe.
+
+Required properties:
+
+- compatible:	Should be "mellanox,bf-tmfifo"
+- reg:		Physical base address and length of Rx/Tx block
+- interrupts:	The interrupt number of Rx low water mark, Rx high water mark
+		Tx low water mark, Tx high water mark respectively.
+
+Example:
+
+tmfifo at 800a20 {
+	compatible = "mellanox,bf-tmfifo";
+	reg = <0x00800a20 0x00000018
+	       0x00800a40 0x00000018>;
+	interrupts = <41, 42, 43, 44>;
+};
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v2 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc
  2018-05-25 20:17 ` [PATCH v2 " Liming Sun
  2018-05-25 20:17   ` [PATCH v2 2/4] arm64: Add Mellanox BlueField SoC config option Liming Sun
  2018-05-25 20:17   ` [PATCH v2 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
@ 2018-05-25 20:17   ` Liming Sun
  2 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2018-05-25 20:17 UTC (permalink / raw)
  To: linux-arm-kernel

Add maintainer information for Mellanox BlueField SoC.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 58b9861..85d5639 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1636,6 +1636,14 @@ L:	linux-mediatek at lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	drivers/phy/mediatek/phy-mtk-tphy.c
 
+ARM/Mellanox BlueField SoC support
+M:	David Woods <dwoods@mellanox.com>
+M:	Liming Sun <lsun@mellanox.com>
+L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	drivers/soc/mellanox/*
+F:	Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
+
 ARM/MICREL KS8695 ARCHITECTURE
 M:	Greg Ungerer <gerg@uclinux.org>
 L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 17:14 ` [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Robin Murphy
@ 2018-05-25 20:18   ` Liming Sun
  0 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2018-05-25 20:18 UTC (permalink / raw)
  To: linux-arm-kernel

Thanks for the comments! Uploaded patch series v2. 
Please also see response inline.

- Liming

> -----Original Message-----
> From: Robin Murphy [mailto:robin.murphy at arm.com]
> Sent: Friday, May 25, 2018 1:15 PM
> To: Liming Sun <lsun@mellanox.com>; Olof Johansson <olof@lixom.net>;
> Arnd Bergmann <arnd@arndb.de>; David Woods <dwoods@mellanox.com>
> Cc: devicetree at vger.kernel.org; linux-arm-kernel at lists.infradead.org
> Subject: Re: [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
> 
> On 25/05/18 17:06, Liming Sun wrote:
> [...]
> > diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
> > new file mode 100644
> > index 0000000..a3303d1
> > --- /dev/null
> > +++ b/drivers/soc/mellanox/tmfifo.c
> > @@ -0,0 +1,1265 @@
> > +// SPDX-License-Identifier: GPL-2.0
> 
> This tag doesn't match the included license text...

Fixed in patch v2-1/4.

> 
> > +/*
> > + * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
> > + *
> > + * This software is available to you under a choice of one of two
> > + * licenses.  You may choose to be licensed under the terms of the GNU
> > + * General Public License (GPL) Version 2, available from the file
> > + * COPYING in the main directory of this source tree, or the
> > + * OpenIB.org BSD license below:
> > + *
> > + *     Redistribution and use in source and binary forms, with or
> > + *     without modification, are permitted provided that the following
> > + *     conditions are met:
> > + *
> > + *      - Redistributions of source code must retain the above
> > + *        copyright notice, this list of conditions and the following
> > + *        disclaimer.
> > + *
> > + *      - Redistributions in binary form must reproduce the above
> > + *        copyright notice, this list of conditions and the following
> > + *        disclaimer in the documentation and/or other materials
> > + *        provided with the distribution.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> KIND,
> > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
> WARRANTIES OF
> > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
> COPYRIGHT HOLDERS
> > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
> AN
> > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
> OR IN
> > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> IN THE
> > + * SOFTWARE.
> > + */
> 
> [...]
> > +/* Several utility macros to get/set the register fields. */
> > +#define TMFIFO_GET_FIELD(reg, field) \
> > +	(((reg) >> field##_SHIFT) & ((1UL << field##_WIDTH) - 1))
> > +
> > +#define TMFIFO_SET_FIELD(reg, field, value) ({ \
> > +	u64 _mask = ((1UL << field##_WIDTH) - 1) << field##_SHIFT; \
> > +	((reg) & ~_mask) | (((value) << field##_SHIFT) & _mask); \
> > +})
> 
> There's no need to reinvent <linux/bitfield.h>

Updated in patch v2-1/4.

> 
> [...]
> > +MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
> > +MODULE_AUTHOR("Mellanox Technologies, Ltd");
> > +MODULE_LICENSE("GPL");
> 
> ...and this implies yet another different license (since it indicates
> "GPLv2 or later")

Fixed in patch v2-1/4.

> 
> > +MODULE_VERSION("0.7");
> > diff --git a/drivers/soc/mellanox/tmfifo_regs.h
> b/drivers/soc/mellanox/tmfifo_regs.h
> > new file mode 100644
> > index 0000000..b07f353
> > --- /dev/null
> > +++ b/drivers/soc/mellanox/tmfifo_regs.h
> > @@ -0,0 +1,112 @@
> > +// SPDX-License-Identifier: GPL-2.0
> 
> Again, this doesn't match the included text. Also, the SPDX comment
> style is /* */ for headers.

Updated in patch v2-1/4.

> 
> > +/*
> > + * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
> > + *
> > + * This software is available to you under a choice of one of two
> > + * licenses.  You may choose to be licensed under the terms of the GNU
> > + * General Public License (GPL) Version 2, available from the file
> > + * COPYING in the main directory of this source tree, or the
> > + * OpenIB.org BSD license below:
> > + *
> > + *     Redistribution and use in source and binary forms, with or
> > + *     without modification, are permitted provided that the following
> > + *     conditions are met:
> > + *
> > + *      - Redistributions of source code must retain the above
> > + *        copyright notice, this list of conditions and the following
> > + *        disclaimer.
> > + *
> > + *      - Redistributions in binary form must reproduce the above
> > + *        copyright notice, this list of conditions and the following
> > + *        disclaimer in the documentation and/or other materials
> > + *        provided with the distribution.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
> KIND,
> > + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
> WARRANTIES OF
> > + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> > + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
> COPYRIGHT HOLDERS
> > + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
> AN
> > + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
> OR IN
> > + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> IN THE
> > + * SOFTWARE.
> > + */
> 
> [...]
> > +#ifdef __ASSEMBLER__
> > +#define _64bit(x) x
> > +#else /* __ASSEMBLER__ */
> > +#ifdef __tile__
> > +#define _64bit(x) x ## UL
> > +#else /* __tile__ */
> > +#define _64bit(x) x ## ULL
> > +#endif /* __tile__ */
> > +#endif /* __ASSEMBLER */
> > +
> > +#ifdef __KERNEL__
> > +#include <linux/types.h>
> > +#else
> > +#include <stdint.h>
> > +#endif
> > +
> > +#ifndef __DOXYGEN__
> 
> Given that this is a private header under drivers/, is any of that lot
> necessary?

Simplified and updated it in patch v2-1/4.

> 
> Robin.

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v2 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-05-25 20:17   ` [PATCH v2 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
@ 2018-05-31  3:43     ` Rob Herring
  2018-06-01 14:31       ` Liming Sun
  0 siblings, 1 reply; 70+ messages in thread
From: Rob Herring @ 2018-05-31  3:43 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, May 25, 2018 at 04:17:15PM -0400, Liming Sun wrote:

Commit msg?

> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>
> ---
>  .../devicetree/bindings/soc/mellanox/tmfifo.txt      | 20 ++++++++++++++++++++
>  1 file changed, 20 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> 
> diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> new file mode 100644
> index 0000000..0a362f5
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> @@ -0,0 +1,20 @@
> +* Mellanox BlueField SoC TmFifo
> +
> +BlueField TmFifo provides a shared FIFO between the target and the
> +external host machine, which can be accessed via USB or PCIe.

A FIFO for what? I'd like to find a better spot than bindings/soc/
> +
> +Required properties:
> +
> +- compatible:	Should be "mellanox,bf-tmfifo"
> +- reg:		Physical base address and length of Rx/Tx block
> +- interrupts:	The interrupt number of Rx low water mark, Rx high water mark
> +		Tx low water mark, Tx high water mark respectively.
> +
> +Example:
> +
> +tmfifo at 800a20 {
> +	compatible = "mellanox,bf-tmfifo";
> +	reg = <0x00800a20 0x00000018
> +	       0x00800a40 0x00000018>;
> +	interrupts = <41, 42, 43, 44>;
> +};
> -- 
> 1.8.3.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe devicetree" in
> the body of a message to majordomo at vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v2 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-05-31  3:43     ` Rob Herring
@ 2018-06-01 14:31       ` Liming Sun
  0 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2018-06-01 14:31 UTC (permalink / raw)
  To: linux-arm-kernel

Thanks for the comments. Please see response inline.

> -----Original Message-----
> From: Rob Herring [mailto:robh at kernel.org]
> Sent: Wednesday, May 30, 2018 11:44 PM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; Arnd Bergmann <arnd@arndb.de>;
> David Woods <dwoods@mellanox.com>; Robin Murphy
> <robin.murphy@arm.com>; devicetree at vger.kernel.org; linux-arm-
> kernel at lists.infradead.org
> Subject: Re: [PATCH v2 3/4] dt-bindings: soc: Add TmFifo binding for
> Mellanox BlueField SoC
> 
> On Fri, May 25, 2018 at 04:17:15PM -0400, Liming Sun wrote:
> 
> Commit msg?

Updated in patch v3 3/4.

> 
> > Reviewed-by: David Woods <dwoods@mellanox.com>
> > Signed-off-by: Liming Sun <lsun@mellanox.com>
> > ---
> >  .../devicetree/bindings/soc/mellanox/tmfifo.txt      | 20
> ++++++++++++++++++++
> >  1 file changed, 20 insertions(+)
> >  create mode 100644
> Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> >
> > diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> > new file mode 100644
> > index 0000000..0a362f5
> > --- /dev/null
> > +++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> > @@ -0,0 +1,20 @@
> > +* Mellanox BlueField SoC TmFifo
> > +
> > +BlueField TmFifo provides a shared FIFO between the target and the
> > +external host machine, which can be accessed via USB or PCIe.
> 
> A FIFO for what? I'd like to find a better spot than bindings/soc/

This is a generic HW FIFO which can be accessed by the SoC SW and external Host machine to exchange any data. In the current Linux tmfifo driver, this FIFO has been used (demuxed) to implement a virtual console and network interface based on virtio framework.  

Updated the tmfifo.txt in patch v3 3/4 with the above explanation. Please advise if there is a better place for this file.
 
> > +
> > +Required properties:
> > +
> > +- compatible:	Should be "mellanox,bf-tmfifo"
> > +- reg:		Physical base address and length of Rx/Tx block
> > +- interrupts:	The interrupt number of Rx low water mark, Rx high water
> mark
> > +		Tx low water mark, Tx high water mark respectively.
> > +
> > +Example:
> > +
> > +tmfifo at 800a20 {
> > +	compatible = "mellanox,bf-tmfifo";
> > +	reg = <0x00800a20 0x00000018
> > +	       0x00800a40 0x00000018>;
> > +	interrupts = <41, 42, 43, 44>;
> > +};
> > --
> > 1.8.3.1
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe devicetree" in
> > the body of a message to majordomo at vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v3 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 16:06 [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
                   ` (4 preceding siblings ...)
  2018-05-25 20:17 ` [PATCH v2 " Liming Sun
@ 2018-06-01 14:31 ` Liming Sun
  2018-06-01 14:31   ` [PATCH v3 2/4] arm64: Add Mellanox BlueField SoC config option Liming Sun
                     ` (2 more replies)
  2018-10-24 17:55 ` [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
                   ` (13 subsequent siblings)
  19 siblings, 3 replies; 70+ messages in thread
From: Liming Sun @ 2018-06-01 14:31 UTC (permalink / raw)
  To: linux-arm-kernel

This commit adds the TmFifo driver for Mellanox BlueField Soc.
TmFifo is a shared FIFO which enables external host machine to
exchange data with the SoC via USB or PCIe. The driver is based on
virtio framework and has console and network access enabled.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/Kconfig                |    1 +
 drivers/soc/Makefile               |    1 +
 drivers/soc/mellanox/Kconfig       |   18 +
 drivers/soc/mellanox/Makefile      |    5 +
 drivers/soc/mellanox/tmfifo.c      | 1239 ++++++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/tmfifo_regs.h |   75 +++
 6 files changed, 1339 insertions(+)
 create mode 100644 drivers/soc/mellanox/Kconfig
 create mode 100644 drivers/soc/mellanox/Makefile
 create mode 100644 drivers/soc/mellanox/tmfifo.c
 create mode 100644 drivers/soc/mellanox/tmfifo_regs.h

diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index c07b4a8..fa87dc8 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -7,6 +7,7 @@ source "drivers/soc/bcm/Kconfig"
 source "drivers/soc/fsl/Kconfig"
 source "drivers/soc/imx/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
+source "drivers/soc/mellanox/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/renesas/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 4052357..868163f 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ARCH_GEMINI)	+= gemini/
 obj-$(CONFIG_ARCH_MXC)		+= imx/
 obj-$(CONFIG_SOC_XWAY)		+= lantiq/
 obj-y				+= mediatek/
+obj-$(CONFIG_SOC_MLNX)		+= mellanox/
 obj-$(CONFIG_ARCH_MESON)	+= amlogic/
 obj-$(CONFIG_ARCH_QCOM)		+= qcom/
 obj-y				+= renesas/
diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
new file mode 100644
index 0000000..d88efa1
--- /dev/null
+++ b/drivers/soc/mellanox/Kconfig
@@ -0,0 +1,18 @@
+menuconfig SOC_MLNX
+	bool "Mellanox SoC drivers"
+	default y if ARCH_MLNX_BLUEFIELD
+
+if ARCH_MLNX_BLUEFIELD || COMPILE_TEST
+
+config MLNX_BLUEFIELD_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo driver"
+	depends on ARM64
+	default m
+	select VIRTIO_CONSOLE
+	select VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides the
+	  virtio driver framework for the TMFIFO of Mellanox BlueField SoC and
+	  the implementation of a console and network driver.
+
+endif # ARCH_MLNX_BLUEFIELD
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
new file mode 100644
index 0000000..c44c0e2
--- /dev/null
+++ b/drivers/soc/mellanox/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Mellanox SoC drivers.
+#
+obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
new file mode 100644
index 0000000..5647cb6
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo.c
@@ -0,0 +1,1239 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/cache.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <asm/byteorder.h>
+
+#include "tmfifo_regs.h"
+
+#define TMFIFO_GET_FIELD(reg, mask)	FIELD_GET(mask, reg)
+
+#define TMFIFO_SET_FIELD(reg, mask, value) \
+	((reg & ~mask) | FIELD_PREP(mask, value))
+
+/* Vring size. */
+#define TMFIFO_VRING_SIZE			1024
+
+/* Console Tx buffer size. */
+#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
+
+/* Use a timer for house-keeping. */
+static int tmfifo_timer_interval = HZ / 10;
+
+/* Global lock. */
+static struct mutex tmfifo_lock;
+
+/* Virtio ring size. */
+static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
+module_param(tmfifo_vring_size, int, 0444);
+MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring.");
+
+struct tmfifo;
+
+/* A flag to indicate TmFifo ready. */
+static bool tmfifo_ready;
+
+/* Virtual devices sharing the TM FIFO. */
+#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/* Spin lock. */
+static DEFINE_SPINLOCK(tmfifo_spin_lock);
+
+/* Structure to maintain the ring state. */
+struct tmfifo_vring {
+	void *va;			/* virtual address */
+	dma_addr_t dma;			/* dma address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	int vdev_id;			/* TMFIFO_VDEV_xxx */
+	u32 pkt_len;			/* packet total length */
+	__virtio16 next_avail;		/* next avail desc id */
+	struct tmfifo *fifo;		/* pointer back to the tmfifo */
+};
+
+/* Interrupt types. */
+enum {
+	TM_RX_LWM_IRQ,			/* Rx low water mark irq */
+	TM_RX_HWM_IRQ,			/* Rx high water mark irq */
+	TM_TX_LWM_IRQ,			/* Tx low water mark irq */
+	TM_TX_HWM_IRQ,			/* Tx high water mark irq */
+	TM_IRQ_CNT
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	TMFIFO_VRING_RX,		/* Rx ring */
+	TMFIFO_VRING_TX,		/* Tx ring */
+	TMFIFO_VRING_NUM
+};
+
+struct tmfifo_vdev {
+	struct virtio_device vdev;	/* virtual device */
+	u8 status;
+	u64 features;
+	union {				/* virtio config space */
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
+	u8 *tx_buf;			/* tx buffer */
+	u32 tx_head;			/* tx buffer head */
+	u32 tx_tail;			/* tx buffer tail */
+};
+
+#define TMFIFO_VDEV_TX_BUF_AVAIL(vdev) \
+	(((vdev)->tx_tail >= (vdev)->tx_head) ? \
+	(TMFIFO_CONS_TX_BUF_SIZE - 8 - ((vdev)->tx_tail - (vdev)->tx_head)) : \
+	((vdev)->tx_head - (vdev)->tx_tail - 8))
+
+#define TMFIFO_VDEV_TX_BUF_PUSH(vdev, len) do { \
+	(vdev)->tx_tail += (len); \
+	if ((vdev)->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE) \
+		(vdev)->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE; \
+} while (0)
+
+#define TMFIFO_VDEV_TX_BUF_POP(vdev, len) do { \
+	(vdev)->tx_head += (len); \
+	if ((vdev)->tx_head >= TMFIFO_CONS_TX_BUF_SIZE) \
+		(vdev)->tx_head -= TMFIFO_CONS_TX_BUF_SIZE; \
+} while (0)
+
+/* TMFIFO device structure */
+struct tmfifo {
+	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
+	struct platform_device *pdev;	/* platform device */
+	struct mutex lock;
+	void __iomem *rx_base;		/* mapped register base */
+	void __iomem *tx_base;		/* mapped register base */
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	int irq[TM_IRQ_CNT];		/* irq numbers */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
+};
+
+union tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* MTU setting of the virtio-net interface. */
+#define TMFIFO_NET_MTU		1500
+
+/* Supported virtio-net features. */
+#define TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
+				 (1UL << VIRTIO_NET_F_STATUS) | \
+				 (1UL << VIRTIO_NET_F_MAC))
+
+/* Forward declaration. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx);
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc);
+
+/* Allocate vrings for the fifo. */
+static int tmfifo_alloc_vrings(struct tmfifo *fifo,
+			       struct tmfifo_vdev *tm_vdev, int vdev_id)
+{
+	dma_addr_t dma;
+	void *va;
+	int i, size;
+	struct tmfifo_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->size = tmfifo_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+		vring->vdev_id = vdev_id;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
+					GFP_KERNEL);
+		if (!va) {
+			dev_err(tm_vdev->vdev.dev.parent,
+				"vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void tmfifo_free_vrings(struct tmfifo *fifo, int vdev_id)
+{
+	int i, size;
+	struct tmfifo_vring *vring;
+	struct tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Free interrupts of the fifo device. */
+static void tmfifo_free_irqs(struct tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		irq = fifo->irq[i];
+		if (irq) {
+			fifo->irq[i] = 0;
+			disable_irq(irq);
+			free_irq(irq, (u8 *)fifo + i);
+		}
+	}
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void tmfifo_work_handler(struct work_struct *work)
+{
+	int i;
+	struct tmfifo_vdev *tm_vdev;
+	struct tmfifo *fifo = container_of(work, struct tmfifo, work);
+
+	if (!tmfifo_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx. */
+	if (test_and_clear_bit(TM_TX_LWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_TX_LWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_TX].vq,
+					false);
+			}
+		}
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(TM_RX_HWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_RX_HWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_RX].vq,
+					true);
+			}
+		}
+	}
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* Interrupt handler. */
+static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
+{
+	int i = (uintptr_t)dev_id % sizeof(void *);
+	struct tmfifo *fifo = dev_id - i;
+
+	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
+		schedule_work(&fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Nothing to do for now. */
+static void tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+tmfifo_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+
+	if (!vr || vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vr->num);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+static inline void tmfifo_virtio_release_desc(struct virtio_device *vdev,
+					      struct vring *vr,
+					      struct vring_desc *desc, u32 len)
+{
+	unsigned int idx;
+
+	idx = vr->used->idx % vr->num;
+	vr->used->ring[idx].id = desc - vr->desc;
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/* Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc)
+{
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+	struct vring_desc *desc_head;
+	uint32_t pkt_len = 0;
+
+	if (!vr)
+		return;
+
+	if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
+		desc_head = vring->desc_head;
+		pkt_len = vring->pkt_len;
+	} else {
+		desc_head = tmfifo_virtio_get_next_desc(vring->vq);
+		if (desc_head != NULL) {
+			pkt_len = tmfifo_virtio_get_pkt_len(vdev,
+							desc_head, vr);
+		}
+	}
+
+	if (desc_head != NULL)
+		tmfifo_virtio_release_desc(vdev, vr, desc_head, pkt_len);
+
+	if (desc != NULL)
+		*desc = NULL;
+	vring->pkt_len = 0;
+}
+
+/* House-keeping timer. */
+static void tmfifo_timer(struct timer_list *arg)
+{
+	struct tmfifo *fifo = container_of(arg, struct tmfifo, timer);
+
+	/*
+	 * Wake up the work handler to poll the Rx FIFO in case interrupt
+	 * missing or any leftover bytes stuck in the FIFO.
+	 */
+	test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+}
+
+/* Buffer the console output. */
+static void tmfifo_console_output(struct tmfifo_vdev *cons,
+				  struct virtqueue *vq)
+{
+	u32 len, pkt_len, idx;
+	struct vring_desc *head_desc, *desc = NULL;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &cons->vdev;
+	void *addr;
+	union tmfifo_msg_hdr *hdr;
+
+	for (;;) {
+		head_desc = tmfifo_virtio_get_next_desc(vq);
+		if (head_desc == NULL)
+			break;
+
+		/* Release the packet if no more space. */
+		pkt_len = tmfifo_virtio_get_pkt_len(vdev, head_desc, vr);
+		if (pkt_len + sizeof(*hdr) > TMFIFO_VDEV_TX_BUF_AVAIL(cons)) {
+			tmfifo_virtio_release_desc(vdev, vr, head_desc,
+						   pkt_len);
+			break;
+		}
+
+		hdr = (union tmfifo_msg_hdr *)&cons->tx_buf[cons->tx_tail];
+		hdr->data = 0;
+		hdr->type = VIRTIO_ID_CONSOLE;
+		hdr->len = htons(pkt_len);
+
+		TMFIFO_VDEV_TX_BUF_PUSH(cons, sizeof(*hdr));
+		desc = head_desc;
+
+		while (desc != NULL) {
+			addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+			len = virtio32_to_cpu(vdev, desc->len);
+
+			if (len <= TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail) {
+				memcpy(cons->tx_buf + cons->tx_tail, addr, len);
+			} else {
+				u32 seg;
+
+				seg = TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail;
+				memcpy(cons->tx_buf + cons->tx_tail, addr, seg);
+				addr += seg;
+				memcpy(cons->tx_buf, addr, len - seg);
+			}
+			TMFIFO_VDEV_TX_BUF_PUSH(cons, len);
+
+			if (!(virtio16_to_cpu(vdev, desc->flags) &
+			    VRING_DESC_F_NEXT))
+				break;
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+		}
+
+		/* Make each packet 8-byte aligned. */
+		TMFIFO_VDEV_TX_BUF_PUSH(cons, ((pkt_len + 7) & -8) - pkt_len);
+
+		tmfifo_virtio_release_desc(vdev, vr, head_desc, pkt_len);
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct tmfifo_vring *vring;
+	struct tmfifo *fifo;
+	struct vring *vr;
+	struct virtio_device *vdev;
+	u64 sts, data;
+	int num_avail = 0, hdr_len, tx_reserve;
+	void *addr;
+	u32 len, idx;
+	struct vring_desc *desc;
+	unsigned long flags;
+	struct tmfifo_vdev *cons;
+
+	if (!vq)
+		return;
+
+	vring = (struct tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+	vr = (struct vring *)virtqueue_get_vring(vq);
+
+	if (!fifo->vdev[vring->vdev_id])
+		return;
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+
+	/* Don't continue if another vring is running. */
+	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
+		return;
+
+	/* tx_reserve is used to reserved some room in FIFO for console. */
+	if (vring->vdev_id == VIRTIO_ID_NET) {
+		hdr_len = sizeof(struct virtio_net_hdr);
+		tx_reserve = fifo->tx_fifo_size / 16;
+	} else {
+		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
+		hdr_len = 0;
+		tx_reserve = 1;
+	}
+
+	desc = vring->desc;
+
+again:
+	while (1) {
+		/* Get available FIFO space. */
+		if (num_avail == 0) {
+			if (is_rx) {
+				/* Get the number of available words in FIFO. */
+				sts = readq(fifo->rx_base + TMFIFO_RX_STS);
+				num_avail = TMFIFO_GET_FIELD(sts,
+						TMFIFO_RX_STS__COUNT_MASK);
+
+				/* Don't continue if nothing in FIFO. */
+				if (num_avail <= 0)
+					break;
+			} else {
+				/* Get available space in FIFO. */
+				sts = readq(fifo->tx_base + TMFIFO_TX_STS);
+				num_avail = fifo->tx_fifo_size - tx_reserve -
+					TMFIFO_GET_FIELD(sts,
+						TMFIFO_TX_STS__COUNT_MASK);
+
+				if (num_avail <= 0)
+					break;
+			}
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
+		    cons != NULL && cons->tx_buf != NULL) {
+			for (;;) {
+				spin_lock_irqsave(&tmfifo_spin_lock, flags);
+				if (cons->tx_head == cons->tx_tail) {
+					spin_unlock_irqrestore(
+						&tmfifo_spin_lock, flags);
+					return;
+				}
+				addr = cons->tx_buf + cons->tx_head;
+				writeq(cpu_to_le64(*(u64 *)addr),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+				TMFIFO_VDEV_TX_BUF_POP(cons, sizeof(u64));
+				spin_unlock_irqrestore(&tmfifo_spin_lock,
+						       flags);
+				if (--num_avail <= 0)
+					goto again;
+			}
+		}
+
+		/* Get the desc of next packet. */
+		if (!desc) {
+			/* Save the head desc of the chain. */
+			vring->desc_head = tmfifo_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				return;
+			}
+			desc = vring->desc_head;
+			vring->desc = desc;
+
+			if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+						vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			}
+		}
+
+		/* Beginning of each packet. */
+		if (vring->pkt_len == 0) {
+			int vdev_id, vring_change = 0;
+			union tmfifo_msg_hdr hdr;
+
+			num_avail--;
+
+			/* Read/Write packet length. */
+			if (is_rx) {
+				hdr.data = readq(fifo->rx_base +
+						 TMFIFO_RX_DATA);
+				hdr.data = le64_to_cpu(hdr.data);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (hdr.len == 0)
+					continue;
+
+				/* Check packet type. */
+				if (hdr.type == VIRTIO_ID_NET) {
+					vdev_id = VIRTIO_ID_NET;
+					hdr_len = sizeof(struct virtio_net_hdr);
+				} else if (hdr.type == VIRTIO_ID_CONSOLE) {
+					vdev_id = VIRTIO_ID_CONSOLE;
+					hdr_len = 0;
+				} else {
+					continue;
+				}
+
+				/*
+				 * Check whether the new packet still belongs
+				 * to this vring or not. If not, update the
+				 * pkt_len of the new vring and return.
+				 */
+				if (vdev_id != vring->vdev_id) {
+					struct tmfifo_vdev *dev2 =
+						fifo->vdev[vdev_id];
+
+					if (!dev2)
+						break;
+					vring->desc = desc;
+					vring = &dev2->vrings[TMFIFO_VRING_RX];
+					vring_change = 1;
+				}
+				vring->pkt_len = ntohs(hdr.len) + hdr_len;
+			} else {
+				vring->pkt_len = tmfifo_virtio_get_pkt_len(
+					vdev, desc, vr);
+
+				hdr.data = 0;
+				hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+					VIRTIO_ID_NET :
+					VIRTIO_ID_CONSOLE;
+				hdr.len = htons(vring->pkt_len - hdr_len);
+				writeq(cpu_to_le64(hdr.data),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+			}
+
+			vring->cur_len = hdr_len;
+			vring->rem_len = vring->pkt_len;
+			fifo->vring[is_rx] = vring;
+
+			if (vring_change)
+				return;
+			continue;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check if the current desc is already done. */
+		if (vring->cur_len == len)
+			goto check_done;
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		/* Read a word from FIFO for Rx. */
+		if (is_rx) {
+			data = readq(fifo->rx_base + TMFIFO_RX_DATA);
+			data = le64_to_cpu(data);
+		}
+
+		if (vring->cur_len + sizeof(u64) <= len) {
+			/* The whole word. */
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       sizeof(u64));
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       sizeof(u64));
+			}
+			vring->cur_len += sizeof(u64);
+		} else {
+			/* Leftover bytes. */
+			BUG_ON(vring->cur_len > len);
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       len - vring->cur_len);
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       len - vring->cur_len);
+			}
+			vring->cur_len = len;
+		}
+
+		/* Write the word into FIFO for Tx. */
+		if (!is_rx) {
+			writeq(cpu_to_le64(data),
+			       fifo->tx_base + TMFIFO_TX_DATA);
+		}
+
+		num_avail--;
+
+check_done:
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done and release the desc. */
+			tmfifo_release_pkt(vdev, vring, &desc);
+			fifo->vring[is_rx] = NULL;
+
+			/* Notify upper layer that packet is done. */
+			spin_lock_irqsave(&tmfifo_spin_lock, flags);
+			vring_interrupt(0, vq);
+			spin_unlock_irqrestore(&tmfifo_spin_lock, flags);
+			continue;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+	struct tmfifo *fifo = vring->fifo;
+	unsigned long flags;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX HWM bit to start Rx. */
+		if (!test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&tmfifo_spin_lock, flags);
+			tmfifo_console_output(fifo->vdev[VIRTIO_ID_CONSOLE],
+					      vq);
+			spin_unlock_irqrestore(&tmfifo_spin_lock, flags);
+			schedule_work(&fifo->work);
+		} else if (!test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	}
+
+	return true;
+}
+
+/* Get the array of feature bits for this device. */
+static u64 tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			tmfifo_release_pkt(&tm_vdev->vdev, vring, &vring->desc);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+				  unsigned int nvqs,
+				  struct virtqueue *vqs[],
+				  vq_callback_t *callbacks[],
+				  const char * const names[],
+				  const bool *ctx,
+				  struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void tmfifo_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void tmfifo_virtio_get(struct virtio_device *vdev,
+			      unsigned int offset,
+			      void *buf,
+			      unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void tmfifo_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops tmfifo_virtio_config_ops = {
+	.get_features = tmfifo_virtio_get_features,
+	.finalize_features = tmfifo_virtio_finalize_features,
+	.find_vqs = tmfifo_virtio_find_vqs,
+	.del_vqs = tmfifo_virtio_del_vqs,
+	.reset = tmfifo_virtio_reset,
+	.set_status = tmfifo_virtio_set_status,
+	.get_status = tmfifo_virtio_get_status,
+	.get = tmfifo_virtio_get,
+	.set = tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+int tmfifo_create_vdev(struct tmfifo *fifo, int vdev_id, u64 features,
+		       void *config, u32 size)
+{
+	struct tmfifo_vdev *tm_vdev;
+	int ret = 0;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev != NULL) {
+		pr_err("vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto already_exist;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto already_exist;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+	if (tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
+		pr_err("Unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto alloc_vring_fail;
+	}
+	if (vdev_id == VIRTIO_ID_CONSOLE) {
+		tm_vdev->tx_buf = kmalloc(TMFIFO_CONS_TX_BUF_SIZE,
+					  GFP_KERNEL);
+	}
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
+		goto register_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+register_fail:
+	tmfifo_free_vrings(fifo, vdev_id);
+	fifo->vdev[vdev_id] = NULL;
+alloc_vring_fail:
+	kfree(tm_vdev);
+already_exist:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+int tmfifo_delete_vdev(struct tmfifo *fifo, int vdev_id)
+{
+	struct tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		tmfifo_free_vrings(fifo, vdev_id);
+		kfree(tm_vdev->tx_buf);
+		kfree(tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Device remove function. */
+static int tmfifo_remove(struct platform_device *pdev)
+{
+	int i;
+	struct tmfifo *fifo = platform_get_drvdata(pdev);
+	struct resource *rx_res, *tx_res;
+
+	tmfifo_ready = false;
+
+	if (fifo) {
+		mutex_lock(&tmfifo_lock);
+
+		/* Stop the timer. */
+		del_timer_sync(&fifo->timer);
+
+		/* Release interrupts. */
+		tmfifo_free_irqs(fifo);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&fifo->work);
+
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++)
+			tmfifo_delete_vdev(fifo, i);
+
+		/* Release IO resources. */
+		if (fifo->rx_base)
+			iounmap(fifo->rx_base);
+		if (fifo->tx_base)
+			iounmap(fifo->tx_base);
+
+		platform_set_drvdata(pdev, NULL);
+		kfree(fifo);
+
+		mutex_unlock(&tmfifo_lock);
+	}
+
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (rx_res)
+		release_mem_region(rx_res->start, resource_size(rx_res));
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (tx_res)
+		release_mem_region(tx_res->start, resource_size(tx_res));
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void tmfifo_get_cfg_mac(u8 *mac)
+{
+	u8 buf[6];
+	efi_status_t status;
+	unsigned long size = sizeof(buf);
+	efi_char16_t name[] = { 'R', 's', 'h', 'i', 'm', 'M', 'a', 'c',
+				'A', 'd', 'd', 'r', 0 };
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+
+	status = efi.get_variable(name, &guid, NULL, &size, buf);
+	if (status == EFI_SUCCESS && size == sizeof(buf))
+		memcpy(mac, buf, sizeof(buf));
+}
+
+/* Probe the TMFIFO. */
+static int tmfifo_probe(struct platform_device *pdev)
+{
+	u64 ctl;
+	struct tmfifo *fifo;
+	struct resource *rx_res, *tx_res;
+	struct virtio_net_config net_config;
+	int i, ret;
+
+	/* Get the resource of the Rx & Tx FIFO. */
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!rx_res || !tx_res) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (request_mem_region(rx_res->start,
+			       resource_size(rx_res), "bf-tmfifo") == NULL) {
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	if (request_mem_region(tx_res->start,
+			       resource_size(tx_res), "bf-tmfifo") == NULL) {
+		release_mem_region(rx_res->start, resource_size(rx_res));
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	ret = -ENOMEM;
+	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
+	if (!fifo)
+		goto err;
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	INIT_WORK(&fifo->work, tmfifo_work_handler);
+
+	timer_setup(&fifo->timer, tmfifo_timer, 0);
+	fifo->timer.function = tmfifo_timer;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		fifo->irq[i] = platform_get_irq(pdev, i);
+		ret = request_irq(fifo->irq[i], tmfifo_irq_handler, 0,
+				  "tmfifo", (u8 *)fifo + i);
+		if (ret) {
+			pr_err("Unable to request irq\n");
+			fifo->irq[i] = 0;
+			goto err;
+		}
+	}
+
+	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
+	if (!fifo->rx_base)
+		goto err;
+
+	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
+	if (!fifo->tx_base)
+		goto err;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		TMFIFO_GET_FIELD(ctl, TMFIFO_TX_CTL__MAX_ENTRIES_MASK);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_TX_CTL__LWM_MASK,
+			       fifo->tx_fifo_size / 2);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_TX_CTL__HWM_MASK,
+			       fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		TMFIFO_GET_FIELD(ctl, TMFIFO_RX_CTL__MAX_ENTRIES_MASK);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + TMFIFO_RX_CTL);
+
+	mutex_init(&fifo->lock);
+
+	/* Create the console vdev. */
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (ret)
+		goto err;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = TMFIFO_NET_MTU;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	memcpy(net_config.mac, tmfifo_net_default_mac, 6);
+	tmfifo_get_cfg_mac(net_config.mac);
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_NET, TMFIFO_NET_FEATURES,
+				 &net_config, sizeof(net_config));
+	if (ret)
+		goto err;
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+
+	tmfifo_ready = true;
+
+	return 0;
+
+err:
+	tmfifo_remove(pdev);
+early_err:
+	dev_err(&pdev->dev, "Probe Failed\n");
+	return ret;
+}
+
+static const struct of_device_id tmfifo_match[] = {
+	{ .compatible = "mellanox,bf-tmfifo" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, tmfifo_match);
+
+static const struct acpi_device_id bf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, bf_tmfifo_acpi_match);
+
+static struct platform_driver tmfifo_driver = {
+	.probe = tmfifo_probe,
+	.remove = tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.of_match_table = tmfifo_match,
+		.acpi_match_table = ACPI_PTR(bf_tmfifo_acpi_match),
+	},
+};
+
+static int __init tmfifo_init(void)
+{
+	int ret;
+
+	mutex_init(&tmfifo_lock);
+
+	ret = platform_driver_register(&tmfifo_driver);
+	if (ret)
+		pr_err("Failed to register tmfifo driver.\n");
+
+	return ret;
+}
+
+static void __exit tmfifo_exit(void)
+{
+	platform_driver_unregister(&tmfifo_driver);
+}
+
+module_init(tmfifo_init);
+module_exit(tmfifo_exit);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
new file mode 100644
index 0000000..f42c9d6
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo_regs.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __TMFIFO_REGS_H__
+#define __TMFIFO_REGS_H__
+
+#include <linux/types.h>
+
+#define TMFIFO_TX_DATA 0x0
+
+#define TMFIFO_TX_STS 0x8
+#define TMFIFO_TX_STS__LENGTH 0x0001
+#define TMFIFO_TX_STS__COUNT_SHIFT 0
+#define TMFIFO_TX_STS__COUNT_WIDTH 9
+#define TMFIFO_TX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_TX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_TX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_TX_CTL 0x10
+#define TMFIFO_TX_CTL__LENGTH 0x0001
+#define TMFIFO_TX_CTL__LWM_SHIFT 0
+#define TMFIFO_TX_CTL__LWM_WIDTH 8
+#define TMFIFO_TX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__LWM_RMASK 0xff
+#define TMFIFO_TX_CTL__LWM_MASK  0xff
+#define TMFIFO_TX_CTL__HWM_SHIFT 8
+#define TMFIFO_TX_CTL__HWM_WIDTH 8
+#define TMFIFO_TX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__HWM_RMASK 0xff
+#define TMFIFO_TX_CTL__HWM_MASK  0xff00
+#define TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#define TMFIFO_RX_DATA 0x0
+
+#define TMFIFO_RX_STS 0x8
+#define TMFIFO_RX_STS__LENGTH 0x0001
+#define TMFIFO_RX_STS__COUNT_SHIFT 0
+#define TMFIFO_RX_STS__COUNT_WIDTH 9
+#define TMFIFO_RX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_RX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_RX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_RX_CTL 0x10
+#define TMFIFO_RX_CTL__LENGTH 0x0001
+#define TMFIFO_RX_CTL__LWM_SHIFT 0
+#define TMFIFO_RX_CTL__LWM_WIDTH 8
+#define TMFIFO_RX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__LWM_RMASK 0xff
+#define TMFIFO_RX_CTL__LWM_MASK  0xff
+#define TMFIFO_RX_CTL__HWM_SHIFT 8
+#define TMFIFO_RX_CTL__HWM_WIDTH 8
+#define TMFIFO_RX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__HWM_RMASK 0xff
+#define TMFIFO_RX_CTL__HWM_MASK  0xff00
+#define TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#endif /* !defined(__TMFIFO_REGS_H__) */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v3 2/4] arm64: Add Mellanox BlueField SoC config option
  2018-06-01 14:31 ` [PATCH v3 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
@ 2018-06-01 14:31   ` Liming Sun
  2018-06-01 14:31   ` [PATCH v3 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
  2018-06-01 14:31   ` [PATCH v3 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
  2 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2018-06-01 14:31 UTC (permalink / raw)
  To: linux-arm-kernel

This commit introduces config option for Mellanox BlueField SoC,
which can be used to build the SoC specific drivers, and enables
it by default in configs/defconfig.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 arch/arm64/Kconfig.platforms | 6 ++++++
 arch/arm64/configs/defconfig | 1 +
 2 files changed, 7 insertions(+)

diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 2b1535c..74ad03f 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -110,6 +110,12 @@ config ARCH_MESON
 	help
 	  This enables support for the Amlogic S905 SoCs.
 
+config ARCH_MLNX_BLUEFIELD
+	bool "Mellanox BlueField SoC Family"
+	select SOC_MLNX
+	help
+	  This enables support for the Mellanox BlueField SoC.
+
 config ARCH_MVEBU
 	bool "Marvell EBU SoC Family"
 	select ARMADA_AP806_SYSCON
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 1c98939..842f607 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -43,6 +43,7 @@ CONFIG_ARCH_LG1K=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
 CONFIG_ARCH_MESON=y
+CONFIG_ARCH_MLNX_BLUEFIELD=y
 CONFIG_ARCH_MVEBU=y
 CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v3 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-06-01 14:31 ` [PATCH v3 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
  2018-06-01 14:31   ` [PATCH v3 2/4] arm64: Add Mellanox BlueField SoC config option Liming Sun
@ 2018-06-01 14:31   ` Liming Sun
  2018-06-11 18:19     ` Rob Herring
  2018-06-01 14:31   ` [PATCH v3 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
  2 siblings, 1 reply; 70+ messages in thread
From: Liming Sun @ 2018-06-01 14:31 UTC (permalink / raw)
  To: linux-arm-kernel

Add devicetree bindings for the TmFifo which is found on Mellanox
BlueField SoCs.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 .../devicetree/bindings/soc/mellanox/tmfifo.txt    | 23 ++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt

diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
new file mode 100644
index 0000000..8a13fa6
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
@@ -0,0 +1,23 @@
+* Mellanox BlueField SoC TmFifo
+
+BlueField TmFifo provides a shared FIFO between the target and the
+external host machine, which can be accessed by external host via
+USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
+to implement virtual console and network interface based on the virtio
+framework.
+
+Required properties:
+
+- compatible:	Should be "mellanox,bf-tmfifo"
+- reg:		Physical base address and length of Rx/Tx block
+- interrupts:	The interrupt number of Rx low water mark, Rx high water mark
+		Tx low water mark, Tx high water mark respectively.
+
+Example:
+
+tmfifo at 800a20 {
+	compatible = "mellanox,bf-tmfifo";
+	reg = <0x00800a20 0x00000018
+	       0x00800a40 0x00000018>;
+	interrupts = <41, 42, 43, 44>;
+};
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v3 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc
  2018-06-01 14:31 ` [PATCH v3 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
  2018-06-01 14:31   ` [PATCH v3 2/4] arm64: Add Mellanox BlueField SoC config option Liming Sun
  2018-06-01 14:31   ` [PATCH v3 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
@ 2018-06-01 14:31   ` Liming Sun
  2 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2018-06-01 14:31 UTC (permalink / raw)
  To: linux-arm-kernel

Add maintainer information for Mellanox BlueField SoC.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 58b9861..85d5639 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1636,6 +1636,14 @@ L:	linux-mediatek at lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	drivers/phy/mediatek/phy-mtk-tphy.c
 
+ARM/Mellanox BlueField SoC support
+M:	David Woods <dwoods@mellanox.com>
+M:	Liming Sun <lsun@mellanox.com>
+L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	drivers/soc/mellanox/*
+F:	Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
+
 ARM/MICREL KS8695 ARCHITECTURE
 M:	Greg Ungerer <gerg@uclinux.org>
 L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v3 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-06-01 14:31   ` [PATCH v3 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
@ 2018-06-11 18:19     ` Rob Herring
  0 siblings, 0 replies; 70+ messages in thread
From: Rob Herring @ 2018-06-11 18:19 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Jun 01, 2018 at 10:31:06AM -0400, Liming Sun wrote:
> Add devicetree bindings for the TmFifo which is found on Mellanox
> BlueField SoCs.
> 
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>
> ---
>  .../devicetree/bindings/soc/mellanox/tmfifo.txt    | 23 ++++++++++++++++++++++
>  1 file changed, 23 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt

Reviewed-by: Rob Herring <robh@kernel.org>

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 16:06 [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
                   ` (5 preceding siblings ...)
  2018-06-01 14:31 ` [PATCH v3 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
@ 2018-10-24 17:55 ` Liming Sun
  2018-10-24 17:55   ` [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option Liming Sun
                     ` (3 more replies)
  2018-10-31 18:09 ` [PATCH v5 1/5] " Liming Sun
                   ` (12 subsequent siblings)
  19 siblings, 4 replies; 70+ messages in thread
From: Liming Sun @ 2018-10-24 17:55 UTC (permalink / raw)
  To: linux-arm-kernel

This commit adds the TmFifo driver for Mellanox BlueField Soc.
TmFifo is a shared FIFO which enables external host machine to
exchange data with the SoC via USB or PCIe. The driver is based on
virtio framework and has console and network access enabled.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/Kconfig                |    1 +
 drivers/soc/Makefile               |    1 +
 drivers/soc/mellanox/Kconfig       |   18 +
 drivers/soc/mellanox/Makefile      |    5 +
 drivers/soc/mellanox/tmfifo.c      | 1239 ++++++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/tmfifo_regs.h |   75 +++
 6 files changed, 1339 insertions(+)
 create mode 100644 drivers/soc/mellanox/Kconfig
 create mode 100644 drivers/soc/mellanox/Makefile
 create mode 100644 drivers/soc/mellanox/tmfifo.c
 create mode 100644 drivers/soc/mellanox/tmfifo_regs.h

diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index c07b4a8..fa87dc8 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -7,6 +7,7 @@ source "drivers/soc/bcm/Kconfig"
 source "drivers/soc/fsl/Kconfig"
 source "drivers/soc/imx/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
+source "drivers/soc/mellanox/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/renesas/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 113e884..93052d0 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ARCH_GEMINI)	+= gemini/
 obj-$(CONFIG_ARCH_MXC)		+= imx/
 obj-$(CONFIG_SOC_XWAY)		+= lantiq/
 obj-y				+= mediatek/
+obj-$(CONFIG_SOC_MLNX)		+= mellanox/
 obj-$(CONFIG_ARCH_MESON)	+= amlogic/
 obj-y				+= qcom/
 obj-y				+= renesas/
diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
new file mode 100644
index 0000000..d88efa1
--- /dev/null
+++ b/drivers/soc/mellanox/Kconfig
@@ -0,0 +1,18 @@
+menuconfig SOC_MLNX
+	bool "Mellanox SoC drivers"
+	default y if ARCH_MLNX_BLUEFIELD
+
+if ARCH_MLNX_BLUEFIELD || COMPILE_TEST
+
+config MLNX_BLUEFIELD_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo driver"
+	depends on ARM64
+	default m
+	select VIRTIO_CONSOLE
+	select VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides the
+	  virtio driver framework for the TMFIFO of Mellanox BlueField SoC and
+	  the implementation of a console and network driver.
+
+endif # ARCH_MLNX_BLUEFIELD
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
new file mode 100644
index 0000000..c44c0e2
--- /dev/null
+++ b/drivers/soc/mellanox/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Mellanox SoC drivers.
+#
+obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
new file mode 100644
index 0000000..5647cb6
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo.c
@@ -0,0 +1,1239 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/cache.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <asm/byteorder.h>
+
+#include "tmfifo_regs.h"
+
+#define TMFIFO_GET_FIELD(reg, mask)	FIELD_GET(mask, reg)
+
+#define TMFIFO_SET_FIELD(reg, mask, value) \
+	((reg & ~mask) | FIELD_PREP(mask, value))
+
+/* Vring size. */
+#define TMFIFO_VRING_SIZE			1024
+
+/* Console Tx buffer size. */
+#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
+
+/* Use a timer for house-keeping. */
+static int tmfifo_timer_interval = HZ / 10;
+
+/* Global lock. */
+static struct mutex tmfifo_lock;
+
+/* Virtio ring size. */
+static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
+module_param(tmfifo_vring_size, int, 0444);
+MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring.");
+
+struct tmfifo;
+
+/* A flag to indicate TmFifo ready. */
+static bool tmfifo_ready;
+
+/* Virtual devices sharing the TM FIFO. */
+#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/* Spin lock. */
+static DEFINE_SPINLOCK(tmfifo_spin_lock);
+
+/* Structure to maintain the ring state. */
+struct tmfifo_vring {
+	void *va;			/* virtual address */
+	dma_addr_t dma;			/* dma address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	int vdev_id;			/* TMFIFO_VDEV_xxx */
+	u32 pkt_len;			/* packet total length */
+	__virtio16 next_avail;		/* next avail desc id */
+	struct tmfifo *fifo;		/* pointer back to the tmfifo */
+};
+
+/* Interrupt types. */
+enum {
+	TM_RX_LWM_IRQ,			/* Rx low water mark irq */
+	TM_RX_HWM_IRQ,			/* Rx high water mark irq */
+	TM_TX_LWM_IRQ,			/* Tx low water mark irq */
+	TM_TX_HWM_IRQ,			/* Tx high water mark irq */
+	TM_IRQ_CNT
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	TMFIFO_VRING_RX,		/* Rx ring */
+	TMFIFO_VRING_TX,		/* Tx ring */
+	TMFIFO_VRING_NUM
+};
+
+struct tmfifo_vdev {
+	struct virtio_device vdev;	/* virtual device */
+	u8 status;
+	u64 features;
+	union {				/* virtio config space */
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
+	u8 *tx_buf;			/* tx buffer */
+	u32 tx_head;			/* tx buffer head */
+	u32 tx_tail;			/* tx buffer tail */
+};
+
+#define TMFIFO_VDEV_TX_BUF_AVAIL(vdev) \
+	(((vdev)->tx_tail >= (vdev)->tx_head) ? \
+	(TMFIFO_CONS_TX_BUF_SIZE - 8 - ((vdev)->tx_tail - (vdev)->tx_head)) : \
+	((vdev)->tx_head - (vdev)->tx_tail - 8))
+
+#define TMFIFO_VDEV_TX_BUF_PUSH(vdev, len) do { \
+	(vdev)->tx_tail += (len); \
+	if ((vdev)->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE) \
+		(vdev)->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE; \
+} while (0)
+
+#define TMFIFO_VDEV_TX_BUF_POP(vdev, len) do { \
+	(vdev)->tx_head += (len); \
+	if ((vdev)->tx_head >= TMFIFO_CONS_TX_BUF_SIZE) \
+		(vdev)->tx_head -= TMFIFO_CONS_TX_BUF_SIZE; \
+} while (0)
+
+/* TMFIFO device structure */
+struct tmfifo {
+	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
+	struct platform_device *pdev;	/* platform device */
+	struct mutex lock;
+	void __iomem *rx_base;		/* mapped register base */
+	void __iomem *tx_base;		/* mapped register base */
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	int irq[TM_IRQ_CNT];		/* irq numbers */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
+};
+
+union tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* MTU setting of the virtio-net interface. */
+#define TMFIFO_NET_MTU		1500
+
+/* Supported virtio-net features. */
+#define TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
+				 (1UL << VIRTIO_NET_F_STATUS) | \
+				 (1UL << VIRTIO_NET_F_MAC))
+
+/* Forward declaration. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx);
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc);
+
+/* Allocate vrings for the fifo. */
+static int tmfifo_alloc_vrings(struct tmfifo *fifo,
+			       struct tmfifo_vdev *tm_vdev, int vdev_id)
+{
+	dma_addr_t dma;
+	void *va;
+	int i, size;
+	struct tmfifo_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->size = tmfifo_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+		vring->vdev_id = vdev_id;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
+					GFP_KERNEL);
+		if (!va) {
+			dev_err(tm_vdev->vdev.dev.parent,
+				"vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void tmfifo_free_vrings(struct tmfifo *fifo, int vdev_id)
+{
+	int i, size;
+	struct tmfifo_vring *vring;
+	struct tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Free interrupts of the fifo device. */
+static void tmfifo_free_irqs(struct tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		irq = fifo->irq[i];
+		if (irq) {
+			fifo->irq[i] = 0;
+			disable_irq(irq);
+			free_irq(irq, (u8 *)fifo + i);
+		}
+	}
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void tmfifo_work_handler(struct work_struct *work)
+{
+	int i;
+	struct tmfifo_vdev *tm_vdev;
+	struct tmfifo *fifo = container_of(work, struct tmfifo, work);
+
+	if (!tmfifo_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx. */
+	if (test_and_clear_bit(TM_TX_LWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_TX_LWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_TX].vq,
+					false);
+			}
+		}
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(TM_RX_HWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_RX_HWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_RX].vq,
+					true);
+			}
+		}
+	}
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* Interrupt handler. */
+static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
+{
+	int i = (uintptr_t)dev_id % sizeof(void *);
+	struct tmfifo *fifo = dev_id - i;
+
+	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
+		schedule_work(&fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Nothing to do for now. */
+static void tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+tmfifo_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+
+	if (!vr || vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vr->num);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+static inline void tmfifo_virtio_release_desc(struct virtio_device *vdev,
+					      struct vring *vr,
+					      struct vring_desc *desc, u32 len)
+{
+	unsigned int idx;
+
+	idx = vr->used->idx % vr->num;
+	vr->used->ring[idx].id = desc - vr->desc;
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/* Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc)
+{
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+	struct vring_desc *desc_head;
+	uint32_t pkt_len = 0;
+
+	if (!vr)
+		return;
+
+	if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
+		desc_head = vring->desc_head;
+		pkt_len = vring->pkt_len;
+	} else {
+		desc_head = tmfifo_virtio_get_next_desc(vring->vq);
+		if (desc_head != NULL) {
+			pkt_len = tmfifo_virtio_get_pkt_len(vdev,
+							desc_head, vr);
+		}
+	}
+
+	if (desc_head != NULL)
+		tmfifo_virtio_release_desc(vdev, vr, desc_head, pkt_len);
+
+	if (desc != NULL)
+		*desc = NULL;
+	vring->pkt_len = 0;
+}
+
+/* House-keeping timer. */
+static void tmfifo_timer(struct timer_list *arg)
+{
+	struct tmfifo *fifo = container_of(arg, struct tmfifo, timer);
+
+	/*
+	 * Wake up the work handler to poll the Rx FIFO in case interrupt
+	 * missing or any leftover bytes stuck in the FIFO.
+	 */
+	test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+}
+
+/* Buffer the console output. */
+static void tmfifo_console_output(struct tmfifo_vdev *cons,
+				  struct virtqueue *vq)
+{
+	u32 len, pkt_len, idx;
+	struct vring_desc *head_desc, *desc = NULL;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &cons->vdev;
+	void *addr;
+	union tmfifo_msg_hdr *hdr;
+
+	for (;;) {
+		head_desc = tmfifo_virtio_get_next_desc(vq);
+		if (head_desc == NULL)
+			break;
+
+		/* Release the packet if no more space. */
+		pkt_len = tmfifo_virtio_get_pkt_len(vdev, head_desc, vr);
+		if (pkt_len + sizeof(*hdr) > TMFIFO_VDEV_TX_BUF_AVAIL(cons)) {
+			tmfifo_virtio_release_desc(vdev, vr, head_desc,
+						   pkt_len);
+			break;
+		}
+
+		hdr = (union tmfifo_msg_hdr *)&cons->tx_buf[cons->tx_tail];
+		hdr->data = 0;
+		hdr->type = VIRTIO_ID_CONSOLE;
+		hdr->len = htons(pkt_len);
+
+		TMFIFO_VDEV_TX_BUF_PUSH(cons, sizeof(*hdr));
+		desc = head_desc;
+
+		while (desc != NULL) {
+			addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+			len = virtio32_to_cpu(vdev, desc->len);
+
+			if (len <= TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail) {
+				memcpy(cons->tx_buf + cons->tx_tail, addr, len);
+			} else {
+				u32 seg;
+
+				seg = TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail;
+				memcpy(cons->tx_buf + cons->tx_tail, addr, seg);
+				addr += seg;
+				memcpy(cons->tx_buf, addr, len - seg);
+			}
+			TMFIFO_VDEV_TX_BUF_PUSH(cons, len);
+
+			if (!(virtio16_to_cpu(vdev, desc->flags) &
+			    VRING_DESC_F_NEXT))
+				break;
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+		}
+
+		/* Make each packet 8-byte aligned. */
+		TMFIFO_VDEV_TX_BUF_PUSH(cons, ((pkt_len + 7) & -8) - pkt_len);
+
+		tmfifo_virtio_release_desc(vdev, vr, head_desc, pkt_len);
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct tmfifo_vring *vring;
+	struct tmfifo *fifo;
+	struct vring *vr;
+	struct virtio_device *vdev;
+	u64 sts, data;
+	int num_avail = 0, hdr_len, tx_reserve;
+	void *addr;
+	u32 len, idx;
+	struct vring_desc *desc;
+	unsigned long flags;
+	struct tmfifo_vdev *cons;
+
+	if (!vq)
+		return;
+
+	vring = (struct tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+	vr = (struct vring *)virtqueue_get_vring(vq);
+
+	if (!fifo->vdev[vring->vdev_id])
+		return;
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+
+	/* Don't continue if another vring is running. */
+	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
+		return;
+
+	/* tx_reserve is used to reserved some room in FIFO for console. */
+	if (vring->vdev_id == VIRTIO_ID_NET) {
+		hdr_len = sizeof(struct virtio_net_hdr);
+		tx_reserve = fifo->tx_fifo_size / 16;
+	} else {
+		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
+		hdr_len = 0;
+		tx_reserve = 1;
+	}
+
+	desc = vring->desc;
+
+again:
+	while (1) {
+		/* Get available FIFO space. */
+		if (num_avail == 0) {
+			if (is_rx) {
+				/* Get the number of available words in FIFO. */
+				sts = readq(fifo->rx_base + TMFIFO_RX_STS);
+				num_avail = TMFIFO_GET_FIELD(sts,
+						TMFIFO_RX_STS__COUNT_MASK);
+
+				/* Don't continue if nothing in FIFO. */
+				if (num_avail <= 0)
+					break;
+			} else {
+				/* Get available space in FIFO. */
+				sts = readq(fifo->tx_base + TMFIFO_TX_STS);
+				num_avail = fifo->tx_fifo_size - tx_reserve -
+					TMFIFO_GET_FIELD(sts,
+						TMFIFO_TX_STS__COUNT_MASK);
+
+				if (num_avail <= 0)
+					break;
+			}
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
+		    cons != NULL && cons->tx_buf != NULL) {
+			for (;;) {
+				spin_lock_irqsave(&tmfifo_spin_lock, flags);
+				if (cons->tx_head == cons->tx_tail) {
+					spin_unlock_irqrestore(
+						&tmfifo_spin_lock, flags);
+					return;
+				}
+				addr = cons->tx_buf + cons->tx_head;
+				writeq(cpu_to_le64(*(u64 *)addr),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+				TMFIFO_VDEV_TX_BUF_POP(cons, sizeof(u64));
+				spin_unlock_irqrestore(&tmfifo_spin_lock,
+						       flags);
+				if (--num_avail <= 0)
+					goto again;
+			}
+		}
+
+		/* Get the desc of next packet. */
+		if (!desc) {
+			/* Save the head desc of the chain. */
+			vring->desc_head = tmfifo_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				return;
+			}
+			desc = vring->desc_head;
+			vring->desc = desc;
+
+			if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+						vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			}
+		}
+
+		/* Beginning of each packet. */
+		if (vring->pkt_len == 0) {
+			int vdev_id, vring_change = 0;
+			union tmfifo_msg_hdr hdr;
+
+			num_avail--;
+
+			/* Read/Write packet length. */
+			if (is_rx) {
+				hdr.data = readq(fifo->rx_base +
+						 TMFIFO_RX_DATA);
+				hdr.data = le64_to_cpu(hdr.data);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (hdr.len == 0)
+					continue;
+
+				/* Check packet type. */
+				if (hdr.type == VIRTIO_ID_NET) {
+					vdev_id = VIRTIO_ID_NET;
+					hdr_len = sizeof(struct virtio_net_hdr);
+				} else if (hdr.type == VIRTIO_ID_CONSOLE) {
+					vdev_id = VIRTIO_ID_CONSOLE;
+					hdr_len = 0;
+				} else {
+					continue;
+				}
+
+				/*
+				 * Check whether the new packet still belongs
+				 * to this vring or not. If not, update the
+				 * pkt_len of the new vring and return.
+				 */
+				if (vdev_id != vring->vdev_id) {
+					struct tmfifo_vdev *dev2 =
+						fifo->vdev[vdev_id];
+
+					if (!dev2)
+						break;
+					vring->desc = desc;
+					vring = &dev2->vrings[TMFIFO_VRING_RX];
+					vring_change = 1;
+				}
+				vring->pkt_len = ntohs(hdr.len) + hdr_len;
+			} else {
+				vring->pkt_len = tmfifo_virtio_get_pkt_len(
+					vdev, desc, vr);
+
+				hdr.data = 0;
+				hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+					VIRTIO_ID_NET :
+					VIRTIO_ID_CONSOLE;
+				hdr.len = htons(vring->pkt_len - hdr_len);
+				writeq(cpu_to_le64(hdr.data),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+			}
+
+			vring->cur_len = hdr_len;
+			vring->rem_len = vring->pkt_len;
+			fifo->vring[is_rx] = vring;
+
+			if (vring_change)
+				return;
+			continue;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check if the current desc is already done. */
+		if (vring->cur_len == len)
+			goto check_done;
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		/* Read a word from FIFO for Rx. */
+		if (is_rx) {
+			data = readq(fifo->rx_base + TMFIFO_RX_DATA);
+			data = le64_to_cpu(data);
+		}
+
+		if (vring->cur_len + sizeof(u64) <= len) {
+			/* The whole word. */
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       sizeof(u64));
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       sizeof(u64));
+			}
+			vring->cur_len += sizeof(u64);
+		} else {
+			/* Leftover bytes. */
+			BUG_ON(vring->cur_len > len);
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       len - vring->cur_len);
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       len - vring->cur_len);
+			}
+			vring->cur_len = len;
+		}
+
+		/* Write the word into FIFO for Tx. */
+		if (!is_rx) {
+			writeq(cpu_to_le64(data),
+			       fifo->tx_base + TMFIFO_TX_DATA);
+		}
+
+		num_avail--;
+
+check_done:
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done and release the desc. */
+			tmfifo_release_pkt(vdev, vring, &desc);
+			fifo->vring[is_rx] = NULL;
+
+			/* Notify upper layer that packet is done. */
+			spin_lock_irqsave(&tmfifo_spin_lock, flags);
+			vring_interrupt(0, vq);
+			spin_unlock_irqrestore(&tmfifo_spin_lock, flags);
+			continue;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+	struct tmfifo *fifo = vring->fifo;
+	unsigned long flags;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX HWM bit to start Rx. */
+		if (!test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&tmfifo_spin_lock, flags);
+			tmfifo_console_output(fifo->vdev[VIRTIO_ID_CONSOLE],
+					      vq);
+			spin_unlock_irqrestore(&tmfifo_spin_lock, flags);
+			schedule_work(&fifo->work);
+		} else if (!test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	}
+
+	return true;
+}
+
+/* Get the array of feature bits for this device. */
+static u64 tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			tmfifo_release_pkt(&tm_vdev->vdev, vring, &vring->desc);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+				  unsigned int nvqs,
+				  struct virtqueue *vqs[],
+				  vq_callback_t *callbacks[],
+				  const char * const names[],
+				  const bool *ctx,
+				  struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void tmfifo_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void tmfifo_virtio_get(struct virtio_device *vdev,
+			      unsigned int offset,
+			      void *buf,
+			      unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void tmfifo_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops tmfifo_virtio_config_ops = {
+	.get_features = tmfifo_virtio_get_features,
+	.finalize_features = tmfifo_virtio_finalize_features,
+	.find_vqs = tmfifo_virtio_find_vqs,
+	.del_vqs = tmfifo_virtio_del_vqs,
+	.reset = tmfifo_virtio_reset,
+	.set_status = tmfifo_virtio_set_status,
+	.get_status = tmfifo_virtio_get_status,
+	.get = tmfifo_virtio_get,
+	.set = tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+int tmfifo_create_vdev(struct tmfifo *fifo, int vdev_id, u64 features,
+		       void *config, u32 size)
+{
+	struct tmfifo_vdev *tm_vdev;
+	int ret = 0;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev != NULL) {
+		pr_err("vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto already_exist;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto already_exist;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+	if (tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
+		pr_err("Unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto alloc_vring_fail;
+	}
+	if (vdev_id == VIRTIO_ID_CONSOLE) {
+		tm_vdev->tx_buf = kmalloc(TMFIFO_CONS_TX_BUF_SIZE,
+					  GFP_KERNEL);
+	}
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
+		goto register_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+register_fail:
+	tmfifo_free_vrings(fifo, vdev_id);
+	fifo->vdev[vdev_id] = NULL;
+alloc_vring_fail:
+	kfree(tm_vdev);
+already_exist:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+int tmfifo_delete_vdev(struct tmfifo *fifo, int vdev_id)
+{
+	struct tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		tmfifo_free_vrings(fifo, vdev_id);
+		kfree(tm_vdev->tx_buf);
+		kfree(tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Device remove function. */
+static int tmfifo_remove(struct platform_device *pdev)
+{
+	int i;
+	struct tmfifo *fifo = platform_get_drvdata(pdev);
+	struct resource *rx_res, *tx_res;
+
+	tmfifo_ready = false;
+
+	if (fifo) {
+		mutex_lock(&tmfifo_lock);
+
+		/* Stop the timer. */
+		del_timer_sync(&fifo->timer);
+
+		/* Release interrupts. */
+		tmfifo_free_irqs(fifo);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&fifo->work);
+
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++)
+			tmfifo_delete_vdev(fifo, i);
+
+		/* Release IO resources. */
+		if (fifo->rx_base)
+			iounmap(fifo->rx_base);
+		if (fifo->tx_base)
+			iounmap(fifo->tx_base);
+
+		platform_set_drvdata(pdev, NULL);
+		kfree(fifo);
+
+		mutex_unlock(&tmfifo_lock);
+	}
+
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (rx_res)
+		release_mem_region(rx_res->start, resource_size(rx_res));
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (tx_res)
+		release_mem_region(tx_res->start, resource_size(tx_res));
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void tmfifo_get_cfg_mac(u8 *mac)
+{
+	u8 buf[6];
+	efi_status_t status;
+	unsigned long size = sizeof(buf);
+	efi_char16_t name[] = { 'R', 's', 'h', 'i', 'm', 'M', 'a', 'c',
+				'A', 'd', 'd', 'r', 0 };
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+
+	status = efi.get_variable(name, &guid, NULL, &size, buf);
+	if (status == EFI_SUCCESS && size == sizeof(buf))
+		memcpy(mac, buf, sizeof(buf));
+}
+
+/* Probe the TMFIFO. */
+static int tmfifo_probe(struct platform_device *pdev)
+{
+	u64 ctl;
+	struct tmfifo *fifo;
+	struct resource *rx_res, *tx_res;
+	struct virtio_net_config net_config;
+	int i, ret;
+
+	/* Get the resource of the Rx & Tx FIFO. */
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!rx_res || !tx_res) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (request_mem_region(rx_res->start,
+			       resource_size(rx_res), "bf-tmfifo") == NULL) {
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	if (request_mem_region(tx_res->start,
+			       resource_size(tx_res), "bf-tmfifo") == NULL) {
+		release_mem_region(rx_res->start, resource_size(rx_res));
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	ret = -ENOMEM;
+	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
+	if (!fifo)
+		goto err;
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	INIT_WORK(&fifo->work, tmfifo_work_handler);
+
+	timer_setup(&fifo->timer, tmfifo_timer, 0);
+	fifo->timer.function = tmfifo_timer;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		fifo->irq[i] = platform_get_irq(pdev, i);
+		ret = request_irq(fifo->irq[i], tmfifo_irq_handler, 0,
+				  "tmfifo", (u8 *)fifo + i);
+		if (ret) {
+			pr_err("Unable to request irq\n");
+			fifo->irq[i] = 0;
+			goto err;
+		}
+	}
+
+	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
+	if (!fifo->rx_base)
+		goto err;
+
+	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
+	if (!fifo->tx_base)
+		goto err;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		TMFIFO_GET_FIELD(ctl, TMFIFO_TX_CTL__MAX_ENTRIES_MASK);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_TX_CTL__LWM_MASK,
+			       fifo->tx_fifo_size / 2);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_TX_CTL__HWM_MASK,
+			       fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		TMFIFO_GET_FIELD(ctl, TMFIFO_RX_CTL__MAX_ENTRIES_MASK);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = TMFIFO_SET_FIELD(ctl, TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + TMFIFO_RX_CTL);
+
+	mutex_init(&fifo->lock);
+
+	/* Create the console vdev. */
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (ret)
+		goto err;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = TMFIFO_NET_MTU;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	memcpy(net_config.mac, tmfifo_net_default_mac, 6);
+	tmfifo_get_cfg_mac(net_config.mac);
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_NET, TMFIFO_NET_FEATURES,
+				 &net_config, sizeof(net_config));
+	if (ret)
+		goto err;
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+
+	tmfifo_ready = true;
+
+	return 0;
+
+err:
+	tmfifo_remove(pdev);
+early_err:
+	dev_err(&pdev->dev, "Probe Failed\n");
+	return ret;
+}
+
+static const struct of_device_id tmfifo_match[] = {
+	{ .compatible = "mellanox,bf-tmfifo" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, tmfifo_match);
+
+static const struct acpi_device_id bf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, bf_tmfifo_acpi_match);
+
+static struct platform_driver tmfifo_driver = {
+	.probe = tmfifo_probe,
+	.remove = tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.of_match_table = tmfifo_match,
+		.acpi_match_table = ACPI_PTR(bf_tmfifo_acpi_match),
+	},
+};
+
+static int __init tmfifo_init(void)
+{
+	int ret;
+
+	mutex_init(&tmfifo_lock);
+
+	ret = platform_driver_register(&tmfifo_driver);
+	if (ret)
+		pr_err("Failed to register tmfifo driver.\n");
+
+	return ret;
+}
+
+static void __exit tmfifo_exit(void)
+{
+	platform_driver_unregister(&tmfifo_driver);
+}
+
+module_init(tmfifo_init);
+module_exit(tmfifo_exit);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
new file mode 100644
index 0000000..f42c9d6
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo_regs.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __TMFIFO_REGS_H__
+#define __TMFIFO_REGS_H__
+
+#include <linux/types.h>
+
+#define TMFIFO_TX_DATA 0x0
+
+#define TMFIFO_TX_STS 0x8
+#define TMFIFO_TX_STS__LENGTH 0x0001
+#define TMFIFO_TX_STS__COUNT_SHIFT 0
+#define TMFIFO_TX_STS__COUNT_WIDTH 9
+#define TMFIFO_TX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_TX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_TX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_TX_CTL 0x10
+#define TMFIFO_TX_CTL__LENGTH 0x0001
+#define TMFIFO_TX_CTL__LWM_SHIFT 0
+#define TMFIFO_TX_CTL__LWM_WIDTH 8
+#define TMFIFO_TX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__LWM_RMASK 0xff
+#define TMFIFO_TX_CTL__LWM_MASK  0xff
+#define TMFIFO_TX_CTL__HWM_SHIFT 8
+#define TMFIFO_TX_CTL__HWM_WIDTH 8
+#define TMFIFO_TX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__HWM_RMASK 0xff
+#define TMFIFO_TX_CTL__HWM_MASK  0xff00
+#define TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#define TMFIFO_RX_DATA 0x0
+
+#define TMFIFO_RX_STS 0x8
+#define TMFIFO_RX_STS__LENGTH 0x0001
+#define TMFIFO_RX_STS__COUNT_SHIFT 0
+#define TMFIFO_RX_STS__COUNT_WIDTH 9
+#define TMFIFO_RX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_RX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_RX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_RX_CTL 0x10
+#define TMFIFO_RX_CTL__LENGTH 0x0001
+#define TMFIFO_RX_CTL__LWM_SHIFT 0
+#define TMFIFO_RX_CTL__LWM_WIDTH 8
+#define TMFIFO_RX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__LWM_RMASK 0xff
+#define TMFIFO_RX_CTL__LWM_MASK  0xff
+#define TMFIFO_RX_CTL__HWM_SHIFT 8
+#define TMFIFO_RX_CTL__HWM_WIDTH 8
+#define TMFIFO_RX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__HWM_RMASK 0xff
+#define TMFIFO_RX_CTL__HWM_MASK  0xff00
+#define TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#endif /* !defined(__TMFIFO_REGS_H__) */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
  2018-10-24 17:55 ` [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
@ 2018-10-24 17:55   ` Liming Sun
  2018-10-25 15:38     ` Arnd Bergmann
  2018-10-24 17:55   ` [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 70+ messages in thread
From: Liming Sun @ 2018-10-24 17:55 UTC (permalink / raw)
  To: linux-arm-kernel

This commit introduces config option for Mellanox BlueField SoC,
which can be used to build the SoC specific drivers, and enables
it by default in configs/defconfig.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 arch/arm64/Kconfig.platforms | 6 ++++++
 arch/arm64/configs/defconfig | 1 +
 2 files changed, 7 insertions(+)

diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index d5aeac3..aeb67c2 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -110,6 +110,12 @@ config ARCH_MESON
 	help
 	  This enables support for the Amlogic S905 SoCs.
 
+config ARCH_MLNX_BLUEFIELD
+	bool "Mellanox BlueField SoC Family"
+	select SOC_MLNX
+	help
+	  This enables support for the Mellanox BlueField SoC.
+
 config ARCH_MVEBU
 	bool "Marvell EBU SoC Family"
 	select ARMADA_AP806_SYSCON
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index f9a186f..508cb9d 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -43,6 +43,7 @@ CONFIG_ARCH_LG1K=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
 CONFIG_ARCH_MESON=y
+CONFIG_ARCH_MLNX_BLUEFIELD=y
 CONFIG_ARCH_MVEBU=y
 CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-10-24 17:55 ` [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
  2018-10-24 17:55   ` [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option Liming Sun
@ 2018-10-24 17:55   ` Liming Sun
  2018-10-25 15:32     ` Arnd Bergmann
  2018-10-24 17:55   ` [PATCH v4 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
  2018-10-25 15:57   ` [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Arnd Bergmann
  3 siblings, 1 reply; 70+ messages in thread
From: Liming Sun @ 2018-10-24 17:55 UTC (permalink / raw)
  To: linux-arm-kernel

Add devicetree bindings for the TmFifo which is found on Mellanox
BlueField SoCs.

Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 .../devicetree/bindings/soc/mellanox/tmfifo.txt    | 23 ++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt

diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
new file mode 100644
index 0000000..8a13fa6
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
@@ -0,0 +1,23 @@
+* Mellanox BlueField SoC TmFifo
+
+BlueField TmFifo provides a shared FIFO between the target and the
+external host machine, which can be accessed by external host via
+USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
+to implement virtual console and network interface based on the virtio
+framework.
+
+Required properties:
+
+- compatible:	Should be "mellanox,bf-tmfifo"
+- reg:		Physical base address and length of Rx/Tx block
+- interrupts:	The interrupt number of Rx low water mark, Rx high water mark
+		Tx low water mark, Tx high water mark respectively.
+
+Example:
+
+tmfifo at 800a20 {
+	compatible = "mellanox,bf-tmfifo";
+	reg = <0x00800a20 0x00000018
+	       0x00800a40 0x00000018>;
+	interrupts = <41, 42, 43, 44>;
+};
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v4 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc
  2018-10-24 17:55 ` [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
  2018-10-24 17:55   ` [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option Liming Sun
  2018-10-24 17:55   ` [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
@ 2018-10-24 17:55   ` Liming Sun
  2018-10-25 15:57   ` [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Arnd Bergmann
  3 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2018-10-24 17:55 UTC (permalink / raw)
  To: linux-arm-kernel

Add maintainer information for Mellanox BlueField SoC.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index c78feb0..07f7c7e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1649,6 +1649,14 @@ L:	linux-mediatek at lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	drivers/phy/mediatek/phy-mtk-tphy.c
 
+ARM/Mellanox BlueField SoC support
+M:	David Woods <dwoods@mellanox.com>
+M:	Liming Sun <lsun@mellanox.com>
+L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	drivers/soc/mellanox/*
+F:	Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
+
 ARM/MICREL KS8695 ARCHITECTURE
 M:	Greg Ungerer <gerg@uclinux.org>
 L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-10-24 17:55   ` [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
@ 2018-10-25 15:32     ` Arnd Bergmann
  2018-10-26 19:36       ` Liming Sun
  0 siblings, 1 reply; 70+ messages in thread
From: Arnd Bergmann @ 2018-10-25 15:32 UTC (permalink / raw)
  To: linux-arm-kernel

On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> Add devicetree bindings for the TmFifo which is found on Mellanox
> BlueField SoCs.
>
> Reviewed-by: Rob Herring <robh@kernel.org>
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>
> ---
>  .../devicetree/bindings/soc/mellanox/tmfifo.txt    | 23
> ++++++++++++++++++++++
>  1 file changed, 23 insertions(+)
>  create mode 100644
> Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
>
> diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> new file mode 100644
> index 0000000..8a13fa6
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> @@ -0,0 +1,23 @@
> +* Mellanox BlueField SoC TmFifo
> +
> +BlueField TmFifo provides a shared FIFO between the target and the
> +external host machine, which can be accessed by external host via
> +USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
> +to implement virtual console and network interface based on the virtio
> +framework.
> +
> +Required properties:
> +
> +- compatible:	Should be "mellanox,bf-tmfifo"
> +- reg:		Physical base address and length of Rx/Tx block
> +- interrupts:	The interrupt number of Rx low water mark, Rx high water
> mark
> +		Tx low water mark, Tx high water mark respectively.


This sounds like it might fit into the mailbox subsystem, and perhaps
it should use the mailbox DT bindings. Have you had a look at that?

       Arnd

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
  2018-10-24 17:55   ` [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option Liming Sun
@ 2018-10-25 15:38     ` Arnd Bergmann
  2018-10-26 19:18       ` Liming Sun
  0 siblings, 1 reply; 70+ messages in thread
From: Arnd Bergmann @ 2018-10-25 15:38 UTC (permalink / raw)
  To: linux-arm-kernel

On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> This commit introduces config option for Mellanox BlueField SoC,
> which can be used to build the SoC specific drivers, and enables
> it by default in configs/defconfig.
>
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>
> ---
>  arch/arm64/Kconfig.platforms | 6 ++++++
>  arch/arm64/configs/defconfig | 1 +
>  2 files changed, 7 insertions(+)

Reviewed-by: Arnd Bergmann <arnd@arndb.de>

I'm sorry for missing your series in the past. We should definitely merge
the platform support soon. Do you also have device tree files for reference
systems or even production hardware?

I need to have a separate look at the fifo driver.

Unfortunately, you have sent these patches during the merge window,
which is the time during which we don't pick up new work. Let's plan
to pick these up after 4.20-rc1, and please resend to arm at kernel.org
if we manage to forget about it again.

      Arnd

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-10-24 17:55 ` [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
                     ` (2 preceding siblings ...)
  2018-10-24 17:55   ` [PATCH v4 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
@ 2018-10-25 15:57   ` Arnd Bergmann
  2018-10-26 18:24     ` Liming Sun
  2018-12-04 22:12     ` Liming Sun
  3 siblings, 2 replies; 70+ messages in thread
From: Arnd Bergmann @ 2018-10-25 15:57 UTC (permalink / raw)
  To: linux-arm-kernel

On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> This commit adds the TmFifo driver for Mellanox BlueField Soc.
> TmFifo is a shared FIFO which enables external host machine to
> exchange data with the SoC via USB or PCIe. The driver is based on
> virtio framework and has console and network access enabled.
>
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>

I definitely like the idea of using virtio-net and virtio-console here,
this is a great way of reusing the existing high-level drivers,
and i similar in concept (but also much simpler) to what we
have in drivers/misc/mic/ for another Linux-running machine that
can be a PCIe add-on card.

Have you also posted the other half of this driver? I'd like to see
how it all fits together.

A few style comments:

> +
> +#define TMFIFO_GET_FIELD(reg, mask)	FIELD_GET(mask, reg)
> +
> +#define TMFIFO_SET_FIELD(reg, mask, value) \
> +	((reg & ~mask) | FIELD_PREP(mask, value))

I think it would be nicer to use FIELD_GET/FIELD_PREP
in the code directly, and avoid adding extra wrappers around them.

> +/* Vring size. */
> +#define TMFIFO_VRING_SIZE			1024
> +
> +/* Console Tx buffer size. */
> +#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
> +
> +/* Use a timer for house-keeping. */
> +static int tmfifo_timer_interval = HZ / 10;
> +
> +/* Global lock. */
> +static struct mutex tmfifo_lock;

Maybe use 'static DEFINE_MUTEX(tmfifo_lock) here and remove the
initialization call.

> +/* Virtio ring size. */
> +static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
> +module_param(tmfifo_vring_size, int, 0444);
> +MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring.");
> +
> +struct tmfifo;
> +
> +/* A flag to indicate TmFifo ready. */
> +static bool tmfifo_ready;
> +
> +/* Virtual devices sharing the TM FIFO. */
> +#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
> +
> +/* Spin lock. */
> +static DEFINE_SPINLOCK(tmfifo_spin_lock);

Generally speaking, it's nicer to write a driver in a way that avoids
global variables and make the flags and locks all members of a
device specific structure.

> +struct tmfifo_vdev {
> +	struct virtio_device vdev;	/* virtual device */
> +	u8 status;
> +	u64 features;
> +	union {				/* virtio config space */
> +		struct virtio_console_config cons;
> +		struct virtio_net_config net;
> +	} config;
> +	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
> +	u8 *tx_buf;			/* tx buffer */
> +	u32 tx_head;			/* tx buffer head */
> +	u32 tx_tail;			/* tx buffer tail */
> +};

I suppose you did this to keep the driver simple, but it seems a
little inflexible
to only support two specific device types. Wouldn't we also want e.g. 9pfs
or virtio_blk in some configurations?

> +
> +#define TMFIFO_VDEV_TX_BUF_AVAIL(vdev) \
> +	(((vdev)->tx_tail >= (vdev)->tx_head) ? \
> +	(TMFIFO_CONS_TX_BUF_SIZE - 8 - ((vdev)->tx_tail - (vdev)->tx_head)) : \
> +	((vdev)->tx_head - (vdev)->tx_tail - 8))
> +
> +#define TMFIFO_VDEV_TX_BUF_PUSH(vdev, len) do { \
> +	(vdev)->tx_tail += (len); \
> +	if ((vdev)->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE) \
> +		(vdev)->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE; \
> +} while (0)
> +
> +#define TMFIFO_VDEV_TX_BUF_POP(vdev, len) do { \
> +	(vdev)->tx_head += (len); \
> +	if ((vdev)->tx_head >= TMFIFO_CONS_TX_BUF_SIZE) \
> +		(vdev)->tx_head -= TMFIFO_CONS_TX_BUF_SIZE; \
> +} while (0)

It would be nicer to turn these into inline functions rather than macros.

> +/* TMFIFO device structure */
> +struct tmfifo {
> +	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
> +	struct platform_device *pdev;	/* platform device */
> +	struct mutex lock;
> +	void __iomem *rx_base;		/* mapped register base */
> +	void __iomem *tx_base;		/* mapped register base */
> +	int tx_fifo_size;		/* number of entries of the Tx FIFO */
> +	int rx_fifo_size;		/* number of entries of the Rx FIFO */
> +	unsigned long pend_events;	/* pending bits for deferred process */
> +	int irq[TM_IRQ_CNT];		/* irq numbers */
> +	struct work_struct work;	/* work struct for deferred process */
> +	struct timer_list timer;	/* keepalive timer */
> +	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
> +};
> +
> +union tmfifo_msg_hdr {
> +	struct {
> +		u8 type;		/* message type */
> +		__be16 len;		/* payload length */
> +		u8 unused[5];		/* reserved, set to 0 */
> +	} __packed;
> +	u64 data;
> +};
> +
> +/*
> + * Default MAC.
> + * This MAC address will be read from EFI persistent variable if
> configured.
> + * It can also be reconfigured with standard Linux tools.
> + */
> +static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF,
> 0x01};
> +

Is a predefined MAC address better than a random one here?

For DT based systems, we tend to also call of_get_mac_address()
in order to allow setting a unique address from firmware.

> +/* Forward declaration. */
> +static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx);
> +static void tmfifo_release_pkt(struct virtio_device *vdev,
> +			       struct tmfifo_vring *vring,
> +			       struct vring_desc **desc);

Try to avoid forward declarations by reordering the functions according
to how they get called.

> +
> +/* Interrupt handler. */
> +static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
> +{
> +	int i = (uintptr_t)dev_id % sizeof(void *);
> +	struct tmfifo *fifo = dev_id - i;
> +
> +	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
> +		schedule_work(&fifo->work);
> +
> +	return IRQ_HANDLED;
> +}

Maybe using a request_threaded_irq() would be a better way to defer
the handler into IRQ context.

        Arnd

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-10-25 15:57   ` [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Arnd Bergmann
@ 2018-10-26 18:24     ` Liming Sun
  2018-10-26 18:35       ` Arnd Bergmann
  2018-12-04 22:12     ` Liming Sun
  1 sibling, 1 reply; 70+ messages in thread
From: Liming Sun @ 2018-10-26 18:24 UTC (permalink / raw)
  To: linux-arm-kernel

Thanks Arnd for the comments! Please see the response inline.

- Liming

> -----Original Message-----
> From: arndbergmann at gmail.com [mailto:arndbergmann at gmail.com] On
> Behalf Of Arnd Bergmann
> Sent: Thursday, October 25, 2018 11:58 AM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods
> <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-
> soc <arm@kernel.org>; devicetree at vger.kernel.org; linux-arm-
> kernel at lists.infradead.org
> Subject: Re: [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField
> Soc
> 
> On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> > This commit adds the TmFifo driver for Mellanox BlueField Soc.
> > TmFifo is a shared FIFO which enables external host machine to
> > exchange data with the SoC via USB or PCIe. The driver is based on
> > virtio framework and has console and network access enabled.
> >
> > Reviewed-by: David Woods <dwoods@mellanox.com>
> > Signed-off-by: Liming Sun <lsun@mellanox.com>
> 
> I definitely like the idea of using virtio-net and virtio-console here,
> this is a great way of reusing the existing high-level drivers,
> and i similar in concept (but also much simpler) to what we
> have in drivers/misc/mic/ for another Linux-running machine that
> can be a PCIe add-on card.
> 
> Have you also posted the other half of this driver? I'd like to see
> how it all fits together.

I'll add the (x86) host side driver into this patch series v5 as a separate commit.

> 
> A few style comments:
> 
> > +
> > +#define TMFIFO_GET_FIELD(reg, mask)	FIELD_GET(mask, reg)
> > +
> > +#define TMFIFO_SET_FIELD(reg, mask, value) \
> > +	((reg & ~mask) | FIELD_PREP(mask, value))
> 
> I think it would be nicer to use FIELD_GET/FIELD_PREP
> in the code directly, and avoid adding extra wrappers around them.

Will update it in patch v5.

> 
> > +/* Vring size. */
> > +#define TMFIFO_VRING_SIZE			1024
> > +
> > +/* Console Tx buffer size. */
> > +#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
> > +
> > +/* Use a timer for house-keeping. */
> > +static int tmfifo_timer_interval = HZ / 10;
> > +
> > +/* Global lock. */
> > +static struct mutex tmfifo_lock;
> 
> Maybe use 'static DEFINE_MUTEX(tmfifo_lock) here and remove the
> initialization call.

Will update it in patch v5.

> 
> > +/* Virtio ring size. */
> > +static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
> > +module_param(tmfifo_vring_size, int, 0444);
> > +MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring.");
> > +
> > +struct tmfifo;
> > +
> > +/* A flag to indicate TmFifo ready. */
> > +static bool tmfifo_ready;
> > +
> > +/* Virtual devices sharing the TM FIFO. */
> > +#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
> > +
> > +/* Spin lock. */
> > +static DEFINE_SPINLOCK(tmfifo_spin_lock);
> 
> Generally speaking, it's nicer to write a driver in a way that avoids
> global variables and make the flags and locks all members of a
> device specific structure.

Will update it in patch v5.

> 
> > +struct tmfifo_vdev {
> > +	struct virtio_device vdev;	/* virtual device */
> > +	u8 status;
> > +	u64 features;
> > +	union {				/* virtio config space */
> > +		struct virtio_console_config cons;
> > +		struct virtio_net_config net;
> > +	} config;
> > +	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
> > +	u8 *tx_buf;			/* tx buffer */
> > +	u32 tx_head;			/* tx buffer head */
> > +	u32 tx_tail;			/* tx buffer tail */
> > +};
> 
> I suppose you did this to keep the driver simple, but it seems a
> little inflexible
> to only support two specific device types. Wouldn't we also want e.g. 9pfs
> or virtio_blk in some configurations?

We could definitely add more when needed, which should be straightforward
due to the virtio framework. For now only network and console are supported
and ben been verified. 

> 
> > +
> > +#define TMFIFO_VDEV_TX_BUF_AVAIL(vdev) \
> > +	(((vdev)->tx_tail >= (vdev)->tx_head) ? \
> > +	(TMFIFO_CONS_TX_BUF_SIZE - 8 - ((vdev)->tx_tail - (vdev)->tx_head))
> : \
> > +	((vdev)->tx_head - (vdev)->tx_tail - 8))
> > +
> > +#define TMFIFO_VDEV_TX_BUF_PUSH(vdev, len) do { \
> > +	(vdev)->tx_tail += (len); \
> > +	if ((vdev)->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE) \
> > +		(vdev)->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE; \
> > +} while (0)
> > +
> > +#define TMFIFO_VDEV_TX_BUF_POP(vdev, len) do { \
> > +	(vdev)->tx_head += (len); \
> > +	if ((vdev)->tx_head >= TMFIFO_CONS_TX_BUF_SIZE) \
> > +		(vdev)->tx_head -= TMFIFO_CONS_TX_BUF_SIZE; \
> > +} while (0)
> 
> It would be nicer to turn these into inline functions rather than macros.

Will update it in patch v5.

> 
> > +/* TMFIFO device structure */
> > +struct tmfifo {
> > +	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
> > +	struct platform_device *pdev;	/* platform device */
> > +	struct mutex lock;
> > +	void __iomem *rx_base;		/* mapped register base */
> > +	void __iomem *tx_base;		/* mapped register base */
> > +	int tx_fifo_size;		/* number of entries of the Tx FIFO */
> > +	int rx_fifo_size;		/* number of entries of the Rx FIFO */
> > +	unsigned long pend_events;	/* pending bits for deferred process
> */
> > +	int irq[TM_IRQ_CNT];		/* irq numbers */
> > +	struct work_struct work;	/* work struct for deferred process
> */
> > +	struct timer_list timer;	/* keepalive timer */
> > +	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
> > +};
> > +
> > +union tmfifo_msg_hdr {
> > +	struct {
> > +		u8 type;		/* message type */
> > +		__be16 len;		/* payload length */
> > +		u8 unused[5];		/* reserved, set to 0 */
> > +	} __packed;
> > +	u64 data;
> > +};
> > +
> > +/*
> > + * Default MAC.
> > + * This MAC address will be read from EFI persistent variable if
> > configured.
> > + * It can also be reconfigured with standard Linux tools.
> > + */
> > +static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF,
> > 0x01};
> > +
> 
> Is a predefined MAC address better than a random one here?
> 
> For DT based systems, we tend to also call of_get_mac_address()
> in order to allow setting a unique address from firmware.

A predefined default MAC address is simpler in this case, which makes 
DHCP or PXE boot easier in development environment. 

For production, the MAC address is stored in persistent UEFI variable 
on the eeprom, which is read in function tmfifo_get_cfg_mac() which 
calls efi.get_variable() to get the MAC address.

> 
> > +/* Forward declaration. */
> > +static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx);
> > +static void tmfifo_release_pkt(struct virtio_device *vdev,
> > +			       struct tmfifo_vring *vring,
> > +			       struct vring_desc **desc);
> 
> Try to avoid forward declarations by reordering the functions according
> to how they get called.

Will update it in patch v5.

> 
> > +
> > +/* Interrupt handler. */
> > +static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
> > +{
> > +	int i = (uintptr_t)dev_id % sizeof(void *);
> > +	struct tmfifo *fifo = dev_id - i;
> > +
> > +	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
> > +		schedule_work(&fifo->work);
> > +
> > +	return IRQ_HANDLED;
> > +}
> 
> Maybe using a request_threaded_irq() would be a better way to defer
> the handler into IRQ context.

Not sure if I understand this comment correctly... In this case, the implemented handler 
has some mutex_lock() used, which tries to make the logic simple since multiple services 
(network & console) are sharing the same fifo. Thus schedule_work() is used.

> 
>         Arnd

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-10-26 18:24     ` Liming Sun
@ 2018-10-26 18:35       ` Arnd Bergmann
  2018-10-29 14:17         ` Liming Sun
  0 siblings, 1 reply; 70+ messages in thread
From: Arnd Bergmann @ 2018-10-26 18:35 UTC (permalink / raw)
  To: linux-arm-kernel

On 10/26/18, Liming Sun <lsun@mellanox.com> wrote:
>> -----Original Message-----
>> From: arndbergmann at gmail.com [mailto:arndbergmann at gmail.com] On
>> Behalf Of Arnd Bergmann
>> Sent: Thursday, October 25, 2018 11:58 AM
>> To: Liming Sun <lsun@mellanox.com>
>> Cc: Olof Johansson <olof@lixom.net>; David Woods
>> <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-
>> soc <arm@kernel.org>; devicetree at vger.kernel.org; linux-arm-
>> kernel at lists.infradead.org
>> Subject: Re: [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField
>> Soc
>>
>> On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
>> > +struct tmfifo_vdev {
>> > +	struct virtio_device vdev;	/* virtual device */
>> > +	u8 status;
>> > +	u64 features;
>> > +	union {				/* virtio config space */
>> > +		struct virtio_console_config cons;
>> > +		struct virtio_net_config net;
>> > +	} config;
>> > +	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
>> > +	u8 *tx_buf;			/* tx buffer */
>> > +	u32 tx_head;			/* tx buffer head */
>> > +	u32 tx_tail;			/* tx buffer tail */
>> > +};
>>
>> I suppose you did this to keep the driver simple, but it seems a
>> little inflexible
>> to only support two specific device types. Wouldn't we also want e.g.
>> 9pfs
>> or virtio_blk in some configurations?
>
> We could definitely add more when needed, which should be straightforward
> due to the virtio framework. For now only network and console are supported
> and ben been verified.

Wouldn't that require a new PCI ID to have the driver on the host
side match what this side does? I guess I'll see when you post the
other driver.

>> > +/* TMFIFO device structure */
>> > +struct tmfifo {
>> > +	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
>> > +	struct platform_device *pdev;	/* platform device */
>> > +	struct mutex lock;
>> > +	void __iomem *rx_base;		/* mapped register base */
>> > +	void __iomem *tx_base;		/* mapped register base */
>> > +	int tx_fifo_size;		/* number of entries of the Tx FIFO */
>> > +	int rx_fifo_size;		/* number of entries of the Rx FIFO */
>> > +	unsigned long pend_events;	/* pending bits for deferred process
>> */
>> > +	int irq[TM_IRQ_CNT];		/* irq numbers */
>> > +	struct work_struct work;	/* work struct for deferred process
>> */
>> > +	struct timer_list timer;	/* keepalive timer */
>> > +	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
>> > +};
>> > +
>> > +union tmfifo_msg_hdr {
>> > +	struct {
>> > +		u8 type;		/* message type */
>> > +		__be16 len;		/* payload length */
>> > +		u8 unused[5];		/* reserved, set to 0 */
>> > +	} __packed;
>> > +	u64 data;
>> > +};
>> > +
>> > +/*
>> > + * Default MAC.
>> > + * This MAC address will be read from EFI persistent variable if
>> > configured.
>> > + * It can also be reconfigured with standard Linux tools.
>> > + */
>> > +static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF,
>> > 0x01};
>> > +
>>
>> Is a predefined MAC address better than a random one here?
>>
>> For DT based systems, we tend to also call of_get_mac_address()
>> in order to allow setting a unique address from firmware.
>
> A predefined default MAC address is simpler in this case, which makes
> DHCP or PXE boot easier in development environment.
>
> For production, the MAC address is stored in persistent UEFI variable
> on the eeprom, which is read in function tmfifo_get_cfg_mac() which
> calls efi.get_variable() to get the MAC address.

Ok, fair enough. Generally speaking the recommended way of doing
this is to update the DT properties from eeprom when a network
driver has no way to store the mac address itself, but I suppose
you always have UEFI anyway, and this also makes it work in
the same way across both DT and ACPI.

>> > +/* Interrupt handler. */
>> > +static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
>> > +{
>> > +	int i = (uintptr_t)dev_id % sizeof(void *);
>> > +	struct tmfifo *fifo = dev_id - i;
>> > +
>> > +	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
>> > +		schedule_work(&fifo->work);
>> > +
>> > +	return IRQ_HANDLED;
>> > +}
>>
>> Maybe using a request_threaded_irq() would be a better way to defer
>> the handler into IRQ context.
>
> Not sure if I understand this comment correctly... In this case, the
> implemented handler
> has some mutex_lock() used, which tries to make the logic simple since
> multiple services
> (network & console) are sharing the same fifo. Thus schedule_work() is
> used.

schedule_work() and threaded IRQs are just two different ways of deferring
into process context where you can do the mutex_lock(). The effect is
almost the same, but work queues can be delayed for a substantial
amount of time depending on what other work functions have been
queued at the same time, and request_threaded_irq() is the more normal
way of doing this specifically for an IRQ handler, probably saving a couple
of lines of source code.

If you have any kind of real-time requirement, you can also assign a
specific realtime priority to that interrupt thread.

       Arnd

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
  2018-10-25 15:38     ` Arnd Bergmann
@ 2018-10-26 19:18       ` Liming Sun
  2018-10-26 19:32         ` Arnd Bergmann
  0 siblings, 1 reply; 70+ messages in thread
From: Liming Sun @ 2018-10-26 19:18 UTC (permalink / raw)
  To: linux-arm-kernel

Thanks Arnd for the comments!  Please see my response inline.

- Liming

> -----Original Message-----
> From: arndbergmann at gmail.com [mailto:arndbergmann at gmail.com] On
> Behalf Of Arnd Bergmann
> Sent: Thursday, October 25, 2018 11:38 AM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods
> <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-
> soc <arm@kernel.org>; devicetree at vger.kernel.org; linux-arm-
> kernel at lists.infradead.org
> Subject: Re: [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
> 
> On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> > This commit introduces config option for Mellanox BlueField SoC,
> > which can be used to build the SoC specific drivers, and enables
> > it by default in configs/defconfig.
> >
> > Reviewed-by: David Woods <dwoods@mellanox.com>
> > Signed-off-by: Liming Sun <lsun@mellanox.com>
> > ---
> >  arch/arm64/Kconfig.platforms | 6 ++++++
> >  arch/arm64/configs/defconfig | 1 +
> >  2 files changed, 7 insertions(+)
> 
> Reviewed-by: Arnd Bergmann <arnd@arndb.de>
> 
> I'm sorry for missing your series in the past. We should definitely merge
> the platform support soon. Do you also have device tree files for reference
> systems or even production hardware?

We have obsoleted the device tree, and mainly support ACPI now
on Reference/Production HW. Below is the TMFIFO definition in the ACPI
DSDT table.

    // RShim TMFIFO
    Device(RSH0) {
      Name(_HID, "MLNXBF01")
      Name(_UID, Zero)
      Name(_CCA, 1)
      Name(_CRS, ResourceTemplate() {
        Memory32Fixed(ReadWrite, 0x00800a20, 0x00000018)
        Memory32Fixed(ReadWrite, 0x00800a40, 0x00000018)
        Interrupt(ResourceConsumer, Edge, ActiveHigh, Exclusive)
          { BF1_RSH0_TM_HTT_LWM_INT,
            BF1_RSH0_TM_HTT_HWM_INT,
            BF1_RSH0_TM_TTH_LWM_INT,
            BF1_RSH0_TM_TTH_HWM_INT
          }
      })
    }

The full ACPI implementation can be found in the 1.0 release:
http://www.mellanox.com/downloads/BlueField/BlueField-1.0.0.10521/BlueField-1.0.0.10521.tar.xz
Inside this tarball, we can see the "src/edk2/" directory which has the edk2 patch file including all the ACPI implementation.

> 
> I need to have a separate look at the fifo driver.
> 
> Unfortunately, you have sent these patches during the merge window,
> which is the time during which we don't pick up new work. Let's plan
> to pick these up after 4.20-rc1, and please resend to arm at kernel.org
> if we manage to forget about it again.
> 
>       Arnd

Thanks for the information. I'll resend it to make sure not to miss the date.

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
  2018-10-26 19:18       ` Liming Sun
@ 2018-10-26 19:32         ` Arnd Bergmann
  2018-10-29 14:58           ` Liming Sun
  0 siblings, 1 reply; 70+ messages in thread
From: Arnd Bergmann @ 2018-10-26 19:32 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Oct 26, 2018 at 9:18 PM Liming Sun <lsun@mellanox.com> wrote:
> > -----Original Message-----
> > From: arndbergmann at gmail.com [mailto:arndbergmann at gmail.com] On
> > Behalf Of Arnd Bergmann
> > Sent: Thursday, October 25, 2018 11:38 AM
> > To: Liming Sun <lsun@mellanox.com>
> > Cc: Olof Johansson <olof@lixom.net>; David Woods
> > <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-
> > soc <arm@kernel.org>; devicetree at vger.kernel.org; linux-arm-
> > kernel at lists.infradead.org
> > Subject: Re: [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
> >
> > On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> > > This commit introduces config option for Mellanox BlueField SoC,
> > > which can be used to build the SoC specific drivers, and enables
> > > it by default in configs/defconfig.
> > >
> > > Reviewed-by: David Woods <dwoods@mellanox.com>
> > > Signed-off-by: Liming Sun <lsun@mellanox.com>
> > > ---
> > >  arch/arm64/Kconfig.platforms | 6 ++++++
> > >  arch/arm64/configs/defconfig | 1 +
> > >  2 files changed, 7 insertions(+)
> >
> > Reviewed-by: Arnd Bergmann <arnd@arndb.de>
> >
> > I'm sorry for missing your series in the past. We should definitely merge
> > the platform support soon. Do you also have device tree files for reference
> > systems or even production hardware?
>
> We have obsoleted the device tree, and mainly support ACPI now
> on Reference/Production HW. Below is the TMFIFO definition in the ACPI
> DSDT table.
>
>     // RShim TMFIFO
>     Device(RSH0) {
>       Name(_HID, "MLNXBF01")
>       Name(_UID, Zero)
>       Name(_CCA, 1)
>       Name(_CRS, ResourceTemplate() {
>         Memory32Fixed(ReadWrite, 0x00800a20, 0x00000018)
>         Memory32Fixed(ReadWrite, 0x00800a40, 0x00000018)
>         Interrupt(ResourceConsumer, Edge, ActiveHigh, Exclusive)
>           { BF1_RSH0_TM_HTT_LWM_INT,
>             BF1_RSH0_TM_HTT_HWM_INT,
>             BF1_RSH0_TM_TTH_LWM_INT,
>             BF1_RSH0_TM_TTH_HWM_INT
>           }
>       })
>     }
>
> The full ACPI implementation can be found in the 1.0 release:
> http://www.mellanox.com/downloads/BlueField/BlueField-1.0.0.10521/BlueField-1.0.0.10521.tar.xz
> Inside this tarball, we can see the "src/edk2/" directory which has the edk2 patch file including all the ACPI implementation.

It would be nice if you could still include the dts sources in the submission.
Since this is not a classic server hardware, DT is probably more suitable
here, and there are likely use cases that won't work with ACPI, so it's
good to have a starting point for your users if they need to override
the ACPI tables with a DT, or build systems with a simpler boot loader.

       Arnd

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-10-25 15:32     ` Arnd Bergmann
@ 2018-10-26 19:36       ` Liming Sun
  2018-10-26 20:33         ` Arnd Bergmann
  0 siblings, 1 reply; 70+ messages in thread
From: Liming Sun @ 2018-10-26 19:36 UTC (permalink / raw)
  To: linux-arm-kernel

Thanks Arnd for the comments! Please see the response inline.

- Liming

> -----Original Message-----
> From: arndbergmann at gmail.com [mailto:arndbergmann at gmail.com] On
> Behalf Of Arnd Bergmann
> Sent: Thursday, October 25, 2018 11:33 AM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods
> <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-
> soc <arm@kernel.org>; devicetree at vger.kernel.org; linux-arm-
> kernel at lists.infradead.org
> Subject: Re: [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox
> BlueField SoC
> 
> On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> > Add devicetree bindings for the TmFifo which is found on Mellanox
> > BlueField SoCs.
> >
> > Reviewed-by: Rob Herring <robh@kernel.org>
> > Reviewed-by: David Woods <dwoods@mellanox.com>
> > Signed-off-by: Liming Sun <lsun@mellanox.com>
> > ---
> >  .../devicetree/bindings/soc/mellanox/tmfifo.txt    | 23
> > ++++++++++++++++++++++
> >  1 file changed, 23 insertions(+)
> >  create mode 100644
> > Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> >
> > diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> > b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> > new file mode 100644
> > index 0000000..8a13fa6
> > --- /dev/null
> > +++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> > @@ -0,0 +1,23 @@
> > +* Mellanox BlueField SoC TmFifo
> > +
> > +BlueField TmFifo provides a shared FIFO between the target and the
> > +external host machine, which can be accessed by external host via
> > +USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
> > +to implement virtual console and network interface based on the virtio
> > +framework.
> > +
> > +Required properties:
> > +
> > +- compatible:	Should be "mellanox,bf-tmfifo"
> > +- reg:		Physical base address and length of Rx/Tx block
> > +- interrupts:	The interrupt number of Rx low water mark, Rx high water
> > mark
> > +		Tx low water mark, Tx high water mark respectively.
> 
> 
> This sounds like it might fit into the mailbox subsystem, and perhaps
> it should use the mailbox DT bindings. Have you had a look at that?

This commit of dt-bindings is mainly to solve the warning of checkpatch.pl.
Like the response to patch 2/4, ACPI is actually used now instead of device tree.
The TMFIFO definition in the ACPI DSDT table would be something like below.

    // RShim TMFIFO
    Device(RSH0) {
      Name(_HID, "MLNXBF01")
      Name(_UID, Zero)
      Name(_CCA, 1)
      Name(_CRS, ResourceTemplate() {
        Memory32Fixed(ReadWrite, 0x00800a20, 0x00000018)
        Memory32Fixed(ReadWrite, 0x00800a40, 0x00000018)
        Interrupt(ResourceConsumer, Edge, ActiveHigh, Exclusive)
          { BF1_RSH0_TM_HTT_LWM_INT,
            BF1_RSH0_TM_HTT_HWM_INT,
            BF1_RSH0_TM_TTH_LWM_INT,
            BF1_RSH0_TM_TTH_HWM_INT
          }
      })
    }

Any suggestion how it should be added into Linux Documentation, or maybe I
should just remove this commit from this patch series?

As for the sub-component of this driver, the "soc" might be better fit than the mailbox 
for some reasons. It's a communication between extern machines and the SoC via 
USB / PCIe,  like pushing boot stream, console and network mgmt. Some of the features, 
like pushing boot stream, doesn't communicate with the ARM core. The boot stream 
is pushed to the SoC HW logic directly. I'll add the host-side virtio-based driver in patch v5.
> 
>        Arnd

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-10-26 19:36       ` Liming Sun
@ 2018-10-26 20:33         ` Arnd Bergmann
  2018-10-29 16:48           ` Liming Sun
  2019-01-24 15:07           ` Liming Sun
  0 siblings, 2 replies; 70+ messages in thread
From: Arnd Bergmann @ 2018-10-26 20:33 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Oct 26, 2018 at 9:36 PM Liming Sun <lsun@mellanox.com> wrote:
> > -----Original Message-----
> > From: arndbergmann at gmail.com [mailto:arndbergmann at gmail.com] On
> > > --- /dev/null
> > > +++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> > > @@ -0,0 +1,23 @@
> > > +* Mellanox BlueField SoC TmFifo
> > > +
> > > +BlueField TmFifo provides a shared FIFO between the target and the
> > > +external host machine, which can be accessed by external host via
> > > +USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
> > > +to implement virtual console and network interface based on the virtio
> > > +framework.
> > > +
> > > +Required properties:
> > > +
> > > +- compatible:      Should be "mellanox,bf-tmfifo"
> > > +- reg:             Physical base address and length of Rx/Tx block
> > > +- interrupts:      The interrupt number of Rx low water mark, Rx high water
> > > mark
> > > +           Tx low water mark, Tx high water mark respectively.
> >
> >
> > This sounds like it might fit into the mailbox subsystem, and perhaps
> > it should use the mailbox DT bindings. Have you had a look at that?
>
> This commit of dt-bindings is mainly to solve the warning of checkpatch.pl.
> Like the response to patch 2/4, ACPI is actually used now instead of device tree.
> The TMFIFO definition in the ACPI DSDT table would be something like below.
>
>     // RShim TMFIFO
>     Device(RSH0) {
>       Name(_HID, "MLNXBF01")
>       Name(_UID, Zero)
>       Name(_CCA, 1)
>       Name(_CRS, ResourceTemplate() {
>         Memory32Fixed(ReadWrite, 0x00800a20, 0x00000018)
>         Memory32Fixed(ReadWrite, 0x00800a40, 0x00000018)
>         Interrupt(ResourceConsumer, Edge, ActiveHigh, Exclusive)
>           { BF1_RSH0_TM_HTT_LWM_INT,
>             BF1_RSH0_TM_HTT_HWM_INT,
>             BF1_RSH0_TM_TTH_LWM_INT,
>             BF1_RSH0_TM_TTH_HWM_INT
>           }
>       })
>     }
>
> Any suggestion how it should be added into Linux Documentation, or maybe I
> should just remove this commit from this patch series?

Maybe the best way here would be to not use ACPI for the case
where bluefin is integrated into a PCIe endpoint, since ACPI is
not as flexible here and generally relies on having an SBSA
compliant hardware that you no longer have if you require
random platform devices for booting from and for your console.

For the case where a bluefin SoC is used in a standalone system,
having ACPI makes more sense, as that lets you install Red Hat
Linux or other operating systems that rely on SBBR and SBSA.

> As for the sub-component of this driver, the "soc" might be better fit than the mailbox
> for some reasons. It's a communication between extern machines and the SoC via
> USB / PCIe,  like pushing boot stream, console and network mgmt. Some of the features,
> like pushing boot stream, doesn't communicate with the ARM core. The boot stream
> is pushed to the SoC HW logic directly. I'll add the host-side virtio-based driver in patch v5.

Right, the drivers/mailbox subsystem was not the right idea here,
I noticed that myself after actually reading the driver. Drivers/soc
may also not be the best fit, since this is not really about it being
a SoC, but rather a way to encapsulate virtual devices. The
mic driver I mentioned is in drivers/misc, but I don't like to add stuff
there if we can avoid it.

drivers/virtio, drivers/bus or drivers/mfd might also be an option that
could fit better than drivers/soc, or you could have your own subdir
below drivers/ as some others do. Finally, drivers/platform/mellanox
might be a reasonable choice, and it would let you keep both sides
of the driver in one place.

       Arnd

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-10-26 18:35       ` Arnd Bergmann
@ 2018-10-29 14:17         ` Liming Sun
  2018-10-29 14:52           ` Arnd Bergmann
  0 siblings, 1 reply; 70+ messages in thread
From: Liming Sun @ 2018-10-29 14:17 UTC (permalink / raw)
  To: linux-arm-kernel

Thanks. Please see my response inline.

> -----Original Message-----
> From: arndbergmann at gmail.com [mailto:arndbergmann at gmail.com] On
> Behalf Of Arnd Bergmann
> Sent: Friday, October 26, 2018 2:35 PM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods
> <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-
> soc <arm@kernel.org>; devicetree at vger.kernel.org; linux-arm-
> kernel at lists.infradead.org
> Subject: Re: [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField
> Soc
> 
> On 10/26/18, Liming Sun <lsun@mellanox.com> wrote:
> >> -----Original Message-----
> >> From: arndbergmann at gmail.com [mailto:arndbergmann at gmail.com] On
> >> Behalf Of Arnd Bergmann
> >> Sent: Thursday, October 25, 2018 11:58 AM
> >> To: Liming Sun <lsun@mellanox.com>
> >> Cc: Olof Johansson <olof@lixom.net>; David Woods
> >> <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>;
> arm-
> >> soc <arm@kernel.org>; devicetree at vger.kernel.org; linux-arm-
> >> kernel at lists.infradead.org
> >> Subject: Re: [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField
> >> Soc
> >>
> >> On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> >> > +struct tmfifo_vdev {
> >> > +	struct virtio_device vdev;	/* virtual device */
> >> > +	u8 status;
> >> > +	u64 features;
> >> > +	union {				/* virtio config space */
> >> > +		struct virtio_console_config cons;
> >> > +		struct virtio_net_config net;
> >> > +	} config;
> >> > +	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
> >> > +	u8 *tx_buf;			/* tx buffer */
> >> > +	u32 tx_head;			/* tx buffer head */
> >> > +	u32 tx_tail;			/* tx buffer tail */
> >> > +};
> >>
> >> I suppose you did this to keep the driver simple, but it seems a
> >> little inflexible
> >> to only support two specific device types. Wouldn't we also want e.g.
> >> 9pfs
> >> or virtio_blk in some configurations?
> >
> > We could definitely add more when needed, which should be
> straightforward
> > due to the virtio framework. For now only network and console are
> supported
> > and ben been verified.
> 
> Wouldn't that require a new PCI ID to have the driver on the host
> side match what this side does? I guess I'll see when you post the
> other driver.

Yes, the PCI ID is in the host side driver which will be included in patch v5.

> 
> >> > +/* TMFIFO device structure */
> >> > +struct tmfifo {
> >> > +	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
> >> > +	struct platform_device *pdev;	/* platform device */
> >> > +	struct mutex lock;
> >> > +	void __iomem *rx_base;		/* mapped register base */
> >> > +	void __iomem *tx_base;		/* mapped register base */
> >> > +	int tx_fifo_size;		/* number of entries of the Tx FIFO */
> >> > +	int rx_fifo_size;		/* number of entries of the Rx FIFO */
> >> > +	unsigned long pend_events;	/* pending bits for deferred process
> >> */
> >> > +	int irq[TM_IRQ_CNT];		/* irq numbers */
> >> > +	struct work_struct work;	/* work struct for deferred process
> >> */
> >> > +	struct timer_list timer;	/* keepalive timer */
> >> > +	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
> >> > +};
> >> > +
> >> > +union tmfifo_msg_hdr {
> >> > +	struct {
> >> > +		u8 type;		/* message type */
> >> > +		__be16 len;		/* payload length */
> >> > +		u8 unused[5];		/* reserved, set to 0 */
> >> > +	} __packed;
> >> > +	u64 data;
> >> > +};
> >> > +
> >> > +/*
> >> > + * Default MAC.
> >> > + * This MAC address will be read from EFI persistent variable if
> >> > configured.
> >> > + * It can also be reconfigured with standard Linux tools.
> >> > + */
> >> > +static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF,
> >> > 0x01};
> >> > +
> >>
> >> Is a predefined MAC address better than a random one here?
> >>
> >> For DT based systems, we tend to also call of_get_mac_address()
> >> in order to allow setting a unique address from firmware.
> >
> > A predefined default MAC address is simpler in this case, which makes
> > DHCP or PXE boot easier in development environment.
> >
> > For production, the MAC address is stored in persistent UEFI variable
> > on the eeprom, which is read in function tmfifo_get_cfg_mac() which
> > calls efi.get_variable() to get the MAC address.
> 
> Ok, fair enough. Generally speaking the recommended way of doing
> this is to update the DT properties from eeprom when a network
> driver has no way to store the mac address itself, but I suppose
> you always have UEFI anyway, and this also makes it work in
> the same way across both DT and ACPI.

Yes, we always have UEFI available.

> 
> >> > +/* Interrupt handler. */
> >> > +static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
> >> > +{
> >> > +	int i = (uintptr_t)dev_id % sizeof(void *);
> >> > +	struct tmfifo *fifo = dev_id - i;
> >> > +
> >> > +	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
> >> > +		schedule_work(&fifo->work);
> >> > +
> >> > +	return IRQ_HANDLED;
> >> > +}
> >>
> >> Maybe using a request_threaded_irq() would be a better way to defer
> >> the handler into IRQ context.
> >
> > Not sure if I understand this comment correctly... In this case, the
> > implemented handler
> > has some mutex_lock() used, which tries to make the logic simple since
> > multiple services
> > (network & console) are sharing the same fifo. Thus schedule_work() is
> > used.
> 
> schedule_work() and threaded IRQs are just two different ways of deferring
> into process context where you can do the mutex_lock(). The effect is
> almost the same, but work queues can be delayed for a substantial
> amount of time depending on what other work functions have been
> queued at the same time, and request_threaded_irq() is the more normal
> way of doing this specifically for an IRQ handler, probably saving a couple
> of lines of source code.
> 
> If you have any kind of real-time requirement, you can also assign a
> specific realtime priority to that interrupt thread.

Good information! Currently this FIFO is mainly for mgmt purpose. I'll try the threaded 
IRQs approach to see whether it can be easily converted and make it into the v5 patch.
If not easily, probably a separate commit to improve it later?

> 
>        Arnd

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-10-29 14:17         ` Liming Sun
@ 2018-10-29 14:52           ` Arnd Bergmann
  0 siblings, 0 replies; 70+ messages in thread
From: Arnd Bergmann @ 2018-10-29 14:52 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Oct 29, 2018 at 3:17 PM Liming Sun <lsun@mellanox.com> wrote:

> > >> > +/* Interrupt handler. */
> > >> > +static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
> > >> > +{
> > >> > +        int i = (uintptr_t)dev_id % sizeof(void *);
> > >> > +        struct tmfifo *fifo = dev_id - i;
> > >> > +
> > >> > +        if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
> > >> > +                schedule_work(&fifo->work);
> > >> > +
> > >> > +        return IRQ_HANDLED;
> > >> > +}
> > >>
> > >> Maybe using a request_threaded_irq() would be a better way to defer
> > >> the handler into IRQ context.
> > >
> > > Not sure if I understand this comment correctly... In this case, the
> > > implemented handler
> > > has some mutex_lock() used, which tries to make the logic simple since
> > > multiple services
> > > (network & console) are sharing the same fifo. Thus schedule_work() is
> > > used.
> >
> > schedule_work() and threaded IRQs are just two different ways of deferring
> > into process context where you can do the mutex_lock(). The effect is
> > almost the same, but work queues can be delayed for a substantial
> > amount of time depending on what other work functions have been
> > queued at the same time, and request_threaded_irq() is the more normal
> > way of doing this specifically for an IRQ handler, probably saving a couple
> > of lines of source code.
> >
> > If you have any kind of real-time requirement, you can also assign a
> > specific realtime priority to that interrupt thread.
>
> Good information! Currently this FIFO is mainly for mgmt purpose. I'll try the threaded
> IRQs approach to see whether it can be easily converted and make it into the v5 patch.
> If not easily, probably a separate commit to improve it later?

Sure, no problem. This is not an important change, but I also think it should
be easy to do, in particular as it is meant to simplify the code.

       Arnd

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
  2018-10-26 19:32         ` Arnd Bergmann
@ 2018-10-29 14:58           ` Liming Sun
  2018-10-29 15:26             ` Arnd Bergmann
  0 siblings, 1 reply; 70+ messages in thread
From: Liming Sun @ 2018-10-29 14:58 UTC (permalink / raw)
  To: linux-arm-kernel

Thanks! Please see response inline.

- Liming

> -----Original Message-----
> From: Arnd Bergmann [mailto:arnd at arndb.de]
> Sent: Friday, October 26, 2018 3:32 PM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods
> <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-
> soc <arm@kernel.org>; DTML <devicetree@vger.kernel.org>; Linux ARM
> <linux-arm-kernel@lists.infradead.org>
> Subject: Re: [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
> 
> On Fri, Oct 26, 2018 at 9:18 PM Liming Sun <lsun@mellanox.com> wrote:
> > > -----Original Message-----
> > > From: arndbergmann at gmail.com [mailto:arndbergmann at gmail.com] On
> > > Behalf Of Arnd Bergmann
> > > Sent: Thursday, October 25, 2018 11:38 AM
> > > To: Liming Sun <lsun@mellanox.com>
> > > Cc: Olof Johansson <olof@lixom.net>; David Woods
> > > <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>;
> arm-
> > > soc <arm@kernel.org>; devicetree at vger.kernel.org; linux-arm-
> > > kernel at lists.infradead.org
> > > Subject: Re: [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config
> option
> > >
> > > On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> > > > This commit introduces config option for Mellanox BlueField SoC,
> > > > which can be used to build the SoC specific drivers, and enables
> > > > it by default in configs/defconfig.
> > > >
> > > > Reviewed-by: David Woods <dwoods@mellanox.com>
> > > > Signed-off-by: Liming Sun <lsun@mellanox.com>
> > > > ---
> > > >  arch/arm64/Kconfig.platforms | 6 ++++++
> > > >  arch/arm64/configs/defconfig | 1 +
> > > >  2 files changed, 7 insertions(+)
> > >
> > > Reviewed-by: Arnd Bergmann <arnd@arndb.de>
> > >
> > > I'm sorry for missing your series in the past. We should definitely merge
> > > the platform support soon. Do you also have device tree files for
> reference
> > > systems or even production hardware?
> >
> > We have obsoleted the device tree, and mainly support ACPI now
> > on Reference/Production HW. Below is the TMFIFO definition in the ACPI
> > DSDT table.
> >
> >     // RShim TMFIFO
> >     Device(RSH0) {
> >       Name(_HID, "MLNXBF01")
> >       Name(_UID, Zero)
> >       Name(_CCA, 1)
> >       Name(_CRS, ResourceTemplate() {
> >         Memory32Fixed(ReadWrite, 0x00800a20, 0x00000018)
> >         Memory32Fixed(ReadWrite, 0x00800a40, 0x00000018)
> >         Interrupt(ResourceConsumer, Edge, ActiveHigh, Exclusive)
> >           { BF1_RSH0_TM_HTT_LWM_INT,
> >             BF1_RSH0_TM_HTT_HWM_INT,
> >             BF1_RSH0_TM_TTH_LWM_INT,
> >             BF1_RSH0_TM_TTH_HWM_INT
> >           }
> >       })
> >     }
> >
> > The full ACPI implementation can be found in the 1.0 release:
> > http://www.mellanox.com/downloads/BlueField/BlueField-
> 1.0.0.10521/BlueField-1.0.0.10521.tar.xz
> > Inside this tarball, we can see the "src/edk2/" directory which has the edk2
> patch file including all the ACPI implementation.
> 
> It would be nice if you could still include the dts sources in the submission.
> Since this is not a classic server hardware, DT is probably more suitable
> here, and there are likely use cases that won't work with ACPI, so it's
> good to have a starting point for your users if they need to override
> the ACPI tables with a DT, or build systems with a simpler boot loader.

Sure, I'll keep this DT change for reference. A little explanation that
Mellanox BlueField SOC could be PCIe NIC or standalone device (server)  form which
can do PXE boot, CentOS installation even on the NIC, etc. The same software/driver 
works on both.  DT seems not working well for some requirement/features like 
NVDIMM (which requires ACPI DSM).  That's why we switched to ACPI though
DT support is preserved.

> 
>        Arnd

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
  2018-10-29 14:58           ` Liming Sun
@ 2018-10-29 15:26             ` Arnd Bergmann
  2018-10-29 16:09               ` Liming Sun
  0 siblings, 1 reply; 70+ messages in thread
From: Arnd Bergmann @ 2018-10-29 15:26 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Oct 29, 2018 at 3:58 PM Liming Sun <lsun@mellanox.com> wrote:
> > It would be nice if you could still include the dts sources in the submission.
> > Since this is not a classic server hardware, DT is probably more suitable
> > here, and there are likely use cases that won't work with ACPI, so it's
> > good to have a starting point for your users if they need to override
> > the ACPI tables with a DT, or build systems with a simpler boot loader.
>
> Sure, I'll keep this DT change for reference. A little explanation that
> Mellanox BlueField SOC could be PCIe NIC or standalone device (server)  form which
> can do PXE boot, CentOS installation even on the NIC, etc. The same software/driver
> works on both.

Ok, good.

>  DT seems not working well for some requirement/features like
> NVDIMM (which requires ACPI DSM).  That's why we switched to ACPI though
> DT support is preserved.

This is probably something we need to look at separately, I'm sure we will want
to use NVDIMM with DT based systems in the future. Can you explain what
the DSM is used for here? Is this something that could be done in software
in the NVDIMM hardware specific driver?

       Arnd

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
  2018-10-29 15:26             ` Arnd Bergmann
@ 2018-10-29 16:09               ` Liming Sun
  0 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2018-10-29 16:09 UTC (permalink / raw)
  To: linux-arm-kernel

Please see my response below.

> -----Original Message-----
> From: Arnd Bergmann [mailto:arnd at arndb.de]
> Sent: Monday, October 29, 2018 11:26 AM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods
> <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-
> soc <arm@kernel.org>; DTML <devicetree@vger.kernel.org>; Linux ARM
> <linux-arm-kernel@lists.infradead.org>
> Subject: Re: [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option
> 
> On Mon, Oct 29, 2018 at 3:58 PM Liming Sun <lsun@mellanox.com> wrote:
> > > It would be nice if you could still include the dts sources in the submission.
> > > Since this is not a classic server hardware, DT is probably more suitable
> > > here, and there are likely use cases that won't work with ACPI, so it's
> > > good to have a starting point for your users if they need to override
> > > the ACPI tables with a DT, or build systems with a simpler boot loader.
> >
> > Sure, I'll keep this DT change for reference. A little explanation that
> > Mellanox BlueField SOC could be PCIe NIC or standalone device (server)
> form which
> > can do PXE boot, CentOS installation even on the NIC, etc. The same
> software/driver
> > works on both.
> 
> Ok, good.
> 
> >  DT seems not working well for some requirement/features like
> > NVDIMM (which requires ACPI DSM).  That's why we switched to ACPI
> though
> > DT support is preserved.
> 
> This is probably something we need to look at separately, I'm sure we will
> want
> to use NVDIMM with DT based systems in the future. Can you explain what
> the DSM is used for here? Is this something that could be done in software
> in the NVDIMM hardware specific driver?

The DSM is kind of running logic instead of configuration which is provided by 
system firmware. It's defined in the UEFI spec and supported by Linux. In the
NVDIMM case here, it's used to do Address Range Scrub (ARS), I2C access (such as
reading the DIMM SPD information), runtime health check, etc. 

DSM is kind of requirement by NVDIMM, such as
https://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf
https://docs.microsoft.com/en-us/windows-hardware/drivers/storage/-dsm-interface-for-byte-addressable-energy-backed-function-class--function-interface-1-

By following the ACPI DSM definitions, there is no extra drivers needed in 
Linux due to the existing ACPI framework.  Since it's 'standard' APIs in UEFI, 
it has potential to support other OS other than Linux.

Hardware specific driver might be able to do some of the functionality, or
even 'simulate' some of the ACPI implementation or APIs. I guess it might need 
a framework for it which I haven't thought a lot of details yet :)

> 
>        Arnd

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-10-26 20:33         ` Arnd Bergmann
@ 2018-10-29 16:48           ` Liming Sun
  2019-01-24 15:07           ` Liming Sun
  1 sibling, 0 replies; 70+ messages in thread
From: Liming Sun @ 2018-10-29 16:48 UTC (permalink / raw)
  To: linux-arm-kernel

Thanks for the comments! Please see my response/questions inline.

> -----Original Message-----
> From: Arnd Bergmann [mailto:arnd at arndb.de]
> Sent: Friday, October 26, 2018 4:34 PM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods
> <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-
> soc <arm@kernel.org>; DTML <devicetree@vger.kernel.org>; Linux ARM
> <linux-arm-kernel@lists.infradead.org>
> Subject: Re: [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox
> BlueField SoC
> 
> On Fri, Oct 26, 2018 at 9:36 PM Liming Sun <lsun@mellanox.com> wrote:
> > > -----Original Message-----
> > > From: arndbergmann at gmail.com [mailto:arndbergmann at gmail.com] On
> > > > --- /dev/null
> > > > +++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> > > > @@ -0,0 +1,23 @@
> > > > +* Mellanox BlueField SoC TmFifo
> > > > +
> > > > +BlueField TmFifo provides a shared FIFO between the target and the
> > > > +external host machine, which can be accessed by external host via
> > > > +USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
> > > > +to implement virtual console and network interface based on the virtio
> > > > +framework.
> > > > +
> > > > +Required properties:
> > > > +
> > > > +- compatible:      Should be "mellanox,bf-tmfifo"
> > > > +- reg:             Physical base address and length of Rx/Tx block
> > > > +- interrupts:      The interrupt number of Rx low water mark, Rx high
> water
> > > > mark
> > > > +           Tx low water mark, Tx high water mark respectively.
> > >
> > >
> > > This sounds like it might fit into the mailbox subsystem, and perhaps
> > > it should use the mailbox DT bindings. Have you had a look at that?
> >
> > This commit of dt-bindings is mainly to solve the warning of checkpatch.pl.
> > Like the response to patch 2/4, ACPI is actually used now instead of device
> tree.
> > The TMFIFO definition in the ACPI DSDT table would be something like
> below.
> >
> >     // RShim TMFIFO
> >     Device(RSH0) {
> >       Name(_HID, "MLNXBF01")
> >       Name(_UID, Zero)
> >       Name(_CCA, 1)
> >       Name(_CRS, ResourceTemplate() {
> >         Memory32Fixed(ReadWrite, 0x00800a20, 0x00000018)
> >         Memory32Fixed(ReadWrite, 0x00800a40, 0x00000018)
> >         Interrupt(ResourceConsumer, Edge, ActiveHigh, Exclusive)
> >           { BF1_RSH0_TM_HTT_LWM_INT,
> >             BF1_RSH0_TM_HTT_HWM_INT,
> >             BF1_RSH0_TM_TTH_LWM_INT,
> >             BF1_RSH0_TM_TTH_HWM_INT
> >           }
> >       })
> >     }
> >
> > Any suggestion how it should be added into Linux Documentation, or maybe
> I
> > should just remove this commit from this patch series?
> 
> Maybe the best way here would be to not use ACPI for the case
> where bluefin is integrated into a PCIe endpoint, since ACPI is
> not as flexible here and generally relies on having an SBSA
> compliant hardware that you no longer have if you require
> random platform devices for booting from and for your console.
> 
> For the case where a bluefin SoC is used in a standalone system,
> having ACPI makes more sense, as that lets you install Red Hat
> Linux or other operating systems that rely on SBBR and SBSA.

A little explanation for this SoC:
In the PCIe case, it's not just an endpoint. It can work in a PCIe "multi-host" mode
which behaves just like standalone system, such as doing PXE boot and CentOS 
or other OS installation on the eMMC and boot from it. It can run fully isolated 
from the x86 host. Below is a link of brief introduction.
http://www.mellanox.com/related-docs/prod_adapter_cards/PB_BlueField_Smart_NIC.pdf

So for now the same SW with ACPI configuration is used on all the boards for simplicity. 
But I think DT could definitely be used on customized board or when needed.

> 
> > As for the sub-component of this driver, the "soc" might be better fit than
> the mailbox
> > for some reasons. It's a communication between extern machines and the
> SoC via
> > USB / PCIe,  like pushing boot stream, console and network mgmt. Some of
> the features,
> > like pushing boot stream, doesn't communicate with the ARM core. The
> boot stream
> > is pushed to the SoC HW logic directly. I'll add the host-side virtio-based
> driver in patch v5.
> 
> Right, the drivers/mailbox subsystem was not the right idea here,
> I noticed that myself after actually reading the driver. Drivers/soc
> may also not be the best fit, since this is not really about it being
> a SoC, but rather a way to encapsulate virtual devices. The
> mic driver I mentioned is in drivers/misc, but I don't like to add stuff
> there if we can avoid it.
> 
> drivers/virtio, drivers/bus or drivers/mfd might also be an option that
> could fit better than drivers/soc, or you could have your own subdir
> below drivers/ as some others do. Finally, drivers/platform/mellanox
> might be a reasonable choice, and it would let you keep both sides
> of the driver in one place.

We actually have more drivers coming for this SoC, such for I2C, GPIO, PKA, 
performance counter, L3 cache profile, etc, which could be found at the link below
https://git.launchpad.net/~dcwoods/ubuntu/+source/linux/+git/cosmic/log/?h=mellanox_bluefield

Since it's not just the FIFO driver, any suggestion which one would be better, 
the "soc" or still the 'platform' ? Thanks!

> 
>        Arnd

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v5 1/5] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 16:06 [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
                   ` (6 preceding siblings ...)
  2018-10-24 17:55 ` [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
@ 2018-10-31 18:09 ` Liming Sun
  2018-10-31 18:09   ` [PATCH v5 2/5] arm64: Add Mellanox BlueField SoC config option Liming Sun
                     ` (2 more replies)
  2018-11-01 16:23 ` [PATCH v6 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
                   ` (11 subsequent siblings)
  19 siblings, 3 replies; 70+ messages in thread
From: Liming Sun @ 2018-10-31 18:09 UTC (permalink / raw)
  To: linux-arm-kernel

This commit adds the TmFifo driver for Mellanox BlueField Soc.
TmFifo is a shared FIFO which enables external host machine to
exchange data with the SoC via USB or PCIe. The driver is based on
virtio framework and has console and network access enabled.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/Kconfig                |    1 +
 drivers/soc/Makefile               |    1 +
 drivers/soc/mellanox/Kconfig       |   18 +
 drivers/soc/mellanox/Makefile      |    5 +
 drivers/soc/mellanox/tmfifo.c      | 1236 ++++++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/tmfifo_regs.h |   76 +++
 6 files changed, 1337 insertions(+)
 create mode 100644 drivers/soc/mellanox/Kconfig
 create mode 100644 drivers/soc/mellanox/Makefile
 create mode 100644 drivers/soc/mellanox/tmfifo.c
 create mode 100644 drivers/soc/mellanox/tmfifo_regs.h

diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index c07b4a8..fa87dc8 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -7,6 +7,7 @@ source "drivers/soc/bcm/Kconfig"
 source "drivers/soc/fsl/Kconfig"
 source "drivers/soc/imx/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
+source "drivers/soc/mellanox/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/renesas/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 113e884..93052d0 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ARCH_GEMINI)	+= gemini/
 obj-$(CONFIG_ARCH_MXC)		+= imx/
 obj-$(CONFIG_SOC_XWAY)		+= lantiq/
 obj-y				+= mediatek/
+obj-$(CONFIG_SOC_MLNX)		+= mellanox/
 obj-$(CONFIG_ARCH_MESON)	+= amlogic/
 obj-y				+= qcom/
 obj-y				+= renesas/
diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
new file mode 100644
index 0000000..d88efa1
--- /dev/null
+++ b/drivers/soc/mellanox/Kconfig
@@ -0,0 +1,18 @@
+menuconfig SOC_MLNX
+	bool "Mellanox SoC drivers"
+	default y if ARCH_MLNX_BLUEFIELD
+
+if ARCH_MLNX_BLUEFIELD || COMPILE_TEST
+
+config MLNX_BLUEFIELD_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo driver"
+	depends on ARM64
+	default m
+	select VIRTIO_CONSOLE
+	select VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides the
+	  virtio driver framework for the TMFIFO of Mellanox BlueField SoC and
+	  the implementation of a console and network driver.
+
+endif # ARCH_MLNX_BLUEFIELD
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
new file mode 100644
index 0000000..c44c0e2
--- /dev/null
+++ b/drivers/soc/mellanox/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Mellanox SoC drivers.
+#
+obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
new file mode 100644
index 0000000..d5e3550
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo.c
@@ -0,0 +1,1236 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/cache.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <asm/byteorder.h>
+
+#include "tmfifo_regs.h"
+
+/* Vring size. */
+#define TMFIFO_VRING_SIZE			1024
+
+/* Console Tx buffer size. */
+#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
+
+/* House-keeping timer interval. */
+static int tmfifo_timer_interval = HZ / 10;
+module_param(tmfifo_timer_interval, int, 0644);
+MODULE_PARM_DESC(tmfifo_timer_interval, "timer interval");
+
+/* Global lock. */
+static DEFINE_MUTEX(tmfifo_lock);
+
+/* Virtio ring size. */
+static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
+module_param(tmfifo_vring_size, int, 0444);
+MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring");
+
+/* Struct declaration. */
+struct tmfifo;
+
+/* Virtual devices sharing the TM FIFO. */
+#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/* Structure to maintain the ring state. */
+struct tmfifo_vring {
+	void *va;			/* virtual address */
+	dma_addr_t dma;			/* dma address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	int vdev_id;			/* TMFIFO_VDEV_xxx */
+	u32 pkt_len;			/* packet total length */
+	__virtio16 next_avail;		/* next avail desc id */
+	struct tmfifo *fifo;		/* pointer back to the tmfifo */
+};
+
+/* Interrupt types. */
+enum {
+	TM_RX_LWM_IRQ,			/* Rx low water mark irq */
+	TM_RX_HWM_IRQ,			/* Rx high water mark irq */
+	TM_TX_LWM_IRQ,			/* Tx low water mark irq */
+	TM_TX_HWM_IRQ,			/* Tx high water mark irq */
+	TM_IRQ_CNT
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	TMFIFO_VRING_RX,		/* Rx ring */
+	TMFIFO_VRING_TX,		/* Tx ring */
+	TMFIFO_VRING_NUM
+};
+
+struct tmfifo_vdev {
+	struct virtio_device vdev;	/* virtual device */
+	u8 status;
+	u64 features;
+	union {				/* virtio config space */
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
+	u8 *tx_buf;			/* tx buffer */
+	u32 tx_head;			/* tx buffer head */
+	u32 tx_tail;			/* tx buffer tail */
+};
+
+/* TMFIFO device structure */
+struct tmfifo {
+	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
+	struct platform_device *pdev;	/* platform device */
+	struct mutex lock;
+	void __iomem *rx_base;		/* mapped register base */
+	void __iomem *tx_base;		/* mapped register base */
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	int irq[TM_IRQ_CNT];		/* irq numbers */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
+	bool is_ready;			/* ready flag */
+	spinlock_t spin_lock;		/* spin lock */
+};
+
+union tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* MTU setting of the virtio-net interface. */
+#define TMFIFO_NET_MTU		1500
+
+/* Supported virtio-net features. */
+#define TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
+				 (1UL << VIRTIO_NET_F_STATUS) | \
+				 (1UL << VIRTIO_NET_F_MAC))
+
+/* Return the available Tx buffer space. */
+static inline int tmfifo_vdev_tx_buf_avail(struct tmfifo_vdev *vdev)
+{
+	return ((vdev->tx_tail >= vdev->tx_head) ?
+		(TMFIFO_CONS_TX_BUF_SIZE - 8 - (vdev->tx_tail -
+		vdev->tx_head)) : (vdev->tx_head - vdev->tx_tail - 8));
+}
+
+/* Update Tx buffer pointer after pushing data. */
+static inline void tmfifo_vdev_tx_buf_push(struct tmfifo_vdev *vdev, u32 len)
+{
+	vdev->tx_tail += len;
+	if (vdev->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE)
+		vdev->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Update Tx buffer pointer after popping data. */
+static inline void tmfifo_vdev_tx_buf_pop(struct tmfifo_vdev *vdev, u32 len)
+{
+	vdev->tx_head += len;
+	if (vdev->tx_head >= TMFIFO_CONS_TX_BUF_SIZE)
+		vdev->tx_head -= TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Allocate vrings for the fifo. */
+static int tmfifo_alloc_vrings(struct tmfifo *fifo,
+			       struct tmfifo_vdev *tm_vdev, int vdev_id)
+{
+	dma_addr_t dma;
+	void *va;
+	int i, size;
+	struct tmfifo_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->size = tmfifo_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+		vring->vdev_id = vdev_id;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
+					GFP_KERNEL);
+		if (!va) {
+			dev_err(tm_vdev->vdev.dev.parent,
+				"vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void tmfifo_free_vrings(struct tmfifo *fifo, int vdev_id)
+{
+	int i, size;
+	struct tmfifo_vring *vring;
+	struct tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Free interrupts of the fifo device. */
+static void tmfifo_free_irqs(struct tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		irq = fifo->irq[i];
+		if (irq) {
+			fifo->irq[i] = 0;
+			disable_irq(irq);
+			free_irq(irq, (u8 *)fifo + i);
+		}
+	}
+}
+
+/* Interrupt handler. */
+static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
+{
+	int i = (uintptr_t)dev_id % sizeof(void *);
+	struct tmfifo *fifo = dev_id - i;
+
+	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
+		schedule_work(&fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Nothing to do for now. */
+static void tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+tmfifo_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+
+	if (!vr || vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vr->num);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+static inline void tmfifo_virtio_release_desc(struct virtio_device *vdev,
+					      struct vring *vr,
+					      struct vring_desc *desc, u32 len)
+{
+	unsigned int idx;
+
+	idx = vr->used->idx % vr->num;
+	vr->used->ring[idx].id = desc - vr->desc;
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/* Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc)
+{
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+	struct vring_desc *desc_head;
+	uint32_t pkt_len = 0;
+
+	if (!vr)
+		return;
+
+	if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
+		desc_head = vring->desc_head;
+		pkt_len = vring->pkt_len;
+	} else {
+		desc_head = tmfifo_virtio_get_next_desc(vring->vq);
+		if (desc_head != NULL) {
+			pkt_len = tmfifo_virtio_get_pkt_len(vdev,
+							desc_head, vr);
+		}
+	}
+
+	if (desc_head != NULL)
+		tmfifo_virtio_release_desc(vdev, vr, desc_head, pkt_len);
+
+	if (desc != NULL)
+		*desc = NULL;
+	vring->pkt_len = 0;
+}
+
+/* House-keeping timer. */
+static void tmfifo_timer(struct timer_list *arg)
+{
+	struct tmfifo *fifo = container_of(arg, struct tmfifo, timer);
+
+	/*
+	 * Wake up the work handler to poll the Rx FIFO in case interrupt
+	 * missing or any leftover bytes stuck in the FIFO.
+	 */
+	test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+}
+
+/* Buffer the console output. */
+static void tmfifo_console_output(struct tmfifo_vdev *cons,
+				  struct virtqueue *vq)
+{
+	u32 len, pkt_len, idx;
+	struct vring_desc *head_desc, *desc = NULL;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &cons->vdev;
+	void *addr;
+	union tmfifo_msg_hdr *hdr;
+
+	for (;;) {
+		head_desc = tmfifo_virtio_get_next_desc(vq);
+		if (head_desc == NULL)
+			break;
+
+		/* Release the packet if no more space. */
+		pkt_len = tmfifo_virtio_get_pkt_len(vdev, head_desc, vr);
+		if (pkt_len + sizeof(*hdr) > tmfifo_vdev_tx_buf_avail(cons)) {
+			tmfifo_virtio_release_desc(vdev, vr, head_desc,
+						   pkt_len);
+			break;
+		}
+
+		hdr = (union tmfifo_msg_hdr *)&cons->tx_buf[cons->tx_tail];
+		hdr->data = 0;
+		hdr->type = VIRTIO_ID_CONSOLE;
+		hdr->len = htons(pkt_len);
+
+		tmfifo_vdev_tx_buf_push(cons, sizeof(*hdr));
+		desc = head_desc;
+
+		while (desc != NULL) {
+			addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+			len = virtio32_to_cpu(vdev, desc->len);
+
+			if (len <= TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail) {
+				memcpy(cons->tx_buf + cons->tx_tail, addr, len);
+			} else {
+				u32 seg;
+
+				seg = TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail;
+				memcpy(cons->tx_buf + cons->tx_tail, addr, seg);
+				addr += seg;
+				memcpy(cons->tx_buf, addr, len - seg);
+			}
+			tmfifo_vdev_tx_buf_push(cons, len);
+
+			if (!(virtio16_to_cpu(vdev, desc->flags) &
+			    VRING_DESC_F_NEXT))
+				break;
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+		}
+
+		/* Make each packet 8-byte aligned. */
+		tmfifo_vdev_tx_buf_push(cons, ((pkt_len + 7) & -8) - pkt_len);
+
+		tmfifo_virtio_release_desc(vdev, vr, head_desc, pkt_len);
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct tmfifo_vring *vring;
+	struct tmfifo *fifo;
+	struct vring *vr;
+	struct virtio_device *vdev;
+	u64 sts, data;
+	int num_avail = 0, hdr_len, tx_reserve;
+	void *addr;
+	u32 len, idx;
+	struct vring_desc *desc;
+	unsigned long flags;
+	struct tmfifo_vdev *cons;
+
+	if (!vq)
+		return;
+
+	vring = (struct tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+	vr = (struct vring *)virtqueue_get_vring(vq);
+
+	if (!fifo->vdev[vring->vdev_id])
+		return;
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+
+	/* Don't continue if another vring is running. */
+	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
+		return;
+
+	/* tx_reserve is used to reserved some room in FIFO for console. */
+	if (vring->vdev_id == VIRTIO_ID_NET) {
+		hdr_len = sizeof(struct virtio_net_hdr);
+		tx_reserve = fifo->tx_fifo_size / 16;
+	} else {
+		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
+		hdr_len = 0;
+		tx_reserve = 1;
+	}
+
+	desc = vring->desc;
+
+again:
+	while (1) {
+		/* Get available FIFO space. */
+		if (num_avail == 0) {
+			if (is_rx) {
+				/* Get the number of available words in FIFO. */
+				sts = readq(fifo->rx_base + TMFIFO_RX_STS);
+				num_avail = FIELD_GET(
+					TMFIFO_RX_STS__COUNT_MASK, sts);
+
+				/* Don't continue if nothing in FIFO. */
+				if (num_avail <= 0)
+					break;
+			} else {
+				/* Get available space in FIFO. */
+				sts = readq(fifo->tx_base + TMFIFO_TX_STS);
+				num_avail = fifo->tx_fifo_size - tx_reserve -
+					FIELD_GET(
+						TMFIFO_TX_STS__COUNT_MASK, sts);
+
+				if (num_avail <= 0)
+					break;
+			}
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
+		    cons != NULL && cons->tx_buf != NULL) {
+			for (;;) {
+				spin_lock_irqsave(&fifo->spin_lock, flags);
+				if (cons->tx_head == cons->tx_tail) {
+					spin_unlock_irqrestore(
+						&fifo->spin_lock, flags);
+					return;
+				}
+				addr = cons->tx_buf + cons->tx_head;
+				writeq(cpu_to_le64(*(u64 *)addr),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+				tmfifo_vdev_tx_buf_pop(cons, sizeof(u64));
+				spin_unlock_irqrestore(&fifo->spin_lock,
+						       flags);
+				if (--num_avail <= 0)
+					goto again;
+			}
+		}
+
+		/* Get the desc of next packet. */
+		if (!desc) {
+			/* Save the head desc of the chain. */
+			vring->desc_head = tmfifo_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				return;
+			}
+			desc = vring->desc_head;
+			vring->desc = desc;
+
+			if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+						vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			}
+		}
+
+		/* Beginning of each packet. */
+		if (vring->pkt_len == 0) {
+			int vdev_id, vring_change = 0;
+			union tmfifo_msg_hdr hdr;
+
+			num_avail--;
+
+			/* Read/Write packet length. */
+			if (is_rx) {
+				hdr.data = readq(fifo->rx_base +
+						 TMFIFO_RX_DATA);
+				hdr.data = le64_to_cpu(hdr.data);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (hdr.len == 0)
+					continue;
+
+				/* Check packet type. */
+				if (hdr.type == VIRTIO_ID_NET) {
+					vdev_id = VIRTIO_ID_NET;
+					hdr_len = sizeof(struct virtio_net_hdr);
+				} else if (hdr.type == VIRTIO_ID_CONSOLE) {
+					vdev_id = VIRTIO_ID_CONSOLE;
+					hdr_len = 0;
+				} else {
+					continue;
+				}
+
+				/*
+				 * Check whether the new packet still belongs
+				 * to this vring or not. If not, update the
+				 * pkt_len of the new vring and return.
+				 */
+				if (vdev_id != vring->vdev_id) {
+					struct tmfifo_vdev *dev2 =
+						fifo->vdev[vdev_id];
+
+					if (!dev2)
+						break;
+					vring->desc = desc;
+					vring = &dev2->vrings[TMFIFO_VRING_RX];
+					vring_change = 1;
+				}
+				vring->pkt_len = ntohs(hdr.len) + hdr_len;
+			} else {
+				vring->pkt_len = tmfifo_virtio_get_pkt_len(
+					vdev, desc, vr);
+
+				hdr.data = 0;
+				hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+					VIRTIO_ID_NET :
+					VIRTIO_ID_CONSOLE;
+				hdr.len = htons(vring->pkt_len - hdr_len);
+				writeq(cpu_to_le64(hdr.data),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+			}
+
+			vring->cur_len = hdr_len;
+			vring->rem_len = vring->pkt_len;
+			fifo->vring[is_rx] = vring;
+
+			if (vring_change)
+				return;
+			continue;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check if the current desc is already done. */
+		if (vring->cur_len == len)
+			goto check_done;
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		/* Read a word from FIFO for Rx. */
+		if (is_rx) {
+			data = readq(fifo->rx_base + TMFIFO_RX_DATA);
+			data = le64_to_cpu(data);
+		}
+
+		if (vring->cur_len + sizeof(u64) <= len) {
+			/* The whole word. */
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       sizeof(u64));
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       sizeof(u64));
+			}
+			vring->cur_len += sizeof(u64);
+		} else {
+			/* Leftover bytes. */
+			BUG_ON(vring->cur_len > len);
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       len - vring->cur_len);
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       len - vring->cur_len);
+			}
+			vring->cur_len = len;
+		}
+
+		/* Write the word into FIFO for Tx. */
+		if (!is_rx) {
+			writeq(cpu_to_le64(data),
+			       fifo->tx_base + TMFIFO_TX_DATA);
+		}
+
+		num_avail--;
+
+check_done:
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done and release the desc. */
+			tmfifo_release_pkt(vdev, vring, &desc);
+			fifo->vring[is_rx] = NULL;
+
+			/* Notify upper layer that packet is done. */
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			vring_interrupt(0, vq);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			continue;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+	struct tmfifo *fifo = vring->fifo;
+	unsigned long flags;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX HWM bit to start Rx. */
+		if (!test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			tmfifo_console_output(fifo->vdev[VIRTIO_ID_CONSOLE],
+					      vq);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			schedule_work(&fifo->work);
+		} else if (!test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	}
+
+	return true;
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void tmfifo_work_handler(struct work_struct *work)
+{
+	int i;
+	struct tmfifo_vdev *tm_vdev;
+	struct tmfifo *fifo = container_of(work, struct tmfifo, work);
+
+	if (!fifo->is_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx. */
+	if (test_and_clear_bit(TM_TX_LWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_TX_LWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_TX].vq,
+					false);
+			}
+		}
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(TM_RX_HWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_RX_HWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_RX].vq,
+					true);
+			}
+		}
+	}
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* Get the array of feature bits for this device. */
+static u64 tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			tmfifo_release_pkt(&tm_vdev->vdev, vring, &vring->desc);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+				  unsigned int nvqs,
+				  struct virtqueue *vqs[],
+				  vq_callback_t *callbacks[],
+				  const char * const names[],
+				  const bool *ctx,
+				  struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void tmfifo_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void tmfifo_virtio_get(struct virtio_device *vdev,
+			      unsigned int offset,
+			      void *buf,
+			      unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void tmfifo_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops tmfifo_virtio_config_ops = {
+	.get_features = tmfifo_virtio_get_features,
+	.finalize_features = tmfifo_virtio_finalize_features,
+	.find_vqs = tmfifo_virtio_find_vqs,
+	.del_vqs = tmfifo_virtio_del_vqs,
+	.reset = tmfifo_virtio_reset,
+	.set_status = tmfifo_virtio_set_status,
+	.get_status = tmfifo_virtio_get_status,
+	.get = tmfifo_virtio_get,
+	.set = tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+int tmfifo_create_vdev(struct tmfifo *fifo, int vdev_id, u64 features,
+		       void *config, u32 size)
+{
+	struct tmfifo_vdev *tm_vdev;
+	int ret = 0;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev != NULL) {
+		pr_err("vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto already_exist;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto already_exist;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+	if (tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
+		pr_err("Unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto alloc_vring_fail;
+	}
+	if (vdev_id == VIRTIO_ID_CONSOLE) {
+		tm_vdev->tx_buf = kmalloc(TMFIFO_CONS_TX_BUF_SIZE,
+					  GFP_KERNEL);
+	}
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
+		goto register_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+register_fail:
+	tmfifo_free_vrings(fifo, vdev_id);
+	fifo->vdev[vdev_id] = NULL;
+alloc_vring_fail:
+	kfree(tm_vdev);
+already_exist:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+int tmfifo_delete_vdev(struct tmfifo *fifo, int vdev_id)
+{
+	struct tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		tmfifo_free_vrings(fifo, vdev_id);
+		kfree(tm_vdev->tx_buf);
+		kfree(tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Device remove function. */
+static int tmfifo_remove(struct platform_device *pdev)
+{
+	int i;
+	struct tmfifo *fifo = platform_get_drvdata(pdev);
+	struct resource *rx_res, *tx_res;
+
+	if (fifo) {
+		mutex_lock(&tmfifo_lock);
+
+		fifo->is_ready = false;
+
+		/* Stop the timer. */
+		del_timer_sync(&fifo->timer);
+
+		/* Release interrupts. */
+		tmfifo_free_irqs(fifo);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&fifo->work);
+
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++)
+			tmfifo_delete_vdev(fifo, i);
+
+		/* Release IO resources. */
+		if (fifo->rx_base)
+			iounmap(fifo->rx_base);
+		if (fifo->tx_base)
+			iounmap(fifo->tx_base);
+
+		platform_set_drvdata(pdev, NULL);
+		kfree(fifo);
+
+		mutex_unlock(&tmfifo_lock);
+	}
+
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (rx_res)
+		release_mem_region(rx_res->start, resource_size(rx_res));
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (tx_res)
+		release_mem_region(tx_res->start, resource_size(tx_res));
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void tmfifo_get_cfg_mac(u8 *mac)
+{
+	u8 buf[6];
+	efi_status_t status;
+	unsigned long size = sizeof(buf);
+	efi_char16_t name[] = { 'R', 's', 'h', 'i', 'm', 'M', 'a', 'c',
+				'A', 'd', 'd', 'r', 0 };
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+
+	status = efi.get_variable(name, &guid, NULL, &size, buf);
+	if (status == EFI_SUCCESS && size == sizeof(buf))
+		memcpy(mac, buf, sizeof(buf));
+}
+
+/* Probe the TMFIFO. */
+static int tmfifo_probe(struct platform_device *pdev)
+{
+	u64 ctl;
+	struct tmfifo *fifo;
+	struct resource *rx_res, *tx_res;
+	struct virtio_net_config net_config;
+	int i, ret;
+
+	/* Get the resource of the Rx & Tx FIFO. */
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!rx_res || !tx_res) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (request_mem_region(rx_res->start,
+			       resource_size(rx_res), "bf-tmfifo") == NULL) {
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	if (request_mem_region(tx_res->start,
+			       resource_size(tx_res), "bf-tmfifo") == NULL) {
+		release_mem_region(rx_res->start, resource_size(rx_res));
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	ret = -ENOMEM;
+	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
+	if (!fifo)
+		goto err;
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	spin_lock_init(&fifo->spin_lock);
+	INIT_WORK(&fifo->work, tmfifo_work_handler);
+
+	timer_setup(&fifo->timer, tmfifo_timer, 0);
+	fifo->timer.function = tmfifo_timer;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		fifo->irq[i] = platform_get_irq(pdev, i);
+		ret = request_irq(fifo->irq[i], tmfifo_irq_handler, 0,
+				  "tmfifo", (u8 *)fifo + i);
+		if (ret) {
+			pr_err("Unable to request irq\n");
+			fifo->irq[i] = 0;
+			goto err;
+		}
+	}
+
+	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
+	if (!fifo->rx_base)
+		goto err;
+
+	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
+	if (!fifo->tx_base)
+		goto err;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		FIELD_GET(TMFIFO_TX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~TMFIFO_TX_CTL__LWM_MASK) |
+		FIELD_PREP(TMFIFO_TX_CTL__LWM_MASK, fifo->tx_fifo_size / 2);
+	ctl = (ctl & ~TMFIFO_TX_CTL__HWM_MASK) |
+		FIELD_PREP(TMFIFO_TX_CTL__HWM_MASK, fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		FIELD_GET(TMFIFO_RX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~TMFIFO_RX_CTL__LWM_MASK) |
+		FIELD_PREP(TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = (ctl & ~TMFIFO_RX_CTL__HWM_MASK) |
+		FIELD_PREP(TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + TMFIFO_RX_CTL);
+
+	mutex_init(&fifo->lock);
+
+	/* Create the console vdev. */
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (ret)
+		goto err;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = TMFIFO_NET_MTU;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	memcpy(net_config.mac, tmfifo_net_default_mac, 6);
+	tmfifo_get_cfg_mac(net_config.mac);
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_NET, TMFIFO_NET_FEATURES,
+				 &net_config, sizeof(net_config));
+	if (ret)
+		goto err;
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+
+	fifo->is_ready = true;
+
+	return 0;
+
+err:
+	tmfifo_remove(pdev);
+early_err:
+	dev_err(&pdev->dev, "Probe Failed\n");
+	return ret;
+}
+
+static const struct of_device_id tmfifo_match[] = {
+	{ .compatible = "mellanox,bf-tmfifo" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, tmfifo_match);
+
+static const struct acpi_device_id bf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, bf_tmfifo_acpi_match);
+
+static struct platform_driver tmfifo_driver = {
+	.probe = tmfifo_probe,
+	.remove = tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.of_match_table = tmfifo_match,
+		.acpi_match_table = ACPI_PTR(bf_tmfifo_acpi_match),
+	},
+};
+
+static int __init tmfifo_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&tmfifo_driver);
+	if (ret)
+		pr_err("Failed to register tmfifo driver.\n");
+
+	return ret;
+}
+
+static void __exit tmfifo_exit(void)
+{
+	platform_driver_unregister(&tmfifo_driver);
+}
+
+module_init(tmfifo_init);
+module_exit(tmfifo_exit);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
new file mode 100644
index 0000000..9f21764
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo_regs.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __TMFIFO_REGS_H__
+#define __TMFIFO_REGS_H__
+
+#include <linux/types.h>
+
+#define TMFIFO_TX_DATA 0x0
+
+#define TMFIFO_TX_STS 0x8
+#define TMFIFO_TX_STS__LENGTH 0x0001
+#define TMFIFO_TX_STS__COUNT_SHIFT 0
+#define TMFIFO_TX_STS__COUNT_WIDTH 9
+#define TMFIFO_TX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_TX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_TX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_TX_CTL 0x10
+#define TMFIFO_TX_CTL__LENGTH 0x0001
+#define TMFIFO_TX_CTL__LWM_SHIFT 0
+#define TMFIFO_TX_CTL__LWM_WIDTH 8
+#define TMFIFO_TX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__LWM_RMASK 0xff
+#define TMFIFO_TX_CTL__LWM_MASK  0xff
+#define TMFIFO_TX_CTL__HWM_SHIFT 8
+#define TMFIFO_TX_CTL__HWM_WIDTH 8
+#define TMFIFO_TX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__HWM_RMASK 0xff
+#define TMFIFO_TX_CTL__HWM_MASK  0xff00
+#define TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#define TMFIFO_RX_DATA 0x0
+
+#define TMFIFO_RX_STS 0x8
+#define TMFIFO_RX_STS__LENGTH 0x0001
+#define TMFIFO_RX_STS__COUNT_SHIFT 0
+#define TMFIFO_RX_STS__COUNT_WIDTH 9
+#define TMFIFO_RX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_RX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_RX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_RX_CTL 0x10
+#define TMFIFO_RX_CTL__LENGTH 0x0001
+#define TMFIFO_RX_CTL__LWM_SHIFT 0
+#define TMFIFO_RX_CTL__LWM_WIDTH 8
+#define TMFIFO_RX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__LWM_RMASK 0xff
+#define TMFIFO_RX_CTL__LWM_MASK  0xff
+#define TMFIFO_RX_CTL__HWM_SHIFT 8
+#define TMFIFO_RX_CTL__HWM_WIDTH 8
+#define TMFIFO_RX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__HWM_RMASK 0xff
+#define TMFIFO_RX_CTL__HWM_MASK  0xff00
+#define TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#endif /* !defined(__TMFIFO_REGS_H__) */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v5 2/5] arm64: Add Mellanox BlueField SoC config option
  2018-10-31 18:09 ` [PATCH v5 1/5] " Liming Sun
@ 2018-10-31 18:09   ` Liming Sun
  2018-10-31 18:09   ` [PATCH v5 3/5] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
  2018-10-31 18:09   ` [PATCH v5 4/5] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
  2 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2018-10-31 18:09 UTC (permalink / raw)
  To: linux-arm-kernel

This commit introduces config option for Mellanox BlueField SoC,
which can be used to build the SoC specific drivers, and enables
it by default in configs/defconfig.

Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 arch/arm64/Kconfig.platforms | 6 ++++++
 arch/arm64/configs/defconfig | 1 +
 2 files changed, 7 insertions(+)

diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index d5aeac3..aeb67c2 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -110,6 +110,12 @@ config ARCH_MESON
 	help
 	  This enables support for the Amlogic S905 SoCs.
 
+config ARCH_MLNX_BLUEFIELD
+	bool "Mellanox BlueField SoC Family"
+	select SOC_MLNX
+	help
+	  This enables support for the Mellanox BlueField SoC.
+
 config ARCH_MVEBU
 	bool "Marvell EBU SoC Family"
 	select ARMADA_AP806_SYSCON
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index f9a186f..508cb9d 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -43,6 +43,7 @@ CONFIG_ARCH_LG1K=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
 CONFIG_ARCH_MESON=y
+CONFIG_ARCH_MLNX_BLUEFIELD=y
 CONFIG_ARCH_MVEBU=y
 CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v5 3/5] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-10-31 18:09 ` [PATCH v5 1/5] " Liming Sun
  2018-10-31 18:09   ` [PATCH v5 2/5] arm64: Add Mellanox BlueField SoC config option Liming Sun
@ 2018-10-31 18:09   ` Liming Sun
  2018-10-31 18:09   ` [PATCH v5 4/5] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
  2 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2018-10-31 18:09 UTC (permalink / raw)
  To: linux-arm-kernel

Add devicetree bindings for the TmFifo which is found on Mellanox
BlueField SoCs.

Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 .../devicetree/bindings/soc/mellanox/tmfifo.txt    | 23 ++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt

diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
new file mode 100644
index 0000000..8a13fa6
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
@@ -0,0 +1,23 @@
+* Mellanox BlueField SoC TmFifo
+
+BlueField TmFifo provides a shared FIFO between the target and the
+external host machine, which can be accessed by external host via
+USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
+to implement virtual console and network interface based on the virtio
+framework.
+
+Required properties:
+
+- compatible:	Should be "mellanox,bf-tmfifo"
+- reg:		Physical base address and length of Rx/Tx block
+- interrupts:	The interrupt number of Rx low water mark, Rx high water mark
+		Tx low water mark, Tx high water mark respectively.
+
+Example:
+
+tmfifo at 800a20 {
+	compatible = "mellanox,bf-tmfifo";
+	reg = <0x00800a20 0x00000018
+	       0x00800a40 0x00000018>;
+	interrupts = <41, 42, 43, 44>;
+};
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v5 4/5] MAINTAINERS: Add entry for Mellanox Bluefield Soc
  2018-10-31 18:09 ` [PATCH v5 1/5] " Liming Sun
  2018-10-31 18:09   ` [PATCH v5 2/5] arm64: Add Mellanox BlueField SoC config option Liming Sun
  2018-10-31 18:09   ` [PATCH v5 3/5] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
@ 2018-10-31 18:09   ` Liming Sun
  2 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2018-10-31 18:09 UTC (permalink / raw)
  To: linux-arm-kernel

Add maintainer information for Mellanox BlueField SoC.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index c78feb0..07f7c7e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1649,6 +1649,14 @@ L:	linux-mediatek at lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	drivers/phy/mediatek/phy-mtk-tphy.c
 
+ARM/Mellanox BlueField SoC support
+M:	David Woods <dwoods@mellanox.com>
+M:	Liming Sun <lsun@mellanox.com>
+L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	drivers/soc/mellanox/*
+F:	Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
+
 ARM/MICREL KS8695 ARCHITECTURE
 M:	Greg Ungerer <gerg@uclinux.org>
 L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v6 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 16:06 [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
                   ` (7 preceding siblings ...)
  2018-10-31 18:09 ` [PATCH v5 1/5] " Liming Sun
@ 2018-11-01 16:23 ` Liming Sun
  2018-12-12 23:07   ` Matthias Brugger
  2018-11-01 16:25 ` Liming Sun
                   ` (10 subsequent siblings)
  19 siblings, 1 reply; 70+ messages in thread
From: Liming Sun @ 2018-11-01 16:23 UTC (permalink / raw)
  To: linux-arm-kernel

This commit adds the TmFifo driver for Mellanox BlueField Soc.
TmFifo is a shared FIFO which enables external host machine to
exchange data with the SoC via USB or PCIe. The driver is based on
virtio framework and has console and network access enabled.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/Kconfig                |    1 +
 drivers/soc/Makefile               |    1 +
 drivers/soc/mellanox/Kconfig       |   18 +
 drivers/soc/mellanox/Makefile      |    5 +
 drivers/soc/mellanox/tmfifo.c      | 1236 ++++++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/tmfifo_regs.h |   76 +++
 6 files changed, 1337 insertions(+)
 create mode 100644 drivers/soc/mellanox/Kconfig
 create mode 100644 drivers/soc/mellanox/Makefile
 create mode 100644 drivers/soc/mellanox/tmfifo.c
 create mode 100644 drivers/soc/mellanox/tmfifo_regs.h

diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index c07b4a8..fa87dc8 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -7,6 +7,7 @@ source "drivers/soc/bcm/Kconfig"
 source "drivers/soc/fsl/Kconfig"
 source "drivers/soc/imx/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
+source "drivers/soc/mellanox/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/renesas/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 113e884..93052d0 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ARCH_GEMINI)	+= gemini/
 obj-$(CONFIG_ARCH_MXC)		+= imx/
 obj-$(CONFIG_SOC_XWAY)		+= lantiq/
 obj-y				+= mediatek/
+obj-$(CONFIG_SOC_MLNX)		+= mellanox/
 obj-$(CONFIG_ARCH_MESON)	+= amlogic/
 obj-y				+= qcom/
 obj-y				+= renesas/
diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
new file mode 100644
index 0000000..d88efa1
--- /dev/null
+++ b/drivers/soc/mellanox/Kconfig
@@ -0,0 +1,18 @@
+menuconfig SOC_MLNX
+	bool "Mellanox SoC drivers"
+	default y if ARCH_MLNX_BLUEFIELD
+
+if ARCH_MLNX_BLUEFIELD || COMPILE_TEST
+
+config MLNX_BLUEFIELD_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo driver"
+	depends on ARM64
+	default m
+	select VIRTIO_CONSOLE
+	select VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides the
+	  virtio driver framework for the TMFIFO of Mellanox BlueField SoC and
+	  the implementation of a console and network driver.
+
+endif # ARCH_MLNX_BLUEFIELD
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
new file mode 100644
index 0000000..c44c0e2
--- /dev/null
+++ b/drivers/soc/mellanox/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Mellanox SoC drivers.
+#
+obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
new file mode 100644
index 0000000..d5e3550
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo.c
@@ -0,0 +1,1236 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/cache.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <asm/byteorder.h>
+
+#include "tmfifo_regs.h"
+
+/* Vring size. */
+#define TMFIFO_VRING_SIZE			1024
+
+/* Console Tx buffer size. */
+#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
+
+/* House-keeping timer interval. */
+static int tmfifo_timer_interval = HZ / 10;
+module_param(tmfifo_timer_interval, int, 0644);
+MODULE_PARM_DESC(tmfifo_timer_interval, "timer interval");
+
+/* Global lock. */
+static DEFINE_MUTEX(tmfifo_lock);
+
+/* Virtio ring size. */
+static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
+module_param(tmfifo_vring_size, int, 0444);
+MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring");
+
+/* Struct declaration. */
+struct tmfifo;
+
+/* Virtual devices sharing the TM FIFO. */
+#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/* Structure to maintain the ring state. */
+struct tmfifo_vring {
+	void *va;			/* virtual address */
+	dma_addr_t dma;			/* dma address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	int vdev_id;			/* TMFIFO_VDEV_xxx */
+	u32 pkt_len;			/* packet total length */
+	__virtio16 next_avail;		/* next avail desc id */
+	struct tmfifo *fifo;		/* pointer back to the tmfifo */
+};
+
+/* Interrupt types. */
+enum {
+	TM_RX_LWM_IRQ,			/* Rx low water mark irq */
+	TM_RX_HWM_IRQ,			/* Rx high water mark irq */
+	TM_TX_LWM_IRQ,			/* Tx low water mark irq */
+	TM_TX_HWM_IRQ,			/* Tx high water mark irq */
+	TM_IRQ_CNT
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	TMFIFO_VRING_RX,		/* Rx ring */
+	TMFIFO_VRING_TX,		/* Tx ring */
+	TMFIFO_VRING_NUM
+};
+
+struct tmfifo_vdev {
+	struct virtio_device vdev;	/* virtual device */
+	u8 status;
+	u64 features;
+	union {				/* virtio config space */
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
+	u8 *tx_buf;			/* tx buffer */
+	u32 tx_head;			/* tx buffer head */
+	u32 tx_tail;			/* tx buffer tail */
+};
+
+/* TMFIFO device structure */
+struct tmfifo {
+	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
+	struct platform_device *pdev;	/* platform device */
+	struct mutex lock;
+	void __iomem *rx_base;		/* mapped register base */
+	void __iomem *tx_base;		/* mapped register base */
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	int irq[TM_IRQ_CNT];		/* irq numbers */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
+	bool is_ready;			/* ready flag */
+	spinlock_t spin_lock;		/* spin lock */
+};
+
+union tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* MTU setting of the virtio-net interface. */
+#define TMFIFO_NET_MTU		1500
+
+/* Supported virtio-net features. */
+#define TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
+				 (1UL << VIRTIO_NET_F_STATUS) | \
+				 (1UL << VIRTIO_NET_F_MAC))
+
+/* Return the available Tx buffer space. */
+static inline int tmfifo_vdev_tx_buf_avail(struct tmfifo_vdev *vdev)
+{
+	return ((vdev->tx_tail >= vdev->tx_head) ?
+		(TMFIFO_CONS_TX_BUF_SIZE - 8 - (vdev->tx_tail -
+		vdev->tx_head)) : (vdev->tx_head - vdev->tx_tail - 8));
+}
+
+/* Update Tx buffer pointer after pushing data. */
+static inline void tmfifo_vdev_tx_buf_push(struct tmfifo_vdev *vdev, u32 len)
+{
+	vdev->tx_tail += len;
+	if (vdev->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE)
+		vdev->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Update Tx buffer pointer after popping data. */
+static inline void tmfifo_vdev_tx_buf_pop(struct tmfifo_vdev *vdev, u32 len)
+{
+	vdev->tx_head += len;
+	if (vdev->tx_head >= TMFIFO_CONS_TX_BUF_SIZE)
+		vdev->tx_head -= TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Allocate vrings for the fifo. */
+static int tmfifo_alloc_vrings(struct tmfifo *fifo,
+			       struct tmfifo_vdev *tm_vdev, int vdev_id)
+{
+	dma_addr_t dma;
+	void *va;
+	int i, size;
+	struct tmfifo_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->size = tmfifo_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+		vring->vdev_id = vdev_id;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
+					GFP_KERNEL);
+		if (!va) {
+			dev_err(tm_vdev->vdev.dev.parent,
+				"vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void tmfifo_free_vrings(struct tmfifo *fifo, int vdev_id)
+{
+	int i, size;
+	struct tmfifo_vring *vring;
+	struct tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Free interrupts of the fifo device. */
+static void tmfifo_free_irqs(struct tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		irq = fifo->irq[i];
+		if (irq) {
+			fifo->irq[i] = 0;
+			disable_irq(irq);
+			free_irq(irq, (u8 *)fifo + i);
+		}
+	}
+}
+
+/* Interrupt handler. */
+static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
+{
+	int i = (uintptr_t)dev_id % sizeof(void *);
+	struct tmfifo *fifo = dev_id - i;
+
+	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
+		schedule_work(&fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Nothing to do for now. */
+static void tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+tmfifo_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+
+	if (!vr || vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vr->num);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+static inline void tmfifo_virtio_release_desc(struct virtio_device *vdev,
+					      struct vring *vr,
+					      struct vring_desc *desc, u32 len)
+{
+	unsigned int idx;
+
+	idx = vr->used->idx % vr->num;
+	vr->used->ring[idx].id = desc - vr->desc;
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/* Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc)
+{
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+	struct vring_desc *desc_head;
+	uint32_t pkt_len = 0;
+
+	if (!vr)
+		return;
+
+	if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
+		desc_head = vring->desc_head;
+		pkt_len = vring->pkt_len;
+	} else {
+		desc_head = tmfifo_virtio_get_next_desc(vring->vq);
+		if (desc_head != NULL) {
+			pkt_len = tmfifo_virtio_get_pkt_len(vdev,
+							desc_head, vr);
+		}
+	}
+
+	if (desc_head != NULL)
+		tmfifo_virtio_release_desc(vdev, vr, desc_head, pkt_len);
+
+	if (desc != NULL)
+		*desc = NULL;
+	vring->pkt_len = 0;
+}
+
+/* House-keeping timer. */
+static void tmfifo_timer(struct timer_list *arg)
+{
+	struct tmfifo *fifo = container_of(arg, struct tmfifo, timer);
+
+	/*
+	 * Wake up the work handler to poll the Rx FIFO in case interrupt
+	 * missing or any leftover bytes stuck in the FIFO.
+	 */
+	test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+}
+
+/* Buffer the console output. */
+static void tmfifo_console_output(struct tmfifo_vdev *cons,
+				  struct virtqueue *vq)
+{
+	u32 len, pkt_len, idx;
+	struct vring_desc *head_desc, *desc = NULL;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &cons->vdev;
+	void *addr;
+	union tmfifo_msg_hdr *hdr;
+
+	for (;;) {
+		head_desc = tmfifo_virtio_get_next_desc(vq);
+		if (head_desc == NULL)
+			break;
+
+		/* Release the packet if no more space. */
+		pkt_len = tmfifo_virtio_get_pkt_len(vdev, head_desc, vr);
+		if (pkt_len + sizeof(*hdr) > tmfifo_vdev_tx_buf_avail(cons)) {
+			tmfifo_virtio_release_desc(vdev, vr, head_desc,
+						   pkt_len);
+			break;
+		}
+
+		hdr = (union tmfifo_msg_hdr *)&cons->tx_buf[cons->tx_tail];
+		hdr->data = 0;
+		hdr->type = VIRTIO_ID_CONSOLE;
+		hdr->len = htons(pkt_len);
+
+		tmfifo_vdev_tx_buf_push(cons, sizeof(*hdr));
+		desc = head_desc;
+
+		while (desc != NULL) {
+			addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+			len = virtio32_to_cpu(vdev, desc->len);
+
+			if (len <= TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail) {
+				memcpy(cons->tx_buf + cons->tx_tail, addr, len);
+			} else {
+				u32 seg;
+
+				seg = TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail;
+				memcpy(cons->tx_buf + cons->tx_tail, addr, seg);
+				addr += seg;
+				memcpy(cons->tx_buf, addr, len - seg);
+			}
+			tmfifo_vdev_tx_buf_push(cons, len);
+
+			if (!(virtio16_to_cpu(vdev, desc->flags) &
+			    VRING_DESC_F_NEXT))
+				break;
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+		}
+
+		/* Make each packet 8-byte aligned. */
+		tmfifo_vdev_tx_buf_push(cons, ((pkt_len + 7) & -8) - pkt_len);
+
+		tmfifo_virtio_release_desc(vdev, vr, head_desc, pkt_len);
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct tmfifo_vring *vring;
+	struct tmfifo *fifo;
+	struct vring *vr;
+	struct virtio_device *vdev;
+	u64 sts, data;
+	int num_avail = 0, hdr_len, tx_reserve;
+	void *addr;
+	u32 len, idx;
+	struct vring_desc *desc;
+	unsigned long flags;
+	struct tmfifo_vdev *cons;
+
+	if (!vq)
+		return;
+
+	vring = (struct tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+	vr = (struct vring *)virtqueue_get_vring(vq);
+
+	if (!fifo->vdev[vring->vdev_id])
+		return;
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+
+	/* Don't continue if another vring is running. */
+	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
+		return;
+
+	/* tx_reserve is used to reserved some room in FIFO for console. */
+	if (vring->vdev_id == VIRTIO_ID_NET) {
+		hdr_len = sizeof(struct virtio_net_hdr);
+		tx_reserve = fifo->tx_fifo_size / 16;
+	} else {
+		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
+		hdr_len = 0;
+		tx_reserve = 1;
+	}
+
+	desc = vring->desc;
+
+again:
+	while (1) {
+		/* Get available FIFO space. */
+		if (num_avail == 0) {
+			if (is_rx) {
+				/* Get the number of available words in FIFO. */
+				sts = readq(fifo->rx_base + TMFIFO_RX_STS);
+				num_avail = FIELD_GET(
+					TMFIFO_RX_STS__COUNT_MASK, sts);
+
+				/* Don't continue if nothing in FIFO. */
+				if (num_avail <= 0)
+					break;
+			} else {
+				/* Get available space in FIFO. */
+				sts = readq(fifo->tx_base + TMFIFO_TX_STS);
+				num_avail = fifo->tx_fifo_size - tx_reserve -
+					FIELD_GET(
+						TMFIFO_TX_STS__COUNT_MASK, sts);
+
+				if (num_avail <= 0)
+					break;
+			}
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
+		    cons != NULL && cons->tx_buf != NULL) {
+			for (;;) {
+				spin_lock_irqsave(&fifo->spin_lock, flags);
+				if (cons->tx_head == cons->tx_tail) {
+					spin_unlock_irqrestore(
+						&fifo->spin_lock, flags);
+					return;
+				}
+				addr = cons->tx_buf + cons->tx_head;
+				writeq(cpu_to_le64(*(u64 *)addr),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+				tmfifo_vdev_tx_buf_pop(cons, sizeof(u64));
+				spin_unlock_irqrestore(&fifo->spin_lock,
+						       flags);
+				if (--num_avail <= 0)
+					goto again;
+			}
+		}
+
+		/* Get the desc of next packet. */
+		if (!desc) {
+			/* Save the head desc of the chain. */
+			vring->desc_head = tmfifo_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				return;
+			}
+			desc = vring->desc_head;
+			vring->desc = desc;
+
+			if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+						vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			}
+		}
+
+		/* Beginning of each packet. */
+		if (vring->pkt_len == 0) {
+			int vdev_id, vring_change = 0;
+			union tmfifo_msg_hdr hdr;
+
+			num_avail--;
+
+			/* Read/Write packet length. */
+			if (is_rx) {
+				hdr.data = readq(fifo->rx_base +
+						 TMFIFO_RX_DATA);
+				hdr.data = le64_to_cpu(hdr.data);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (hdr.len == 0)
+					continue;
+
+				/* Check packet type. */
+				if (hdr.type == VIRTIO_ID_NET) {
+					vdev_id = VIRTIO_ID_NET;
+					hdr_len = sizeof(struct virtio_net_hdr);
+				} else if (hdr.type == VIRTIO_ID_CONSOLE) {
+					vdev_id = VIRTIO_ID_CONSOLE;
+					hdr_len = 0;
+				} else {
+					continue;
+				}
+
+				/*
+				 * Check whether the new packet still belongs
+				 * to this vring or not. If not, update the
+				 * pkt_len of the new vring and return.
+				 */
+				if (vdev_id != vring->vdev_id) {
+					struct tmfifo_vdev *dev2 =
+						fifo->vdev[vdev_id];
+
+					if (!dev2)
+						break;
+					vring->desc = desc;
+					vring = &dev2->vrings[TMFIFO_VRING_RX];
+					vring_change = 1;
+				}
+				vring->pkt_len = ntohs(hdr.len) + hdr_len;
+			} else {
+				vring->pkt_len = tmfifo_virtio_get_pkt_len(
+					vdev, desc, vr);
+
+				hdr.data = 0;
+				hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+					VIRTIO_ID_NET :
+					VIRTIO_ID_CONSOLE;
+				hdr.len = htons(vring->pkt_len - hdr_len);
+				writeq(cpu_to_le64(hdr.data),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+			}
+
+			vring->cur_len = hdr_len;
+			vring->rem_len = vring->pkt_len;
+			fifo->vring[is_rx] = vring;
+
+			if (vring_change)
+				return;
+			continue;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check if the current desc is already done. */
+		if (vring->cur_len == len)
+			goto check_done;
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		/* Read a word from FIFO for Rx. */
+		if (is_rx) {
+			data = readq(fifo->rx_base + TMFIFO_RX_DATA);
+			data = le64_to_cpu(data);
+		}
+
+		if (vring->cur_len + sizeof(u64) <= len) {
+			/* The whole word. */
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       sizeof(u64));
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       sizeof(u64));
+			}
+			vring->cur_len += sizeof(u64);
+		} else {
+			/* Leftover bytes. */
+			BUG_ON(vring->cur_len > len);
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       len - vring->cur_len);
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       len - vring->cur_len);
+			}
+			vring->cur_len = len;
+		}
+
+		/* Write the word into FIFO for Tx. */
+		if (!is_rx) {
+			writeq(cpu_to_le64(data),
+			       fifo->tx_base + TMFIFO_TX_DATA);
+		}
+
+		num_avail--;
+
+check_done:
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done and release the desc. */
+			tmfifo_release_pkt(vdev, vring, &desc);
+			fifo->vring[is_rx] = NULL;
+
+			/* Notify upper layer that packet is done. */
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			vring_interrupt(0, vq);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			continue;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+	struct tmfifo *fifo = vring->fifo;
+	unsigned long flags;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX HWM bit to start Rx. */
+		if (!test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			tmfifo_console_output(fifo->vdev[VIRTIO_ID_CONSOLE],
+					      vq);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			schedule_work(&fifo->work);
+		} else if (!test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	}
+
+	return true;
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void tmfifo_work_handler(struct work_struct *work)
+{
+	int i;
+	struct tmfifo_vdev *tm_vdev;
+	struct tmfifo *fifo = container_of(work, struct tmfifo, work);
+
+	if (!fifo->is_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx. */
+	if (test_and_clear_bit(TM_TX_LWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_TX_LWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_TX].vq,
+					false);
+			}
+		}
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(TM_RX_HWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_RX_HWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_RX].vq,
+					true);
+			}
+		}
+	}
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* Get the array of feature bits for this device. */
+static u64 tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			tmfifo_release_pkt(&tm_vdev->vdev, vring, &vring->desc);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+				  unsigned int nvqs,
+				  struct virtqueue *vqs[],
+				  vq_callback_t *callbacks[],
+				  const char * const names[],
+				  const bool *ctx,
+				  struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void tmfifo_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void tmfifo_virtio_get(struct virtio_device *vdev,
+			      unsigned int offset,
+			      void *buf,
+			      unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void tmfifo_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops tmfifo_virtio_config_ops = {
+	.get_features = tmfifo_virtio_get_features,
+	.finalize_features = tmfifo_virtio_finalize_features,
+	.find_vqs = tmfifo_virtio_find_vqs,
+	.del_vqs = tmfifo_virtio_del_vqs,
+	.reset = tmfifo_virtio_reset,
+	.set_status = tmfifo_virtio_set_status,
+	.get_status = tmfifo_virtio_get_status,
+	.get = tmfifo_virtio_get,
+	.set = tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+int tmfifo_create_vdev(struct tmfifo *fifo, int vdev_id, u64 features,
+		       void *config, u32 size)
+{
+	struct tmfifo_vdev *tm_vdev;
+	int ret = 0;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev != NULL) {
+		pr_err("vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto already_exist;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto already_exist;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+	if (tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
+		pr_err("Unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto alloc_vring_fail;
+	}
+	if (vdev_id == VIRTIO_ID_CONSOLE) {
+		tm_vdev->tx_buf = kmalloc(TMFIFO_CONS_TX_BUF_SIZE,
+					  GFP_KERNEL);
+	}
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
+		goto register_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+register_fail:
+	tmfifo_free_vrings(fifo, vdev_id);
+	fifo->vdev[vdev_id] = NULL;
+alloc_vring_fail:
+	kfree(tm_vdev);
+already_exist:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+int tmfifo_delete_vdev(struct tmfifo *fifo, int vdev_id)
+{
+	struct tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		tmfifo_free_vrings(fifo, vdev_id);
+		kfree(tm_vdev->tx_buf);
+		kfree(tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Device remove function. */
+static int tmfifo_remove(struct platform_device *pdev)
+{
+	int i;
+	struct tmfifo *fifo = platform_get_drvdata(pdev);
+	struct resource *rx_res, *tx_res;
+
+	if (fifo) {
+		mutex_lock(&tmfifo_lock);
+
+		fifo->is_ready = false;
+
+		/* Stop the timer. */
+		del_timer_sync(&fifo->timer);
+
+		/* Release interrupts. */
+		tmfifo_free_irqs(fifo);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&fifo->work);
+
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++)
+			tmfifo_delete_vdev(fifo, i);
+
+		/* Release IO resources. */
+		if (fifo->rx_base)
+			iounmap(fifo->rx_base);
+		if (fifo->tx_base)
+			iounmap(fifo->tx_base);
+
+		platform_set_drvdata(pdev, NULL);
+		kfree(fifo);
+
+		mutex_unlock(&tmfifo_lock);
+	}
+
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (rx_res)
+		release_mem_region(rx_res->start, resource_size(rx_res));
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (tx_res)
+		release_mem_region(tx_res->start, resource_size(tx_res));
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void tmfifo_get_cfg_mac(u8 *mac)
+{
+	u8 buf[6];
+	efi_status_t status;
+	unsigned long size = sizeof(buf);
+	efi_char16_t name[] = { 'R', 's', 'h', 'i', 'm', 'M', 'a', 'c',
+				'A', 'd', 'd', 'r', 0 };
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+
+	status = efi.get_variable(name, &guid, NULL, &size, buf);
+	if (status == EFI_SUCCESS && size == sizeof(buf))
+		memcpy(mac, buf, sizeof(buf));
+}
+
+/* Probe the TMFIFO. */
+static int tmfifo_probe(struct platform_device *pdev)
+{
+	u64 ctl;
+	struct tmfifo *fifo;
+	struct resource *rx_res, *tx_res;
+	struct virtio_net_config net_config;
+	int i, ret;
+
+	/* Get the resource of the Rx & Tx FIFO. */
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!rx_res || !tx_res) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (request_mem_region(rx_res->start,
+			       resource_size(rx_res), "bf-tmfifo") == NULL) {
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	if (request_mem_region(tx_res->start,
+			       resource_size(tx_res), "bf-tmfifo") == NULL) {
+		release_mem_region(rx_res->start, resource_size(rx_res));
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	ret = -ENOMEM;
+	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
+	if (!fifo)
+		goto err;
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	spin_lock_init(&fifo->spin_lock);
+	INIT_WORK(&fifo->work, tmfifo_work_handler);
+
+	timer_setup(&fifo->timer, tmfifo_timer, 0);
+	fifo->timer.function = tmfifo_timer;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		fifo->irq[i] = platform_get_irq(pdev, i);
+		ret = request_irq(fifo->irq[i], tmfifo_irq_handler, 0,
+				  "tmfifo", (u8 *)fifo + i);
+		if (ret) {
+			pr_err("Unable to request irq\n");
+			fifo->irq[i] = 0;
+			goto err;
+		}
+	}
+
+	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
+	if (!fifo->rx_base)
+		goto err;
+
+	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
+	if (!fifo->tx_base)
+		goto err;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		FIELD_GET(TMFIFO_TX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~TMFIFO_TX_CTL__LWM_MASK) |
+		FIELD_PREP(TMFIFO_TX_CTL__LWM_MASK, fifo->tx_fifo_size / 2);
+	ctl = (ctl & ~TMFIFO_TX_CTL__HWM_MASK) |
+		FIELD_PREP(TMFIFO_TX_CTL__HWM_MASK, fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		FIELD_GET(TMFIFO_RX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~TMFIFO_RX_CTL__LWM_MASK) |
+		FIELD_PREP(TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = (ctl & ~TMFIFO_RX_CTL__HWM_MASK) |
+		FIELD_PREP(TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + TMFIFO_RX_CTL);
+
+	mutex_init(&fifo->lock);
+
+	/* Create the console vdev. */
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (ret)
+		goto err;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = TMFIFO_NET_MTU;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	memcpy(net_config.mac, tmfifo_net_default_mac, 6);
+	tmfifo_get_cfg_mac(net_config.mac);
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_NET, TMFIFO_NET_FEATURES,
+				 &net_config, sizeof(net_config));
+	if (ret)
+		goto err;
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+
+	fifo->is_ready = true;
+
+	return 0;
+
+err:
+	tmfifo_remove(pdev);
+early_err:
+	dev_err(&pdev->dev, "Probe Failed\n");
+	return ret;
+}
+
+static const struct of_device_id tmfifo_match[] = {
+	{ .compatible = "mellanox,bf-tmfifo" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, tmfifo_match);
+
+static const struct acpi_device_id bf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, bf_tmfifo_acpi_match);
+
+static struct platform_driver tmfifo_driver = {
+	.probe = tmfifo_probe,
+	.remove = tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.of_match_table = tmfifo_match,
+		.acpi_match_table = ACPI_PTR(bf_tmfifo_acpi_match),
+	},
+};
+
+static int __init tmfifo_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&tmfifo_driver);
+	if (ret)
+		pr_err("Failed to register tmfifo driver.\n");
+
+	return ret;
+}
+
+static void __exit tmfifo_exit(void)
+{
+	platform_driver_unregister(&tmfifo_driver);
+}
+
+module_init(tmfifo_init);
+module_exit(tmfifo_exit);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
new file mode 100644
index 0000000..9f21764
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo_regs.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __TMFIFO_REGS_H__
+#define __TMFIFO_REGS_H__
+
+#include <linux/types.h>
+
+#define TMFIFO_TX_DATA 0x0
+
+#define TMFIFO_TX_STS 0x8
+#define TMFIFO_TX_STS__LENGTH 0x0001
+#define TMFIFO_TX_STS__COUNT_SHIFT 0
+#define TMFIFO_TX_STS__COUNT_WIDTH 9
+#define TMFIFO_TX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_TX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_TX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_TX_CTL 0x10
+#define TMFIFO_TX_CTL__LENGTH 0x0001
+#define TMFIFO_TX_CTL__LWM_SHIFT 0
+#define TMFIFO_TX_CTL__LWM_WIDTH 8
+#define TMFIFO_TX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__LWM_RMASK 0xff
+#define TMFIFO_TX_CTL__LWM_MASK  0xff
+#define TMFIFO_TX_CTL__HWM_SHIFT 8
+#define TMFIFO_TX_CTL__HWM_WIDTH 8
+#define TMFIFO_TX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__HWM_RMASK 0xff
+#define TMFIFO_TX_CTL__HWM_MASK  0xff00
+#define TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#define TMFIFO_RX_DATA 0x0
+
+#define TMFIFO_RX_STS 0x8
+#define TMFIFO_RX_STS__LENGTH 0x0001
+#define TMFIFO_RX_STS__COUNT_SHIFT 0
+#define TMFIFO_RX_STS__COUNT_WIDTH 9
+#define TMFIFO_RX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_RX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_RX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_RX_CTL 0x10
+#define TMFIFO_RX_CTL__LENGTH 0x0001
+#define TMFIFO_RX_CTL__LWM_SHIFT 0
+#define TMFIFO_RX_CTL__LWM_WIDTH 8
+#define TMFIFO_RX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__LWM_RMASK 0xff
+#define TMFIFO_RX_CTL__LWM_MASK  0xff
+#define TMFIFO_RX_CTL__HWM_SHIFT 8
+#define TMFIFO_RX_CTL__HWM_WIDTH 8
+#define TMFIFO_RX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__HWM_RMASK 0xff
+#define TMFIFO_RX_CTL__HWM_MASK  0xff00
+#define TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#endif /* !defined(__TMFIFO_REGS_H__) */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v6 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 16:06 [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
                   ` (8 preceding siblings ...)
  2018-11-01 16:23 ` [PATCH v6 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
@ 2018-11-01 16:25 ` Liming Sun
  2018-11-01 16:25   ` [PATCH v6 2/9] arm64: Add Mellanox BlueField SoC config option Liming Sun
                     ` (7 more replies)
  2019-01-03 19:17 ` [PATCH v7 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
                   ` (9 subsequent siblings)
  19 siblings, 8 replies; 70+ messages in thread
From: Liming Sun @ 2018-11-01 16:25 UTC (permalink / raw)
  To: linux-arm-kernel

This commit adds the TmFifo driver for Mellanox BlueField Soc.
TmFifo is a shared FIFO which enables external host machine to
exchange data with the SoC via USB or PCIe. The driver is based on
virtio framework and has console and network access enabled.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/Kconfig                |    1 +
 drivers/soc/Makefile               |    1 +
 drivers/soc/mellanox/Kconfig       |   18 +
 drivers/soc/mellanox/Makefile      |    5 +
 drivers/soc/mellanox/tmfifo.c      | 1236 ++++++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/tmfifo_regs.h |   76 +++
 6 files changed, 1337 insertions(+)
 create mode 100644 drivers/soc/mellanox/Kconfig
 create mode 100644 drivers/soc/mellanox/Makefile
 create mode 100644 drivers/soc/mellanox/tmfifo.c
 create mode 100644 drivers/soc/mellanox/tmfifo_regs.h

diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index c07b4a8..fa87dc8 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -7,6 +7,7 @@ source "drivers/soc/bcm/Kconfig"
 source "drivers/soc/fsl/Kconfig"
 source "drivers/soc/imx/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
+source "drivers/soc/mellanox/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/renesas/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 113e884..93052d0 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ARCH_GEMINI)	+= gemini/
 obj-$(CONFIG_ARCH_MXC)		+= imx/
 obj-$(CONFIG_SOC_XWAY)		+= lantiq/
 obj-y				+= mediatek/
+obj-$(CONFIG_SOC_MLNX)		+= mellanox/
 obj-$(CONFIG_ARCH_MESON)	+= amlogic/
 obj-y				+= qcom/
 obj-y				+= renesas/
diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
new file mode 100644
index 0000000..d88efa1
--- /dev/null
+++ b/drivers/soc/mellanox/Kconfig
@@ -0,0 +1,18 @@
+menuconfig SOC_MLNX
+	bool "Mellanox SoC drivers"
+	default y if ARCH_MLNX_BLUEFIELD
+
+if ARCH_MLNX_BLUEFIELD || COMPILE_TEST
+
+config MLNX_BLUEFIELD_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo driver"
+	depends on ARM64
+	default m
+	select VIRTIO_CONSOLE
+	select VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides the
+	  virtio driver framework for the TMFIFO of Mellanox BlueField SoC and
+	  the implementation of a console and network driver.
+
+endif # ARCH_MLNX_BLUEFIELD
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
new file mode 100644
index 0000000..c44c0e2
--- /dev/null
+++ b/drivers/soc/mellanox/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Mellanox SoC drivers.
+#
+obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
new file mode 100644
index 0000000..d5e3550
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo.c
@@ -0,0 +1,1236 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/cache.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <asm/byteorder.h>
+
+#include "tmfifo_regs.h"
+
+/* Vring size. */
+#define TMFIFO_VRING_SIZE			1024
+
+/* Console Tx buffer size. */
+#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
+
+/* House-keeping timer interval. */
+static int tmfifo_timer_interval = HZ / 10;
+module_param(tmfifo_timer_interval, int, 0644);
+MODULE_PARM_DESC(tmfifo_timer_interval, "timer interval");
+
+/* Global lock. */
+static DEFINE_MUTEX(tmfifo_lock);
+
+/* Virtio ring size. */
+static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
+module_param(tmfifo_vring_size, int, 0444);
+MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring");
+
+/* Struct declaration. */
+struct tmfifo;
+
+/* Virtual devices sharing the TM FIFO. */
+#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/* Structure to maintain the ring state. */
+struct tmfifo_vring {
+	void *va;			/* virtual address */
+	dma_addr_t dma;			/* dma address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	int vdev_id;			/* TMFIFO_VDEV_xxx */
+	u32 pkt_len;			/* packet total length */
+	__virtio16 next_avail;		/* next avail desc id */
+	struct tmfifo *fifo;		/* pointer back to the tmfifo */
+};
+
+/* Interrupt types. */
+enum {
+	TM_RX_LWM_IRQ,			/* Rx low water mark irq */
+	TM_RX_HWM_IRQ,			/* Rx high water mark irq */
+	TM_TX_LWM_IRQ,			/* Tx low water mark irq */
+	TM_TX_HWM_IRQ,			/* Tx high water mark irq */
+	TM_IRQ_CNT
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	TMFIFO_VRING_RX,		/* Rx ring */
+	TMFIFO_VRING_TX,		/* Tx ring */
+	TMFIFO_VRING_NUM
+};
+
+struct tmfifo_vdev {
+	struct virtio_device vdev;	/* virtual device */
+	u8 status;
+	u64 features;
+	union {				/* virtio config space */
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
+	u8 *tx_buf;			/* tx buffer */
+	u32 tx_head;			/* tx buffer head */
+	u32 tx_tail;			/* tx buffer tail */
+};
+
+/* TMFIFO device structure */
+struct tmfifo {
+	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
+	struct platform_device *pdev;	/* platform device */
+	struct mutex lock;
+	void __iomem *rx_base;		/* mapped register base */
+	void __iomem *tx_base;		/* mapped register base */
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	int irq[TM_IRQ_CNT];		/* irq numbers */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
+	bool is_ready;			/* ready flag */
+	spinlock_t spin_lock;		/* spin lock */
+};
+
+union tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* MTU setting of the virtio-net interface. */
+#define TMFIFO_NET_MTU		1500
+
+/* Supported virtio-net features. */
+#define TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
+				 (1UL << VIRTIO_NET_F_STATUS) | \
+				 (1UL << VIRTIO_NET_F_MAC))
+
+/* Return the available Tx buffer space. */
+static inline int tmfifo_vdev_tx_buf_avail(struct tmfifo_vdev *vdev)
+{
+	return ((vdev->tx_tail >= vdev->tx_head) ?
+		(TMFIFO_CONS_TX_BUF_SIZE - 8 - (vdev->tx_tail -
+		vdev->tx_head)) : (vdev->tx_head - vdev->tx_tail - 8));
+}
+
+/* Update Tx buffer pointer after pushing data. */
+static inline void tmfifo_vdev_tx_buf_push(struct tmfifo_vdev *vdev, u32 len)
+{
+	vdev->tx_tail += len;
+	if (vdev->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE)
+		vdev->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Update Tx buffer pointer after popping data. */
+static inline void tmfifo_vdev_tx_buf_pop(struct tmfifo_vdev *vdev, u32 len)
+{
+	vdev->tx_head += len;
+	if (vdev->tx_head >= TMFIFO_CONS_TX_BUF_SIZE)
+		vdev->tx_head -= TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Allocate vrings for the fifo. */
+static int tmfifo_alloc_vrings(struct tmfifo *fifo,
+			       struct tmfifo_vdev *tm_vdev, int vdev_id)
+{
+	dma_addr_t dma;
+	void *va;
+	int i, size;
+	struct tmfifo_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->size = tmfifo_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+		vring->vdev_id = vdev_id;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
+					GFP_KERNEL);
+		if (!va) {
+			dev_err(tm_vdev->vdev.dev.parent,
+				"vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void tmfifo_free_vrings(struct tmfifo *fifo, int vdev_id)
+{
+	int i, size;
+	struct tmfifo_vring *vring;
+	struct tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Free interrupts of the fifo device. */
+static void tmfifo_free_irqs(struct tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		irq = fifo->irq[i];
+		if (irq) {
+			fifo->irq[i] = 0;
+			disable_irq(irq);
+			free_irq(irq, (u8 *)fifo + i);
+		}
+	}
+}
+
+/* Interrupt handler. */
+static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
+{
+	int i = (uintptr_t)dev_id % sizeof(void *);
+	struct tmfifo *fifo = dev_id - i;
+
+	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
+		schedule_work(&fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Nothing to do for now. */
+static void tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+tmfifo_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+
+	if (!vr || vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vr->num);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+static inline void tmfifo_virtio_release_desc(struct virtio_device *vdev,
+					      struct vring *vr,
+					      struct vring_desc *desc, u32 len)
+{
+	unsigned int idx;
+
+	idx = vr->used->idx % vr->num;
+	vr->used->ring[idx].id = desc - vr->desc;
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/* Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc)
+{
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+	struct vring_desc *desc_head;
+	uint32_t pkt_len = 0;
+
+	if (!vr)
+		return;
+
+	if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
+		desc_head = vring->desc_head;
+		pkt_len = vring->pkt_len;
+	} else {
+		desc_head = tmfifo_virtio_get_next_desc(vring->vq);
+		if (desc_head != NULL) {
+			pkt_len = tmfifo_virtio_get_pkt_len(vdev,
+							desc_head, vr);
+		}
+	}
+
+	if (desc_head != NULL)
+		tmfifo_virtio_release_desc(vdev, vr, desc_head, pkt_len);
+
+	if (desc != NULL)
+		*desc = NULL;
+	vring->pkt_len = 0;
+}
+
+/* House-keeping timer. */
+static void tmfifo_timer(struct timer_list *arg)
+{
+	struct tmfifo *fifo = container_of(arg, struct tmfifo, timer);
+
+	/*
+	 * Wake up the work handler to poll the Rx FIFO in case interrupt
+	 * missing or any leftover bytes stuck in the FIFO.
+	 */
+	test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+}
+
+/* Buffer the console output. */
+static void tmfifo_console_output(struct tmfifo_vdev *cons,
+				  struct virtqueue *vq)
+{
+	u32 len, pkt_len, idx;
+	struct vring_desc *head_desc, *desc = NULL;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &cons->vdev;
+	void *addr;
+	union tmfifo_msg_hdr *hdr;
+
+	for (;;) {
+		head_desc = tmfifo_virtio_get_next_desc(vq);
+		if (head_desc == NULL)
+			break;
+
+		/* Release the packet if no more space. */
+		pkt_len = tmfifo_virtio_get_pkt_len(vdev, head_desc, vr);
+		if (pkt_len + sizeof(*hdr) > tmfifo_vdev_tx_buf_avail(cons)) {
+			tmfifo_virtio_release_desc(vdev, vr, head_desc,
+						   pkt_len);
+			break;
+		}
+
+		hdr = (union tmfifo_msg_hdr *)&cons->tx_buf[cons->tx_tail];
+		hdr->data = 0;
+		hdr->type = VIRTIO_ID_CONSOLE;
+		hdr->len = htons(pkt_len);
+
+		tmfifo_vdev_tx_buf_push(cons, sizeof(*hdr));
+		desc = head_desc;
+
+		while (desc != NULL) {
+			addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+			len = virtio32_to_cpu(vdev, desc->len);
+
+			if (len <= TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail) {
+				memcpy(cons->tx_buf + cons->tx_tail, addr, len);
+			} else {
+				u32 seg;
+
+				seg = TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail;
+				memcpy(cons->tx_buf + cons->tx_tail, addr, seg);
+				addr += seg;
+				memcpy(cons->tx_buf, addr, len - seg);
+			}
+			tmfifo_vdev_tx_buf_push(cons, len);
+
+			if (!(virtio16_to_cpu(vdev, desc->flags) &
+			    VRING_DESC_F_NEXT))
+				break;
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+		}
+
+		/* Make each packet 8-byte aligned. */
+		tmfifo_vdev_tx_buf_push(cons, ((pkt_len + 7) & -8) - pkt_len);
+
+		tmfifo_virtio_release_desc(vdev, vr, head_desc, pkt_len);
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct tmfifo_vring *vring;
+	struct tmfifo *fifo;
+	struct vring *vr;
+	struct virtio_device *vdev;
+	u64 sts, data;
+	int num_avail = 0, hdr_len, tx_reserve;
+	void *addr;
+	u32 len, idx;
+	struct vring_desc *desc;
+	unsigned long flags;
+	struct tmfifo_vdev *cons;
+
+	if (!vq)
+		return;
+
+	vring = (struct tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+	vr = (struct vring *)virtqueue_get_vring(vq);
+
+	if (!fifo->vdev[vring->vdev_id])
+		return;
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+
+	/* Don't continue if another vring is running. */
+	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
+		return;
+
+	/* tx_reserve is used to reserved some room in FIFO for console. */
+	if (vring->vdev_id == VIRTIO_ID_NET) {
+		hdr_len = sizeof(struct virtio_net_hdr);
+		tx_reserve = fifo->tx_fifo_size / 16;
+	} else {
+		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
+		hdr_len = 0;
+		tx_reserve = 1;
+	}
+
+	desc = vring->desc;
+
+again:
+	while (1) {
+		/* Get available FIFO space. */
+		if (num_avail == 0) {
+			if (is_rx) {
+				/* Get the number of available words in FIFO. */
+				sts = readq(fifo->rx_base + TMFIFO_RX_STS);
+				num_avail = FIELD_GET(
+					TMFIFO_RX_STS__COUNT_MASK, sts);
+
+				/* Don't continue if nothing in FIFO. */
+				if (num_avail <= 0)
+					break;
+			} else {
+				/* Get available space in FIFO. */
+				sts = readq(fifo->tx_base + TMFIFO_TX_STS);
+				num_avail = fifo->tx_fifo_size - tx_reserve -
+					FIELD_GET(
+						TMFIFO_TX_STS__COUNT_MASK, sts);
+
+				if (num_avail <= 0)
+					break;
+			}
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
+		    cons != NULL && cons->tx_buf != NULL) {
+			for (;;) {
+				spin_lock_irqsave(&fifo->spin_lock, flags);
+				if (cons->tx_head == cons->tx_tail) {
+					spin_unlock_irqrestore(
+						&fifo->spin_lock, flags);
+					return;
+				}
+				addr = cons->tx_buf + cons->tx_head;
+				writeq(cpu_to_le64(*(u64 *)addr),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+				tmfifo_vdev_tx_buf_pop(cons, sizeof(u64));
+				spin_unlock_irqrestore(&fifo->spin_lock,
+						       flags);
+				if (--num_avail <= 0)
+					goto again;
+			}
+		}
+
+		/* Get the desc of next packet. */
+		if (!desc) {
+			/* Save the head desc of the chain. */
+			vring->desc_head = tmfifo_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				return;
+			}
+			desc = vring->desc_head;
+			vring->desc = desc;
+
+			if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+						vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			}
+		}
+
+		/* Beginning of each packet. */
+		if (vring->pkt_len == 0) {
+			int vdev_id, vring_change = 0;
+			union tmfifo_msg_hdr hdr;
+
+			num_avail--;
+
+			/* Read/Write packet length. */
+			if (is_rx) {
+				hdr.data = readq(fifo->rx_base +
+						 TMFIFO_RX_DATA);
+				hdr.data = le64_to_cpu(hdr.data);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (hdr.len == 0)
+					continue;
+
+				/* Check packet type. */
+				if (hdr.type == VIRTIO_ID_NET) {
+					vdev_id = VIRTIO_ID_NET;
+					hdr_len = sizeof(struct virtio_net_hdr);
+				} else if (hdr.type == VIRTIO_ID_CONSOLE) {
+					vdev_id = VIRTIO_ID_CONSOLE;
+					hdr_len = 0;
+				} else {
+					continue;
+				}
+
+				/*
+				 * Check whether the new packet still belongs
+				 * to this vring or not. If not, update the
+				 * pkt_len of the new vring and return.
+				 */
+				if (vdev_id != vring->vdev_id) {
+					struct tmfifo_vdev *dev2 =
+						fifo->vdev[vdev_id];
+
+					if (!dev2)
+						break;
+					vring->desc = desc;
+					vring = &dev2->vrings[TMFIFO_VRING_RX];
+					vring_change = 1;
+				}
+				vring->pkt_len = ntohs(hdr.len) + hdr_len;
+			} else {
+				vring->pkt_len = tmfifo_virtio_get_pkt_len(
+					vdev, desc, vr);
+
+				hdr.data = 0;
+				hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+					VIRTIO_ID_NET :
+					VIRTIO_ID_CONSOLE;
+				hdr.len = htons(vring->pkt_len - hdr_len);
+				writeq(cpu_to_le64(hdr.data),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+			}
+
+			vring->cur_len = hdr_len;
+			vring->rem_len = vring->pkt_len;
+			fifo->vring[is_rx] = vring;
+
+			if (vring_change)
+				return;
+			continue;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check if the current desc is already done. */
+		if (vring->cur_len == len)
+			goto check_done;
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		/* Read a word from FIFO for Rx. */
+		if (is_rx) {
+			data = readq(fifo->rx_base + TMFIFO_RX_DATA);
+			data = le64_to_cpu(data);
+		}
+
+		if (vring->cur_len + sizeof(u64) <= len) {
+			/* The whole word. */
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       sizeof(u64));
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       sizeof(u64));
+			}
+			vring->cur_len += sizeof(u64);
+		} else {
+			/* Leftover bytes. */
+			BUG_ON(vring->cur_len > len);
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       len - vring->cur_len);
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       len - vring->cur_len);
+			}
+			vring->cur_len = len;
+		}
+
+		/* Write the word into FIFO for Tx. */
+		if (!is_rx) {
+			writeq(cpu_to_le64(data),
+			       fifo->tx_base + TMFIFO_TX_DATA);
+		}
+
+		num_avail--;
+
+check_done:
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done and release the desc. */
+			tmfifo_release_pkt(vdev, vring, &desc);
+			fifo->vring[is_rx] = NULL;
+
+			/* Notify upper layer that packet is done. */
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			vring_interrupt(0, vq);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			continue;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+	struct tmfifo *fifo = vring->fifo;
+	unsigned long flags;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX HWM bit to start Rx. */
+		if (!test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			tmfifo_console_output(fifo->vdev[VIRTIO_ID_CONSOLE],
+					      vq);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			schedule_work(&fifo->work);
+		} else if (!test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	}
+
+	return true;
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void tmfifo_work_handler(struct work_struct *work)
+{
+	int i;
+	struct tmfifo_vdev *tm_vdev;
+	struct tmfifo *fifo = container_of(work, struct tmfifo, work);
+
+	if (!fifo->is_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx. */
+	if (test_and_clear_bit(TM_TX_LWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_TX_LWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_TX].vq,
+					false);
+			}
+		}
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(TM_RX_HWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq[TM_RX_HWM_IRQ]) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_RX].vq,
+					true);
+			}
+		}
+	}
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* Get the array of feature bits for this device. */
+static u64 tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			tmfifo_release_pkt(&tm_vdev->vdev, vring, &vring->desc);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+				  unsigned int nvqs,
+				  struct virtqueue *vqs[],
+				  vq_callback_t *callbacks[],
+				  const char * const names[],
+				  const bool *ctx,
+				  struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void tmfifo_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void tmfifo_virtio_get(struct virtio_device *vdev,
+			      unsigned int offset,
+			      void *buf,
+			      unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void tmfifo_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops tmfifo_virtio_config_ops = {
+	.get_features = tmfifo_virtio_get_features,
+	.finalize_features = tmfifo_virtio_finalize_features,
+	.find_vqs = tmfifo_virtio_find_vqs,
+	.del_vqs = tmfifo_virtio_del_vqs,
+	.reset = tmfifo_virtio_reset,
+	.set_status = tmfifo_virtio_set_status,
+	.get_status = tmfifo_virtio_get_status,
+	.get = tmfifo_virtio_get,
+	.set = tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+int tmfifo_create_vdev(struct tmfifo *fifo, int vdev_id, u64 features,
+		       void *config, u32 size)
+{
+	struct tmfifo_vdev *tm_vdev;
+	int ret = 0;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev != NULL) {
+		pr_err("vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto already_exist;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto already_exist;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+	if (tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
+		pr_err("Unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto alloc_vring_fail;
+	}
+	if (vdev_id == VIRTIO_ID_CONSOLE) {
+		tm_vdev->tx_buf = kmalloc(TMFIFO_CONS_TX_BUF_SIZE,
+					  GFP_KERNEL);
+	}
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
+		goto register_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+register_fail:
+	tmfifo_free_vrings(fifo, vdev_id);
+	fifo->vdev[vdev_id] = NULL;
+alloc_vring_fail:
+	kfree(tm_vdev);
+already_exist:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+int tmfifo_delete_vdev(struct tmfifo *fifo, int vdev_id)
+{
+	struct tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		tmfifo_free_vrings(fifo, vdev_id);
+		kfree(tm_vdev->tx_buf);
+		kfree(tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Device remove function. */
+static int tmfifo_remove(struct platform_device *pdev)
+{
+	int i;
+	struct tmfifo *fifo = platform_get_drvdata(pdev);
+	struct resource *rx_res, *tx_res;
+
+	if (fifo) {
+		mutex_lock(&tmfifo_lock);
+
+		fifo->is_ready = false;
+
+		/* Stop the timer. */
+		del_timer_sync(&fifo->timer);
+
+		/* Release interrupts. */
+		tmfifo_free_irqs(fifo);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&fifo->work);
+
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++)
+			tmfifo_delete_vdev(fifo, i);
+
+		/* Release IO resources. */
+		if (fifo->rx_base)
+			iounmap(fifo->rx_base);
+		if (fifo->tx_base)
+			iounmap(fifo->tx_base);
+
+		platform_set_drvdata(pdev, NULL);
+		kfree(fifo);
+
+		mutex_unlock(&tmfifo_lock);
+	}
+
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (rx_res)
+		release_mem_region(rx_res->start, resource_size(rx_res));
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (tx_res)
+		release_mem_region(tx_res->start, resource_size(tx_res));
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void tmfifo_get_cfg_mac(u8 *mac)
+{
+	u8 buf[6];
+	efi_status_t status;
+	unsigned long size = sizeof(buf);
+	efi_char16_t name[] = { 'R', 's', 'h', 'i', 'm', 'M', 'a', 'c',
+				'A', 'd', 'd', 'r', 0 };
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+
+	status = efi.get_variable(name, &guid, NULL, &size, buf);
+	if (status == EFI_SUCCESS && size == sizeof(buf))
+		memcpy(mac, buf, sizeof(buf));
+}
+
+/* Probe the TMFIFO. */
+static int tmfifo_probe(struct platform_device *pdev)
+{
+	u64 ctl;
+	struct tmfifo *fifo;
+	struct resource *rx_res, *tx_res;
+	struct virtio_net_config net_config;
+	int i, ret;
+
+	/* Get the resource of the Rx & Tx FIFO. */
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!rx_res || !tx_res) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (request_mem_region(rx_res->start,
+			       resource_size(rx_res), "bf-tmfifo") == NULL) {
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	if (request_mem_region(tx_res->start,
+			       resource_size(tx_res), "bf-tmfifo") == NULL) {
+		release_mem_region(rx_res->start, resource_size(rx_res));
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	ret = -ENOMEM;
+	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
+	if (!fifo)
+		goto err;
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	spin_lock_init(&fifo->spin_lock);
+	INIT_WORK(&fifo->work, tmfifo_work_handler);
+
+	timer_setup(&fifo->timer, tmfifo_timer, 0);
+	fifo->timer.function = tmfifo_timer;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		fifo->irq[i] = platform_get_irq(pdev, i);
+		ret = request_irq(fifo->irq[i], tmfifo_irq_handler, 0,
+				  "tmfifo", (u8 *)fifo + i);
+		if (ret) {
+			pr_err("Unable to request irq\n");
+			fifo->irq[i] = 0;
+			goto err;
+		}
+	}
+
+	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
+	if (!fifo->rx_base)
+		goto err;
+
+	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
+	if (!fifo->tx_base)
+		goto err;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		FIELD_GET(TMFIFO_TX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~TMFIFO_TX_CTL__LWM_MASK) |
+		FIELD_PREP(TMFIFO_TX_CTL__LWM_MASK, fifo->tx_fifo_size / 2);
+	ctl = (ctl & ~TMFIFO_TX_CTL__HWM_MASK) |
+		FIELD_PREP(TMFIFO_TX_CTL__HWM_MASK, fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		FIELD_GET(TMFIFO_RX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~TMFIFO_RX_CTL__LWM_MASK) |
+		FIELD_PREP(TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = (ctl & ~TMFIFO_RX_CTL__HWM_MASK) |
+		FIELD_PREP(TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + TMFIFO_RX_CTL);
+
+	mutex_init(&fifo->lock);
+
+	/* Create the console vdev. */
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (ret)
+		goto err;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = TMFIFO_NET_MTU;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	memcpy(net_config.mac, tmfifo_net_default_mac, 6);
+	tmfifo_get_cfg_mac(net_config.mac);
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_NET, TMFIFO_NET_FEATURES,
+				 &net_config, sizeof(net_config));
+	if (ret)
+		goto err;
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+
+	fifo->is_ready = true;
+
+	return 0;
+
+err:
+	tmfifo_remove(pdev);
+early_err:
+	dev_err(&pdev->dev, "Probe Failed\n");
+	return ret;
+}
+
+static const struct of_device_id tmfifo_match[] = {
+	{ .compatible = "mellanox,bf-tmfifo" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, tmfifo_match);
+
+static const struct acpi_device_id bf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, bf_tmfifo_acpi_match);
+
+static struct platform_driver tmfifo_driver = {
+	.probe = tmfifo_probe,
+	.remove = tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.of_match_table = tmfifo_match,
+		.acpi_match_table = ACPI_PTR(bf_tmfifo_acpi_match),
+	},
+};
+
+static int __init tmfifo_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&tmfifo_driver);
+	if (ret)
+		pr_err("Failed to register tmfifo driver.\n");
+
+	return ret;
+}
+
+static void __exit tmfifo_exit(void)
+{
+	platform_driver_unregister(&tmfifo_driver);
+}
+
+module_init(tmfifo_init);
+module_exit(tmfifo_exit);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
new file mode 100644
index 0000000..9f21764
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo_regs.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __TMFIFO_REGS_H__
+#define __TMFIFO_REGS_H__
+
+#include <linux/types.h>
+
+#define TMFIFO_TX_DATA 0x0
+
+#define TMFIFO_TX_STS 0x8
+#define TMFIFO_TX_STS__LENGTH 0x0001
+#define TMFIFO_TX_STS__COUNT_SHIFT 0
+#define TMFIFO_TX_STS__COUNT_WIDTH 9
+#define TMFIFO_TX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_TX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_TX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_TX_CTL 0x10
+#define TMFIFO_TX_CTL__LENGTH 0x0001
+#define TMFIFO_TX_CTL__LWM_SHIFT 0
+#define TMFIFO_TX_CTL__LWM_WIDTH 8
+#define TMFIFO_TX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__LWM_RMASK 0xff
+#define TMFIFO_TX_CTL__LWM_MASK  0xff
+#define TMFIFO_TX_CTL__HWM_SHIFT 8
+#define TMFIFO_TX_CTL__HWM_WIDTH 8
+#define TMFIFO_TX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__HWM_RMASK 0xff
+#define TMFIFO_TX_CTL__HWM_MASK  0xff00
+#define TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#define TMFIFO_RX_DATA 0x0
+
+#define TMFIFO_RX_STS 0x8
+#define TMFIFO_RX_STS__LENGTH 0x0001
+#define TMFIFO_RX_STS__COUNT_SHIFT 0
+#define TMFIFO_RX_STS__COUNT_WIDTH 9
+#define TMFIFO_RX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_RX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_RX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_RX_CTL 0x10
+#define TMFIFO_RX_CTL__LENGTH 0x0001
+#define TMFIFO_RX_CTL__LWM_SHIFT 0
+#define TMFIFO_RX_CTL__LWM_WIDTH 8
+#define TMFIFO_RX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__LWM_RMASK 0xff
+#define TMFIFO_RX_CTL__LWM_MASK  0xff
+#define TMFIFO_RX_CTL__HWM_SHIFT 8
+#define TMFIFO_RX_CTL__HWM_WIDTH 8
+#define TMFIFO_RX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__HWM_RMASK 0xff
+#define TMFIFO_RX_CTL__HWM_MASK  0xff00
+#define TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#endif /* !defined(__TMFIFO_REGS_H__) */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v6 2/9] arm64: Add Mellanox BlueField SoC config option
  2018-11-01 16:25 ` Liming Sun
@ 2018-11-01 16:25   ` Liming Sun
  2018-11-01 16:25   ` [PATCH v6 3/9] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
                     ` (6 subsequent siblings)
  7 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2018-11-01 16:25 UTC (permalink / raw)
  To: linux-arm-kernel

This commit introduces config option for Mellanox BlueField SoC,
which can be used to build the SoC specific drivers, and enables
it by default in configs/defconfig.

Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 arch/arm64/Kconfig.platforms | 6 ++++++
 arch/arm64/configs/defconfig | 1 +
 2 files changed, 7 insertions(+)

diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 393d2b5..1e28eb6 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -117,6 +117,12 @@ config ARCH_MESON
 	help
 	  This enables support for the Amlogic S905 SoCs.
 
+config ARCH_MLNX_BLUEFIELD
+	bool "Mellanox BlueField SoC Family"
+	select SOC_MLNX
+	help
+	  This enables support for the Mellanox BlueField SoC.
+
 config ARCH_MVEBU
 	bool "Marvell EBU SoC Family"
 	select ARMADA_AP806_SYSCON
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index db8d364..8865ada 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -44,6 +44,7 @@ CONFIG_ARCH_LG1K=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
 CONFIG_ARCH_MESON=y
+CONFIG_ARCH_MLNX_BLUEFIELD=y
 CONFIG_ARCH_MVEBU=y
 CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v6 3/9] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-11-01 16:25 ` Liming Sun
  2018-11-01 16:25   ` [PATCH v6 2/9] arm64: Add Mellanox BlueField SoC config option Liming Sun
@ 2018-11-01 16:25   ` Liming Sun
  2018-11-01 16:25   ` [PATCH v6 4/9] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
                     ` (5 subsequent siblings)
  7 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2018-11-01 16:25 UTC (permalink / raw)
  To: linux-arm-kernel

Add devicetree bindings for the TmFifo which is found on Mellanox
BlueField SoCs.

Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 .../devicetree/bindings/soc/mellanox/tmfifo.txt    | 23 ++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt

diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
new file mode 100644
index 0000000..8a13fa6
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
@@ -0,0 +1,23 @@
+* Mellanox BlueField SoC TmFifo
+
+BlueField TmFifo provides a shared FIFO between the target and the
+external host machine, which can be accessed by external host via
+USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
+to implement virtual console and network interface based on the virtio
+framework.
+
+Required properties:
+
+- compatible:	Should be "mellanox,bf-tmfifo"
+- reg:		Physical base address and length of Rx/Tx block
+- interrupts:	The interrupt number of Rx low water mark, Rx high water mark
+		Tx low water mark, Tx high water mark respectively.
+
+Example:
+
+tmfifo at 800a20 {
+	compatible = "mellanox,bf-tmfifo";
+	reg = <0x00800a20 0x00000018
+	       0x00800a40 0x00000018>;
+	interrupts = <41, 42, 43, 44>;
+};
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v6 4/9] MAINTAINERS: Add entry for Mellanox Bluefield Soc
  2018-11-01 16:25 ` Liming Sun
  2018-11-01 16:25   ` [PATCH v6 2/9] arm64: Add Mellanox BlueField SoC config option Liming Sun
  2018-11-01 16:25   ` [PATCH v6 3/9] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
@ 2018-11-01 16:25   ` Liming Sun
  2018-11-01 16:25   ` [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver Liming Sun
                     ` (4 subsequent siblings)
  7 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2018-11-01 16:25 UTC (permalink / raw)
  To: linux-arm-kernel

Add maintainer information for Mellanox BlueField SoC.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 42a2f31..44fb693 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1694,6 +1694,14 @@ S:	Maintained
 F:	drivers/phy/mediatek/
 F:	Documentation/devicetree/bindings/phy/phy-mtk-*
 
+ARM/Mellanox BlueField SoC support
+M:	David Woods <dwoods@mellanox.com>
+M:	Liming Sun <lsun@mellanox.com>
+L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	drivers/soc/mellanox/*
+F:	Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
+
 ARM/MICREL KS8695 ARCHITECTURE
 M:	Greg Ungerer <gerg@uclinux.org>
 L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
  2018-11-01 16:25 ` Liming Sun
                     ` (2 preceding siblings ...)
  2018-11-01 16:25   ` [PATCH v6 4/9] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
@ 2018-11-01 16:25   ` Liming Sun
  2019-01-18 16:02     ` Arnd Bergmann
  2018-11-01 16:25   ` [PATCH v6 6/9] soc: mellanox: host: Add networking support over Rshim Liming Sun
                     ` (3 subsequent siblings)
  7 siblings, 1 reply; 70+ messages in thread
From: Liming Sun @ 2018-11-01 16:25 UTC (permalink / raw)
  To: linux-arm-kernel

An external host can connect to a Mellanox BlueField SoC via an
interface called Rshim. The Rshim driver provides boot, console,
and networking services over this interface. This commit is
the common driver where the other backend (transport) driver will
use.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/Kconfig           |    8 +
 drivers/soc/mellanox/Makefile          |    1 +
 drivers/soc/mellanox/host/Makefile     |    2 +
 drivers/soc/mellanox/host/rshim.c      | 2673 ++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/host/rshim.h      |  361 +++++
 drivers/soc/mellanox/host/rshim_regs.h |  152 ++
 6 files changed, 3197 insertions(+)
 create mode 100644 drivers/soc/mellanox/host/Makefile
 create mode 100644 drivers/soc/mellanox/host/rshim.c
 create mode 100644 drivers/soc/mellanox/host/rshim.h
 create mode 100644 drivers/soc/mellanox/host/rshim_regs.h

diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
index d88efa1..ecd83a4 100644
--- a/drivers/soc/mellanox/Kconfig
+++ b/drivers/soc/mellanox/Kconfig
@@ -16,3 +16,11 @@ config MLNX_BLUEFIELD_TMFIFO
 	  the implementation of a console and network driver.
 
 endif # ARCH_MLNX_BLUEFIELD
+
+config MLNX_BLUEFIELD_HOST
+	tristate "Mellnox BlueField host side drivers"
+	help
+	  If you say yes to this option, then support will be added
+	  for control and communication of Mellanox BlueField SoCs
+	  from an external host via USB or PCI-express.
+
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
index c44c0e2..aaaf2be 100644
--- a/drivers/soc/mellanox/Makefile
+++ b/drivers/soc/mellanox/Makefile
@@ -3,3 +3,4 @@
 # Makefile for Mellanox SoC drivers.
 #
 obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
+obj-$(CONFIG_MLNX_BLUEFIELD_HOST)	+= host/
diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
new file mode 100644
index 0000000..e47842f
--- /dev/null
+++ b/drivers/soc/mellanox/host/Makefile
@@ -0,0 +1,2 @@
+obj-m := rshim.o
+
diff --git a/drivers/soc/mellanox/host/rshim.c b/drivers/soc/mellanox/host/rshim.c
new file mode 100644
index 0000000..32f1124
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim.c
@@ -0,0 +1,2673 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_common.c - Mellanox host-side driver for RShim
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.	See the GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <linux/ioctl.h>
+#include <linux/termios.h>
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+#include <asm/termbits.h>
+#include <linux/circ_buf.h>
+#include <linux/delay.h>
+#include <linux/virtio_ids.h>
+
+#include "rshim.h"
+
+/* Maximum number of devices controlled by this driver. */
+int rshim_nr_devs = 64;
+module_param(rshim_nr_devs, int, 0444);
+MODULE_PARM_DESC(rshim_nr_devs, "Maximum number of supported devices");
+
+static char *backend_driver = "";
+module_param(backend_driver, charp, 0444);
+MODULE_PARM_DESC(backend_driver, "Rshim backend driver to use");
+
+static int rshim_keepalive_period = 300;
+module_param(rshim_keepalive_period, int, 0644);
+MODULE_PARM_DESC(rshim_keepalive_period, "keepalive period in milliseconds");
+
+#define RSH_KEEPALIVE_MAGIC_NUM 0x5089836482ULL
+
+/* Circular buffer macros. */
+
+#define read_empty(bd, chan) \
+	(CIRC_CNT((bd)->read_fifo[chan].head, \
+		  (bd)->read_fifo[chan].tail, READ_FIFO_SIZE) == 0)
+#define read_full(bd, chan) \
+	(CIRC_SPACE((bd)->read_fifo[chan].head, \
+		    (bd)->read_fifo[chan].tail, READ_FIFO_SIZE) == 0)
+#define read_space(bd, chan) \
+	CIRC_SPACE((bd)->read_fifo[chan].head, \
+		   (bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_cnt(bd, chan) \
+	CIRC_CNT((bd)->read_fifo[chan].head, \
+		 (bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_cnt_to_end(bd, chan) \
+	CIRC_CNT_TO_END((bd)->read_fifo[chan].head, \
+			(bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_data_ptr(bd, chan) \
+	((bd)->read_fifo[chan].data + \
+	 ((bd)->read_fifo[chan].tail & (READ_FIFO_SIZE - 1)))
+#define read_consume_bytes(bd, chan, nbytes) \
+	((bd)->read_fifo[chan].tail = \
+		((bd)->read_fifo[chan].tail + (nbytes)) & \
+		 (READ_FIFO_SIZE - 1))
+#define read_space_to_end(bd, chan) \
+	CIRC_SPACE_TO_END((bd)->read_fifo[chan].head, \
+			  (bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_space_offset(bd, chan) \
+	((bd)->read_fifo[chan].head & (READ_FIFO_SIZE - 1))
+#define read_space_ptr(bd, chan) \
+	((bd)->read_fifo[chan].data + read_space_offset(bd, (chan)))
+#define read_add_bytes(bd, chan, nbytes) \
+	((bd)->read_fifo[chan].head = \
+		((bd)->read_fifo[chan].head + (nbytes)) & \
+		 (READ_FIFO_SIZE - 1))
+#define read_reset(bd, chan) \
+	((bd)->read_fifo[chan].head = (bd)->read_fifo[chan].tail = 0)
+
+#define write_empty(bd, chan) \
+	(CIRC_CNT((bd)->write_fifo[chan].head, \
+		  (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE) == 0)
+#define write_full(bd, chan) \
+	(CIRC_SPACE((bd)->write_fifo[chan].head, \
+		    (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE) == 0)
+#define write_space(bd, chan) \
+	CIRC_SPACE((bd)->write_fifo[chan].head, \
+		   (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_cnt(bd, chan) \
+	CIRC_CNT((bd)->write_fifo[chan].head, \
+		 (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_cnt_to_end(bd, chan) \
+	CIRC_CNT_TO_END((bd)->write_fifo[chan].head, \
+			(bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_data_offset(bd, chan) \
+	((bd)->write_fifo[chan].tail & (WRITE_FIFO_SIZE - 1))
+#define write_data_ptr(bd, chan) \
+	((bd)->write_fifo[chan].data + write_data_offset(bd, (chan)))
+#define write_consume_bytes(bd, chan, nbytes) \
+	((bd)->write_fifo[chan].tail = \
+		 ((bd)->write_fifo[chan].tail + (nbytes)) & \
+		  (WRITE_FIFO_SIZE - 1))
+#define write_space_to_end(bd, chan) \
+	CIRC_SPACE_TO_END((bd)->write_fifo[chan].head, \
+			  (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_space_ptr(bd, chan) \
+	((bd)->write_fifo[chan].data + \
+	 ((bd)->write_fifo[chan].head & (WRITE_FIFO_SIZE - 1)))
+#define write_add_bytes(bd, chan, nbytes) \
+	((bd)->write_fifo[chan].head = \
+	 ((bd)->write_fifo[chan].head + (nbytes)) & \
+	  (WRITE_FIFO_SIZE - 1))
+#define write_reset(bd, chan) \
+	((bd)->write_fifo[chan].head = (bd)->write_fifo[chan].tail = 0)
+
+/*
+ * Tile-to-host bits (UART 0 scratchpad).
+ */
+/*
+ * Output write pointer mask.  Note that this is the maximum size; the
+ * write pointer may be smaller if requested by the host.
+ */
+#define CONS_RSHIM_T2H_OUT_WPTR_MASK     0x3FF
+
+/* Tile is done mask. */
+#define CONS_RSHIM_T2H_DONE_MASK         0x400
+
+/*
+ * Input read pointer mask.  Note that this is the maximum size; the read
+ * pointer may be smaller if requested by the host.
+ */
+#define CONS_RSHIM_T2H_IN_RPTR_MASK      0x1FF800
+
+/* Input read pointer shift. */
+#define CONS_RSHIM_T2H_IN_RPTR_SHIFT     11
+
+/* Tile is done mask. */
+#define CONS_RSHIM_T2H_DONE_MASK         0x400
+
+/* Number of words to send as sync-data (calculated by packet MTU). */
+#define TMFIFO_MAX_SYNC_WORDS            (1536 / 8)
+
+/* Terminal characteristics for newly created consoles. */
+static struct ktermios init_console_termios = {
+	.c_iflag = INLCR | ICRNL,
+	.c_oflag = OPOST | ONLCR,
+	.c_cflag = B115200 | HUPCL | CLOCAL | CREAD | CS8,
+	.c_lflag = ISIG | ICANON | ECHOE | ECHOK | ECHOCTL | ECHOKE | IEXTEN,
+	.c_line = 0,
+	.c_cc = INIT_C_CC,
+};
+
+/* Global mutex. */
+static DEFINE_MUTEX(rshim_mutex);
+
+/*
+ * Array of all of the rshim devices.  The high bits of our minor number
+ * index into this table to find the relevant device.
+ */
+struct rshim_backend **rshim_devs;
+
+/*
+ * Work queue. Right now we have one for the whole driver; we might
+ * eventually decide that we need one per device, but we'll see.
+ */
+struct workqueue_struct *rshim_wq;
+EXPORT_SYMBOL(rshim_wq);
+
+/*
+ * Array of pointers to kmalloc'ed strings, holding the path name for
+ * all of the devices we've seen.  If rshim_devs[i] is non-NULL, then
+ * rshim_dev_names[i] is its path name.  If rshim_devs[i] is NULL, then
+ * rshim_dev_names[i] is the name that was last used for that device.
+ * When we see a new device, we look it up in this table; this allows us to
+ * use the same device index we did last time we saw the device.  The
+ * strings within the array persist until the driver is unloaded.
+ */
+char **rshim_dev_names;
+
+/* Name of the sub-device types. */
+char *rshim_dev_minor_names[RSH_DEV_TYPES] = {
+	[RSH_DEV_TYPE_RSHIM] = "rshim",
+	[RSH_DEV_TYPE_BOOT] = "boot",
+	[RSH_DEV_TYPE_CONSOLE] = "console",
+	[RSH_DEV_TYPE_NET] = "net",
+	[RSH_DEV_TYPE_MISC] = "misc",
+};
+
+/* dev_t base index. */
+static dev_t rshim_dev_base;
+
+/* Class structure for our device class. */
+static struct class *rshim_class;
+
+/* Registered services. */
+static struct rshim_service *rshim_svc[RSH_SVC_MAX];
+
+/* FIFO reset. */
+static void rshim_fifo_reset(struct rshim_backend *bd);
+
+/* Global lock / unlock. */
+
+void rshim_lock(void)
+{
+	mutex_lock(&rshim_mutex);
+}
+EXPORT_SYMBOL(rshim_lock);
+
+void rshim_unlock(void)
+{
+	mutex_unlock(&rshim_mutex);
+}
+EXPORT_SYMBOL(rshim_unlock);
+
+/*
+ * Read some bytes from RShim.
+ *
+ * The provided buffer size should be multiple of 8 bytes. If not, the
+ * leftover bytes (which presumably were sent as NUL bytes by the sender)
+ * will be discarded.
+ */
+static ssize_t rshim_read_default(struct rshim_backend *bd, int devtype,
+				char *buf, size_t count)
+{
+	int retval, total = 0, avail = 0;
+	u64 word;
+
+	/* Read is only supported for RShim TMFIFO. */
+	if (devtype != RSH_DEV_TYPE_NET && devtype != RSH_DEV_TYPE_CONSOLE) {
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+	if (bd->is_boot_open)
+		return 0;
+
+	while (total < count) {
+		if (avail == 0) {
+			retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+						RSH_TM_TILE_TO_HOST_STS, &word);
+			if (retval < 0)
+				break;
+			avail = word & RSH_TM_TILE_TO_HOST_STS__COUNT_MASK;
+			if (avail == 0)
+				break;
+		}
+		retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+					RSH_TM_TILE_TO_HOST_DATA, &word);
+		if (retval < 0)
+			break;
+		/*
+		 * Convert it to little endian before sending to RShim. The
+		 * other side should decode it as little endian as well which
+		 * is usually the default case.
+		 */
+		word = le64_to_cpu(word);
+		if (total + sizeof(word) <= count) {
+			*(u64 *)buf = word;
+			buf += sizeof(word);
+			total += sizeof(word);
+		} else {
+			/* Copy the rest data which is less than 8 bytes. */
+			memcpy(buf, &word, count - total);
+			total = count;
+			break;
+		}
+		avail--;
+	}
+
+	return total;
+}
+
+/*
+ * Write some bytes to the RShim backend.
+ *
+ * If count is not multiple of 8-bytes, the data will be padded to 8-byte
+ * aligned which is required by RShim HW.
+ */
+static ssize_t rshim_write_delayed(struct rshim_backend *bd, int devtype,
+				   const char *buf, size_t count)
+{
+	u64 word;
+	char pad_buf[sizeof(u64)] = { 0 };
+	int size_addr, size_mask, data_addr, max_size;
+	int retval, avail = 0, byte_cnt = 0, retry;
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		if (bd->is_boot_open)
+			return count;
+		size_addr = RSH_TM_HOST_TO_TILE_STS;
+		size_mask = RSH_TM_HOST_TO_TILE_STS__COUNT_MASK;
+		data_addr = RSH_TM_HOST_TO_TILE_DATA;
+		retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+					RSH_TM_HOST_TO_TILE_CTL, &word);
+		if (retval < 0) {
+			pr_err("read_rshim error %d\n", retval);
+			return retval;
+		}
+		max_size = (word >> RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_SHIFT)
+			   & RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RMASK;
+		break;
+
+	case RSH_DEV_TYPE_BOOT:
+		size_addr = RSH_BOOT_FIFO_COUNT;
+		size_mask = RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_MASK;
+		data_addr = RSH_BOOT_FIFO_DATA;
+		max_size = RSH_BOOT_FIFO_SIZE;
+		break;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+
+	while (byte_cnt < count) {
+		/* Check the boot cancel condition. */
+		if (devtype == RSH_DEV_TYPE_BOOT && !bd->boot_work_buf)
+			break;
+
+		/* Add padding if less than 8 bytes left. */
+		if (byte_cnt + sizeof(u64) > count) {
+			memcpy(pad_buf, buf, count - byte_cnt);
+			buf = (const char *)pad_buf;
+		}
+
+		retry = 0;
+		while (avail <= 0) {
+			/* Calculate available space in words. */
+			retval = bd->read_rshim(bd, RSHIM_CHANNEL, size_addr,
+						&word);
+			if (retval < 0) {
+				pr_err("read_rshim error %d\n", retval);
+				break;
+			}
+			avail = max_size - (int)(word & size_mask) - 8;
+			if (avail > 0)
+				break;
+
+			/*
+			 * Retry 100s, or else return failure since the other
+			 * side seems not to be responding.
+			 */
+			if (++retry > 100000)
+				return -ETIMEDOUT;
+			msleep(1);
+		}
+
+		word = *(u64 *)buf;
+		/*
+		 * Convert to little endian before sending to RShim. The
+		 * receiving side should call le64_to_cpu() to convert
+		 * it back.
+		 */
+		word = cpu_to_le64(word);
+		retval = bd->write_rshim(bd, RSHIM_CHANNEL, data_addr, word);
+		if (retval < 0) {
+			pr_err("write_rshim error %d\n", retval);
+			break;
+		}
+		buf += sizeof(word);
+		byte_cnt += sizeof(word);
+		avail--;
+	}
+
+	/* Return number shouldn't count the padded bytes. */
+	return (byte_cnt > count) ? count : byte_cnt;
+}
+
+static ssize_t rshim_write_default(struct rshim_backend *bd, int devtype,
+				   const char *buf, size_t count)
+{
+	int retval;
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		if (bd->is_boot_open)
+			return count;
+
+		/* Set the flag so there is only one outstanding request. */
+		bd->spin_flags |= RSH_SFLG_WRITING;
+
+		/* Wake up the worker. */
+		bd->fifo_work_buf = (char *)buf;
+		bd->fifo_work_buf_len = count;
+		bd->fifo_work_devtype = devtype;
+		/*
+		 * Add barrier so the above writes complete before setting the
+		 * has_fifo_work flag.
+		 */
+		wmb();
+		bd->has_fifo_work = 1;
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+		return 0;
+
+	case RSH_DEV_TYPE_BOOT:
+		reinit_completion(&bd->boot_write_complete);
+		bd->boot_work_buf_len = count;
+		bd->boot_work_buf_actual_len = 0;
+		/*
+		 * Add barrier so the above writes complete before setting the
+		 * boot_work_buf pointer since it's checked in other places.
+		 */
+		wmb();
+		bd->boot_work_buf = (char *)buf;
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+
+		mutex_unlock(&bd->mutex);
+		retval = wait_for_completion_interruptible(
+					&bd->boot_write_complete);
+		/* Cancel the request if interrupted. */
+		if (retval)
+			bd->boot_work_buf = NULL;
+
+		mutex_lock(&bd->mutex);
+		return bd->boot_work_buf_actual_len;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+}
+
+/* Boot file operations routines */
+
+/*
+ * Wait for boot to complete, if necessary.  Return 0 if the boot is done
+ * and it's safe to continue, an error code if something went wrong.  Note
+ * that this routine must be called with the device mutex held.  If it
+ * returns successfully, the mutex will still be held (although it may have
+ * been dropped and reacquired); if it returns unsuccessfully the mutex
+ * will have been dropped.
+ */
+static int wait_for_boot_done(struct rshim_backend *bd)
+{
+	int retval;
+
+	if (!bd->has_reprobe)
+		return 0;
+
+	if (!bd->has_rshim || bd->is_booting) {
+		while (bd->is_booting) {
+			pr_info("boot write, waiting for re-probe\n");
+			/* We're booting, and the backend isn't ready yet. */
+			mutex_unlock(&bd->mutex);
+			/*
+			 * FIXME: might we want a timeout here, too?  If
+			 * the reprobe takes a very long time, something's
+			 * probably wrong.  Maybe a couple of minutes?
+			 */
+			retval = wait_for_completion_interruptible(
+				&bd->booting_complete);
+			if (retval)
+				return retval;
+			mutex_lock(&bd->mutex);
+		}
+		if (!bd->has_rshim) {
+			mutex_unlock(&bd->mutex);
+			return -ENODEV;
+		}
+	}
+
+	return 0;
+}
+
+static ssize_t rshim_boot_write(struct file *file, const char *user_buffer,
+			      size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+	int retval = 0, whichbuf = 0;
+	size_t bytes_written = 0, bytes_left;
+
+	/*
+	 * Hardware requires that we send multiples of 8 bytes.  Ideally
+	 * we'd handle the case where we got unaligned writes by
+	 * accumulating the residue somehow, but none of our clients
+	 * typically do this, so we just clip the size to prevent any
+	 * inadvertent errors from causing hardware problems.
+	 */
+	bytes_left = count & (-((size_t)8));
+	if (!bytes_left)
+		return 0;
+
+	mutex_lock(&bd->mutex);
+	if (bd->is_in_boot_write) {
+		mutex_unlock(&bd->mutex);
+		return -EBUSY;
+	}
+
+	retval = wait_for_boot_done(bd);
+	if (retval) {
+		pr_err("boot_write: wait for boot failed, err %d\n", retval);
+		/* wait_for_boot_done already dropped mutex */
+		return retval;
+	}
+
+	/*
+	 * We're going to drop the mutex while we wait for any outstanding
+	 * write to complete; this keeps another thread from getting in here
+	 * while we do that.
+	 */
+	bd->is_in_boot_write = 1;
+
+	while (bytes_left) {
+		size_t buf_bytes = min((size_t)BOOT_BUF_SIZE, bytes_left);
+		char *buf = bd->boot_buf[whichbuf];
+
+		whichbuf ^= 1;
+		if (copy_from_user(buf, user_buffer, buf_bytes)) {
+			retval = -EFAULT;
+			pr_err("boot_write: copy from user failed\n");
+			break;
+		}
+
+		retval = bd->write(bd, RSH_DEV_TYPE_BOOT, buf, buf_bytes);
+		if (retval > 0) {
+			bytes_left -= retval;
+			user_buffer += retval;
+			bytes_written += retval;
+		} else if (retval == 0) {
+			/* Wait for some time instead of busy polling. */
+			msleep_interruptible(1);
+			continue;
+		}
+		if (retval != buf_bytes)
+			break;
+	}
+
+	bd->is_in_boot_write = 0;
+	mutex_unlock(&bd->mutex);
+
+	/*
+	 * Return an error in case the 'count' is not multiple of 8 bytes.
+	 * At this moment, the truncated data has already been sent to
+	 * the BOOT fifo and hopefully it could still boot the chip.
+	 */
+	if (count % 8 != 0)
+		return -EINVAL;
+
+	return bytes_written ? bytes_written : retval;
+}
+
+static int rshim_boot_release(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+	struct module *owner;
+	int retval;
+
+	/* Restore the boot mode register. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL,
+				 RSH_BOOT_CONTROL,
+				 RSH_BOOT_CONTROL__BOOT_MODE_VAL_EMMC);
+	if (retval)
+		pr_err("couldn't set boot_control, err %d\n", retval);
+
+	mutex_lock(&bd->mutex);
+	bd->is_boot_open = 0;
+	queue_delayed_work(rshim_wq, &bd->work, HZ);
+	mutex_unlock(&bd->mutex);
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+static const struct file_operations rshim_boot_fops = {
+	.owner = THIS_MODULE,
+	.write = rshim_boot_write,
+	.release = rshim_boot_release,
+};
+
+int rshim_boot_open(struct file *file)
+{
+	int retval;
+	int i;
+	struct rshim_backend *bd = file->private_data;
+#if RSH_RESET_MUTEX
+	unsigned long devs_locked = 0;
+#endif
+
+	file->f_op = &rshim_boot_fops;
+
+#if RSH_RESET_MUTEX
+	/*
+	 * We're going to prevent resets and operations from running in
+	 * parallel with other resets.  Our method for this is to grab
+	 * every device's mutex before doing the reset, and then holding
+	 * onto them until the device we reset is reprobed, or a timeout
+	 * expires; the latter is mostly paranoia.  Anyway, in order to
+	 * find all of the other devices, we're going to need to walk the
+	 * device table, so we need to grab its mutex.  We have to do it
+	 * before we get our own device's mutex for lock ordering reasons.
+	 */
+	rshim_lock();
+#endif
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->is_boot_open) {
+		pr_info("can't boot, boot file already open\n");
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		rshim_unlock();
+#endif
+		return -EBUSY;
+	}
+
+	if (!bd->has_rshim) {
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		rshim_unlock();
+#endif
+		return -ENODEV;
+	}
+
+	pr_info("begin booting\n");
+	reinit_completion(&bd->booting_complete);
+	bd->is_booting = 1;
+
+	/*
+	 * Before we reset the chip, make sure we don't have any
+	 * outstanding writes, and flush the write and read FIFOs. (Note
+	 * that we can't have any outstanding reads, since we kill those
+	 * upon release of the TM FIFO file.)
+	 */
+	if (bd->cancel)
+		bd->cancel(bd, RSH_DEV_TYPE_NET, true);
+	bd->read_buf_bytes = 0;
+	bd->read_buf_pkt_rem = 0;
+	bd->read_buf_pkt_padding = 0;
+	spin_lock_irq(&bd->spinlock);
+	/* FIXME: should we be waiting for WRITING to go off, instead? */
+	bd->spin_flags &= ~RSH_SFLG_WRITING;
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		read_reset(bd, i);
+		write_reset(bd, i);
+	}
+	spin_unlock_irq(&bd->spinlock);
+
+	/* Set RShim (external) boot mode. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_BOOT_CONTROL,
+				 RSH_BOOT_CONTROL__BOOT_MODE_VAL_NONE);
+	if (retval) {
+		pr_err("boot_open: error %d writing boot control\n", retval);
+		bd->is_booting = 0;
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		rshim_unlock();
+#endif
+		return retval;
+	}
+
+#if RSH_RESET_MUTEX
+	/*
+	 * Acquire all of the other devices' mutexes, to keep them from
+	 * doing anything while we're performing the reset.  Also kill
+	 * any outstanding boot urbs; that way we'll restart them, after
+	 * the reset is done, and not report errors to the writers.
+	 */
+	for (i = 0; i < rshim_nr_devs; i++) {
+		if (rshim_devs[i] && rshim_devs[i] != bd) {
+			mutex_lock(&rshim_devs[i]->mutex);
+			devs_locked |= 1UL << i;
+			if (rshim_devs[i]->cancel) {
+				rshim_devs[i]->cancel(rshim_devs[i],
+						    RSH_DEV_TYPE_BOOT, true);
+			}
+		}
+	}
+	reinit_completion(&bd->reset_complete);
+#endif
+
+	bd->is_boot_open = 1;
+
+	/* SW reset. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_RESET_CONTROL,
+				 RSH_RESET_CONTROL__RESET_CHIP_VAL_KEY);
+
+	/* Reset the TmFifo. */
+	rshim_fifo_reset(bd);
+
+	/*
+	 * Note that occasionally, we get various errors on writing to
+	 * the reset register.  This appears to be caused by the chip
+	 * actually resetting before the response goes out, or perhaps by
+	 * our noticing the device unplug before we've seen the response.
+	 * Either way, the chip _does_ actually reset, so we just ignore
+	 * the error.  Should we ever start getting these errors without
+	 * the chip being reset, we'll have to figure out how to handle
+	 * this more intelligently.  (One potential option is to not reset
+	 * directly, but to set up a down counter to do the reset, but that
+	 * seems kind of kludgy, especially since Tile software might also
+	 * be trying to use the down counter.)
+	 */
+	if (retval && retval != -EPROTO && retval != -ESHUTDOWN &&
+#ifdef RSH_USB_BMC
+	    /*
+	     * The host driver on the BMC sometimes produces EOVERFLOW on
+	     * reset.  It also seems to have seems to have some sort of bug
+	     * which makes it return more bytes than we actually wrote!  In
+	     * that case we're returning EBADE.
+	     */
+	    retval != -EOVERFLOW && retval != -EBADE &&
+#endif
+	    retval != -ETIMEDOUT && retval != -EPIPE) {
+		pr_err("boot_open: error %d writing reset control\n", retval);
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		while (devs_locked) {
+			int i = __builtin_ctzl(devs_locked);
+
+			mutex_unlock(&rshim_devs[i]->mutex);
+			devs_locked &= ~(1UL << i);
+		}
+		rshim_unlock();
+#endif
+		bd->is_boot_open = 0;
+
+		return retval;
+	}
+
+	if (retval)
+		pr_err("boot_open: got error %d on reset write\n", retval);
+
+	mutex_unlock(&bd->mutex);
+
+#if RSH_RESET_MUTEX
+	rshim_unlock();
+	/*
+	 * We wait for reset_complete (signaled by probe), or for an
+	 * interrupt, or a timeout (set to 5s because of no re-probe
+	 * in the PCIe case). Note that we dropped dev->mutex above
+	 * so that probe can run; the BOOT_OPEN flag should keep our device
+	 * from trying to do anything before the device is reprobed.
+	 */
+	retval = wait_for_completion_interruptible_timeout(&bd->reset_complete,
+							   5 * HZ);
+	if (retval == 0)
+		pr_err("timed out waiting for device reprobe after reset\n");
+
+	while (devs_locked) {
+		int i = __builtin_ctz(devs_locked);
+
+		mutex_unlock(&rshim_devs[i]->mutex);
+		devs_locked &= ~(1UL << i);
+	}
+#endif
+
+	return 0;
+}
+
+/* FIFO common file operations routines */
+
+/*
+ * Signal an error on the FIFO, and wake up anyone who might need to know
+ * about it.
+ */
+static void rshim_fifo_err(struct rshim_backend *bd, int err)
+{
+	int i;
+
+	bd->tmfifo_error = err;
+	wake_up_interruptible_all(&bd->write_completed);
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		wake_up_interruptible_all(&bd->read_fifo[i].operable);
+		wake_up_interruptible_all(&bd->write_fifo[i].operable);
+	}
+}
+
+/* Drain the read buffer, and start another read/interrupt if needed. */
+static void rshim_fifo_input(struct rshim_backend *bd)
+{
+	union rshim_tmfifo_msg_hdr *hdr;
+	bool rx_avail = false;
+
+	if (bd->is_boot_open)
+		return;
+
+again:
+	while (bd->read_buf_next < bd->read_buf_bytes) {
+		int copysize;
+
+		/*
+		 * If we're at the start of a packet, then extract the
+		 * header, and update our count of bytes remaining in the
+		 * packet.
+		 */
+		if (bd->read_buf_pkt_rem == 0) {
+			/* Make sure header is received. */
+			if (bd->read_buf_next + sizeof(*hdr) >
+				bd->read_buf_bytes)
+				break;
+
+			pr_debug("next hdr %d\n", bd->read_buf_next);
+
+			hdr = (union rshim_tmfifo_msg_hdr *)
+				&bd->read_buf[bd->read_buf_next];
+
+			bd->read_buf_pkt_rem = ntohs(hdr->len) + sizeof(*hdr);
+			bd->read_buf_pkt_padding =
+				(8 - (bd->read_buf_pkt_rem & 7)) & 7;
+			if (hdr->type == VIRTIO_ID_NET)
+				bd->rx_chan = TMFIFO_NET_CHAN;
+			else if (hdr->type == VIRTIO_ID_CONSOLE) {
+				bd->rx_chan = TMFIFO_CONS_CHAN;
+				/* Strip off the message header for console. */
+				bd->read_buf_next += sizeof(*hdr);
+				bd->read_buf_pkt_rem -= sizeof(*hdr);
+				if (bd->read_buf_pkt_rem == 0)
+					continue;
+			} else {
+				pr_debug("bad type %d, drop it", hdr->type);
+				bd->read_buf_pkt_rem = 0;
+				bd->read_buf_pkt_padding = 0;
+				bd->read_buf_next = bd->read_buf_bytes;
+				break;
+			}
+
+			pr_debug("drain: hdr, nxt %d rem %d chn %d\n",
+			      bd->read_buf_next, bd->read_buf_pkt_rem,
+			      bd->rx_chan);
+			bd->drop = 0;
+		}
+
+		if (bd->rx_chan == TMFIFO_CONS_CHAN &&
+		    !(bd->spin_flags & RSH_SFLG_CONS_OPEN)) {
+			/*
+			 * If data is coming in for a closed console
+			 * channel, we want to just throw it away.
+			 * Resetting the channel every time through this
+			 * loop is a relatively cheap way to do that.  Note
+			 * that this works because the read buffer is no
+			 * larger than the read FIFO; thus, we know that if
+			 * we reset it here, we will always be able to
+			 * drain the read buffer of any console data, and
+			 * will then launch another read.
+			 */
+			read_reset(bd, TMFIFO_CONS_CHAN);
+			bd->drop = 1;
+		} else if (bd->rx_chan == TMFIFO_NET_CHAN && bd->net == NULL) {
+			/* Drop if networking is not enabled. */
+			read_reset(bd, TMFIFO_NET_CHAN);
+			bd->drop = 1;
+		}
+
+		copysize = min(bd->read_buf_pkt_rem,
+			       bd->read_buf_bytes - bd->read_buf_next);
+		copysize = min(copysize,
+			       read_space_to_end(bd, bd->rx_chan));
+
+		pr_debug("drain: copysize %d, head %d, tail %d, remaining %d\n",
+			 copysize, bd->read_fifo[bd->rx_chan].head,
+			 bd->read_fifo[bd->rx_chan].tail,
+			 bd->read_buf_pkt_rem);
+
+		if (copysize == 0) {
+			/*
+			 * We have data, but no space to put it in, so
+			 * we're done.
+			 */
+			pr_debug("drain: no more space in channel %d\n",
+				 bd->rx_chan);
+			break;
+		}
+
+		if (!bd->drop) {
+			memcpy(read_space_ptr(bd, bd->rx_chan),
+			       &bd->read_buf[bd->read_buf_next],
+			       copysize);
+			read_add_bytes(bd, bd->rx_chan, copysize);
+		}
+
+		bd->read_buf_next += copysize;
+		bd->read_buf_pkt_rem -= copysize;
+
+		wake_up_interruptible_all(&bd->read_fifo[
+				      bd->rx_chan].operable);
+		pr_debug("woke up readable chan %d\n", bd->rx_chan);
+
+		if (bd->read_buf_pkt_rem <= 0) {
+			bd->read_buf_next = bd->read_buf_next +
+				bd->read_buf_pkt_padding;
+			rx_avail = true;
+		}
+	}
+
+	/*
+	 * We've processed all of the data we can, so now we decide if we
+	 * need to launch another I/O.  If there's still data in the read
+	 * buffer, or if we're already reading, don't launch any new
+	 * operations.  If an interrupt just completed, and said there was
+	 * data, or the last time we did a read we got some data, then do
+	 * another read.  Otherwise, do an interrupt.
+	 */
+	if (bd->read_buf_next < bd->read_buf_bytes ||
+	    (bd->spin_flags & RSH_SFLG_READING)) {
+		/* We're doing nothing. */
+		pr_debug("fifo_input: no new read: %s\n",
+			 (bd->read_buf_next < bd->read_buf_bytes) ?
+			 "have data" : "already reading");
+	} else {
+		int len;
+
+		/* Process it if more data is received. */
+		len = bd->read(bd, RSH_DEV_TYPE_NET, (char *)bd->read_buf,
+			      READ_BUF_SIZE);
+		if (len > 0) {
+			bd->read_buf_bytes = len;
+			bd->read_buf_next = 0;
+			goto again;
+		}
+	}
+
+	if (rx_avail) {
+		if (bd->rx_chan == TMFIFO_NET_CHAN) {
+			struct rshim_service *svc;
+
+			/*
+			 * Protect rshim_svc with RCU lock. See comments in
+			 * rshim_register_service() / rshim_register_service()
+			 */
+			rcu_read_lock();
+			svc = rcu_dereference(rshim_svc[RSH_SVC_NET]);
+			if (svc != NULL)
+				(*svc->rx_notify)(bd);
+			rcu_read_unlock();
+		}
+	}
+}
+
+ssize_t rshim_fifo_read(struct rshim_backend *bd, char *buffer,
+		      size_t count, int chan, bool nonblock,
+		      bool to_user)
+{
+	size_t rd_cnt = 0;
+
+	mutex_lock(&bd->mutex);
+
+	while (count) {
+		size_t readsize;
+		int pass1;
+		int pass2;
+
+		pr_debug("fifo_read, top of loop, remaining count %zd\n",
+			 count);
+
+		/*
+		 * We check this each time through the loop since the
+		 * device could get disconnected while we're waiting for
+		 * more data in the read FIFO.
+		 */
+		if (!bd->has_tm) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_read: returning %zd/ENODEV\n", rd_cnt);
+			return rd_cnt ? rd_cnt : -ENODEV;
+		}
+
+		if (bd->tmfifo_error) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_read: returning %zd/%d\n", rd_cnt,
+			      bd->tmfifo_error);
+			return rd_cnt ? rd_cnt : bd->tmfifo_error;
+		}
+
+		if (read_empty(bd, chan)) {
+			pr_debug("fifo_read: fifo empty\n");
+			if (rd_cnt || nonblock) {
+				if (rd_cnt == 0) {
+					spin_lock_irq(&bd->spinlock);
+					rshim_fifo_input(bd);
+					spin_unlock_irq(&bd->spinlock);
+				}
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_read: returning %zd/EAGAIN\n",
+				      rd_cnt);
+				return rd_cnt ? rd_cnt : -EAGAIN;
+			}
+
+			mutex_unlock(&bd->mutex);
+
+			pr_debug("fifo_read: waiting for readable chan %d\n",
+				 chan);
+			if (wait_event_interruptible(
+					bd->read_fifo[chan].operable,
+					    !read_empty(bd, chan))) {
+				pr_debug("fifo_read: returning ERESTARTSYS\n");
+				return to_user ? -EINTR : -ERESTARTSYS;
+			}
+
+			mutex_lock(&bd->mutex);
+
+			/*
+			 * Since we dropped the mutex, we must make
+			 * sure our interface is still there before
+			 * we do anything else.
+			 */
+			continue;
+		}
+
+		/*
+		 * Figure out how many bytes we will transfer on this pass.
+		 */
+		spin_lock_irq(&bd->spinlock);
+
+		readsize = min(count, (size_t)read_cnt(bd, chan));
+
+		pass1 = min(readsize, (size_t)read_cnt_to_end(bd, chan));
+		pass2 = readsize - pass1;
+
+		spin_unlock_irq(&bd->spinlock);
+
+		pr_debug("fifo_read: readsize %zd, head %d, tail %d\n",
+			 readsize, bd->read_fifo[chan].head,
+			 bd->read_fifo[chan].tail);
+
+		if (!to_user) {
+			memcpy(buffer, read_data_ptr(bd, chan), pass1);
+			if (pass2) {
+				memcpy(buffer + pass1,
+				       bd->read_fifo[chan].data, pass2);
+			}
+		} else {
+			if (copy_to_user(buffer, read_data_ptr(bd, chan),
+				pass1) || (pass2 && copy_to_user(buffer + pass1,
+				bd->read_fifo[chan].data, pass2))) {
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_read: returns %zd/EFAULT\n",
+					 rd_cnt);
+				return rd_cnt ? rd_cnt : -EFAULT;
+			}
+		}
+
+		spin_lock_irq(&bd->spinlock);
+
+		read_consume_bytes(bd, chan, readsize);
+
+		/*
+		 * We consumed some bytes, so let's see if we can process
+		 * any more incoming data.
+		 */
+		rshim_fifo_input(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+
+		count -= readsize;
+		buffer += readsize;
+		rd_cnt += readsize;
+		pr_debug("fifo_read: transferred %zd bytes\n", readsize);
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	pr_debug("fifo_read: returning %zd\n", rd_cnt);
+	return rd_cnt;
+}
+EXPORT_SYMBOL(rshim_fifo_read);
+
+static void rshim_fifo_output(struct rshim_backend *bd)
+{
+	int writesize, write_buf_next = 0;
+	int write_avail = WRITE_BUF_SIZE - write_buf_next;
+	int numchan = TMFIFO_MAX_CHAN;
+	int chan, chan_offset;
+
+	/* If we're already writing, we have nowhere to put data. */
+	if (bd->spin_flags & RSH_SFLG_WRITING)
+		return;
+
+	/* Walk through all the channels, sending as much data as possible. */
+	for (chan_offset = 0; chan_offset < numchan; chan_offset++) {
+		/*
+		 * Pick the current channel if not done, otherwise round-robin
+		 * to the next channel.
+		 */
+		if (bd->write_buf_pkt_rem > 0)
+			chan = bd->tx_chan;
+		else {
+			u16 cur_len;
+			union rshim_tmfifo_msg_hdr *hdr = &bd->msg_hdr;
+
+			chan = bd->tx_chan = (bd->tx_chan + 1) % numchan;
+			cur_len = write_cnt(bd, chan);
+
+			/*
+			 * Set up message header for console data which is byte
+			 * stream. Network packets already have the message
+			 * header included.
+			 */
+			if (chan == TMFIFO_CONS_CHAN) {
+				if (cur_len == 0)
+					continue;
+				hdr->data = 0;
+				hdr->type = VIRTIO_ID_CONSOLE;
+				hdr->len = htons(cur_len);
+			} else {
+				int pass1;
+
+				if (cur_len <
+					sizeof(union rshim_tmfifo_msg_hdr))
+					continue;
+
+				pass1 = write_cnt_to_end(bd, chan);
+				if (pass1 >= sizeof(*hdr)) {
+					hdr = (union rshim_tmfifo_msg_hdr *)
+						write_data_ptr(bd, chan);
+				} else {
+					memcpy(hdr, write_data_ptr(bd, chan),
+					       pass1);
+					memcpy((u8 *)hdr + pass1,
+					       bd->write_fifo[chan].data,
+					       sizeof(*hdr) - pass1);
+				}
+			}
+
+			bd->write_buf_pkt_rem = ntohs(hdr->len) + sizeof(*hdr);
+		}
+
+		/* Send out the packet header for the console data. */
+		if (chan == TMFIFO_CONS_CHAN &&
+		    bd->write_buf_pkt_rem > ntohs(bd->msg_hdr.len)) {
+			union rshim_tmfifo_msg_hdr *hdr = &bd->msg_hdr;
+			int left = bd->write_buf_pkt_rem - ntohs(hdr->len);
+			u8 *pos = (u8 *)hdr + sizeof(*hdr) - left;
+
+			writesize = min(write_avail, left);
+			memcpy(&bd->write_buf[write_buf_next], pos, writesize);
+			write_buf_next += writesize;
+			bd->write_buf_pkt_rem -= writesize;
+			write_avail -= writesize;
+
+			/*
+			 * Don't continue if no more space for the header.
+			 * It'll be picked up next time.
+			 */
+			if (left != writesize)
+				break;
+		}
+
+		writesize = min(write_avail, (int)write_cnt(bd, chan));
+		writesize = min(writesize, bd->write_buf_pkt_rem);
+
+		/*
+		 * The write size should be aligned to 8 bytes unless for the
+		 * last block, which will be padded at the end.
+		 */
+		if (bd->write_buf_pkt_rem != writesize)
+			writesize &= -8;
+
+		if (writesize > 0) {
+			int pass1;
+			int pass2;
+
+			pass1 = min(writesize,
+				    (int)write_cnt_to_end(bd, chan));
+			pass2 = writesize - pass1;
+
+			pr_debug("fifo_outproc: chan %d, writesize %d, next %d,"
+				 " head %d, tail %d\n",
+				 chan, writesize, write_buf_next,
+				 bd->write_fifo[chan].head,
+				 bd->write_fifo[chan].tail);
+
+			memcpy(&bd->write_buf[write_buf_next],
+			       write_data_ptr(bd, chan), pass1);
+			memcpy(&bd->write_buf[write_buf_next + pass1],
+			       bd->write_fifo[chan].data, pass2);
+
+			write_consume_bytes(bd, chan, writesize);
+			write_buf_next += writesize;
+			bd->write_buf_pkt_rem -= writesize;
+			/* Add padding at the end. */
+			if (bd->write_buf_pkt_rem == 0)
+				write_buf_next = (write_buf_next + 7) & -8;
+			write_avail = WRITE_BUF_SIZE - write_buf_next;
+
+			wake_up_interruptible_all(
+				&bd->write_fifo[chan].operable);
+			pr_debug("woke up writable chan %d\n", chan);
+		}
+	}
+
+	/* Drop the data if it is still booting. */
+	if (bd->is_boot_open)
+		return;
+
+	/* If we actually put anything in the buffer, send it. */
+	if (write_buf_next) {
+		bd->write(bd, RSH_DEV_TYPE_NET, (char *)bd->write_buf,
+			  write_buf_next);
+	}
+}
+
+int rshim_fifo_alloc(struct rshim_backend *bd)
+{
+	int i, allocfail = 0;
+
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		if (!bd->read_fifo[i].data)
+			bd->read_fifo[i].data =
+				kmalloc(READ_FIFO_SIZE, GFP_KERNEL);
+		allocfail |= bd->read_fifo[i].data == 0;
+
+		if (!bd->write_fifo[i].data)
+			bd->write_fifo[i].data =
+				kmalloc(WRITE_FIFO_SIZE, GFP_KERNEL);
+		allocfail |= bd->write_fifo[i].data == 0;
+	}
+
+	return allocfail;
+}
+EXPORT_SYMBOL(rshim_fifo_alloc);
+
+static void rshim_fifo_reset(struct rshim_backend *bd)
+{
+	int i;
+
+	bd->read_buf_bytes = 0;
+	bd->read_buf_pkt_rem = 0;
+	bd->read_buf_next = 0;
+	bd->read_buf_pkt_padding = 0;
+	bd->write_buf_pkt_rem = 0;
+	bd->rx_chan = bd->tx_chan = 0;
+
+	spin_lock_irq(&bd->spinlock);
+	bd->spin_flags &= ~(RSH_SFLG_WRITING |
+			    RSH_SFLG_READING);
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		read_reset(bd, i);
+		write_reset(bd, i);
+	}
+	spin_unlock_irq(&bd->spinlock);
+}
+
+void rshim_fifo_free(struct rshim_backend *bd)
+{
+	int i;
+
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		kfree(bd->read_fifo[i].data);
+		bd->read_fifo[i].data = NULL;
+		kfree(bd->write_fifo[i].data);
+		bd->write_fifo[i].data = NULL;
+	}
+
+	rshim_fifo_reset(bd);
+
+	bd->has_tm = 0;
+}
+EXPORT_SYMBOL(rshim_fifo_free);
+
+ssize_t rshim_fifo_write(struct rshim_backend *bd, const char *buffer,
+		       size_t count, int chan, bool nonblock,
+		       bool from_user)
+{
+	size_t wr_cnt = 0;
+
+	mutex_lock(&bd->mutex);
+
+	while (count) {
+		size_t writesize;
+		int pass1;
+		int pass2;
+
+		pr_debug("fifo_write, top of loop, remaining count %zd\n",
+			 count);
+
+		/*
+		 * We check this each time through the loop since the
+		 * device could get disconnected while we're waiting for
+		 * more space in the write buffer.
+		 */
+		if (!bd->has_tm) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_write: returning %zd/ENODEV\n", wr_cnt);
+			return wr_cnt ? wr_cnt : -ENODEV;
+		}
+
+		if (bd->tmfifo_error) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_write: returning %zd/%d\n", wr_cnt,
+				 bd->tmfifo_error);
+			return wr_cnt ? wr_cnt : bd->tmfifo_error;
+		}
+
+		if (write_full(bd, chan)) {
+			pr_debug("fifo_write: fifo full\n");
+			if (nonblock) {
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_write: returning %zd/EAGAIN\n",
+					 wr_cnt);
+				return wr_cnt ? wr_cnt : -EAGAIN;
+			}
+
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_write: waiting for writable chan %d\n",
+				 chan);
+			if (wait_event_interruptible(
+				     bd->write_fifo[chan].operable,
+					     !write_full(bd, chan))) {
+				pr_debug("fifo_write: returning %zd/ERESTARTSYS\n",
+					 wr_cnt);
+				return wr_cnt ? wr_cnt : -ERESTARTSYS;
+			}
+			mutex_lock(&bd->mutex);
+			/*
+			 * Since we dropped the mutex, we must make
+			 * sure our interface is still there before
+			 * we do anything else.
+			 */
+			continue;
+		}
+
+		spin_lock_irq(&bd->spinlock);
+
+		writesize = min(count, (size_t)write_space(bd, chan));
+		pass1 = min(writesize, (size_t)write_space_to_end(bd, chan));
+		pass2 = writesize - pass1;
+
+		spin_unlock_irq(&bd->spinlock);
+
+		pr_debug("fifo_write: writesize %zd, head %d, tail %d\n",
+			 writesize, bd->write_fifo[chan].head,
+			 bd->write_fifo[chan].tail);
+
+		if (!from_user) {
+			memcpy(write_space_ptr(bd, chan), buffer, pass1);
+			if (pass2) {
+				memcpy(bd->write_fifo[chan].data,
+				       buffer + pass1, pass2);
+			}
+		} else {
+			if (copy_from_user(write_space_ptr(bd, chan), buffer,
+				pass1) || (pass2 &&
+				copy_from_user(bd->write_fifo[chan].data,
+						buffer + pass1, pass2))) {
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_write: returns %zd/EFAULT\n",
+					 wr_cnt);
+				return wr_cnt ? wr_cnt : -EFAULT;
+			}
+		}
+
+		spin_lock_irq(&bd->spinlock);
+
+		write_add_bytes(bd, chan, writesize);
+
+		/* We have some new bytes, let's see if we can write any. */
+		rshim_fifo_output(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+
+		count -= writesize;
+		buffer += writesize;
+		wr_cnt += writesize;
+		pr_debug("fifo_write: transferred %zd bytes this pass\n",
+			 writesize);
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	pr_debug("fifo_write: returning %zd\n", wr_cnt);
+	return wr_cnt;
+}
+EXPORT_SYMBOL(rshim_fifo_write);
+
+static int rshim_fifo_fsync(struct file *file, loff_t start, loff_t end,
+			    int datasync, int chan)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	mutex_lock(&bd->mutex);
+
+	/*
+	 * To ensure that all of our data has actually made it to the
+	 * device, we first wait until the channel is empty, then we wait
+	 * until there is no outstanding write urb.
+	 */
+	while (!write_empty(bd, chan))
+		if (wait_event_interruptible(bd->write_fifo[chan].operable,
+					     write_empty(bd, chan))) {
+			mutex_unlock(&bd->mutex);
+			return -ERESTARTSYS;
+		}
+
+	while (bd->spin_flags & RSH_SFLG_WRITING)
+		if (wait_event_interruptible(bd->write_completed,
+					     !(bd->spin_flags &
+					       RSH_SFLG_WRITING))) {
+			mutex_unlock(&bd->mutex);
+			return -ERESTARTSYS;
+		}
+
+	mutex_unlock(&bd->mutex);
+
+	return 0;
+}
+
+static unsigned int rshim_fifo_poll(struct file *file, poll_table *wait,
+				  int chan)
+{
+	struct rshim_backend *bd = file->private_data;
+	unsigned int retval = 0;
+
+	mutex_lock(&bd->mutex);
+
+	poll_wait(file, &bd->read_fifo[chan].operable, wait);
+	poll_wait(file, &bd->write_fifo[chan].operable, wait);
+
+	spin_lock_irq(&bd->spinlock);
+
+	if (!read_empty(bd, chan))
+		retval |= POLLIN | POLLRDNORM;
+	if (!write_full(bd, chan))
+		retval |= POLLOUT | POLLWRNORM;
+	/*
+	 * We don't report POLLERR on the console so that it doesn't get
+	 * automatically disconnected when it fails, and so that you can
+	 * connect to it in the error state before rebooting the target.
+	 * This is inconsistent, but being consistent turns out to be very
+	 * annoying.  If someone tries to actually type on it, they'll
+	 * get an error.
+	 */
+	if (bd->tmfifo_error && chan != TMFIFO_CONS_CHAN)
+		retval |= POLLERR;
+	spin_unlock_irq(&bd->spinlock);
+
+	mutex_unlock(&bd->mutex);
+
+	pr_debug("poll chan %d file %p returns 0x%x\n", chan, file, retval);
+
+	return retval;
+}
+
+
+static int rshim_fifo_release(struct inode *inode, struct file *file,
+			      int chan)
+{
+	struct rshim_backend *bd = file->private_data;
+	struct module *owner;
+
+	mutex_lock(&bd->mutex);
+
+	if (chan == TMFIFO_CONS_CHAN) {
+		/*
+		 * If we aren't the last console file, nothing to do but
+		 * fix the reference count.
+		 */
+		bd->console_opens--;
+		if (bd->console_opens) {
+			mutex_unlock(&bd->mutex);
+			return 0;
+		}
+
+		/*
+		 * We've told the host to stop using the TM FIFO console,
+		 * but there may be a lag before it does.  Unless we
+		 * continue to read data from the console stream, the host
+		 * may spin forever waiting for the console to be drained
+		 * and not realize that it's time to stop using it.
+		 * Clearing the CONS_OPEN spin flag will discard any future
+		 * incoming console data, but if our input buffers are full
+		 * now, we might not be even reading from the hardware
+		 * FIFO.  To avoid problems, clear the buffers and call the
+		 * drainer so that it knows there's space.
+		 */
+		spin_lock_irq(&bd->spinlock);
+
+		bd->spin_flags &= ~RSH_SFLG_CONS_OPEN;
+
+		read_reset(bd, TMFIFO_CONS_CHAN);
+		write_reset(bd, TMFIFO_CONS_CHAN);
+
+		if (bd->has_tm)
+			rshim_fifo_input(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+	}
+
+	if (chan == TMFIFO_CONS_CHAN)
+		bd->is_cons_open = 0;
+	else
+		bd->is_tm_open = 0;
+
+	if (!bd->is_tm_open && !bd->is_cons_open) {
+		if (bd->cancel)
+			bd->cancel(bd, RSH_DEV_TYPE_NET, false);
+
+		spin_lock_irq(&bd->spinlock);
+		bd->spin_flags &= ~RSH_SFLG_READING;
+		spin_unlock_irq(&bd->spinlock);
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+/* TMFIFO file operations routines */
+
+static ssize_t rshim_tmfifo_read(struct file *file, char *user_buffer,
+				   size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_read(bd, user_buffer, count, TMFIFO_NET_CHAN,
+			     file->f_flags & O_NONBLOCK, true);
+}
+
+static ssize_t rshim_tmfifo_write(struct file *file, const char *user_buffer,
+				size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_write(bd, user_buffer, count, TMFIFO_NET_CHAN,
+			      file->f_flags & O_NONBLOCK, true);
+}
+
+static int rshim_tmfifo_fsync(struct file *file, loff_t start,
+			      loff_t end, int datasync)
+{
+	return rshim_fifo_fsync(file, start, end, datasync, TMFIFO_NET_CHAN);
+}
+
+static unsigned int rshim_tmfifo_poll(struct file *file, poll_table *wait)
+{
+	return rshim_fifo_poll(file, wait, TMFIFO_NET_CHAN);
+}
+
+static int rshim_tmfifo_release(struct inode *inode, struct file *file)
+{
+	return rshim_fifo_release(inode, file, TMFIFO_NET_CHAN);
+}
+
+static const struct file_operations rshim_tmfifo_fops = {
+	.owner = THIS_MODULE,
+	.read = rshim_tmfifo_read,
+	.write = rshim_tmfifo_write,
+	.fsync = rshim_tmfifo_fsync,
+	.poll = rshim_tmfifo_poll,
+	.release = rshim_tmfifo_release,
+};
+
+static int rshim_tmfifo_open(struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	file->f_op = &rshim_tmfifo_fops;
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->is_tm_open) {
+		pr_debug("tmfifo_open: file already open\n");
+		mutex_unlock(&bd->mutex);
+		return -EBUSY;
+	}
+
+	bd->is_tm_open = 1;
+
+	spin_lock_irq(&bd->spinlock);
+
+	/* Call the drainer to do an initial read, if needed. */
+	rshim_fifo_input(bd);
+
+	spin_unlock_irq(&bd->spinlock);
+
+	mutex_unlock(&bd->mutex);
+
+	return 0;
+}
+
+/* Console file operations routines */
+
+static void rshim_work_handler(struct work_struct *work)
+{
+	struct rshim_backend *bd = container_of((struct delayed_work *) work,
+					      struct rshim_backend, work);
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->keepalive && bd->has_rshim) {
+		bd->write_rshim(bd, RSHIM_CHANNEL, RSH_SCRATCHPAD1,
+				RSH_KEEPALIVE_MAGIC_NUM);
+		bd->keepalive = 0;
+	}
+
+	if (bd->boot_work_buf != NULL) {
+		bd->boot_work_buf_actual_len = rshim_write_delayed(bd,
+							RSH_DEV_TYPE_BOOT,
+							bd->boot_work_buf,
+							bd->boot_work_buf_len);
+		bd->boot_work_buf = NULL;
+		complete_all(&bd->boot_write_complete);
+	}
+
+	if (bd->is_boot_open) {
+		mutex_unlock(&bd->mutex);
+		return;
+	}
+
+	if (bd->has_fifo_work) {
+		int len;
+
+		len = rshim_write_delayed(bd, bd->fifo_work_devtype,
+					  bd->fifo_work_buf,
+					  bd->fifo_work_buf_len);
+		bd->has_fifo_work = 0;
+
+		spin_lock(&bd->spinlock);
+		bd->spin_flags &= ~RSH_SFLG_WRITING;
+		if (len == bd->fifo_work_buf_len) {
+			wake_up_interruptible_all(&bd->write_completed);
+			rshim_notify(bd, RSH_EVENT_FIFO_OUTPUT, 0);
+		} else {
+			pr_err("fifo_write: completed abnormally.\n");
+			rshim_notify(bd, RSH_EVENT_FIFO_ERR, -1);
+		}
+		spin_unlock(&bd->spinlock);
+	}
+
+	if (bd->has_cons_work) {
+		spin_lock_irq(&bd->spinlock);
+
+		/* FIFO output. */
+		rshim_fifo_output(bd);
+
+		/* FIFO input. */
+		rshim_fifo_input(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+
+		bd->has_cons_work = 0;
+	}
+
+	if (!bd->has_reprobe && bd->is_cons_open) {
+		bd->has_cons_work = 1;
+		mod_timer(&bd->timer, jiffies + HZ / 10);
+	}
+
+	mutex_unlock(&bd->mutex);
+}
+
+static ssize_t rshim_console_read(struct file *file, char *user_buffer,
+				    size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_read(bd, user_buffer, count, TMFIFO_CONS_CHAN,
+			     file->f_flags & O_NONBLOCK, true);
+}
+
+static ssize_t rshim_console_write(struct file *file, const char *user_buffer,
+				 size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_write(bd, user_buffer, count, TMFIFO_CONS_CHAN,
+			      file->f_flags & O_NONBLOCK, true);
+}
+
+static int rshim_console_fsync(struct file *file, loff_t start,
+			       loff_t end, int datasync)
+{
+	return rshim_fifo_fsync(file, start, end, datasync, TMFIFO_CONS_CHAN);
+}
+
+static long rshim_console_unlocked_ioctl(struct file *file, unsigned int
+				       cmd, unsigned long arg)
+{
+	struct rshim_backend *bd = file->private_data;
+	int retval = 0;
+
+	mutex_lock(&bd->mutex);
+
+	switch (cmd) {
+	case TCGETS: {
+#ifdef TCGETS2
+		if (kernel_termios_to_user_termios_1(
+			(struct termios __user *)arg, &bd->cons_termios))
+#else
+		if (kernel_termios_to_user_termios(
+			(struct termios __user *)arg, &bd->cons_termios))
+#endif
+			retval = -EFAULT;
+		break;
+	}
+
+	case TCSETS:
+	case TCSETSW:
+	case TCSETSF: {
+#ifdef TCGETS2
+		if (user_termios_to_kernel_termios_1(
+			&bd->cons_termios, (struct termios __user *)arg))
+#else
+		if (user_termios_to_kernel_termios(
+			&bd->cons_termios, (struct termios __user *)arg))
+#endif
+			retval = -EFAULT;
+		break;
+	}
+
+	default:
+		retval = -EINVAL;
+		break;
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	return retval;
+}
+
+static unsigned int rshim_console_poll(struct file *file, poll_table *wait)
+{
+	return rshim_fifo_poll(file, wait, TMFIFO_CONS_CHAN);
+}
+
+static int rshim_console_release(struct inode *inode, struct file *file)
+{
+	return rshim_fifo_release(inode, file, TMFIFO_CONS_CHAN);
+}
+
+static const struct file_operations rshim_console_fops = {
+	.owner = THIS_MODULE,
+	.read = rshim_console_read,
+	.write = rshim_console_write,
+	.fsync = rshim_console_fsync,
+	.unlocked_ioctl = rshim_console_unlocked_ioctl,
+	.poll = rshim_console_poll,
+	.release = rshim_console_release,
+};
+
+static int rshim_console_open(struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	file->f_op = &rshim_console_fops;
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->is_cons_open) {
+		/*
+		 * The console is already open.  This is OK, but it means
+		 * there's no work to do other than updating the reference
+		 * count.
+		 */
+		bd->console_opens++;
+		mutex_unlock(&bd->mutex);
+		return 0;
+	}
+
+	bd->is_cons_open = 1;
+
+	spin_lock_irq(&bd->spinlock);
+
+	bd->spin_flags |= RSH_SFLG_CONS_OPEN;
+
+	spin_unlock_irq(&bd->spinlock);
+
+	if (!bd->has_cons_work) {
+		bd->has_cons_work = 1;
+		queue_delayed_work(rshim_wq, &bd->work, HZ / 10);
+	}
+
+	bd->console_opens++;
+	mutex_unlock(&bd->mutex);
+
+	return 0;
+}
+
+static int rshim_boot_done(struct rshim_backend *bd)
+{
+	if (bd->has_rshim && bd->has_tm) {
+		/* Clear any previous errors. */
+		bd->tmfifo_error = 0;
+
+		/*
+		 * If someone might be waiting for the device to come up,
+		 * tell them it's ready.
+		 */
+		if (bd->is_booting) {
+			bd->is_booting = 0;
+
+			pr_debug("signaling booting complete\n");
+			complete_all(&bd->booting_complete);
+#if RSH_RESET_MUTEX
+			complete_all(&bd->reset_complete);
+#endif
+		};
+
+		/* If the console device is open, start the worker. */
+		if (bd->is_cons_open && !bd->has_cons_work) {
+			bd->has_cons_work = 1;
+			pr_debug("probe: console_work submitted\n");
+			queue_delayed_work(rshim_wq, &bd->work, 0);
+		}
+
+		/* Tell the user this device is now attached. */
+		pr_info("%s now attached\n", rshim_dev_names[bd->dev_index]);
+	}
+
+	return 0;
+}
+
+/* Rshim file operations routines */
+
+static ssize_t rshim_rshim_read(struct file *file, char *user_buffer,
+			      size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd;
+	int retval = 0;
+	u64 buf;
+
+	/* rshim registers are all 8-byte aligned. */
+	if (count != 8 || (*ppos & 7) != 0)
+		return -EINVAL;
+
+	bd = file->private_data;
+
+	mutex_lock(&bd->mutex);
+	retval = bd->read_rshim(bd,
+				(*ppos >> 16) & 0xF, /* channel # */
+				*ppos & 0xFFFF,	 /* addr */
+				&buf);
+	mutex_unlock(&bd->mutex);
+
+	/* If the read was successful, copy the data to userspace */
+	if (!retval && copy_to_user(user_buffer, &buf, count))
+		return -EFAULT;
+
+	return retval ? retval : count;
+}
+
+static ssize_t rshim_rshim_write(struct file *file, const char *user_buffer,
+			       size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd;
+	int retval = 0;
+	u64 buf;
+
+	/* rshim registers are all 8-byte aligned. */
+	if (count != 8 || (*ppos & 7) != 0)
+		return -EINVAL;
+
+	/* Copy the data from userspace */
+	if (copy_from_user(&buf, user_buffer, count))
+		return -EFAULT;
+
+	bd = file->private_data;
+
+	mutex_lock(&bd->mutex);
+	retval = bd->write_rshim(bd,
+				 (*ppos >> 16) & 0xF, /* channel # */
+				 *ppos & 0xFFFF, /* addr */
+				 buf);
+	mutex_unlock(&bd->mutex);
+
+	return retval ? retval : count;
+}
+
+static int rshim_rshim_release(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+	struct module *owner;
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+static const struct file_operations rshim_rshim_fops = {
+	.owner = THIS_MODULE,
+	.read = rshim_rshim_read,
+	.write = rshim_rshim_write,
+	.release = rshim_rshim_release,
+	.llseek = default_llseek,
+};
+
+static int rshim_rshim_open(struct file *file)
+{
+	file->f_op = &rshim_rshim_fops;
+
+	return 0;
+}
+
+/* Misc file operations routines */
+
+static int
+rshim_misc_seq_show(struct seq_file *s, void *token)
+{
+	struct rshim_backend *bd = s->private;
+	int retval;
+	u64 value;
+
+	/* Boot mode. */
+	retval = bd->read_rshim(bd, RSHIM_CHANNEL, RSH_BOOT_CONTROL,
+				&value);
+	if (retval) {
+		pr_err("couldn't read rshim register\n");
+		return retval;
+	}
+	seq_printf(s, "BOOT_MODE %lld\n",
+		   value & RSH_BOOT_CONTROL__BOOT_MODE_MASK);
+
+	/* SW reset flag is always 0. */
+	seq_printf(s, "SW_RESET  %d\n", 0);
+
+	/* Display the driver name. */
+	seq_printf(s, "DRV_NAME  %s\n", bd->owner->name);
+
+	return 0;
+}
+
+static ssize_t rshim_misc_write(struct file *file, const char *user_buffer,
+				size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd;
+	int retval = 0, value;
+	char buf[64], key[32];
+
+	if (*ppos != 0 || count >= sizeof(buf))
+		return -EINVAL;
+
+	/* Copy the data from userspace */
+	if (copy_from_user(buf, user_buffer, count))
+		return -EFAULT;
+
+	if (sscanf(buf, "%s %x", key, &value) != 2)
+		return -EINVAL;
+
+	bd = ((struct seq_file *)file->private_data)->private;
+
+	if (strcmp(key, "BOOT_MODE") == 0) {
+		retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_BOOT_CONTROL,
+				 value & RSH_BOOT_CONTROL__BOOT_MODE_MASK);
+	} else if (strcmp(key, "SW_RESET") == 0) {
+		if (value) {
+			if (!bd->has_reprobe) {
+				/* Detach, which shouldn't hold bd->mutex. */
+				rshim_notify(bd, RSH_EVENT_DETACH, 0);
+
+				mutex_lock(&bd->mutex);
+				/* Reset the TmFifo. */
+				rshim_fifo_reset(bd);
+				mutex_unlock(&bd->mutex);
+			}
+
+			retval = bd->write_rshim(bd, RSHIM_CHANNEL,
+					RSH_RESET_CONTROL,
+					RSH_RESET_CONTROL__RESET_CHIP_VAL_KEY);
+
+			if (!bd->has_reprobe) {
+				/* Attach. */
+				msleep_interruptible(1000);
+				mutex_lock(&bd->mutex);
+				rshim_notify(bd, RSH_EVENT_ATTACH, 0);
+				mutex_unlock(&bd->mutex);
+			}
+		}
+	} else
+		return -EINVAL;
+
+	return retval ? retval : count;
+}
+
+static int rshim_misc_release(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd;
+	struct module *owner;
+	int retval;
+
+	/*
+	 * Note that since this got turned into a seq file by
+	 * rshim_misc_open(), our device pointer isn't in the usual spot
+	 * (the file's private data); that's used by the seq file
+	 * subsystem.
+	 */
+	bd = ((struct seq_file *)file->private_data)->private;
+
+	retval = single_release(inode, file);
+	if (retval)
+		return retval;
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+static const struct file_operations rshim_misc_fops = {
+	.owner = THIS_MODULE,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.write = rshim_misc_write,
+	.release = rshim_misc_release,
+};
+
+static int rshim_misc_open(struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+	int retval;
+
+	/*
+	 * If file->private_data is non-NULL, seq_open (called by
+	 * single_open) thinks it's already a seq_file struct, and
+	 * scribbles over it!  Very bad.
+	 */
+	file->private_data = NULL;
+
+	file->f_op = &rshim_misc_fops;
+	retval = single_open(file, rshim_misc_seq_show, bd);
+
+	return retval;
+}
+
+/* Common file operations routines */
+
+static int rshim_open(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd;
+	int subminor = iminor(inode);
+	int retval;
+
+	rshim_lock();
+
+	bd = rshim_devs[subminor / RSH_DEV_TYPES];
+	if (!bd) {
+		rshim_unlock();
+		return -ENODEV;
+	}
+
+	/* Add a reference to the owner. */
+	if (!try_module_get(bd->owner)) {
+		rshim_unlock();
+		return -ENODEV;
+	}
+
+	/* Increment our usage count for the device. */
+	kref_get(&bd->kref);
+
+	rshim_unlock();
+
+	file->private_data = bd;
+
+	switch (subminor % RSH_DEV_TYPES) {
+	case RSH_DEV_TYPE_BOOT:
+		retval = rshim_boot_open(file);
+		break;
+
+	case RSH_DEV_TYPE_RSHIM:
+		retval = rshim_rshim_open(file);
+		break;
+
+	case RSH_DEV_TYPE_CONSOLE:
+		retval = rshim_console_open(file);
+		break;
+
+	case RSH_DEV_TYPE_NET:
+		retval = rshim_tmfifo_open(file);
+		break;
+
+	case RSH_DEV_TYPE_MISC:
+		retval = rshim_misc_open(file);
+		break;
+
+	default:
+		retval = -ENODEV;
+		break;
+	}
+
+	/* If the minor open failed, drop the usage count. */
+	if (retval < 0) {
+		struct module *owner;
+
+		rshim_lock();
+		owner = RSHIM_READ_ONCE(bd->owner);
+		kref_put(&bd->kref, bd->destroy);
+		module_put(owner);
+		rshim_unlock();
+	}
+
+	return retval;
+}
+
+static const struct file_operations rshim_fops = {
+	.owner = THIS_MODULE,
+	.open =	rshim_open,
+};
+
+int rshim_tmfifo_sync(struct rshim_backend *bd)
+{
+	u64 word;
+	int i, retval, max_size, avail;
+	union rshim_tmfifo_msg_hdr hdr;
+
+	/* Get FIFO max size. */
+	retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+				RSH_TM_HOST_TO_TILE_CTL, &word);
+	if (retval < 0) {
+		pr_err("read_rshim error %d\n", retval);
+		return retval;
+	}
+	max_size = (word >> RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_SHIFT)
+		   & RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RMASK;
+
+	/* Calculate available size. */
+	retval = bd->read_rshim(bd, RSHIM_CHANNEL, RSH_TM_HOST_TO_TILE_STS,
+				&word);
+	if (retval < 0) {
+		pr_err("read_rshim error %d\n", retval);
+		return retval;
+	}
+	avail = max_size - (int)(word & RSH_TM_HOST_TO_TILE_STS__COUNT_MASK);
+
+	if (avail > TMFIFO_MAX_SYNC_WORDS)
+		avail = TMFIFO_MAX_SYNC_WORDS;
+
+	hdr.type = VIRTIO_ID_NET;
+	hdr.len = 0;
+	for (i = 0; i < avail; i++) {
+		retval = bd->write_rshim(bd, RSHIM_CHANNEL,
+					 RSH_TM_HOST_TO_TILE_STS, hdr.data);
+		if (retval < 0)
+			break;
+	}
+
+	return 0;
+}
+
+int rshim_notify(struct rshim_backend *bd, int event, int code)
+{
+	int i, rc = 0;
+	struct rshim_service *svc;
+
+	switch (event) {
+	case RSH_EVENT_FIFO_INPUT:
+		rshim_fifo_input(bd);
+		break;
+
+	case RSH_EVENT_FIFO_OUTPUT:
+		rshim_fifo_output(bd);
+		break;
+
+	case RSH_EVENT_FIFO_ERR:
+		rshim_fifo_err(bd, code);
+		break;
+
+	case RSH_EVENT_ATTACH:
+		rshim_boot_done(bd);
+
+		/* Sync-up the tmfifo if reprobe is not supported. */
+		if (!bd->has_reprobe && bd->has_rshim)
+			rshim_tmfifo_sync(bd);
+
+		rcu_read_lock();
+		for (i = 0; i < RSH_SVC_MAX; i++) {
+			svc = rcu_dereference(rshim_svc[i]);
+			if (svc != NULL && svc->create != NULL) {
+				rc = (*svc->create)(bd);
+				if (rc == -EEXIST)
+					rc = 0;
+				else if (rc) {
+					pr_err("Failed to attach svc %d\n", i);
+					break;
+				}
+			}
+		}
+		rcu_read_unlock();
+
+		spin_lock_irq(&bd->spinlock);
+		rshim_fifo_input(bd);
+		spin_unlock_irq(&bd->spinlock);
+		break;
+
+	case RSH_EVENT_DETACH:
+		for (i = 0; i < RSH_SVC_MAX; i++) {
+			/*
+			 * The svc->delete() could call into Linux kernel and
+			 * potentially trigger synchronize_rcu(). So it should
+			 * be outside of the rcu_read_lock(). Instead, a ref
+			 * counter is used here to avoid race condition between
+			 * svc deletion such as caused by kernel module unload.
+			 */
+			rcu_read_lock();
+			svc = rcu_dereference(rshim_svc[i]);
+			if (svc != NULL)
+				atomic_inc(&svc->ref);
+			rcu_read_unlock();
+
+			if (svc != NULL) {
+				(*svc->delete)(bd);
+				atomic_dec(&svc->ref);
+			}
+		}
+		bd->dev = NULL;
+		break;
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL(rshim_notify);
+
+static int rshim_find_index(char *dev_name)
+{
+	int i, dev_index = -1;
+
+	/* First look for a match with a previous device name. */
+	for (i = 0; i < rshim_nr_devs; i++)
+		if (rshim_dev_names[i] &&
+		    !strcmp(dev_name, rshim_dev_names[i])) {
+			pr_debug("found match with previous@index %d\n", i);
+			dev_index = i;
+			break;
+		}
+
+	/* Then look for a never-used slot. */
+	if (dev_index < 0) {
+		for (i = 0; i < rshim_nr_devs; i++)
+			if (!rshim_dev_names[i]) {
+				pr_debug("found never-used slot %d\n", i);
+				dev_index = i;
+				break;
+			}
+	}
+
+	/* Finally look for a currently-unused slot. */
+	if (dev_index < 0) {
+		for (i = 0; i < rshim_nr_devs; i++)
+			if (!rshim_devs[i]) {
+				pr_debug("found unused slot %d\n", i);
+				dev_index = i;
+				break;
+			}
+	}
+
+	return dev_index;
+}
+
+struct rshim_backend *rshim_find(char *dev_name)
+{
+	int dev_index = rshim_find_index(dev_name);
+
+	/* If none of that worked, we fail. */
+	if (dev_index < 0) {
+		pr_err("couldn't find slot for new device %s\n", dev_name);
+		return NULL;
+	}
+
+	return rshim_devs[dev_index];
+}
+EXPORT_SYMBOL(rshim_find);
+
+/* House-keeping timer. */
+static void rshim_timer_func(struct timer_list *arg)
+{
+	struct rshim_backend *bd =
+	  container_of(arg, struct rshim_backend, timer);
+
+	u32 period = msecs_to_jiffies(rshim_keepalive_period);
+
+	if (bd->has_cons_work)
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+
+	/* Request keepalive update and restart the ~300ms timer. */
+	if (time_after(jiffies, (unsigned long)bd->last_keepalive + period)) {
+		bd->keepalive = 1;
+		bd->last_keepalive = jiffies;
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+	}
+	mod_timer(&bd->timer, jiffies + period);
+}
+
+static ssize_t rshim_path_show(struct device *cdev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct rshim_backend *bd = dev_get_drvdata(cdev);
+
+	if (bd == NULL)
+		return -ENODEV;
+	return snprintf(buf, PAGE_SIZE, "%s\n",
+			rshim_dev_names[bd->dev_index]);
+}
+
+static DEVICE_ATTR(rshim_path, 0444, rshim_path_show, NULL);
+
+static void
+rshim_load_modules(struct work_struct *work)
+{
+	request_module("rshim_net");
+}
+
+static DECLARE_DELAYED_WORK(rshim_load_modules_work, rshim_load_modules);
+
+/* Check whether backend is allowed to register or not. */
+static int rshim_access_check(struct rshim_backend *bd)
+{
+	int i, retval;
+	u64 value;
+
+	/* Write value 0 to RSH_SCRATCHPAD1. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_SCRATCHPAD1, 0);
+	if (retval < 0)
+		return -ENODEV;
+
+	/*
+	 * Poll RSH_SCRATCHPAD1 up to one second to check whether it's reset to
+	 * the keepalive magic value, which indicates another backend driver has
+	 * already attached to this target.
+	 */
+	for (i = 0; i < 10; i++) {
+		retval = bd->read_rshim(bd, RSHIM_CHANNEL, RSH_SCRATCHPAD1,
+					&value);
+		if (retval < 0)
+			return -ENODEV;
+
+		if (value == RSH_KEEPALIVE_MAGIC_NUM) {
+			pr_info("another backend already attached.\n");
+			return -EEXIST;
+		}
+
+		msleep(100);
+	}
+
+	return 0;
+}
+
+int rshim_register(struct rshim_backend *bd)
+{
+	int i, retval, dev_index;
+
+	if (bd->registered)
+		return 0;
+
+	if (backend_driver[0] && strcmp(backend_driver, bd->owner->name))
+		return -EACCES;
+
+	dev_index = rshim_find_index(bd->dev_name);
+	if (dev_index < 0)
+		return -ENODEV;
+
+	if (!bd->read_rshim || !bd->write_rshim) {
+		pr_err("read_rshim/write_rshim missing\n");
+		return -EINVAL;
+	}
+
+	retval = rshim_access_check(bd);
+	if (retval)
+		return retval;
+
+	if (!bd->write)
+		bd->write = rshim_write_default;
+	if (!bd->read)
+		bd->read = rshim_read_default;
+
+	kref_init(&bd->kref);
+	spin_lock_init(&bd->spinlock);
+#if RSH_RESET_MUTEX
+	init_completion(&bd->reset_complete);
+#endif
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		init_waitqueue_head(&bd->read_fifo[i].operable);
+		init_waitqueue_head(&bd->write_fifo[i].operable);
+	}
+
+	init_waitqueue_head(&bd->write_completed);
+	init_completion(&bd->booting_complete);
+	init_completion(&bd->boot_write_complete);
+	memcpy(&bd->cons_termios, &init_console_termios,
+	       sizeof(init_console_termios));
+	INIT_DELAYED_WORK(&bd->work, rshim_work_handler);
+
+	bd->dev_index = dev_index;
+	if (rshim_dev_names[dev_index] != bd->dev_name) {
+		kfree(rshim_dev_names[dev_index]);
+		rshim_dev_names[dev_index] = bd->dev_name;
+	}
+	rshim_devs[dev_index] = bd;
+
+	for (i = 0; i < RSH_DEV_TYPES; i++) {
+		struct device *cl_dev;
+		int err;
+		char devbuf[32];
+
+		cdev_init(&bd->cdevs[i], &rshim_fops);
+		bd->cdevs[i].owner = THIS_MODULE;
+		/*
+		 * FIXME: is this addition really legal, or should
+		 * we be using MKDEV?
+		 */
+		err = cdev_add(&bd->cdevs[i],
+			       rshim_dev_base +
+			       bd->dev_index * RSH_DEV_TYPES + i,
+			       1);
+		/*
+		 * We complain if this fails, but we don't return
+		 * an error; it really shouldn't happen, and it's
+		 * hard to go un-do the rest of the adds.
+		 */
+		if (err)
+			pr_err("rsh%d: couldn't add minor %d\n", dev_index, i);
+
+		cl_dev = device_create(rshim_class, NULL, rshim_dev_base +
+				       bd->dev_index * RSH_DEV_TYPES + i, NULL,
+				       "rshim%d!%s",
+				       bd->dev_index, rshim_dev_minor_names[i]);
+		if (IS_ERR(cl_dev)) {
+			pr_err("rsh%d: couldn't add dev %s, err %ld\n",
+			       dev_index,
+			       format_dev_t(devbuf, rshim_dev_base + dev_index *
+					    RSH_DEV_TYPES + i),
+			       PTR_ERR(cl_dev));
+		} else {
+			pr_debug("added class dev %s\n",
+				 format_dev_t(devbuf, rshim_dev_base +
+					      bd->dev_index *
+					      RSH_DEV_TYPES + i));
+		}
+
+		dev_set_drvdata(cl_dev, bd);
+		if (device_create_file(cl_dev, &dev_attr_rshim_path))
+			pr_err("could not create rshim_path file in sysfs\n");
+	}
+
+	for (i = 0; i < 2; i++) {
+		bd->boot_buf[i] = kmalloc(BOOT_BUF_SIZE, GFP_KERNEL);
+		if (!bd->boot_buf[i]) {
+			if (i == 1) {
+				kfree(bd->boot_buf[0]);
+				bd->boot_buf[0] = NULL;
+			}
+		}
+	}
+
+	timer_setup(&bd->timer, rshim_timer_func, 0);
+
+	bd->registered = 1;
+
+	/* Start the keepalive timer. */
+	bd->last_keepalive = jiffies;
+	mod_timer(&bd->timer, jiffies + 1);
+
+	schedule_delayed_work(&rshim_load_modules_work, 3 * HZ);
+
+	return 0;
+}
+EXPORT_SYMBOL(rshim_register);
+
+void rshim_deregister(struct rshim_backend *bd)
+{
+	int i;
+
+	if (!bd->registered)
+		return;
+
+	/* Stop the timer. */
+	del_timer_sync(&bd->timer);
+
+	for (i = 0; i < 2; i++)
+		kfree(bd->boot_buf[i]);
+
+	for (i = 0; i < RSH_DEV_TYPES; i++) {
+		cdev_del(&bd->cdevs[i]);
+		device_destroy(rshim_class,
+			       rshim_dev_base + bd->dev_index *
+			       RSH_DEV_TYPES + i);
+	}
+
+	rshim_devs[bd->dev_index] = NULL;
+	bd->registered = 0;
+}
+EXPORT_SYMBOL(rshim_deregister);
+
+int rshim_register_service(struct rshim_service *service)
+{
+	int i, retval = 0;
+	struct rshim_service *svc;
+
+	rshim_lock();
+
+	atomic_set(&service->ref, 0);
+
+	BUG_ON(service->type >= RSH_SVC_MAX);
+
+	if (!rshim_svc[service->type]) {
+		svc = kmalloc(sizeof(*svc), GFP_KERNEL);
+		if (svc) {
+			memcpy(svc, service, sizeof(*svc));
+			/*
+			 * Add memory barrir to make sure 'svc' is ready
+			 * before switching the pointer.
+			 */
+			smp_mb();
+
+			/*
+			 * rshim_svc[] is protected by RCU. References to it
+			 * should have rcu_read_lock() / rcu_dereference() /
+			 * rcu_read_lock().
+			 */
+			rcu_assign_pointer(rshim_svc[service->type], svc);
+
+			/* Attach the service to all backends. */
+			for (i = 0; i < rshim_nr_devs; i++) {
+				if (rshim_devs[i] != NULL) {
+					retval = svc->create(rshim_devs[i]);
+					if (retval && retval != -EEXIST)
+						break;
+				}
+			}
+		} else
+			retval = -ENOMEM;
+	} else
+		retval = -EEXIST;
+
+	rshim_unlock();
+
+	/* Deregister / cleanup the service in case of failures. */
+	if (retval && retval != -EEXIST)
+		rshim_deregister_service(service);
+
+	return retval;
+}
+EXPORT_SYMBOL(rshim_register_service);
+
+void rshim_deregister_service(struct rshim_service *service)
+{
+	int i;
+	struct rshim_service *svc = NULL;
+
+	BUG_ON(service->type >= RSH_SVC_MAX);
+
+	/*
+	 * Use synchronize_rcu() to make sure no more outstanding
+	 * references to the 'svc' pointer before releasing it.
+	 *
+	 * The reason to use RCU is that the rshim_svc pointer will be
+	 * accessed in rshim_notify() which could be called in interrupt
+	 * context and not suitable for mutex lock.
+	 */
+	rshim_lock();
+	if (rshim_svc[service->type]) {
+		svc = rshim_svc[service->type];
+
+		/* Delete the service from all backends. */
+		for (i = 0; i < rshim_nr_devs; i++)
+			if (rshim_devs[i] != NULL)
+				svc->delete(rshim_devs[i]);
+
+		rcu_assign_pointer(rshim_svc[service->type], NULL);
+	}
+	rshim_unlock();
+	if (svc != NULL) {
+		synchronize_rcu();
+
+		/* Make sure no more references to the svc pointer. */
+		while (atomic_read(&svc->ref) != 0)
+			msleep(100);
+		kfree(svc);
+	}
+}
+EXPORT_SYMBOL(rshim_deregister_service);
+
+static int __init rshim_init(void)
+{
+	int result, class_registered = 0;
+
+	/* Register our device class. */
+	rshim_class = class_create(THIS_MODULE, "rsh");
+	if (IS_ERR(rshim_class)) {
+		result = PTR_ERR(rshim_class);
+		goto error;
+	}
+	class_registered = 1;
+
+	/* Allocate major/minor numbers. */
+	result = alloc_chrdev_region(&rshim_dev_base, 0,
+				     rshim_nr_devs * RSH_DEV_TYPES,
+				     "rsh");
+	if (result < 0) {
+		pr_err("can't get rshim major\n");
+		goto error;
+	}
+
+	rshim_dev_names = kzalloc(rshim_nr_devs *
+				    sizeof(rshim_dev_names[0]), GFP_KERNEL);
+	rshim_devs = kcalloc(rshim_nr_devs, sizeof(rshim_devs[0]),
+			       GFP_KERNEL);
+
+	if (!rshim_dev_names || !rshim_devs) {
+		result = -ENOMEM;
+		goto error;
+	}
+
+	rshim_wq = create_workqueue("rshim");
+	if (!rshim_wq) {
+		result = -ENOMEM;
+		goto error;
+	}
+
+	return 0;
+
+error:
+	if (rshim_dev_base)
+		unregister_chrdev_region(rshim_dev_base,
+				 rshim_nr_devs * RSH_DEV_TYPES);
+	if (class_registered)
+		class_destroy(rshim_class);
+	kfree(rshim_dev_names);
+	kfree(rshim_devs);
+
+	return result;
+}
+
+static void __exit rshim_exit(void)
+{
+	int i;
+
+	flush_delayed_work(&rshim_load_modules_work);
+
+	/* Free the major/minor numbers. */
+	unregister_chrdev_region(rshim_dev_base,
+				 rshim_nr_devs * RSH_DEV_TYPES);
+
+	/* Destroy our device class. */
+	class_destroy(rshim_class);
+
+	/* Destroy our work queue. */
+	destroy_workqueue(rshim_wq);
+
+	for (i = 0; i < RSH_SVC_MAX; i++)
+		kfree(rshim_svc[i]);
+
+	for (i = 0; i < rshim_nr_devs; i++)
+		kfree(rshim_dev_names[i]);
+
+	kfree(rshim_dev_names);
+	kfree(rshim_devs);
+}
+
+module_init(rshim_init);
+module_exit(rshim_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.12");
diff --git a/drivers/soc/mellanox/host/rshim.h b/drivers/soc/mellanox/host/rshim.h
new file mode 100644
index 0000000..3ac3410
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim.h
@@ -0,0 +1,361 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _RSHIM_H
+#define _RSHIM_H
+
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/termios.h>
+#include <linux/workqueue.h>
+#include <linux/device.h>
+#include <linux/cdev.h>
+
+#include "rshim_regs.h"
+
+/* ACCESS_ONCE() wrapper. */
+#define RSHIM_READ_ONCE(x)	READ_ONCE(x)
+
+/*
+ * This forces only one reset to occur at a time.  Once we've gotten
+ * more experience with this mode we'll probably remove the #define.
+ */
+#define RSH_RESET_MUTEX		1
+
+/* Spin flag values. */
+#define RSH_SFLG_READING	0x1  /* read is active. */
+#define RSH_SFLG_WRITING	0x2  /* write_urb is active. */
+#define RSH_SFLG_CONS_OPEN	0x4  /* console stream is open. */
+
+/*
+ * Buffer/FIFO sizes.  Note that the FIFO sizes must be powers of 2; also,
+ * the read and write buffers must be no larger than the corresponding
+ * FIFOs.
+ */
+#define READ_BUF_SIZE		2048
+#define WRITE_BUF_SIZE		2048
+#define READ_FIFO_SIZE		(4 * 1024)
+#define WRITE_FIFO_SIZE		(4 * 1024)
+#define BOOT_BUF_SIZE		(16 * 1024)
+
+/* Sub-device types. */
+enum {
+	RSH_DEV_TYPE_RSHIM,
+	RSH_DEV_TYPE_BOOT,
+	RSH_DEV_TYPE_CONSOLE,
+	RSH_DEV_TYPE_NET,
+	RSH_DEV_TYPE_MISC,
+	RSH_DEV_TYPES
+};
+
+/* Event types used in rshim_notify(). */
+enum {
+	RSH_EVENT_FIFO_INPUT,		/* fifo ready for input */
+	RSH_EVENT_FIFO_OUTPUT,		/* fifo ready for output */
+	RSH_EVENT_FIFO_ERR,		/* fifo error */
+	RSH_EVENT_ATTACH,		/* backend attaching */
+	RSH_EVENT_DETACH,		/* backend detaching */
+};
+
+/* RShim service types. */
+enum {
+	RSH_SVC_NET,			/* networking service */
+	RSH_SVC_MAX
+};
+
+/* TMFIFO message header. */
+union rshim_tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/* TMFIFO demux channels. */
+enum {
+	TMFIFO_CONS_CHAN,	/* Console */
+	TMFIFO_NET_CHAN,	/* Network */
+	TMFIFO_MAX_CHAN		/* Number of channels */
+};
+
+/* Various rshim definitions. */
+#define RSH_INT_VEC0_RTC__SWINT3_MASK 0x8
+
+#define RSH_BYTE_ACC_READ_TRIGGER 0x50000000
+#define RSH_BYTE_ACC_SIZE 0x10000000
+#define RSH_BYTE_ACC_PENDING 0x20000000
+
+
+#define BOOT_CHANNEL        RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_BOOT
+#define RSHIM_CHANNEL       RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_RSHIM
+#define UART0_CHANNEL       RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART0
+#define UART1_CHANNEL       RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART1
+
+#define RSH_BOOT_FIFO_SIZE   512
+
+/* FIFO structure. */
+struct rshim_fifo {
+	unsigned char *data;
+	unsigned int head;
+	unsigned int tail;
+	wait_queue_head_t operable;
+};
+
+/* RShim backend. */
+struct rshim_backend {
+	/* Device name. */
+	char *dev_name;
+
+	/* Backend owner. */
+	struct module *owner;
+
+	/* Pointer to the backend device. */
+	struct device *dev;
+
+	/* Pointer to the net device. */
+	void *net;
+
+	/* House-keeping Timer. */
+	struct timer_list timer;
+
+	/* Character device structure for each device. */
+	struct cdev cdevs[RSH_DEV_TYPES];
+
+	/*
+	 * The reference count for this structure.  This is incremented by
+	 * each open, and by the probe routine (thus, one reference for
+	 * each of the two interfaces).  It's decremented on each release,
+	 * and on each disconnect.
+	 */
+	struct kref kref;
+
+	/* State flags. */
+	u32 is_booting : 1;        /* Waiting for device to come back. */
+	u32 is_boot_open : 1;      /* Boot device is open. */
+	u32 is_tm_open : 1;        /* TM FIFO device is open. */
+	u32 is_cons_open : 1;      /* Console device is open. */
+	u32 is_in_boot_write : 1;  /* A thread is in boot_write(). */
+	u32 has_cons_work : 1;     /* Console worker thread running. */
+	u32 has_debug : 1;         /* Debug enabled for this device. */
+	u32 has_tm : 1;            /* TM FIFO found. */
+	u32 has_rshim : 1;         /* RSHIM found. */
+	u32 has_fifo_work : 1;     /* FIFO output to be done in worker. */
+	u32 has_reprobe : 1;       /* Reprobe support after SW reset. */
+	u32 drop : 1;              /* Drop the rest of the packet. */
+	u32 registered : 1;        /* Backend has been registered. */
+	u32 keepalive : 1;         /* A flag to update keepalive. */
+
+	/* Jiffies of last keepalive. */
+	u64 last_keepalive;
+
+	/* State flag bits from RSH_SFLG_xxx (see above). */
+	int spin_flags;
+
+	/* Total bytes in the read buffer. */
+	int read_buf_bytes;
+	/* Offset of next unread byte in the read buffer. */
+	int read_buf_next;
+	/* Bytes left in the current packet, or 0 if no current packet. */
+	int read_buf_pkt_rem;
+	/* Padded bytes in the read buffer. */
+	int read_buf_pkt_padding;
+
+	/* Bytes left in the current packet pending to write. */
+	int write_buf_pkt_rem;
+
+	/* Current message header. */
+	union rshim_tmfifo_msg_hdr msg_hdr;
+
+	/* Read FIFOs. */
+	struct rshim_fifo read_fifo[TMFIFO_MAX_CHAN];
+
+	/* Write FIFOs. */
+	struct rshim_fifo write_fifo[TMFIFO_MAX_CHAN];
+
+	/* Read buffer.  This is a DMA'able buffer. */
+	unsigned char *read_buf;
+	dma_addr_t read_buf_dma;
+
+	/* Write buffer.  This is a DMA'able buffer. */
+	unsigned char *write_buf;
+	dma_addr_t write_buf_dma;
+
+	/* Current Tx FIFO channel. */
+	int tx_chan;
+
+	/* Current Rx FIFO channel. */
+	int rx_chan;
+
+	/* First error encountered during read or write. */
+	int tmfifo_error;
+
+	/* Buffers used for boot writes.  Allocated at startup. */
+	char *boot_buf[2];
+
+	/*
+	 * This mutex is used to prevent the interface pointers and the
+	 * device pointer from disappearing while a driver entry point
+	 * is using them.  It's held throughout a read or write operation
+	 * (at least the parts of those operations which depend upon those
+	 * pointers) and is also held whenever those pointers are modified.
+	 * It also protects state flags, and booting_complete.
+	 */
+	struct mutex mutex;
+
+	/* We'll signal completion on this when FLG_BOOTING is turned off. */
+	struct completion booting_complete;
+
+#ifdef RSH_RESET_MUTEX
+	/* Signaled when a device is disconnected. */
+	struct completion reset_complete;
+#endif
+
+	/*
+	 * This wait queue supports fsync; it's woken up whenever an
+	 * outstanding USB write URB is done.  This will need to be more
+	 * complex if we start doing write double-buffering.
+	 */
+	wait_queue_head_t write_completed;
+
+	/* State for our outstanding boot write. */
+	struct completion boot_write_complete;
+
+	/*
+	 * This spinlock is used to protect items which must be updated by
+	 * URB completion handlers, since those can't sleep.  This includes
+	 * the read and write buffer pointers, as well as spin_flags.
+	 */
+	spinlock_t spinlock;
+
+	/* Current termios settings for the console. */
+	struct ktermios cons_termios;
+
+	/* Work queue entry. */
+	struct delayed_work	work;
+
+	/* Pending boot & fifo request for the worker. */
+	u8 *boot_work_buf;
+	u32 boot_work_buf_len;
+	u32 boot_work_buf_actual_len;
+	u8 *fifo_work_buf;
+	u32 fifo_work_buf_len;
+	int fifo_work_devtype;
+
+	/* Number of open console files. */
+	long console_opens;
+
+	/*
+	 * Our index in rshim_devs, which is also the high bits of our
+	 * minor number.
+	 */
+	int dev_index;
+
+	/* APIs provided by backend. */
+
+	/* API to write bulk data to RShim via the backend. */
+	ssize_t (*write)(struct rshim_backend *bd, int devtype,
+			 const char *buf, size_t count);
+
+	/* API to read bulk data from RShim via the backend. */
+	ssize_t (*read)(struct rshim_backend *bd, int devtype,
+			char *buf, size_t count);
+
+	/* API to cancel a read / write request (optional). */
+	void (*cancel)(struct rshim_backend *bd, int devtype, bool is_write);
+
+	/* API to destroy the backend. */
+	void (*destroy)(struct kref *kref);
+
+	/* API to read 8 bytes from RShim. */
+	int (*read_rshim)(struct rshim_backend *bd, int chan, int addr,
+			  u64 *value);
+
+	/* API to write 8 bytes to RShim. */
+	int (*write_rshim)(struct rshim_backend *bd, int chan, int addr,
+			   u64 value);
+};
+
+/* RShim service. */
+struct rshim_service {
+	/* Service type RSH_SVC_xxx. */
+	int type;
+
+	/* Reference number. */
+	atomic_t ref;
+
+	/* Create service. */
+	int (*create)(struct rshim_backend *bd);
+
+	/* Delete service. */
+	int (*delete)(struct rshim_backend *bd);
+
+	/* Notify service Rx is ready. */
+	void (*rx_notify)(struct rshim_backend *bd);
+};
+
+/* Global variables. */
+
+/* Global array to store RShim devices and names. */
+extern struct workqueue_struct *rshim_wq;
+
+/* Common APIs. */
+
+/* Register/unregister backend. */
+int rshim_register(struct rshim_backend *bd);
+void rshim_deregister(struct rshim_backend *bd);
+
+/* Register / deregister service. */
+int rshim_register_service(struct rshim_service *service);
+void rshim_deregister_service(struct rshim_service *service);
+
+/* Find backend by name. */
+struct rshim_backend *rshim_find(char *dev_name);
+
+/* RShim global lock. */
+void rshim_lock(void);
+void rshim_unlock(void);
+
+/* Event notification. */
+int rshim_notify(struct rshim_backend *bd, int event, int code);
+
+/*
+ * FIFO APIs.
+ *
+ * FIFO is demuxed into two channels, one for network interface
+ * (TMFIFO_NET_CHAN), one for console (TMFIFO_CONS_CHAN).
+ */
+
+/* Write / read some bytes to / from the FIFO via the backend. */
+ssize_t rshim_fifo_read(struct rshim_backend *bd, char *buffer,
+		      size_t count, int chan, bool nonblock,
+		      bool to_user);
+ssize_t rshim_fifo_write(struct rshim_backend *bd, const char *buffer,
+		       size_t count, int chan, bool nonblock,
+		       bool from_user);
+
+/* Alloc/free the FIFO. */
+int rshim_fifo_alloc(struct rshim_backend *bd);
+void rshim_fifo_free(struct rshim_backend *bd);
+
+/* Console APIs. */
+
+/* Enable early console. */
+int rshim_cons_early_enable(struct rshim_backend *bd);
+
+#endif /* _RSHIM_H */
diff --git a/drivers/soc/mellanox/host/rshim_regs.h b/drivers/soc/mellanox/host/rshim_regs.h
new file mode 100644
index 0000000..b14df716
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_regs.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#ifndef __RSHIM_REGS_H__
+#define __RSHIM_REGS_H__
+
+#ifdef __ASSEMBLER__
+#define _64bit(x) x
+#else /* __ASSEMBLER__ */
+#define _64bit(x) x ## ULL
+#endif /* __ASSEMBLER */
+
+#include <linux/types.h>
+
+#define RSH_BOOT_FIFO_DATA 0x408
+
+#define RSH_BOOT_FIFO_COUNT 0x488
+#define RSH_BOOT_FIFO_COUNT__LENGTH 0x0001
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_SHIFT 0
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_WIDTH 10
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_RESET_VAL 0
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_RMASK 0x3ff
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_MASK  0x3ff
+
+#define RSH_BOOT_CONTROL 0x528
+#define RSH_BOOT_CONTROL__LENGTH 0x0001
+#define RSH_BOOT_CONTROL__BOOT_MODE_SHIFT 0
+#define RSH_BOOT_CONTROL__BOOT_MODE_WIDTH 2
+#define RSH_BOOT_CONTROL__BOOT_MODE_RESET_VAL 0
+#define RSH_BOOT_CONTROL__BOOT_MODE_RMASK 0x3
+#define RSH_BOOT_CONTROL__BOOT_MODE_MASK  0x3
+#define RSH_BOOT_CONTROL__BOOT_MODE_VAL_NONE 0x0
+#define RSH_BOOT_CONTROL__BOOT_MODE_VAL_EMMC 0x1
+#define RSH_BOOT_CONTROL__BOOT_MODE_VAL_EMMC_LEGACY 0x3
+
+#define RSH_RESET_CONTROL 0x500
+#define RSH_RESET_CONTROL__LENGTH 0x0001
+#define RSH_RESET_CONTROL__RESET_CHIP_SHIFT 0
+#define RSH_RESET_CONTROL__RESET_CHIP_WIDTH 32
+#define RSH_RESET_CONTROL__RESET_CHIP_RESET_VAL 0
+#define RSH_RESET_CONTROL__RESET_CHIP_RMASK 0xffffffff
+#define RSH_RESET_CONTROL__RESET_CHIP_MASK  0xffffffff
+#define RSH_RESET_CONTROL__RESET_CHIP_VAL_KEY 0xca710001
+#define RSH_RESET_CONTROL__DISABLE_SHIFT 32
+#define RSH_RESET_CONTROL__DISABLE_WIDTH 1
+#define RSH_RESET_CONTROL__DISABLE_RESET_VAL 0
+#define RSH_RESET_CONTROL__DISABLE_RMASK 0x1
+#define RSH_RESET_CONTROL__DISABLE_MASK  _64bit(0x100000000)
+#define RSH_RESET_CONTROL__REQ_PND_SHIFT 33
+#define RSH_RESET_CONTROL__REQ_PND_WIDTH 1
+#define RSH_RESET_CONTROL__REQ_PND_RESET_VAL 0
+#define RSH_RESET_CONTROL__REQ_PND_RMASK 0x1
+#define RSH_RESET_CONTROL__REQ_PND_MASK  _64bit(0x200000000)
+
+#define RSH_SCRATCHPAD1 0xc20
+
+#define RSH_SCRATCH_BUF_CTL 0x600
+
+#define RSH_SCRATCH_BUF_DAT 0x610
+
+#define RSH_SEMAPHORE0 0x28
+
+#define RSH_SCRATCHPAD 0x20
+
+#define RSH_TM_HOST_TO_TILE_CTL 0xa30
+#define RSH_TM_HOST_TO_TILE_CTL__LENGTH 0x0001
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_SHIFT 0
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_WIDTH 8
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_RESET_VAL 128
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_RMASK 0xff
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_MASK  0xff
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_SHIFT 8
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_WIDTH 8
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_RESET_VAL 128
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_RMASK 0xff
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_MASK  0xff00
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_SHIFT 32
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_WIDTH 9
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RESET_VAL 256
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_MASK  _64bit(0x1ff00000000)
+
+#define RSH_TM_HOST_TO_TILE_STS 0xa28
+#define RSH_TM_HOST_TO_TILE_STS__LENGTH 0x0001
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_SHIFT 0
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_WIDTH 9
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_RESET_VAL 0
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_RMASK 0x1ff
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_MASK  0x1ff
+
+#define RSH_TM_TILE_TO_HOST_STS 0xa48
+#define RSH_TM_TILE_TO_HOST_STS__LENGTH 0x0001
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_SHIFT 0
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_WIDTH 9
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_RESET_VAL 0
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_RMASK 0x1ff
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_MASK  0x1ff
+
+#define RSH_TM_HOST_TO_TILE_DATA 0xa20
+
+#define RSH_TM_TILE_TO_HOST_DATA 0xa40
+
+#define RSH_MMIO_ADDRESS_SPACE__LENGTH 0x10000000000
+#define RSH_MMIO_ADDRESS_SPACE__STRIDE 0x8
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_SHIFT 0
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_WIDTH 16
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_RESET_VAL 0
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_RMASK 0xffff
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_MASK  0xffff
+#define RSH_MMIO_ADDRESS_SPACE__PROT_SHIFT 16
+#define RSH_MMIO_ADDRESS_SPACE__PROT_WIDTH 3
+#define RSH_MMIO_ADDRESS_SPACE__PROT_RESET_VAL 0
+#define RSH_MMIO_ADDRESS_SPACE__PROT_RMASK 0x7
+#define RSH_MMIO_ADDRESS_SPACE__PROT_MASK  0x70000
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_SHIFT 23
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_WIDTH 4
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_RESET_VAL 0
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_RMASK 0xf
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_MASK  0x7800000
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_BOOT 0x0
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_RSHIM 0x1
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART0 0x2
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART1 0x3
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_DIAG_UART 0x4
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU 0x5
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU_EXT1 0x6
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU_EXT2 0x7
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU_EXT3 0x8
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TIMER 0x9
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_USB 0xa
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_GPIO 0xb
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_MMC 0xc
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TIMER_EXT 0xd
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_WDOG_NS 0xe
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_WDOG_SEC 0xf
+
+#define RSH_SWINT 0x318
+
+#define RSH_BYTE_ACC_CTL 0x490
+
+#define RSH_BYTE_ACC_WDAT 0x498
+
+#define RSH_BYTE_ACC_RDAT 0x4a0
+
+#define RSH_BYTE_ACC_ADDR 0x4a8
+
+#endif /* !defined(__RSHIM_REGS_H__) */
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v6 6/9] soc: mellanox: host: Add networking support over Rshim
  2018-11-01 16:25 ` Liming Sun
                     ` (3 preceding siblings ...)
  2018-11-01 16:25   ` [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver Liming Sun
@ 2018-11-01 16:25   ` Liming Sun
  2018-11-01 16:25   ` [PATCH v6 7/9] soc: mellanox: host: Add the Rshim USB backend driver Liming Sun
                     ` (2 subsequent siblings)
  7 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2018-11-01 16:25 UTC (permalink / raw)
  To: linux-arm-kernel

This commit adds networking support over the Rshim interface of
the BlueField SoC. It communicates with the target (ARM) side via
the Rshim TmFifo.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/host/Makefile    |   2 +-
 drivers/soc/mellanox/host/rshim_net.c | 834 ++++++++++++++++++++++++++++++++++
 2 files changed, 835 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/mellanox/host/rshim_net.c

diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
index e47842f..1a282b9 100644
--- a/drivers/soc/mellanox/host/Makefile
+++ b/drivers/soc/mellanox/host/Makefile
@@ -1,2 +1,2 @@
-obj-m := rshim.o
+obj-m := rshim.o rshim_net.o
 
diff --git a/drivers/soc/mellanox/host/rshim_net.c b/drivers/soc/mellanox/host/rshim_net.c
new file mode 100644
index 0000000..6d10497
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_net.c
@@ -0,0 +1,834 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_net.c - Mellanox RShim network host driver
+ *
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_ring.h>
+#include <linux/virtio_net.h>
+#include <linux/cache.h>
+#include <linux/interrupt.h>
+#include <linux/version.h>
+#include <asm/byteorder.h>
+
+#include "rshim.h"
+
+/* Vring size. */
+#define RSH_NET_VRING_SIZE			1024
+
+/*
+ * Keepalive time in seconds. If configured, the link is considered down
+ * if no Rx activity within the configured time.
+ */
+static int rshim_net_keepalive;
+module_param(rshim_net_keepalive, int, 0644);
+MODULE_PARM_DESC(rshim_net_keepalive,
+		 "Keepalive time in seconds.");
+
+/* Use a timer for house-keeping. */
+static int rshim_net_timer_interval = HZ / 10;
+
+/* Flag to drain the current pending packet. */
+static bool rshim_net_draining_mode;
+
+/* Spin lock. */
+static DEFINE_SPINLOCK(rshim_net_spin_lock);
+
+/* Virtio ring size. */
+static int rshim_net_vring_size = RSH_NET_VRING_SIZE;
+module_param(rshim_net_vring_size, int, 0444);
+MODULE_PARM_DESC(rshim_net_vring_size, "Size of the vring.");
+
+/* Supported virtio-net features. */
+#define RSH_NET_FEATURES		((1 << VIRTIO_NET_F_MTU) | \
+					 (1 << VIRTIO_NET_F_MAC) | \
+					 (1 << VIRTIO_NET_F_STATUS))
+
+/* Default MAC. */
+static u8 rshim_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x02};
+module_param_array(rshim_net_default_mac, byte, NULL, 0);
+MODULE_PARM_DESC(rshim_net_default_mac, "default MAC address");
+
+#define VIRTIO_GET_FEATURES_RETURN_TYPE		u64
+#define VIRTIO_FINALIZE_FEATURES_RETURN_TYPE	int
+#define VIRTIO_NOTIFY_RETURN_TYPE	bool
+#define VIRTIO_NOTIFY_RETURN		{ return true; }
+
+/* MTU setting of the virtio-net interface. */
+#define RSH_NET_MTU			1500
+
+struct rshim_net;
+static void rshim_net_virtio_rxtx(struct virtqueue *vq, bool is_rx);
+static void rshim_net_update_activity(struct rshim_net *net, bool activity);
+
+/* Structure to maintain the ring state. */
+struct rshim_net_vring {
+	void *va;			/* virtual address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	u32 pkt_len;			/* packet total length */
+	u16 next_avail;			/* next avail desc id */
+	union rshim_tmfifo_msg_hdr hdr;	/* header of the current packet */
+	struct rshim_net *net;		/* pointer back to the rshim_net */
+};
+
+/* Event types. */
+enum {
+	RSH_NET_RX_EVENT,		/* Rx event */
+	RSH_NET_TX_EVENT		/* Tx event */
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	RSH_NET_VRING_RX,		/* Rx ring */
+	RSH_NET_VRING_TX,		/* Tx ring */
+	RSH_NET_VRING_NUM
+};
+
+/* RShim net device structure */
+struct rshim_net {
+	struct virtio_device vdev;	/* virtual device */
+	struct mutex lock;
+	struct rshim_backend *bd;		/* backend */
+	u8 status;
+	u16 virtio_registered : 1;
+	u64 features;
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	unsigned long rx_jiffies;	/* last Rx jiffies */
+	struct rshim_net_vring vrings[RSH_NET_VRING_NUM];
+	struct virtio_net_config config;	/* virtio config space */
+};
+
+/* Allocate vrings for the net device. */
+static int rshim_net_alloc_vrings(struct rshim_net *net)
+{
+	void *va;
+	int i, size;
+	struct rshim_net_vring *vring;
+	struct virtio_device *vdev = &net->vdev;
+
+	for (i = 0; i < ARRAY_SIZE(net->vrings); i++) {
+		vring = &net->vrings[i];
+		vring->net = net;
+		vring->size = rshim_net_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = kzalloc(size, GFP_KERNEL);
+		if (!va) {
+			dev_err(vdev->dev.parent, "vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the net device. */
+static void rshim_net_free_vrings(struct rshim_net *net)
+{
+	int i, size;
+	struct rshim_net_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(net->vrings); i++) {
+		vring = &net->vrings[i];
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			kfree(vring->va);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void rshim_net_work_handler(struct work_struct *work)
+{
+	struct virtqueue *vq;
+	struct rshim_net *net = container_of(work, struct rshim_net, work);
+
+	/* Tx. */
+	if (test_and_clear_bit(RSH_NET_TX_EVENT, &net->pend_events) &&
+		       net->virtio_registered) {
+		vq = net->vrings[RSH_NET_VRING_TX].vq;
+		if (vq)
+			rshim_net_virtio_rxtx(vq, false);
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(RSH_NET_RX_EVENT, &net->pend_events) &&
+		       net->virtio_registered) {
+		vq = net->vrings[RSH_NET_VRING_RX].vq;
+		if (vq)
+			rshim_net_virtio_rxtx(vq, true);
+	}
+
+	/* Keepalive check. */
+	if (rshim_net_keepalive &&
+	    time_after(jiffies, net->rx_jiffies +
+		       (unsigned long)rshim_net_keepalive * HZ)) {
+		mutex_lock(&net->lock);
+		rshim_net_update_activity(net, false);
+		mutex_unlock(&net->lock);
+	}
+}
+
+/* Nothing to do for now. */
+static void rshim_net_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+rshim_net_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct rshim_net_vring *vring = (struct rshim_net_vring *)vq->priv;
+
+	if (vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vring->size;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vring->size);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 rshim_net_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+/* House-keeping timer. */
+static void rshim_net_timer(struct timer_list *arg)
+{
+	struct rshim_net *net = container_of(arg, struct rshim_net, timer);
+
+	/*
+	 * Wake up Rx handler in case Rx event is missing or any leftover
+	 * bytes are stuck in the backend.
+	 */
+	test_and_set_bit(RSH_NET_RX_EVENT, &net->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(RSH_NET_TX_EVENT, &net->pend_events);
+
+	schedule_work(&net->work);
+
+	mod_timer(&net->timer, jiffies + rshim_net_timer_interval);
+}
+
+static void rshim_net_release_cur_desc(struct virtio_device *vdev,
+				       struct rshim_net_vring *vring)
+{
+	int idx;
+	unsigned long flags;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+
+	idx = vr->used->idx % vring->size;
+	vr->used->ring[idx].id = vring->desc_head - vr->desc;
+	vr->used->ring[idx].len =
+		cpu_to_virtio32(vdev, vring->pkt_len);
+
+	/*
+	 * Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+
+	vring->desc = NULL;
+
+	/* Notify upper layer. */
+	spin_lock_irqsave(&rshim_net_spin_lock, flags);
+	vring_interrupt(0, vring->vq);
+	spin_unlock_irqrestore(&rshim_net_spin_lock, flags);
+}
+
+/* Update the link activity. */
+static void rshim_net_update_activity(struct rshim_net *net, bool activity)
+{
+	if (activity) {
+		/* Bring up the link. */
+		if (!(net->config.status & VIRTIO_NET_S_LINK_UP)) {
+			net->config.status |= VIRTIO_NET_S_LINK_UP;
+			virtio_config_changed(&net->vdev);
+		}
+	} else {
+		/* Bring down the link. */
+		if (net->config.status & VIRTIO_NET_S_LINK_UP) {
+			int i;
+
+			net->config.status &= ~VIRTIO_NET_S_LINK_UP;
+			virtio_config_changed(&net->vdev);
+
+			/* Reset the ring state. */
+			for (i = 0; i < RSH_NET_VRING_NUM; i++) {
+				net->vrings[i].pkt_len =
+						sizeof(struct virtio_net_hdr);
+				net->vrings[i].cur_len = 0;
+				net->vrings[i].rem_len = 0;
+			}
+		}
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void rshim_net_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct rshim_net_vring *vring = (struct rshim_net_vring *)vq->priv;
+	struct rshim_net *net = vring->net;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &net->vdev;
+	void *addr;
+	int len, idx, seg_len;
+	struct vring_desc *desc;
+
+	mutex_lock(&net->lock);
+
+	/* Get the current pending descriptor. */
+	desc = vring->desc;
+
+	/* Don't continue if booting. */
+	if (net->bd->is_boot_open) {
+		/* Drop the pending buffer. */
+		if (desc != NULL)
+			rshim_net_release_cur_desc(vdev, vring);
+		mutex_unlock(&net->lock);
+		return;
+	}
+
+	while (1) {
+		if (!desc) {
+			/* Don't process new packet in draining mode. */
+			if (RSHIM_READ_ONCE(rshim_net_draining_mode))
+				break;
+
+			/* Get the head desc of next packet. */
+			vring->desc_head = rshim_net_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				mutex_unlock(&net->lock);
+				return;
+			}
+			desc = vring->desc_head;
+
+			/* Packet length is unknown yet. */
+			vring->pkt_len = 0;
+			vring->rem_len = sizeof(vring->hdr);
+		}
+
+		/* Beginning of a packet. */
+		if (vring->pkt_len == 0) {
+			if (is_rx) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Read the packet header. */
+				len = rshim_fifo_read(net->bd,
+					(void *)&vring->hdr +
+					sizeof(vring->hdr) - vring->rem_len,
+					vring->rem_len, TMFIFO_NET_CHAN, true,
+					false);
+				if (len > 0) {
+					vring->rem_len -= len;
+					if (vring->rem_len != 0)
+						continue;
+				} else
+					break;
+
+				/* Update activity. */
+				net->rx_jiffies = jiffies;
+				rshim_net_update_activity(net, true);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (vring->hdr.len == 0) {
+					vring->rem_len = sizeof(vring->hdr);
+					continue;
+				}
+
+				/* Update total length. */
+				vring->pkt_len = ntohs(vring->hdr.len) +
+					sizeof(struct virtio_net_hdr);
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+					vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			} else {
+				/* Write packet header. */
+				if (vring->rem_len == sizeof(vring->hdr)) {
+					len = rshim_net_virtio_get_pkt_len(
+							vdev, desc, vr);
+					vring->hdr.data = 0;
+					vring->hdr.type = VIRTIO_ID_NET;
+					vring->hdr.len = htons(len -
+						sizeof(struct virtio_net_hdr));
+				}
+
+				len = rshim_fifo_write(net->bd,
+					(void *)&vring->hdr +
+					sizeof(vring->hdr) - vring->rem_len,
+					vring->rem_len, TMFIFO_NET_CHAN,
+					true, false);
+				if (len > 0) {
+					vring->rem_len -= len;
+					if (vring->rem_len != 0)
+						continue;
+				} else
+					break;
+
+				/* Update total length. */
+				vring->pkt_len = rshim_net_virtio_get_pkt_len(
+							vdev, desc, vr);
+			}
+
+			vring->cur_len = sizeof(struct virtio_net_hdr);
+			vring->rem_len = vring->pkt_len;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done with this chain. */
+			rshim_net_release_cur_desc(vdev, vring);
+
+			/* Clear desc and go back to the loop. */
+			desc = NULL;
+
+			continue;
+		}
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		if (is_rx) {
+			seg_len = rshim_fifo_read(net->bd,
+					addr + vring->cur_len,
+					len - vring->cur_len,
+					TMFIFO_NET_CHAN, true, false);
+		} else {
+			seg_len = rshim_fifo_write(net->bd,
+					addr + vring->cur_len,
+					len - vring->cur_len,
+					TMFIFO_NET_CHAN, true, false);
+		}
+		if (seg_len > 0)
+			vring->cur_len += seg_len;
+		else {
+			/* Schedule the worker to speed up Tx. */
+			if (!is_rx) {
+				if (!test_and_set_bit(RSH_NET_TX_EVENT,
+				    &net->pend_events))
+					schedule_work(&net->work);
+			}
+			break;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+
+	mutex_unlock(&net->lock);
+}
+
+/* The notify function is called when new buffers are posted. */
+static VIRTIO_NOTIFY_RETURN_TYPE rshim_net_virtio_notify(struct virtqueue *vq)
+{
+	struct rshim_net_vring *vring = (struct rshim_net_vring *)vq->priv;
+	struct rshim_net *net = vring->net;
+
+	/*
+	 * Virtio-net maintains vrings in pairs. Odd number ring for Rx
+	 * and even number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX bit. */
+		if (!test_and_set_bit(RSH_NET_RX_EVENT, &net->pend_events))
+			schedule_work(&net->work);
+	} else {
+		/* Set the TX bit. */
+		if (!test_and_set_bit(RSH_NET_TX_EVENT, &net->pend_events))
+			schedule_work(&net->work);
+	}
+
+	VIRTIO_NOTIFY_RETURN;
+}
+
+/* Get the array of feature bits for this device. */
+static VIRTIO_GET_FEATURES_RETURN_TYPE rshim_net_virtio_get_features(
+	struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	return net->features;
+}
+
+/* Confirm device features to use. */
+static VIRTIO_FINALIZE_FEATURES_RETURN_TYPE rshim_net_virtio_finalize_features(
+	struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	net->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void rshim_net_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct rshim_net_vring *vring;
+	struct virtqueue *vq;
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	for (i = 0; i < ARRAY_SIZE(net->vrings); i++) {
+		vring = &net->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			rshim_net_release_cur_desc(vdev, vring);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int rshim_net_virtio_find_vqs(struct virtio_device *vdev,
+				     unsigned int nvqs,
+				     struct virtqueue *vqs[],
+				     vq_callback_t *callbacks[],
+				     const char * const names[],
+				     const bool *ctx,
+				     struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct rshim_net_vring *vring;
+	struct virtqueue *vq;
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	if (nvqs > ARRAY_SIZE(net->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &net->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+
+		vq = vring_new_virtqueue(
+					 i,
+					 vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 rshim_net_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vq->priv = vring;
+		/*
+		 * Add barrier to make sure vq is ready before assigning to
+		 * vring.
+		 */
+		mb();
+		vring->vq = vq;
+		vqs[i] = vq;
+	}
+
+	return 0;
+
+error:
+	rshim_net_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 rshim_net_virtio_get_status(struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	return net->status;
+}
+
+/* Write the status byte. */
+static void rshim_net_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	net->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void rshim_net_virtio_reset(struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	net->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void rshim_net_virtio_get(struct virtio_device *vdev,
+				 unsigned int offset,
+				 void *buf,
+				 unsigned int len)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	if (offset + len > sizeof(net->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&net->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void rshim_net_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	if (offset + len > sizeof(net->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&net->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static struct virtio_config_ops rshim_net_virtio_config_ops = {
+	.get_features = rshim_net_virtio_get_features,
+	.finalize_features = rshim_net_virtio_finalize_features,
+	.find_vqs = rshim_net_virtio_find_vqs,
+	.del_vqs = rshim_net_virtio_del_vqs,
+	.reset = rshim_net_virtio_reset,
+	.set_status = rshim_net_virtio_set_status,
+	.get_status = rshim_net_virtio_get_status,
+	.get = rshim_net_virtio_get,
+	.set = rshim_net_virtio_set,
+};
+
+/* Remove. */
+static int rshim_net_delete_dev(struct rshim_net *net)
+{
+	if (net) {
+		/* Stop the timer. */
+		del_timer_sync(&net->timer);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&net->work);
+
+		/* Unregister virtio. */
+		if (net->virtio_registered)
+			unregister_virtio_device(&net->vdev);
+
+		/* Free vring. */
+		rshim_net_free_vrings(net);
+
+		kfree(net);
+	}
+
+	return 0;
+}
+
+/* Rx ready. */
+void rshim_net_rx_notify(struct rshim_backend *bd)
+{
+	struct rshim_net *net = (struct rshim_net *)bd->net;
+
+	if (net) {
+		test_and_set_bit(RSH_NET_RX_EVENT, &net->pend_events);
+		schedule_work(&net->work);
+	}
+}
+
+/* Remove. */
+int rshim_net_delete(struct rshim_backend *bd)
+{
+	int ret = 0;
+
+	if (bd->net) {
+		ret = rshim_net_delete_dev((struct rshim_net *)bd->net);
+		bd->net = NULL;
+	}
+
+	return ret;
+}
+
+/* Init. */
+int rshim_net_create(struct rshim_backend *bd)
+{
+	struct rshim_net *net;
+	struct virtio_device *vdev;
+	int ret = -ENOMEM;
+
+	if (bd->net)
+		return -EEXIST;
+
+	net = kzalloc(sizeof(struct rshim_net), GFP_KERNEL);
+	if (!net)
+		return ret;
+
+	INIT_WORK(&net->work, rshim_net_work_handler);
+
+	timer_setup(&net->timer, rshim_net_timer, 0);
+	net->timer.function = rshim_net_timer;
+
+	net->features = RSH_NET_FEATURES;
+	net->config.mtu = RSH_NET_MTU;
+	memcpy(net->config.mac, rshim_net_default_mac,
+	       sizeof(rshim_net_default_mac));
+	/* Set MAC address to be unique even number. */
+	net->config.mac[5] += bd->dev_index * 2;
+
+	mutex_init(&net->lock);
+
+	vdev = &net->vdev;
+	vdev->id.device = VIRTIO_ID_NET;
+	vdev->config = &rshim_net_virtio_config_ops;
+	vdev->dev.parent = bd->dev;
+	vdev->dev.release = rshim_net_virtio_dev_release;
+	if (rshim_net_alloc_vrings(net))
+		goto err;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(vdev);
+	if (ret) {
+		dev_err(bd->dev, "register_virtio_device() failed\n");
+		goto err;
+	}
+	net->virtio_registered = 1;
+
+	mod_timer(&net->timer, jiffies + rshim_net_timer_interval);
+
+	net->bd = bd;
+	/* Add a barrier to keep the order of the two pointer assignments. */
+	mb();
+	bd->net = net;
+
+	/* Bring up the interface. */
+	mutex_lock(&net->lock);
+	rshim_net_update_activity(net, true);
+	mutex_unlock(&net->lock);
+
+	return 0;
+
+err:
+	rshim_net_delete_dev(net);
+	return ret;
+}
+
+struct rshim_service rshim_svc = {
+	.type = RSH_SVC_NET,
+	.create = rshim_net_create,
+	.delete = rshim_net_delete,
+	.rx_notify = rshim_net_rx_notify
+};
+
+static int __init rshim_net_init(void)
+{
+	return rshim_register_service(&rshim_svc);
+}
+
+static void __exit rshim_net_exit(void)
+{
+	/*
+	 * Wait 200ms, which should be good enough to drain the current
+	 * pending packet.
+	 */
+	rshim_net_draining_mode = true;
+	msleep(200);
+
+	return rshim_deregister_service(&rshim_svc);
+}
+
+module_init(rshim_net_init);
+module_exit(rshim_net_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.7");
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v6 7/9] soc: mellanox: host: Add the Rshim USB backend driver
  2018-11-01 16:25 ` Liming Sun
                     ` (4 preceding siblings ...)
  2018-11-01 16:25   ` [PATCH v6 6/9] soc: mellanox: host: Add networking support over Rshim Liming Sun
@ 2018-11-01 16:25   ` Liming Sun
  2018-11-01 16:25   ` [PATCH v6 8/9] soc: mellanox: host: Add the Rshim PCIe " Liming Sun
  2018-11-01 16:25   ` [PATCH v6 9/9] soc: mellanox: host: Add the Rshim PCIe live-fish " Liming Sun
  7 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2018-11-01 16:25 UTC (permalink / raw)
  To: linux-arm-kernel

This commit adds the USB backend driver to access the Rshim
interface on the BlueField SoC. It can be used when a USB cable
is connected to the Smart NIC or standalone device.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/host/Makefile    |    2 +-
 drivers/soc/mellanox/host/rshim_usb.c | 1035 +++++++++++++++++++++++++++++++++
 2 files changed, 1036 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/mellanox/host/rshim_usb.c

diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
index 1a282b9..c6703cd 100644
--- a/drivers/soc/mellanox/host/Makefile
+++ b/drivers/soc/mellanox/host/Makefile
@@ -1,2 +1,2 @@
-obj-m := rshim.o rshim_net.o
+obj-m := rshim.o rshim_net.o rshim_usb.o
 
diff --git a/drivers/soc/mellanox/host/rshim_usb.c b/drivers/soc/mellanox/host/rshim_usb.c
new file mode 100644
index 0000000..aad6250
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_usb.c
@@ -0,0 +1,1035 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_usb.c - Mellanox RShim USB host driver
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/*
+ * This source code was originally derived from:
+ *
+ *   USB Skeleton driver - 2.0
+ *
+ *   Copyright (C) 2001-2004 Greg Kroah-Hartman (greg at kroah.com)
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License as
+ *	published by the Free Software Foundation, version 2.
+ *
+ * Some code was also lifted from the example drivers in "Linux Device
+ * Drivers" by Alessandro Rubini and Jonathan Corbet, published by
+ * O'Reilly & Associates.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/usb.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <linux/ioctl.h>
+#include <linux/termios.h>
+#include <linux/workqueue.h>
+#include <asm/termbits.h>
+#include <linux/circ_buf.h>
+
+#include "rshim.h"
+
+/* Disable RSim access. */
+static int rshim_disable;
+module_param(rshim_disable, int, 0444);
+MODULE_PARM_DESC(rshim_disable, "Disable rshim (obsoleted)");
+
+/* Our USB vendor/product IDs. */
+#define USB_TILERA_VENDOR_ID	0x22dc	 /* Tilera Corporation */
+#define USB_BLUEFIELD_PRODUCT_ID	0x0004	 /* Mellanox Bluefield */
+
+/* Number of retries for the tmfifo read/write path. */
+#define READ_RETRIES		5
+#define WRITE_RETRIES		5
+
+/* Structure to hold all of our device specific stuff. */
+struct rshim_usb {
+	/* RShim backend structure. */
+	struct rshim_backend bd;
+
+	/*
+	 * The USB device for this device.  We bump its reference count
+	 * when the first interface is probed, and drop the ref when the
+	 * last interface is disconnected.
+	 */
+	struct usb_device *udev;
+
+	/* The USB interfaces for this device. */
+	struct usb_interface *rshim_interface;
+
+	/* State for our outstanding boot write. */
+	struct urb *boot_urb;
+
+	/* Control data. */
+	u64 ctrl_data;
+
+	/* Interrupt data buffer.  This is a USB DMA'able buffer. */
+	u64 *intr_buf;
+	dma_addr_t intr_buf_dma;
+
+	/* Read/interrupt urb, retries, and mode. */
+	struct urb *read_or_intr_urb;
+	int read_or_intr_retries;
+	int read_urb_is_intr;
+
+	/* Write urb and retries. */
+	struct urb *write_urb;
+	int write_retries;
+
+	/* The address of the boot FIFO endpoint. */
+	u8 boot_fifo_ep;
+	/* The address of the tile-monitor FIFO interrupt endpoint. */
+	u8 tm_fifo_int_ep;
+	/* The address of the tile-monitor FIFO input endpoint. */
+	u8 tm_fifo_in_ep;
+	/* The address of the tile-monitor FIFO output endpoint. */
+	u8 tm_fifo_out_ep;
+};
+
+/* Table of devices that work with this driver */
+static struct usb_device_id rshim_usb_table[] = {
+	{ USB_DEVICE(USB_TILERA_VENDOR_ID, USB_BLUEFIELD_PRODUCT_ID) },
+	{ }					/* Terminating entry */
+};
+MODULE_DEVICE_TABLE(usb, rshim_usb_table);
+
+/* Random compatibility hacks. */
+
+/* Arguments to an urb completion handler. */
+#define URB_COMP_ARGS struct urb *urb
+
+static void rshim_usb_delete(struct kref *kref)
+{
+	struct rshim_backend *bd;
+	struct rshim_usb *dev;
+
+	bd = container_of(kref, struct rshim_backend, kref);
+	dev = container_of(bd, struct rshim_usb, bd);
+
+	rshim_deregister(bd);
+	kfree(dev);
+}
+
+/* Rshim read/write routines */
+
+static int rshim_usb_read_rshim(struct rshim_backend *bd, int chan, int addr,
+			      u64 *result)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+	int retval;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	/* Do a blocking control read and endian conversion. */
+	retval = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0),
+				 0,  /* request */
+				 USB_RECIP_ENDPOINT | USB_TYPE_VENDOR |
+				 USB_DIR_IN,  /* request type */
+				 chan, /* value */
+				 addr, /* index */
+				 &dev->ctrl_data, 8, 2000);
+
+	/*
+	 * The RShim HW puts bytes on the wire in little-endian order
+	 * regardless of endianness settings either in the host or the ARM
+	 * cores.
+	 */
+	*result = le64_to_cpu(dev->ctrl_data);
+	if (retval == 8)
+		return 0;
+
+	/*
+	 * These are weird error codes, but we want to use something
+	 * the USB stack doesn't use so that we can identify short/long
+	 * reads.
+	 */
+	return retval >= 0 ? (retval > 8 ? -EBADE : -EBADR) : retval;
+}
+
+static int rshim_usb_write_rshim(struct rshim_backend *bd, int chan, int addr,
+			       u64 value)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+	int retval;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	/* Convert the word to little endian and do blocking control write. */
+	dev->ctrl_data = cpu_to_le64(value);
+	retval = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0),
+				 0,  /* request */
+				 USB_RECIP_ENDPOINT | USB_TYPE_VENDOR |
+				 USB_DIR_OUT,  /* request type */
+				 chan, /* value */
+				 addr, /* index */
+				 &dev->ctrl_data, 8, 2000);
+
+	if (retval == 8)
+		return 0;
+
+	/*
+	 * These are weird error codes, but we want to use something
+	 * the USB stack doesn't use so that we can identify short/long
+	 * writes.
+	 */
+	return retval >= 0 ? (retval > 8 ? -EBADE : -EBADR) : retval;
+}
+
+/* Boot routines */
+
+static void rshim_usb_boot_write_callback(URB_COMP_ARGS)
+{
+	struct rshim_usb *dev = urb->context;
+
+	if (urb->status == -ENOENT)
+		pr_debug("boot tx canceled, actual length %d\n",
+			 urb->actual_length);
+	else if (urb->status)
+		pr_debug("boot tx failed, status %d, actual length %d\n",
+			 urb->status, urb->actual_length);
+
+	complete_all(&dev->bd.boot_write_complete);
+}
+
+static ssize_t rshim_usb_boot_write(struct rshim_usb *dev, const char *buf,
+				  size_t count)
+{
+	struct rshim_backend *bd = &dev->bd;
+	int retval = 0;
+	size_t bytes_written = 0;
+
+	/* Create and fill an urb */
+	dev->boot_urb = usb_alloc_urb(0, GFP_KERNEL);
+	if (unlikely(!dev->boot_urb)) {
+		pr_debug("boot_write: couldn't allocate urb\n");
+		return -ENOMEM;
+	}
+	usb_fill_bulk_urb(dev->boot_urb, dev->udev,
+			  usb_sndbulkpipe(dev->udev, dev->boot_fifo_ep),
+			  (char *)buf, count, rshim_usb_boot_write_callback,
+			  dev);
+
+	/* Submit the urb. */
+	reinit_completion(&bd->boot_write_complete);
+	retval = usb_submit_urb(dev->boot_urb, GFP_KERNEL);
+	if (retval)
+		goto done;
+
+	/*
+	 * Wait until it's done. If anything goes wrong in the USB layer,
+	 * the callback function might never get called and cause stuck.
+	 * Here we release the mutex so user could use 'ctrl + c' to terminate
+	 * the current write. Once the boot file is opened again, the
+	 * outstanding urb will be canceled.
+	 *
+	 * Note: when boot stream starts to write, it will either run to
+	 * completion, or be interrupted by user. The urb callback function will
+	 * be called during this period. There are no other operations to affect
+	 * the boot stream. So unlocking the mutex is considered safe.
+	 */
+	mutex_unlock(&bd->mutex);
+	retval = wait_for_completion_interruptible(&bd->boot_write_complete);
+	mutex_lock(&bd->mutex);
+	if (retval) {
+		usb_kill_urb(dev->boot_urb);
+		bytes_written += dev->boot_urb->actual_length;
+		goto done;
+	}
+
+	if (dev->boot_urb->actual_length !=
+		dev->boot_urb->transfer_buffer_length) {
+		pr_debug("length mismatch, exp %d act %d stat %d\n",
+			 dev->boot_urb->transfer_buffer_length,
+			 dev->boot_urb->actual_length,
+			 dev->boot_urb->status);
+	}
+
+#ifdef RSH_USB_BMC
+	/*
+	 * The UHCI host controller on the BMC seems to
+	 * overestimate the amount of data it's
+	 * successfully sent when it sees a babble error.
+	 */
+	if (dev->boot_urb->status == -EOVERFLOW &&
+	    dev->boot_urb->actual_length >= 64) {
+		dev->boot_urb->actual_length -= 64;
+		pr_debug("saw babble, new length %d\n",
+		dev->boot_urb->actual_length);
+	}
+#endif
+
+	bytes_written = dev->boot_urb->actual_length;
+
+	if (dev->boot_urb->status == -ENOENT &&
+	    dev->boot_urb->transfer_buffer_length !=
+	    dev->boot_urb->actual_length) {
+		pr_debug("boot_write: urb canceled.\n");
+	} else {
+		if (dev->boot_urb->status) {
+			pr_debug("boot_write: urb failed, status %d\n",
+				 dev->boot_urb->status);
+		}
+		if (dev->boot_urb->status != -ENOENT && !retval)
+			retval = dev->boot_urb->status;
+	}
+
+done:
+	usb_free_urb(dev->boot_urb);
+	dev->boot_urb = NULL;
+
+	return bytes_written ? bytes_written : retval;
+}
+
+/* FIFO routines */
+
+static void rshim_usb_fifo_read_callback(URB_COMP_ARGS)
+{
+	struct rshim_usb *dev = urb->context;
+	struct rshim_backend *bd = &dev->bd;
+
+	spin_lock(&bd->spinlock);
+
+	pr_debug("usb_fifo_read_callback: %s urb completed, status %d, "
+		 "actual length %d, intr buf %d\n",
+		 dev->read_urb_is_intr ? "interrupt" : "read",
+		 urb->status, urb->actual_length, (int) *dev->intr_buf);
+
+	bd->spin_flags &= ~RSH_SFLG_READING;
+
+	if (urb->status == 0) {
+		/*
+		 * If a read completed, clear the number of bytes available
+		 * from the last interrupt, and set up the new buffer for
+		 * processing.  (If an interrupt completed, there's nothing
+		 * to do, since the number of bytes available was already
+		 * set by the I/O itself.)
+		 */
+		if (!dev->read_urb_is_intr) {
+			*dev->intr_buf = 0;
+			bd->read_buf_bytes = urb->actual_length;
+			bd->read_buf_next = 0;
+		}
+
+		/* Process any data we got, and launch another I/O if needed. */
+		rshim_notify(bd, RSH_EVENT_FIFO_INPUT, 0);
+	} else if (urb->status == -ENOENT) {
+		/*
+		 * The urb was explicitly cancelled.  The only time we
+		 * currently do this is when we close the stream.  If we
+		 * mark this as an error, tile-monitor --resume won't work,
+		 * so we just want to do nothing.
+		 */
+	} else if (urb->status == -ECONNRESET ||
+		   urb->status == -ESHUTDOWN) {
+		/*
+		 * The device went away.  We don't want to retry this, and
+		 * we expect things to get better, probably after a device
+		 * reset, but in the meantime, we should let upper layers
+		 * know there was a problem.
+		 */
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	} else if (dev->read_or_intr_retries < READ_RETRIES &&
+		   urb->actual_length == 0 &&
+		   (urb->status == -EPROTO || urb->status == -EILSEQ ||
+		    urb->status == -EOVERFLOW)) {
+		/*
+		 * We got an error which could benefit from being retried.
+		 * Just submit the same urb again.  Note that we don't
+		 * handle partial reads; it's hard, and we haven't really
+		 * seen them.
+		 */
+		int retval;
+
+		dev->read_or_intr_retries++;
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			pr_debug("fifo_read_callback: resubmitted urb but got error %d",
+				 retval);
+			/*
+			 * In this case, we won't try again; signal the
+			 * error to upper layers.
+			 */
+			rshim_notify(bd, RSH_EVENT_FIFO_ERR, retval);
+		} else {
+			bd->spin_flags |= RSH_SFLG_READING;
+		}
+	} else {
+		/*
+		 * We got some error we don't know how to handle, or we got
+		 * too many errors.  Either way we don't retry any more,
+		 * but we signal the error to upper layers.
+		 */
+		pr_err("fifo_read_callback: %s urb completed abnormally, "
+		       "error %d\n",
+		       dev->read_urb_is_intr ? "interrupt" : "read",
+		       urb->status);
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	}
+
+	spin_unlock(&bd->spinlock);
+}
+
+static void rshim_usb_fifo_read(struct rshim_usb *dev, char *buffer,
+			      size_t count)
+{
+	struct rshim_backend *bd = &dev->bd;
+
+	if ((int) *dev->intr_buf || bd->read_buf_bytes) {
+		/* We're doing a read. */
+
+		int retval;
+		struct urb *urb = dev->read_or_intr_urb;
+
+		usb_fill_bulk_urb(urb, dev->udev,
+				  usb_rcvbulkpipe(dev->udev,
+						  dev->tm_fifo_in_ep),
+				  buffer, count,
+				  rshim_usb_fifo_read_callback,
+				  dev);
+		urb->transfer_dma = dev->bd.read_buf_dma;
+		urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+
+		dev->bd.spin_flags |= RSH_SFLG_READING;
+		dev->read_urb_is_intr = 0;
+		dev->read_or_intr_retries = 0;
+
+		/* Submit the urb. */
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			dev->bd.spin_flags &= ~RSH_SFLG_READING;
+			pr_debug("fifo_drain: failed submitting read "
+			      "urb, error %d", retval);
+		}
+		pr_debug("fifo_read_callback: resubmitted read urb\n");
+	} else {
+		/* We're doing an interrupt. */
+
+		int retval;
+		struct urb *urb = dev->read_or_intr_urb;
+
+		usb_fill_int_urb(urb, dev->udev,
+				 usb_rcvintpipe(dev->udev, dev->tm_fifo_int_ep),
+				 dev->intr_buf, sizeof(*dev->intr_buf),
+				 rshim_usb_fifo_read_callback,
+				 /*
+				  * FIXME: is 6 a good interval value?  That's
+				  * polling at 8000/(1 << 6) == 125 Hz.
+				  */
+				 dev, 6);
+		urb->transfer_dma = dev->intr_buf_dma;
+		urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+
+		dev->bd.spin_flags |= RSH_SFLG_READING;
+		dev->read_urb_is_intr = 1;
+		dev->read_or_intr_retries = 0;
+
+		/* Submit the urb */
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			dev->bd.spin_flags &= ~RSH_SFLG_READING;
+			pr_debug("fifo_read_callback: failed submitting "
+			      "interrupt urb, error %d", retval);
+		}
+		pr_debug("fifo_read_callback: resubmitted interrupt urb\n");
+	}
+}
+
+static void rshim_usb_fifo_write_callback(URB_COMP_ARGS)
+{
+	struct rshim_usb *dev = urb->context;
+	struct rshim_backend *bd = &dev->bd;
+
+	spin_lock(&bd->spinlock);
+
+	pr_debug("fifo_write_callback: urb completed, status %d, "
+		 "actual length %d, intr buf %d\n",
+		 urb->status, urb->actual_length, (int) *dev->intr_buf);
+
+	bd->spin_flags &= ~RSH_SFLG_WRITING;
+
+	if (urb->status == 0) {
+		/* A write completed. */
+		wake_up_interruptible_all(&bd->write_completed);
+		rshim_notify(bd, RSH_EVENT_FIFO_OUTPUT, 0);
+	} else if (urb->status == -ENOENT) {
+		/*
+		 * The urb was explicitly cancelled.  The only time we
+		 * currently do this is when we close the stream.  If we
+		 * mark this as an error, tile-monitor --resume won't work,
+		 * so we just want to do nothing.
+		 */
+	} else if (urb->status == -ECONNRESET ||
+		   urb->status == -ESHUTDOWN) {
+		/*
+		 * The device went away.  We don't want to retry this, and
+		 * we expect things to get better, probably after a device
+		 * reset, but in the meantime, we should let upper layers
+		 * know there was a problem.
+		 */
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	} else if (dev->write_retries < WRITE_RETRIES &&
+		   urb->actual_length == 0 &&
+		   (urb->status == -EPROTO || urb->status == -EILSEQ ||
+		    urb->status == -EOVERFLOW)) {
+		/*
+		 * We got an error which could benefit from being retried.
+		 * Just submit the same urb again.  Note that we don't
+		 * handle partial writes; it's hard, and we haven't really
+		 * seen them.
+		 */
+		int retval;
+
+		dev->write_retries++;
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			pr_err("fifo_write_callback: resubmitted urb but "
+			       "got error %d\n", retval);
+			/*
+			 * In this case, we won't try again; signal the
+			 * error to upper layers.
+			 */
+			rshim_notify(bd, RSH_EVENT_FIFO_ERR, retval);
+		} else {
+			bd->spin_flags |= RSH_SFLG_WRITING;
+		}
+	} else {
+		/*
+		 * We got some error we don't know how to handle, or we got
+		 * too many errors.  Either way we don't retry any more,
+		 * but we signal the error to upper layers.
+		 */
+		pr_err("fifo_write_callback: urb completed abnormally, "
+		       "error %d\n", urb->status);
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	}
+
+	spin_unlock(&bd->spinlock);
+}
+
+static int rshim_usb_fifo_write(struct rshim_usb *dev, const char *buffer,
+			      size_t count)
+{
+	struct rshim_backend *bd = &dev->bd;
+	int retval;
+
+	WARN_ONCE(count % 8 != 0, "rshim write %d is not multiple of 8 bytes\n",
+		  (int)count);
+
+	/* Initialize the urb properly. */
+	usb_fill_bulk_urb(dev->write_urb, dev->udev,
+			  usb_sndbulkpipe(dev->udev,
+					  dev->tm_fifo_out_ep),
+			  (char *)buffer,
+			  count,
+			  rshim_usb_fifo_write_callback,
+			  dev);
+	dev->write_urb->transfer_dma = bd->write_buf_dma;
+	dev->write_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+	dev->write_retries = 0;
+
+	/* Send the data out the bulk port. */
+	retval = usb_submit_urb(dev->write_urb, GFP_ATOMIC);
+	if (retval) {
+		bd->spin_flags &= ~RSH_SFLG_WRITING;
+		pr_err("fifo_write: failed submitting write "
+		       "urb, error %d\n", retval);
+		return -1;
+	}
+
+	bd->spin_flags |= RSH_SFLG_WRITING;
+	return 0;
+}
+
+/* Probe routines */
+
+/* These make the endpoint test code in rshim_usb_probe() a lot cleaner. */
+#define is_in_ep(ep)   (((ep)->bEndpointAddress & USB_ENDPOINT_DIR_MASK) == \
+			USB_DIR_IN)
+#define is_bulk_ep(ep) (((ep)->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == \
+			USB_ENDPOINT_XFER_BULK)
+#define is_int_ep(ep)  (((ep)->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == \
+			USB_ENDPOINT_XFER_INT)
+#define max_pkt(ep)    le16_to_cpu(ep->wMaxPacketSize)
+#define ep_addr(ep)    (ep->bEndpointAddress)
+
+static ssize_t rshim_usb_backend_read(struct rshim_backend *bd, int devtype,
+				    char *buf, size_t count)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		rshim_usb_fifo_read(dev, buf, count);
+		return 0;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+}
+
+static ssize_t rshim_usb_backend_write(struct rshim_backend *bd, int devtype,
+				     const char *buf, size_t count)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		return rshim_usb_fifo_write(dev, buf, count);
+
+	case RSH_DEV_TYPE_BOOT:
+		return rshim_usb_boot_write(dev, buf, count);
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+}
+
+static void rshim_usb_backend_cancel_req(struct rshim_backend *bd, int devtype,
+				       bool is_write)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		if (is_write)
+			usb_kill_urb(dev->write_urb);
+		else
+			usb_kill_urb(dev->read_or_intr_urb);
+		break;
+
+	case RSH_DEV_TYPE_BOOT:
+		usb_kill_urb(dev->boot_urb);
+		break;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		break;
+	}
+}
+
+static int rshim_usb_probe(struct usb_interface *interface,
+			 const struct usb_device_id *id)
+{
+	char *usb_dev_name;
+	int dev_name_len = 32;
+	struct rshim_usb *dev = NULL;
+	struct rshim_backend *bd;
+	struct usb_host_interface *iface_desc;
+	struct usb_endpoint_descriptor *ep;
+	int i;
+	int allocfail = 0;
+	int retval = -ENOMEM;
+
+	/*
+	 * Get our device pathname.  The usb_make_path interface uselessly
+	 * returns -1 if the output buffer is too small, instead of telling
+	 * us how big it needs to be, so we just start with a reasonable
+	 * size and double it until the name fits.
+	 */
+	while (1) {
+		usb_dev_name = kmalloc(dev_name_len, GFP_KERNEL);
+		if (!usb_dev_name)
+			goto error;
+		if (usb_make_path(interface_to_usbdev(interface), usb_dev_name,
+				  dev_name_len) >= 0)
+			break;
+		kfree(usb_dev_name);
+		dev_name_len *= 2;
+	}
+
+	pr_debug("probing %s\n", usb_dev_name);
+
+	/*
+	 * Now see if we've previously seen this device.  If so, we use the
+	 * same device number, otherwise we pick the first available one.
+	 */
+	rshim_lock();
+
+	/* Find the backend. */
+	bd = rshim_find(usb_dev_name);
+	if (bd) {
+		pr_debug("found previously allocated rshim_usb structure\n");
+		kref_get(&bd->kref);
+		dev = container_of(bd, struct rshim_usb, bd);
+		kfree(usb_dev_name);
+		usb_dev_name = NULL;
+	} else {
+		pr_debug("creating new rshim_usb structure\n");
+		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+		if (dev == NULL) {
+			pr_err("couldn't get memory for new device\n");
+			rshim_unlock();
+			goto error;
+		}
+
+		bd = &dev->bd;
+		bd->dev_name = usb_dev_name;
+		bd->read = rshim_usb_backend_read;
+		bd->write = rshim_usb_backend_write;
+		bd->cancel = rshim_usb_backend_cancel_req;
+		bd->destroy = rshim_usb_delete;
+		bd->read_rshim = rshim_usb_read_rshim;
+		bd->write_rshim = rshim_usb_write_rshim;
+		bd->has_reprobe = 1;
+		bd->owner = THIS_MODULE;
+		mutex_init(&bd->mutex);
+	}
+
+	/*
+	 * This has to be done on the first probe, whether or not we
+	 * allocated a new rshim_usb structure, since it's always dropped
+	 * on the second disconnect.
+	 */
+	if (!bd->has_rshim && !bd->has_tm)
+		dev->udev = usb_get_dev(interface_to_usbdev(interface));
+
+	/*
+	 * It would seem more logical to allocate these above when we create
+	 * a new rshim_usb structure, but we don't want to do it until we've
+	 * upped the usb device reference count.
+	 */
+	allocfail |= rshim_fifo_alloc(bd);
+
+	if (!bd->read_buf)
+		bd->read_buf = usb_alloc_coherent(dev->udev, READ_BUF_SIZE,
+						   GFP_KERNEL,
+						   &bd->read_buf_dma);
+	allocfail |= bd->read_buf == 0;
+
+	if (!dev->intr_buf) {
+		dev->intr_buf = usb_alloc_coherent(dev->udev,
+						   sizeof(*dev->intr_buf),
+						   GFP_KERNEL,
+						   &dev->intr_buf_dma);
+		if (dev->intr_buf != NULL)
+			*dev->intr_buf = 0;
+	}
+	allocfail |= dev->intr_buf == 0;
+
+	if (!bd->write_buf) {
+		bd->write_buf = usb_alloc_coherent(dev->udev,
+						       WRITE_BUF_SIZE,
+						       GFP_KERNEL,
+						       &bd->write_buf_dma);
+	}
+	allocfail |= bd->write_buf == 0;
+
+	if (!dev->read_or_intr_urb)
+		dev->read_or_intr_urb = usb_alloc_urb(0, GFP_KERNEL);
+	allocfail |= dev->read_or_intr_urb == 0;
+
+	if (!dev->write_urb)
+		dev->write_urb = usb_alloc_urb(0, GFP_KERNEL);
+	allocfail |= dev->write_urb == 0;
+
+	if (allocfail) {
+		pr_err("can't allocate buffers or urbs\n");
+		rshim_unlock();
+		goto error;
+	}
+
+	rshim_unlock();
+
+	iface_desc = interface->cur_altsetting;
+
+	/* Make sure this is a vendor-specific interface class. */
+	if (iface_desc->desc.bInterfaceClass != 0xFF)
+		goto error;
+
+	/* See which interface this is, then save the correct data. */
+
+	mutex_lock(&bd->mutex);
+	if (iface_desc->desc.bInterfaceSubClass == 0) {
+		pr_debug("found rshim interface\n");
+		/*
+		 * We only expect one endpoint here, just make sure its
+		 * attributes match.
+		 */
+		if (iface_desc->desc.bNumEndpoints != 1) {
+			pr_err("wrong number of endpoints for rshim "
+			       "interface\n");
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+		ep = &iface_desc->endpoint[0].desc;
+
+		/* We expect a bulk out endpoint. */
+		if (!is_bulk_ep(ep) || is_in_ep(ep)) {
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+
+		bd->has_rshim = 1;
+		dev->rshim_interface = interface;
+		dev->boot_fifo_ep = ep_addr(ep);
+
+	} else if (iface_desc->desc.bInterfaceSubClass == 1) {
+		pr_debug("found tmfifo interface\n");
+		/*
+		 * We expect 3 endpoints here.  Since they're listed in
+		 * random order we have to use their attributes to figure
+		 * out which is which.
+		 */
+		if (iface_desc->desc.bNumEndpoints != 3) {
+			pr_err("wrong number of endpoints for tm "
+			       "interface\n");
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+		dev->tm_fifo_in_ep = 0;
+		dev->tm_fifo_int_ep = 0;
+		dev->tm_fifo_out_ep = 0;
+
+		for (i = 0; i < iface_desc->desc.bNumEndpoints; i++) {
+			ep = &iface_desc->endpoint[i].desc;
+
+			if (is_in_ep(ep)) {
+				if (is_bulk_ep(ep)) {
+					/* Bulk in endpoint. */
+					dev->tm_fifo_in_ep = ep_addr(ep);
+				} else if (is_int_ep(ep)) {
+					/* Interrupt in endpoint. */
+					dev->tm_fifo_int_ep = ep_addr(ep);
+				}
+			} else {
+				if (is_bulk_ep(ep)) {
+					/* Bulk out endpoint. */
+					dev->tm_fifo_out_ep = ep_addr(ep);
+				}
+			}
+		}
+
+		if (!dev->tm_fifo_in_ep || !dev->tm_fifo_int_ep ||
+		    !dev->tm_fifo_out_ep) {
+			pr_err("could not find all required endpoints for "
+			       "tm interface\n");
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+		bd->has_tm = 1;
+	} else {
+		mutex_unlock(&bd->mutex);
+		goto error;
+	}
+
+	/* Save our data pointer in this interface device. */
+	usb_set_intfdata(interface, dev);
+
+	if (!bd->dev)
+		bd->dev = &dev->udev->dev;
+
+	/*
+	 * Register rshim here since it needs to detect whether other backend
+	 * has already registered or not, which involves reading/writing rshim
+	 * registers and has assumption that the under layer is working.
+	 */
+	rshim_lock();
+	if (!bd->registered) {
+		retval = rshim_register(bd);
+		if (retval) {
+			rshim_unlock();
+			goto error;
+		}
+	}
+	rshim_unlock();
+
+	/* Notify that device is attached. */
+	retval = rshim_notify(&dev->bd, RSH_EVENT_ATTACH, 0);
+	mutex_unlock(&dev->bd.mutex);
+	if (retval)
+		goto error;
+
+	return 0;
+
+error:
+	if (dev) {
+		usb_free_urb(dev->read_or_intr_urb);
+		dev->read_or_intr_urb = NULL;
+		usb_free_urb(dev->write_urb);
+		dev->write_urb = NULL;
+
+		usb_free_coherent(dev->udev, READ_BUF_SIZE,
+				  dev->bd.read_buf, dev->bd.read_buf_dma);
+		dev->bd.read_buf = NULL;
+
+		usb_free_coherent(dev->udev, WRITE_BUF_SIZE,
+				  dev->bd.write_buf, dev->bd.write_buf_dma);
+		dev->bd.write_buf = NULL;
+
+		rshim_fifo_free(&dev->bd);
+
+		usb_free_coherent(dev->udev, sizeof(*dev->intr_buf),
+				  dev->intr_buf, dev->intr_buf_dma);
+		dev->intr_buf = NULL;
+
+		rshim_lock();
+		kref_put(&dev->bd.kref, rshim_usb_delete);
+		rshim_unlock();
+	}
+
+	kfree(usb_dev_name);
+	return retval;
+}
+
+static void rshim_usb_disconnect(struct usb_interface *interface)
+{
+	struct rshim_usb *dev;
+	struct rshim_backend *bd;
+	int flush_wq = 0;
+
+	dev = usb_get_intfdata(interface);
+	bd = &dev->bd;
+	usb_set_intfdata(interface, NULL);
+
+	rshim_notify(bd, RSH_EVENT_DETACH, 0);
+
+	/*
+	 * Clear this interface so we don't unregister our devices next
+	 * time.
+	 */
+	mutex_lock(&bd->mutex);
+
+	if (dev->rshim_interface == interface) {
+		bd->has_rshim = 0;
+		dev->rshim_interface = NULL;
+	} else {
+		/*
+		 * We have to get rid of any USB state, since it may be
+		 * tied to the USB device which is going to vanish as soon
+		 * as we get both disconnects.  We'll reallocate these
+		 * on the next probe.
+		 *
+		 * Supposedly the code which called us already killed any
+		 * outstanding URBs, but it doesn't hurt to be sure.
+		 */
+
+		/*
+		 * We must make sure the console worker isn't running
+		 * before we free all these resources, and particularly
+		 * before we decrement our usage count, below.  Most of the
+		 * time, if it's even enabled, it'll be scheduled to run at
+		 * some point in the future, and we can take care of that
+		 * by asking that it be canceled.
+		 *
+		 * However, it's possible that it's already started
+		 * running, but can't make progress because it's waiting
+		 * for the device mutex, which we currently have.  We
+		 * handle this case by clearing the bit that says it's
+		 * enabled.  The worker tests this bit as soon as it gets
+		 * the mutex, and if it's clear, it just returns without
+		 * rescheduling itself.  Note that if we didn't
+		 * successfully cancel it, we flush the work entry below,
+		 * after we drop the mutex, to be sure it's done before we
+		 * decrement the device usage count.
+		 *
+		 * XXX This might be racy; what if something else which
+		 * would enable the worker runs after we drop the mutex
+		 * but before the worker itself runs?
+		 */
+		flush_wq = !cancel_delayed_work(&bd->work);
+		bd->has_cons_work = 0;
+
+		usb_kill_urb(dev->read_or_intr_urb);
+		usb_free_urb(dev->read_or_intr_urb);
+		dev->read_or_intr_urb = NULL;
+		usb_kill_urb(dev->write_urb);
+		usb_free_urb(dev->write_urb);
+		dev->write_urb = NULL;
+
+		usb_free_coherent(dev->udev, READ_BUF_SIZE,
+				  bd->read_buf, bd->read_buf_dma);
+		bd->read_buf = NULL;
+
+		usb_free_coherent(dev->udev, sizeof(*dev->intr_buf),
+				  dev->intr_buf, dev->intr_buf_dma);
+		dev->intr_buf = NULL;
+
+		usb_free_coherent(dev->udev, WRITE_BUF_SIZE,
+				  bd->write_buf, bd->write_buf_dma);
+		bd->write_buf = NULL;
+
+		rshim_fifo_free(bd);
+	}
+
+	if (!bd->has_rshim && !bd->has_tm) {
+		usb_put_dev(dev->udev);
+		dev->udev = NULL;
+		pr_info("now disconnected\n");
+	} else {
+		pr_debug("partially disconnected\n");
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	/* This can't be done while we hold the mutex; see comments above. */
+	if (flush_wq)
+		flush_workqueue(rshim_wq);
+
+	/* decrement our usage count */
+	rshim_lock();
+	kref_put(&bd->kref, rshim_usb_delete);
+	rshim_unlock();
+}
+
+static struct usb_driver rshim_usb_driver = {
+	.name = "rshim_usb",
+	.probe = rshim_usb_probe,
+	.disconnect = rshim_usb_disconnect,
+	.id_table = rshim_usb_table,
+};
+
+static int __init rshim_usb_init(void)
+{
+	int result;
+
+	/* Register this driver with the USB subsystem. */
+	result = usb_register(&rshim_usb_driver);
+	if (result)
+		pr_err("usb_register failed, error number %d\n", result);
+
+	return result;
+}
+
+static void __exit rshim_usb_exit(void)
+{
+	/* Deregister this driver with the USB subsystem. */
+	usb_deregister(&rshim_usb_driver);
+}
+
+module_init(rshim_usb_init);
+module_exit(rshim_usb_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.6");
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v6 8/9] soc: mellanox: host: Add the Rshim PCIe backend driver
  2018-11-01 16:25 ` Liming Sun
                     ` (5 preceding siblings ...)
  2018-11-01 16:25   ` [PATCH v6 7/9] soc: mellanox: host: Add the Rshim USB backend driver Liming Sun
@ 2018-11-01 16:25   ` Liming Sun
  2018-11-01 16:25   ` [PATCH v6 9/9] soc: mellanox: host: Add the Rshim PCIe live-fish " Liming Sun
  7 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2018-11-01 16:25 UTC (permalink / raw)
  To: linux-arm-kernel

This commit adds the PCIe backend driver to access the Rshim
interface on the BlueField SoC, such as on the Smart NIC.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/host/Makefile     |   2 +-
 drivers/soc/mellanox/host/rshim_pcie.c | 478 +++++++++++++++++++++++++++++++++
 2 files changed, 479 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/mellanox/host/rshim_pcie.c

diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
index c6703cd..fa4b21c 100644
--- a/drivers/soc/mellanox/host/Makefile
+++ b/drivers/soc/mellanox/host/Makefile
@@ -1,2 +1,2 @@
-obj-m := rshim.o rshim_net.o rshim_usb.o
+obj-m := rshim.o rshim_net.o rshim_usb.o rshim_pcie.o
 
diff --git a/drivers/soc/mellanox/host/rshim_pcie.c b/drivers/soc/mellanox/host/rshim_pcie.c
new file mode 100644
index 0000000..3fa7bd9
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_pcie.c
@@ -0,0 +1,478 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_pcie.c - Mellanox RShim PCIe host driver
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/pci.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+
+#include "rshim.h"
+
+/* Disable RSim access. */
+static int rshim_disable;
+module_param(rshim_disable, int, 0444);
+MODULE_PARM_DESC(rshim_disable, "Disable rshim (obsoleted)");
+
+/** Our Vendor/Device IDs. */
+#define TILERA_VENDOR_ID					0x15b3
+#define BLUEFIELD_DEVICE_ID					0xc2d2
+
+/** The offset in BAR2 of the RShim region. */
+#define PCI_RSHIM_WINDOW_OFFSET					0x0
+
+/** The size the RShim region. */
+#define PCI_RSHIM_WINDOW_SIZE					0x100000
+
+/* Maximum number of devices this driver can handle */
+#define MAX_DEV_COUNT						16
+
+struct rshim_pcie {
+	/* RShim backend structure. */
+	struct rshim_backend	bd;
+
+	struct pci_dev *pci_dev;
+
+	/* RShim BAR size. */
+	uint64_t bar0_size;
+
+	/* Address of the RShim registers. */
+	u8 __iomem *rshim_regs;
+
+	/* Keep track of number of 8-byte word writes */
+	u8 write_count;
+};
+
+static struct rshim_pcie *instances[MAX_DEV_COUNT];
+
+#ifndef CONFIG_64BIT
+/* Wait until the RSH_BYTE_ACC_CTL pending bit is cleared */
+static int rshim_byte_acc_pending_wait(struct rshim_pcie *dev, int chan)
+{
+	u32 read_value;
+
+	do {
+		read_value = readl(dev->rshim_regs +
+			(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+		if (signal_pending(current))
+			return -EINTR;
+
+	} while (read_value & RSH_BYTE_ACC_PENDING);
+
+	return 0;
+}
+
+/*
+ * RShim read/write methods for 32-bit systems
+ * Mechanism to do an 8-byte access to the Rshim using
+ * two 4-byte accesses through the Rshim Byte Access Widget.
+ */
+static int rshim_byte_acc_read(struct rshim_pcie *dev, int chan, int addr,
+				u64 *result)
+{
+	int retval;
+	u32 read_value;
+	u64 read_result;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	writel(RSH_BYTE_ACC_SIZE, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	writel(addr, dev->rshim_regs + (RSH_BYTE_ACC_ADDR | (chan << 16)));
+
+	/* Write trigger bits to perform read */
+	writel(RSH_BYTE_ACC_READ_TRIGGER, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read lower 32-bits of data */
+	read_value = readl(dev->rshim_regs +
+		(RSH_BYTE_ACC_RDAT | (chan << 16)));
+
+	read_result = (u64)read_value << 32;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read upper 32-bits of data */
+	read_value = readl(dev->rshim_regs +
+		(RSH_BYTE_ACC_RDAT | (chan << 16)));
+
+	read_result |= (u64)read_value;
+	*result = be64_to_cpu(read_result);
+
+	return 0;
+}
+
+static int rshim_byte_acc_write(struct rshim_pcie *dev, int chan, int addr,
+				u64 value)
+{
+	int retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	writel(RSH_BYTE_ACC_SIZE, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	writel(addr, dev->rshim_regs + (RSH_BYTE_ACC_ADDR | (chan << 16)));
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	writel(RSH_BYTE_ACC_SIZE, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Write lower 32 bits of data to TRIO_CR_GW_DATA */
+	writel((u32)(value >> 32), dev->rshim_regs +
+		(RSH_BYTE_ACC_WDAT | (chan << 16)));
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Write upper 32 bits of data to TRIO_CR_GW_DATA */
+	writel((u32)(value), dev->rshim_regs +
+		(RSH_BYTE_ACC_WDAT | (chan << 16)));
+
+	return 0;
+}
+#endif /* CONFIG_64BIT */
+
+/* RShim read/write routines */
+static int rshim_pcie_read(struct rshim_backend *bd, int chan, int addr,
+				u64 *result)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	int retval = 0;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	dev->write_count = 0;
+
+#ifndef CONFIG_64BIT
+	retval = rshim_byte_acc_read(dev, chan, addr, result);
+#else
+	*result = readq(dev->rshim_regs + (addr | (chan << 16)));
+#endif
+	return retval;
+}
+
+static int rshim_pcie_write(struct rshim_backend *bd, int chan, int addr,
+				u64 value)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	u64 result;
+	int retval = 0;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	/*
+	 * We cannot stream large numbers of PCIe writes to the RShim's BAR.
+	 * Instead, we must write no more than 15 8-byte words before
+	 * doing a read from another register within the BAR,
+	 * which forces previous writes to drain.
+	 */
+	if (dev->write_count == 15) {
+		/* Add memory barrier to synchronize the order. */
+		mb();
+		rshim_pcie_read(bd, chan, RSH_SCRATCHPAD, &result);
+	}
+	dev->write_count++;
+#ifndef CONFIG_64BIT
+	retval = rshim_byte_acc_write(dev, chan, addr, value);
+#else
+	writeq(value, dev->rshim_regs + (addr | (chan << 16)));
+#endif
+
+	return retval;
+}
+
+static void rshim_pcie_delete(struct kref *kref)
+{
+	struct rshim_backend *bd;
+	struct rshim_pcie *dev;
+
+	bd = container_of(kref, struct rshim_backend, kref);
+	dev = container_of(bd, struct rshim_pcie, bd);
+
+	rshim_deregister(bd);
+	if (dev->pci_dev)
+		dev_set_drvdata(&dev->pci_dev->dev, NULL);
+	kfree(dev);
+}
+
+/* Probe routine */
+static int rshim_pcie_probe(struct pci_dev *pci_dev,
+			    const struct pci_device_id *id)
+{
+	struct rshim_pcie *dev;
+	struct rshim_backend *bd;
+	char *pcie_dev_name;
+	int index, retval, err = 0, allocfail = 0;
+	const int max_name_len = 20;
+
+	for (index = 0; index < MAX_DEV_COUNT; index++)
+		if (instances[index] == NULL)
+			break;
+	if (index == MAX_DEV_COUNT) {
+		pr_err("Driver cannot handle any more devices.\n");
+		return -ENODEV;
+	}
+
+	pcie_dev_name = kzalloc(max_name_len, GFP_KERNEL);
+	if (pcie_dev_name == NULL) {
+		err = -ENOMEM;
+		goto error;
+	}
+	retval = snprintf(pcie_dev_name, max_name_len,
+				"rshim_pcie%d", index);
+	if (WARN_ON_ONCE(retval >= max_name_len)) {
+		err = -EINVAL;
+		goto error;
+	}
+
+	pr_debug("Probing %s\n", pcie_dev_name);
+
+	rshim_lock();
+
+	/* Find the backend. */
+	bd = rshim_find(pcie_dev_name);
+	if (bd) {
+		kref_get(&bd->kref);
+		dev = container_of(bd, struct rshim_pcie, bd);
+	} else {
+		/* Get some memory for this device's driver state. */
+		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+		if (dev == NULL) {
+			err = -ENOMEM;
+			rshim_unlock();
+			goto error;
+		}
+
+		instances[index] = dev;
+		bd = &dev->bd;
+		bd->has_rshim = 1;
+		bd->has_tm = 1;
+		bd->dev_name = pcie_dev_name;
+		bd->read_rshim = rshim_pcie_read;
+		bd->write_rshim = rshim_pcie_write;
+		bd->destroy = rshim_pcie_delete;
+		bd->owner = THIS_MODULE;
+		dev->write_count = 0;
+		mutex_init(&bd->mutex);
+	}
+
+	retval = rshim_fifo_alloc(bd);
+	if (retval) {
+		rshim_unlock();
+		dev_err(&pci_dev->dev, "Failed to allocate fifo\n");
+		err = -ENOMEM;
+		goto enable_failed;
+	}
+
+	allocfail |= rshim_fifo_alloc(bd);
+
+	if (!bd->read_buf) {
+		bd->read_buf = kzalloc(READ_BUF_SIZE,
+					   GFP_KERNEL);
+	}
+	allocfail |= bd->read_buf == 0;
+
+	if (!bd->write_buf) {
+		bd->write_buf = kzalloc(WRITE_BUF_SIZE,
+					    GFP_KERNEL);
+	}
+	allocfail |= bd->write_buf == 0;
+
+	if (allocfail) {
+		rshim_unlock();
+		pr_err("can't allocate buffers\n");
+		goto enable_failed;
+	}
+
+	rshim_unlock();
+
+	/* Enable the device. */
+	err = pci_enable_device(pci_dev);
+	if (err != 0) {
+		pr_err("Device enable failed with error %d\n", err);
+		goto enable_failed;
+	}
+
+	/* Initialize object */
+	dev->pci_dev = pci_dev;
+	dev_set_drvdata(&pci_dev->dev, dev);
+
+	dev->bar0_size = pci_resource_len(pci_dev, 0);
+
+	/* Fail if the BAR is unassigned. */
+	if (!dev->bar0_size) {
+		pr_err("BAR unassigned, run 'lspci -v'.\n");
+		err = -ENOMEM;
+		goto rshim_map_failed;
+	}
+
+	/* Map in the RShim registers. */
+	dev->rshim_regs = ioremap(pci_resource_start(pci_dev, 0) +
+				  PCI_RSHIM_WINDOW_OFFSET,
+				  PCI_RSHIM_WINDOW_SIZE);
+	if (dev->rshim_regs == NULL) {
+		dev_err(&pci_dev->dev, "Failed to map RShim registers\n");
+		err = -ENOMEM;
+		goto rshim_map_failed;
+	}
+
+	/* Enable PCI bus mastering. */
+	pci_set_master(pci_dev);
+
+	/*
+	 * Register rshim here since it needs to detect whether other backend
+	 * has already registered or not, which involves reading/writing rshim
+	 * registers and has assumption that the under layer is working.
+	 */
+	rshim_lock();
+	if (!bd->registered) {
+		retval = rshim_register(bd);
+		if (retval) {
+			rshim_unlock();
+			goto rshim_map_failed;
+		} else
+			pcie_dev_name = NULL;
+	}
+	rshim_unlock();
+
+	/* Notify that the device is attached */
+	mutex_lock(&bd->mutex);
+	retval = rshim_notify(bd, RSH_EVENT_ATTACH, 0);
+	mutex_unlock(&bd->mutex);
+	if (retval)
+		goto rshim_map_failed;
+
+	return 0;
+
+ rshim_map_failed:
+	pci_disable_device(pci_dev);
+ enable_failed:
+	rshim_lock();
+	kref_put(&bd->kref, rshim_pcie_delete);
+	rshim_unlock();
+ error:
+	kfree(pcie_dev_name);
+	return err;
+}
+
+/* Called via pci_unregister_driver() when the module is removed. */
+static void rshim_pcie_remove(struct pci_dev *pci_dev)
+{
+	struct rshim_pcie *dev = dev_get_drvdata(&pci_dev->dev);
+	int flush_wq;
+
+	if (!dev)
+		return;
+
+	/*
+	 * Reset TRIO_PCIE_INTFC_RX_BAR0_ADDR_MASK and TRIO_MAP_RSH_BASE.
+	 * Otherwise, upon host reboot, the two registers will retain previous
+	 * values that don't match the new BAR0 address that is assigned to
+	 * the PCIe ports, causing host MMIO access to RShim to fail.
+	 */
+	rshim_pcie_write(&dev->bd, (RSH_SWINT >> 16) & 0xF,
+		RSH_SWINT & 0xFFFF, RSH_INT_VEC0_RTC__SWINT3_MASK);
+
+	/* Clear the flags before unmapping rshim registers to avoid race. */
+	dev->bd.has_rshim = 0;
+	dev->bd.has_tm = 0;
+	/* Add memory barrier to synchronize the order. */
+	mb();
+
+	if (dev->rshim_regs)
+		iounmap(dev->rshim_regs);
+
+	rshim_notify(&dev->bd, RSH_EVENT_DETACH, 0);
+	mutex_lock(&dev->bd.mutex);
+	flush_wq = !cancel_delayed_work(&dev->bd.work);
+	if (flush_wq)
+		flush_workqueue(rshim_wq);
+	dev->bd.has_cons_work = 0;
+	kfree(dev->bd.read_buf);
+	kfree(dev->bd.write_buf);
+	rshim_fifo_free(&dev->bd);
+	mutex_unlock(&dev->bd.mutex);
+
+	rshim_lock();
+	kref_put(&dev->bd.kref, rshim_pcie_delete);
+	rshim_unlock();
+
+	pci_disable_device(pci_dev);
+	dev_set_drvdata(&pci_dev->dev, NULL);
+}
+
+static struct pci_device_id rshim_pcie_table[] = {
+	{ PCI_DEVICE(TILERA_VENDOR_ID, BLUEFIELD_DEVICE_ID), },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, rshim_pcie_table);
+
+static struct pci_driver rshim_pcie_driver = {
+	.name = "rshim_pcie",
+	.probe = rshim_pcie_probe,
+	.remove = rshim_pcie_remove,
+	.id_table = rshim_pcie_table,
+};
+
+static int __init rshim_pcie_init(void)
+{
+	int result;
+
+	/* Register the driver */
+	result = pci_register_driver(&rshim_pcie_driver);
+	if (result)
+		pr_err("pci_register failed, error number %d\n", result);
+
+	return result;
+}
+
+static void __exit rshim_pcie_exit(void)
+{
+	/* Unregister the driver. */
+	pci_unregister_driver(&rshim_pcie_driver);
+}
+
+module_init(rshim_pcie_init);
+module_exit(rshim_pcie_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.6");
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v6 9/9] soc: mellanox: host: Add the Rshim PCIe live-fish backend driver
  2018-11-01 16:25 ` Liming Sun
                     ` (6 preceding siblings ...)
  2018-11-01 16:25   ` [PATCH v6 8/9] soc: mellanox: host: Add the Rshim PCIe " Liming Sun
@ 2018-11-01 16:25   ` Liming Sun
  7 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2018-11-01 16:25 UTC (permalink / raw)
  To: linux-arm-kernel

This commit adds the PCIe live-fish backend driver to access the
Rshim interface on the BlueField SoC, such as on the Smart NIC.
It is slow access and can be used for live-fish mode when the NIC
firmware hasn't been programmed yet.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/host/Makefile        |   2 +-
 drivers/soc/mellanox/host/rshim_pcie_lf.c | 695 ++++++++++++++++++++++++++++++
 2 files changed, 696 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/mellanox/host/rshim_pcie_lf.c

diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
index fa4b21c..79a1c86 100644
--- a/drivers/soc/mellanox/host/Makefile
+++ b/drivers/soc/mellanox/host/Makefile
@@ -1,2 +1,2 @@
-obj-m := rshim.o rshim_net.o rshim_usb.o rshim_pcie.o
+obj-m := rshim.o rshim_net.o rshim_usb.o rshim_pcie.o rshim_pcie_lf.o
 
diff --git a/drivers/soc/mellanox/host/rshim_pcie_lf.c b/drivers/soc/mellanox/host/rshim_pcie_lf.c
new file mode 100644
index 0000000..08e2c15
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_pcie_lf.c
@@ -0,0 +1,695 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_pcie_lf.c - Mellanox RShim PCIe Livefish driver for x86 host
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/pci.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+
+#include "rshim.h"
+
+/* Disable RSim access. */
+static int rshim_disable;
+module_param(rshim_disable, int, 0444);
+MODULE_PARM_DESC(rshim_disable, "Disable rshim (obsoleted)");
+
+/** Our Vendor/Device IDs. */
+#define TILERA_VENDOR_ID					0x15b3
+#define BLUEFIELD_DEVICE_ID					0x0211
+
+/* Maximum number of devices this driver can handle */
+#define MAX_DEV_COUNT						16
+
+/* Mellanox Address & Data Capabilities */
+#define MELLANOX_ADDR						0x58
+#define MELLANOX_DATA						0x5c
+#define MELLANOX_CAP_READ					0x1
+
+/* TRIO_CR_GATEWAY registers */
+#define TRIO_CR_GW_LOCK						0xe38a0
+#define TRIO_CR_GW_LOCK_CPY					0xe38a4
+#define TRIO_CR_GW_DATA_UPPER					0xe38ac
+#define TRIO_CR_GW_DATA_LOWER					0xe38b0
+#define TRIO_CR_GW_CTL						0xe38b4
+#define TRIO_CR_GW_ADDR_UPPER					0xe38b8
+#define TRIO_CR_GW_ADDR_LOWER					0xe38bc
+#define TRIO_CR_GW_LOCK_ACQUIRED				0x80000000
+#define TRIO_CR_GW_LOCK_RELEASE					0x0
+#define TRIO_CR_GW_BUSY						0x60000000
+#define TRIO_CR_GW_TRIGGER					0xe0000000
+#define TRIO_CR_GW_READ_4BYTE					0x6
+#define TRIO_CR_GW_WRITE_4BYTE					0x2
+
+/* Base RShim Address */
+#define RSH_BASE_ADDR						0x80000000
+#define RSH_CHANNEL1_BASE					0x80010000
+
+struct rshim_pcie {
+	/* RShim backend structure. */
+	struct rshim_backend	bd;
+
+	struct pci_dev *pci_dev;
+
+	/* Keep track of number of 8-byte word writes */
+	u8 write_count;
+};
+
+static struct rshim_pcie *instances[MAX_DEV_COUNT];
+
+/* Mechanism to access the CR space using hidden PCI capabilities */
+static int pci_cap_read(struct pci_dev *pci_dev, int offset,
+				u32 *result)
+{
+	int retval;
+
+	/*
+	 * Write target offset to MELLANOX_ADDR.
+	 * Set LSB to indicate a read operation.
+	 */
+	retval = pci_write_config_dword(pci_dev, MELLANOX_ADDR,
+				offset | MELLANOX_CAP_READ);
+	if (retval)
+		return retval;
+
+	/* Read result from MELLANOX_DATA */
+	retval = pci_read_config_dword(pci_dev, MELLANOX_DATA,
+				result);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+static int pci_cap_write(struct pci_dev *pci_dev, int offset,
+				u32 value)
+{
+	int retval;
+
+	/* Write data to MELLANOX_DATA */
+	retval = pci_write_config_dword(pci_dev, MELLANOX_DATA,
+				value);
+	if (retval)
+		return retval;
+
+	/*
+	 * Write target offset to MELLANOX_ADDR.
+	 * Leave LSB clear to indicate a write operation.
+	 */
+	retval = pci_write_config_dword(pci_dev, MELLANOX_ADDR,
+				offset);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/* Acquire and release the TRIO_CR_GW_LOCK. */
+static int trio_cr_gw_lock_acquire(struct pci_dev *pci_dev)
+{
+	int retval;
+	u32 read_value;
+
+	/* Wait until TRIO_CR_GW_LOCK is free */
+	do {
+		retval = pci_cap_read(pci_dev, TRIO_CR_GW_LOCK,
+				&read_value);
+		if (retval)
+			return retval;
+		if (signal_pending(current))
+			return -EINTR;
+	} while (read_value & TRIO_CR_GW_LOCK_ACQUIRED);
+
+	/* Acquire TRIO_CR_GW_LOCK */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_LOCK_ACQUIRED);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+static int trio_cr_gw_lock_release(struct pci_dev *pci_dev)
+{
+	int retval;
+
+	/* Release TRIO_CR_GW_LOCK */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_LOCK_RELEASE);
+
+	return retval;
+}
+
+/*
+ * Mechanism to access the RShim from the CR space using the
+ * TRIO_CR_GATEWAY.
+ */
+static int trio_cr_gw_read(struct pci_dev *pci_dev, int addr,
+				u32 *result)
+{
+	int retval;
+
+	/* Acquire TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_acquire(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write addr to TRIO_CR_GW_ADDR_LOWER */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_ADDR_LOWER,
+				addr);
+	if (retval)
+		return retval;
+
+	/* Set TRIO_CR_GW_READ_4BYTE */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_CTL,
+				TRIO_CR_GW_READ_4BYTE);
+	if (retval)
+		return retval;
+
+	/* Trigger TRIO_CR_GW to read from addr */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_TRIGGER);
+	if (retval)
+		return retval;
+
+	/* Read 32-bit data from TRIO_CR_GW_DATA_LOWER */
+	retval = pci_cap_read(pci_dev, TRIO_CR_GW_DATA_LOWER,
+				result);
+	if (retval)
+		return retval;
+
+	/* Release TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_release(pci_dev);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+static int trio_cr_gw_write(struct pci_dev *pci_dev, int addr,
+				u32 value)
+{
+	int retval;
+
+	/* Acquire TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_acquire(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write 32-bit data to TRIO_CR_GW_DATA_LOWER */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_DATA_LOWER,
+				value);
+	if (retval)
+		return retval;
+
+	/* Write addr to TRIO_CR_GW_ADDR_LOWER */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_ADDR_LOWER,
+				addr);
+	if (retval)
+		return retval;
+
+	/* Set TRIO_CR_GW_WRITE_4BYTE */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_CTL,
+				TRIO_CR_GW_WRITE_4BYTE);
+	if (retval)
+		return retval;
+
+	/* Trigger CR gateway to write to RShim */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_TRIGGER);
+	if (retval)
+		return retval;
+
+	/* Release TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_release(pci_dev);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/* Wait until the RSH_BYTE_ACC_CTL pending bit is cleared */
+static int rshim_byte_acc_pending_wait(struct pci_dev *pci_dev)
+{
+	int retval;
+	u32 read_value;
+
+	do {
+		retval = trio_cr_gw_read(pci_dev,
+			RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL, &read_value);
+		if (retval)
+			return retval;
+		if (signal_pending(current))
+			return -EINTR;
+	} while (read_value & (RSH_CHANNEL1_BASE + RSH_BYTE_ACC_PENDING));
+
+	return 0;
+}
+
+/*
+ * Mechanism to do an 8-byte access to the Rshim using
+ * two 4-byte accesses through the Rshim Byte Access Widget.
+ */
+static int rshim_byte_acc_read(struct pci_dev *pci_dev, int addr,
+				u64 *result)
+{
+	int retval;
+	u32 read_value;
+	u64 read_result;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_SIZE);
+	if (retval)
+		return retval;
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_ADDR, addr);
+	if (retval)
+		return retval;
+
+	/* Write trigger bits to perform read */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_READ_TRIGGER);
+	if (retval)
+		return retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read lower 32-bits of data */
+	retval = trio_cr_gw_read(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_RDAT,
+				&read_value);
+	if (retval)
+		return retval;
+
+	read_result = (u64)read_value << 32;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read upper 32-bits of data */
+	retval = trio_cr_gw_read(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_RDAT,
+				&read_value);
+	if (retval)
+		return retval;
+
+	read_result |= (u64)read_value;
+	*result = be64_to_cpu(read_result);
+
+	return 0;
+}
+
+static int rshim_byte_acc_write(struct pci_dev *pci_dev, int addr,
+				u64 value)
+{
+	int retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_SIZE);
+	if (retval)
+		return retval;
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_ADDR, addr);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_SIZE);
+	if (retval)
+		return retval;
+
+	/* Write lower 32 bits of data to TRIO_CR_GW_DATA */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_WDAT, (u32)(value >> 32));
+	if (retval)
+		return retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write upper 32 bits of data to TRIO_CR_GW_DATA */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_WDAT, (u32)(value));
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/*
+ * The RShim Boot FIFO has a holding register which can couple
+ * two consecutive 4-byte writes into a single 8-byte write
+ * before pushing the data into the FIFO.
+ * Hence the RShim Byte Access Widget is not necessary to write
+ * to the BOOT FIFO using 4-byte writes.
+ */
+static int rshim_boot_fifo_write(struct pci_dev *pci_dev, int addr,
+				u64 value)
+{
+	int retval;
+
+	/* Write lower 32 bits of data to RSH_BOOT_FIFO_DATA */
+	retval = trio_cr_gw_write(pci_dev, addr,
+				(u32)(value >> 32));
+	if (retval)
+		return retval;
+
+	/* Write upper 32 bits of data to RSH_BOOT_FIFO_DATA */
+	retval = trio_cr_gw_write(pci_dev, addr,
+				(u32)(value));
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/* RShim read/write routines */
+static int rshim_pcie_read(struct rshim_backend *bd, int chan, int addr,
+				u64 *result)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	struct pci_dev *pci_dev = dev->pci_dev;
+	int retval;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	dev->write_count = 0;
+
+	addr = RSH_BASE_ADDR + (addr | (chan << 16));
+	addr = be32_to_cpu(addr);
+
+	retval = rshim_byte_acc_read(pci_dev, addr, result);
+
+	return retval;
+}
+
+static int rshim_pcie_write(struct rshim_backend *bd, int chan, int addr,
+				u64 value)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	struct pci_dev *pci_dev = dev->pci_dev;
+	int retval;
+	u64 result;
+	bool is_boot_stream = (addr == RSH_BOOT_FIFO_DATA);
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	addr = RSH_BASE_ADDR + (addr | (chan << 16));
+	if (!is_boot_stream)
+		addr = be32_to_cpu(addr);
+
+	value = be64_to_cpu(value);
+
+	/*
+	 * We cannot stream large numbers of PCIe writes to the RShim.
+	 * Instead, we must write no more than 15 words before
+	 * doing a read from another register within the RShim,
+	 * which forces previous writes to drain.
+	 * Note that we allow a max write_count of 7 since each 8-byte
+	 * write is done using 2 4-byte writes in the boot fifo case.
+	 */
+	if (dev->write_count == 7) {
+		/* Add memory barrier to synchronize the order. */
+		mb();
+		rshim_pcie_read(bd, 1, RSH_SCRATCHPAD, &result);
+	}
+	dev->write_count++;
+
+	if (is_boot_stream)
+		retval = rshim_boot_fifo_write(pci_dev, addr, value);
+	else
+		retval = rshim_byte_acc_write(pci_dev, addr, value);
+
+	return retval;
+}
+
+static void rshim_pcie_delete(struct kref *kref)
+{
+	struct rshim_backend *bd;
+	struct rshim_pcie *dev;
+
+	bd = container_of(kref, struct rshim_backend, kref);
+	dev = container_of(bd, struct rshim_pcie, bd);
+
+	rshim_deregister(bd);
+	if (dev->pci_dev)
+		dev_set_drvdata(&dev->pci_dev->dev, NULL);
+	kfree(dev);
+}
+
+/* Probe routine */
+static int rshim_pcie_probe(struct pci_dev *pci_dev,
+				const struct pci_device_id *id)
+{
+	struct rshim_pcie *dev = NULL;
+	struct rshim_backend *bd = NULL;
+	char *pcie_dev_name;
+	int index, retval, err = 0, allocfail = 0;
+	const int max_name_len = 20;
+
+	for (index = 0; index < MAX_DEV_COUNT; index++)
+		if (instances[index] == NULL)
+			break;
+	if (index == MAX_DEV_COUNT) {
+		pr_err("Driver cannot handle any more devices.\n");
+		return -ENODEV;
+	}
+
+	pcie_dev_name = kzalloc(max_name_len, GFP_KERNEL);
+	if (pcie_dev_name == NULL)
+		return -ENOMEM;
+	retval = snprintf(pcie_dev_name, max_name_len,
+				"rshim_pcie%d", index);
+	if (WARN_ON_ONCE(retval >= max_name_len)) {
+		err = -EINVAL;
+		goto error;
+	}
+
+	pr_debug("Probing %s\n", pcie_dev_name);
+
+	rshim_lock();
+
+	/* Find the backend. */
+	bd = rshim_find(pcie_dev_name);
+	if (bd) {
+		kref_get(&bd->kref);
+		dev = container_of(bd, struct rshim_pcie, bd);
+	} else {
+		/* Get some memory for this device's driver state. */
+		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+		if (dev == NULL) {
+			err = -ENOMEM;
+			rshim_unlock();
+			goto error;
+		}
+
+		instances[index] = dev;
+		bd = &dev->bd;
+		bd->has_rshim = 1;
+		bd->has_tm = 1;
+		bd->owner = THIS_MODULE;
+		bd->dev_name = pcie_dev_name;
+		bd->destroy = rshim_pcie_delete;
+		bd->read_rshim = rshim_pcie_read;
+		bd->write_rshim = rshim_pcie_write;
+		dev->write_count = 0;
+		mutex_init(&bd->mutex);
+	}
+
+	retval = rshim_fifo_alloc(bd);
+	if (retval) {
+		rshim_unlock();
+		pr_err("Failed to allocate fifo\n");
+		err = -ENOMEM;
+		goto enable_failed;
+	}
+
+	allocfail |= rshim_fifo_alloc(bd);
+
+	if (!bd->read_buf) {
+		bd->read_buf = kzalloc(READ_BUF_SIZE,
+					   GFP_KERNEL);
+	}
+	allocfail |= bd->read_buf == 0;
+
+	if (!bd->write_buf) {
+		bd->write_buf = kzalloc(WRITE_BUF_SIZE,
+					    GFP_KERNEL);
+	}
+	allocfail |= bd->write_buf == 0;
+
+	if (allocfail) {
+		rshim_unlock();
+		pr_err("can't allocate buffers\n");
+		goto enable_failed;
+	}
+
+	rshim_unlock();
+
+	/* Enable the device. */
+	err = pci_enable_device(pci_dev);
+	if (err != 0) {
+		pr_err("Device enable failed with error %d\n", err);
+		goto enable_failed;
+	}
+
+	/* Initialize object */
+	dev->pci_dev = pci_dev;
+	dev_set_drvdata(&pci_dev->dev, dev);
+
+	/* Enable PCI bus mastering. */
+	pci_set_master(pci_dev);
+
+	/*
+	 * Register rshim here since it needs to detect whether other backend
+	 * has already registered or not, which involves reading/writing rshim
+	 * registers and has assumption that the under layer is working.
+	 */
+	rshim_lock();
+	if (!bd->registered) {
+		retval = rshim_register(bd);
+		if (retval) {
+			pr_err("Backend register failed with error %d\n",
+				 retval);
+			rshim_unlock();
+			goto register_failed;
+		}
+	}
+	rshim_unlock();
+
+	/* Notify that the device is attached */
+	mutex_lock(&bd->mutex);
+	retval = rshim_notify(bd, RSH_EVENT_ATTACH, 0);
+	mutex_unlock(&bd->mutex);
+	if (retval)
+		goto register_failed;
+
+	return 0;
+
+register_failed:
+	pci_disable_device(pci_dev);
+
+enable_failed:
+	rshim_lock();
+	kref_put(&dev->bd.kref, rshim_pcie_delete);
+	rshim_unlock();
+error:
+	kfree(pcie_dev_name);
+
+	return err;
+}
+
+/* Called via pci_unregister_driver() when the module is removed. */
+static void rshim_pcie_remove(struct pci_dev *pci_dev)
+{
+	struct rshim_pcie *dev = dev_get_drvdata(&pci_dev->dev);
+	int retval, flush_wq;
+
+	/*
+	 * Reset TRIO_PCIE_INTFC_RX_BAR0_ADDR_MASK and TRIO_MAP_RSH_BASE.
+	 * Otherwise, upon host reboot, the two registers will retain previous
+	 * values that don't match the new BAR0 address that is assigned to
+	 * the PCIe ports, causing host MMIO access to RShim to fail.
+	 */
+	retval = rshim_pcie_write(&dev->bd, (RSH_SWINT >> 16) & 0xF,
+			RSH_SWINT & 0xFFFF, RSH_INT_VEC0_RTC__SWINT3_MASK);
+	if (retval)
+		pr_err("RShim write failed\n");
+
+	/* Clear the flags before deleting the backend. */
+	dev->bd.has_rshim = 0;
+	dev->bd.has_tm = 0;
+
+	rshim_notify(&dev->bd, RSH_EVENT_DETACH, 0);
+	mutex_lock(&dev->bd.mutex);
+	flush_wq = !cancel_delayed_work(&dev->bd.work);
+	if (flush_wq)
+		flush_workqueue(rshim_wq);
+	dev->bd.has_cons_work = 0;
+	kfree(dev->bd.read_buf);
+	kfree(dev->bd.write_buf);
+	rshim_fifo_free(&dev->bd);
+	mutex_unlock(&dev->bd.mutex);
+
+	rshim_lock();
+	kref_put(&dev->bd.kref, rshim_pcie_delete);
+	rshim_unlock();
+
+	pci_disable_device(pci_dev);
+	dev_set_drvdata(&pci_dev->dev, NULL);
+}
+
+static struct pci_device_id rshim_pcie_table[] = {
+	{ PCI_DEVICE(TILERA_VENDOR_ID, BLUEFIELD_DEVICE_ID), },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, rshim_pcie_table);
+
+static struct pci_driver rshim_pcie_driver = {
+	.name = "rshim_pcie_lf",
+	.probe = rshim_pcie_probe,
+	.remove = rshim_pcie_remove,
+	.id_table = rshim_pcie_table,
+};
+
+static int __init rshim_pcie_init(void)
+{
+	int result;
+
+	/* Register the driver */
+	result = pci_register_driver(&rshim_pcie_driver);
+	if (result)
+		pr_err("pci_register failed, error number %d\n", result);
+
+	return result;
+}
+
+static void __exit rshim_pcie_exit(void)
+{
+	/* Unregister the driver. */
+	pci_unregister_driver(&rshim_pcie_driver);
+}
+
+module_init(rshim_pcie_init);
+module_exit(rshim_pcie_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.4");
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* RE: [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-10-25 15:57   ` [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Arnd Bergmann
  2018-10-26 18:24     ` Liming Sun
@ 2018-12-04 22:12     ` Liming Sun
  1 sibling, 0 replies; 70+ messages in thread
From: Liming Sun @ 2018-12-04 22:12 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: devicetree, David Woods, arm-soc, Olof Johansson, Robin Murphy,
	linux-arm-kernel

Just an update that I have uploaded new patch series v6, which includes the other half of the driver that runs on the external USB host machine, and also tries to resolve the previous comments.

The v6 patches could also be found at
https://patchwork.kernel.org/project/linux-arm-kernel/list/?submitter=176699

Thanks!

-----Original Message-----
From: arndbergmann@gmail.com <arndbergmann@gmail.com> On Behalf Of Arnd Bergmann
Sent: Thursday, October 25, 2018 11:58 AM
To: Liming Sun <lsun@mellanox.com>
Cc: Olof Johansson <olof@lixom.net>; David Woods <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-soc <arm@kernel.org>; devicetree@vger.kernel.org; linux-arm-kernel@lists.infradead.org
Subject: Re: [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc

On 10/24/18, Liming Sun <lsun@mellanox.com> wrote:
> This commit adds the TmFifo driver for Mellanox BlueField Soc.
> TmFifo is a shared FIFO which enables external host machine to 
> exchange data with the SoC via USB or PCIe. The driver is based on 
> virtio framework and has console and network access enabled.
>
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>

I definitely like the idea of using virtio-net and virtio-console here, this is a great way of reusing the existing high-level drivers, and i similar in concept (but also much simpler) to what we have in drivers/misc/mic/ for another Linux-running machine that can be a PCIe add-on card.

Have you also posted the other half of this driver? I'd like to see how it all fits together.

A few style comments:

> +
> +#define TMFIFO_GET_FIELD(reg, mask)	FIELD_GET(mask, reg)
> +
> +#define TMFIFO_SET_FIELD(reg, mask, value) \
> +	((reg & ~mask) | FIELD_PREP(mask, value))

I think it would be nicer to use FIELD_GET/FIELD_PREP in the code directly, and avoid adding extra wrappers around them.

> +/* Vring size. */
> +#define TMFIFO_VRING_SIZE			1024
> +
> +/* Console Tx buffer size. */
> +#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
> +
> +/* Use a timer for house-keeping. */
> +static int tmfifo_timer_interval = HZ / 10;
> +
> +/* Global lock. */
> +static struct mutex tmfifo_lock;

Maybe use 'static DEFINE_MUTEX(tmfifo_lock) here and remove the initialization call.

> +/* Virtio ring size. */
> +static int tmfifo_vring_size = TMFIFO_VRING_SIZE; 
> +module_param(tmfifo_vring_size, int, 0444); 
> +MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring.");
> +
> +struct tmfifo;
> +
> +/* A flag to indicate TmFifo ready. */ static bool tmfifo_ready;
> +
> +/* Virtual devices sharing the TM FIFO. */
> +#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
> +
> +/* Spin lock. */
> +static DEFINE_SPINLOCK(tmfifo_spin_lock);

Generally speaking, it's nicer to write a driver in a way that avoids global variables and make the flags and locks all members of a device specific structure.

> +struct tmfifo_vdev {
> +	struct virtio_device vdev;	/* virtual device */
> +	u8 status;
> +	u64 features;
> +	union {				/* virtio config space */
> +		struct virtio_console_config cons;
> +		struct virtio_net_config net;
> +	} config;
> +	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
> +	u8 *tx_buf;			/* tx buffer */
> +	u32 tx_head;			/* tx buffer head */
> +	u32 tx_tail;			/* tx buffer tail */
> +};

I suppose you did this to keep the driver simple, but it seems a little inflexible to only support two specific device types. Wouldn't we also want e.g. 9pfs or virtio_blk in some configurations?

> +
> +#define TMFIFO_VDEV_TX_BUF_AVAIL(vdev) \
> +	(((vdev)->tx_tail >= (vdev)->tx_head) ? \
> +	(TMFIFO_CONS_TX_BUF_SIZE - 8 - ((vdev)->tx_tail - (vdev)->tx_head)) : \
> +	((vdev)->tx_head - (vdev)->tx_tail - 8))
> +
> +#define TMFIFO_VDEV_TX_BUF_PUSH(vdev, len) do { \
> +	(vdev)->tx_tail += (len); \
> +	if ((vdev)->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE) \
> +		(vdev)->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE; \ } while (0)
> +
> +#define TMFIFO_VDEV_TX_BUF_POP(vdev, len) do { \
> +	(vdev)->tx_head += (len); \
> +	if ((vdev)->tx_head >= TMFIFO_CONS_TX_BUF_SIZE) \
> +		(vdev)->tx_head -= TMFIFO_CONS_TX_BUF_SIZE; \ } while (0)

It would be nicer to turn these into inline functions rather than macros.

> +/* TMFIFO device structure */
> +struct tmfifo {
> +	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
> +	struct platform_device *pdev;	/* platform device */
> +	struct mutex lock;
> +	void __iomem *rx_base;		/* mapped register base */
> +	void __iomem *tx_base;		/* mapped register base */
> +	int tx_fifo_size;		/* number of entries of the Tx FIFO */
> +	int rx_fifo_size;		/* number of entries of the Rx FIFO */
> +	unsigned long pend_events;	/* pending bits for deferred process */
> +	int irq[TM_IRQ_CNT];		/* irq numbers */
> +	struct work_struct work;	/* work struct for deferred process */
> +	struct timer_list timer;	/* keepalive timer */
> +	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
> +};
> +
> +union tmfifo_msg_hdr {
> +	struct {
> +		u8 type;		/* message type */
> +		__be16 len;		/* payload length */
> +		u8 unused[5];		/* reserved, set to 0 */
> +	} __packed;
> +	u64 data;
> +};
> +
> +/*
> + * Default MAC.
> + * This MAC address will be read from EFI persistent variable if
> configured.
> + * It can also be reconfigured with standard Linux tools.
> + */
> +static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF,
> 0x01};
> +

Is a predefined MAC address better than a random one here?

For DT based systems, we tend to also call of_get_mac_address() in order to allow setting a unique address from firmware.

> +/* Forward declaration. */
> +static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx); 
> +static void tmfifo_release_pkt(struct virtio_device *vdev,
> +			       struct tmfifo_vring *vring,
> +			       struct vring_desc **desc);

Try to avoid forward declarations by reordering the functions according to how they get called.

> +
> +/* Interrupt handler. */
> +static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id) {
> +	int i = (uintptr_t)dev_id % sizeof(void *);
> +	struct tmfifo *fifo = dev_id - i;
> +
> +	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
> +		schedule_work(&fifo->work);
> +
> +	return IRQ_HANDLED;
> +}

Maybe using a request_threaded_irq() would be a better way to defer the handler into IRQ context.

        Arnd
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v6 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-11-01 16:23 ` [PATCH v6 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
@ 2018-12-12 23:07   ` Matthias Brugger
  2019-01-03 19:20     ` Liming Sun
  0 siblings, 1 reply; 70+ messages in thread
From: Matthias Brugger @ 2018-12-12 23:07 UTC (permalink / raw)
  To: Liming Sun, y, Olof Johansson, Arnd Bergmann, David Woods,
	Robin Murphy, arm-soc
  Cc: devicetree, mbrugger@suse.com >> Matthias Brugger,
	linux-arm-kernel



On 01/11/2018 17:23, Liming Sun wrote:> This commit adds the TmFifo driver for
Mellanox BlueField Soc.
> TmFifo is a shared FIFO which enables external host machine to
> exchange data with the SoC via USB or PCIe. The driver is based on
> virtio framework and has console and network access enabled.
>
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>
> ---
>  drivers/soc/Kconfig                |    1 +
>  drivers/soc/Makefile               |    1 +
>  drivers/soc/mellanox/Kconfig       |   18 +
>  drivers/soc/mellanox/Makefile      |    5 +
>  drivers/soc/mellanox/tmfifo.c      | 1236 ++++++++++++++++++++++++++++++++++++
>  drivers/soc/mellanox/tmfifo_regs.h |   76 +++
>  6 files changed, 1337 insertions(+)
>  create mode 100644 drivers/soc/mellanox/Kconfig
>  create mode 100644 drivers/soc/mellanox/Makefile
>  create mode 100644 drivers/soc/mellanox/tmfifo.c
>  create mode 100644 drivers/soc/mellanox/tmfifo_regs.h
>

[...]

> +
> +/* Interrupt handler. */
> +static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
> +{
> +	int i = (uintptr_t)dev_id % sizeof(void *);
> +	struct tmfifo *fifo = dev_id - i;
> +
> +	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
> +		schedule_work(&fifo->work);
> +
> +	return IRQ_HANDLED;
> +}
> +
[...]
> +
> +/* Probe the TMFIFO. */
> +static int tmfifo_probe(struct platform_device *pdev)
> +{
> +	u64 ctl;
> +	struct tmfifo *fifo;
> +	struct resource *rx_res, *tx_res;
> +	struct virtio_net_config net_config;
> +	int i, ret;
> +
> +	/* Get the resource of the Rx & Tx FIFO. */
> +	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> +	if (!rx_res || !tx_res) {
> +		ret = -EINVAL;
> +		goto err;
> +	}
> +
> +	if (request_mem_region(rx_res->start,
> +			       resource_size(rx_res), "bf-tmfifo") == NULL) {
> +		ret = -EBUSY;
> +		goto early_err;
> +	}
> +
> +	if (request_mem_region(tx_res->start,
> +			       resource_size(tx_res), "bf-tmfifo") == NULL) {
> +		release_mem_region(rx_res->start, resource_size(rx_res));
> +		ret = -EBUSY;
> +		goto early_err;
> +	}
> +
> +	ret = -ENOMEM;
> +	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
> +	if (!fifo)
> +		goto err;
> +
> +	fifo->pdev = pdev;
> +	platform_set_drvdata(pdev, fifo);
> +
> +	spin_lock_init(&fifo->spin_lock);
> +	INIT_WORK(&fifo->work, tmfifo_work_handler);
> +
> +	timer_setup(&fifo->timer, tmfifo_timer, 0);
> +	fifo->timer.function = tmfifo_timer;
> +
> +	for (i = 0; i < TM_IRQ_CNT; i++) {
> +		fifo->irq[i] = platform_get_irq(pdev, i);
> +		ret = request_irq(fifo->irq[i], tmfifo_irq_handler, 0,
> +				  "tmfifo", (u8 *)fifo + i);

I think it would be better if you create a struct that passes a pointer to fifo
and the ID instead of "hiding" the ID inside the address.

Regards,
Matthias

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v7 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-05-25 16:06 [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
                   ` (9 preceding siblings ...)
  2018-11-01 16:25 ` Liming Sun
@ 2019-01-03 19:17 ` Liming Sun
  2019-03-15 13:18   ` Matthias Brugger
  2019-01-03 19:17 ` [PATCH v7 2/9] arm64: Add Mellanox BlueField SoC config option Liming Sun
                   ` (8 subsequent siblings)
  19 siblings, 1 reply; 70+ messages in thread
From: Liming Sun @ 2019-01-03 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit adds the TmFifo driver for Mellanox BlueField Soc.
TmFifo is a shared FIFO which enables external host machine to
exchange data with the SoC via USB or PCIe. The driver is based on
virtio framework and has console and network access enabled.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/Kconfig                |    1 +
 drivers/soc/Makefile               |    1 +
 drivers/soc/mellanox/Kconfig       |   18 +
 drivers/soc/mellanox/Makefile      |    5 +
 drivers/soc/mellanox/tmfifo.c      | 1244 ++++++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/tmfifo_regs.h |   76 +++
 6 files changed, 1345 insertions(+)
 create mode 100644 drivers/soc/mellanox/Kconfig
 create mode 100644 drivers/soc/mellanox/Makefile
 create mode 100644 drivers/soc/mellanox/tmfifo.c
 create mode 100644 drivers/soc/mellanox/tmfifo_regs.h

diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index c07b4a8..fa87dc8 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -7,6 +7,7 @@ source "drivers/soc/bcm/Kconfig"
 source "drivers/soc/fsl/Kconfig"
 source "drivers/soc/imx/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
+source "drivers/soc/mellanox/Kconfig"
 source "drivers/soc/qcom/Kconfig"
 source "drivers/soc/renesas/Kconfig"
 source "drivers/soc/rockchip/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 446166b..d14555b 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_ARCH_GEMINI)	+= gemini/
 obj-$(CONFIG_ARCH_MXC)		+= imx/
 obj-$(CONFIG_SOC_XWAY)		+= lantiq/
 obj-y				+= mediatek/
+obj-$(CONFIG_SOC_MLNX)		+= mellanox/
 obj-$(CONFIG_ARCH_MESON)	+= amlogic/
 obj-y				+= qcom/
 obj-y				+= renesas/
diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
new file mode 100644
index 0000000..d88efa1
--- /dev/null
+++ b/drivers/soc/mellanox/Kconfig
@@ -0,0 +1,18 @@
+menuconfig SOC_MLNX
+	bool "Mellanox SoC drivers"
+	default y if ARCH_MLNX_BLUEFIELD
+
+if ARCH_MLNX_BLUEFIELD || COMPILE_TEST
+
+config MLNX_BLUEFIELD_TMFIFO
+	tristate "Mellanox BlueField SoC TmFifo driver"
+	depends on ARM64
+	default m
+	select VIRTIO_CONSOLE
+	select VIRTIO_NET
+	help
+	  Say y here to enable TmFifo support. The TmFifo driver provides the
+	  virtio driver framework for the TMFIFO of Mellanox BlueField SoC and
+	  the implementation of a console and network driver.
+
+endif # ARCH_MLNX_BLUEFIELD
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
new file mode 100644
index 0000000..c44c0e2
--- /dev/null
+++ b/drivers/soc/mellanox/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Mellanox SoC drivers.
+#
+obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
new file mode 100644
index 0000000..2975229
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo.c
@@ -0,0 +1,1244 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/cache.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <asm/byteorder.h>
+
+#include "tmfifo_regs.h"
+
+/* Vring size. */
+#define TMFIFO_VRING_SIZE			1024
+
+/* Console Tx buffer size. */
+#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
+
+/* House-keeping timer interval. */
+static int tmfifo_timer_interval = HZ / 10;
+module_param(tmfifo_timer_interval, int, 0644);
+MODULE_PARM_DESC(tmfifo_timer_interval, "timer interval");
+
+/* Global lock. */
+static DEFINE_MUTEX(tmfifo_lock);
+
+/* Virtio ring size. */
+static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
+module_param(tmfifo_vring_size, int, 0444);
+MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring");
+
+/* Struct declaration. */
+struct tmfifo;
+
+/* Virtual devices sharing the TM FIFO. */
+#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
+
+/* Structure to maintain the ring state. */
+struct tmfifo_vring {
+	void *va;			/* virtual address */
+	dma_addr_t dma;			/* dma address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	int vdev_id;			/* TMFIFO_VDEV_xxx */
+	u32 pkt_len;			/* packet total length */
+	__virtio16 next_avail;		/* next avail desc id */
+	struct tmfifo *fifo;		/* pointer back to the tmfifo */
+};
+
+/* Interrupt types. */
+enum {
+	TM_RX_LWM_IRQ,			/* Rx low water mark irq */
+	TM_RX_HWM_IRQ,			/* Rx high water mark irq */
+	TM_TX_LWM_IRQ,			/* Tx low water mark irq */
+	TM_TX_HWM_IRQ,			/* Tx high water mark irq */
+	TM_IRQ_CNT
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	TMFIFO_VRING_RX,		/* Rx ring */
+	TMFIFO_VRING_TX,		/* Tx ring */
+	TMFIFO_VRING_NUM
+};
+
+struct tmfifo_vdev {
+	struct virtio_device vdev;	/* virtual device */
+	u8 status;
+	u64 features;
+	union {				/* virtio config space */
+		struct virtio_console_config cons;
+		struct virtio_net_config net;
+	} config;
+	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
+	u8 *tx_buf;			/* tx buffer */
+	u32 tx_head;			/* tx buffer head */
+	u32 tx_tail;			/* tx buffer tail */
+};
+
+struct tmfifo_irq_info {
+	struct tmfifo *fifo;		/* tmfifo structure */
+	int irq;			/* interrupt number */
+	int index;			/* array index */
+};
+
+/* TMFIFO device structure */
+struct tmfifo {
+	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
+	struct platform_device *pdev;	/* platform device */
+	struct mutex lock;
+	void __iomem *rx_base;		/* mapped register base */
+	void __iomem *tx_base;		/* mapped register base */
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	struct tmfifo_irq_info irq_info[TM_IRQ_CNT];	/* irq info */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
+	bool is_ready;			/* ready flag */
+	spinlock_t spin_lock;		/* spin lock */
+};
+
+union tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
+
+/* MTU setting of the virtio-net interface. */
+#define TMFIFO_NET_MTU		1500
+
+/* Supported virtio-net features. */
+#define TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
+				 (1UL << VIRTIO_NET_F_STATUS) | \
+				 (1UL << VIRTIO_NET_F_MAC))
+
+/* Return the available Tx buffer space. */
+static inline int tmfifo_vdev_tx_buf_avail(struct tmfifo_vdev *vdev)
+{
+	return ((vdev->tx_tail >= vdev->tx_head) ?
+		(TMFIFO_CONS_TX_BUF_SIZE - 8 - (vdev->tx_tail -
+		vdev->tx_head)) : (vdev->tx_head - vdev->tx_tail - 8));
+}
+
+/* Update Tx buffer pointer after pushing data. */
+static inline void tmfifo_vdev_tx_buf_push(struct tmfifo_vdev *vdev, u32 len)
+{
+	vdev->tx_tail += len;
+	if (vdev->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE)
+		vdev->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Update Tx buffer pointer after popping data. */
+static inline void tmfifo_vdev_tx_buf_pop(struct tmfifo_vdev *vdev, u32 len)
+{
+	vdev->tx_head += len;
+	if (vdev->tx_head >= TMFIFO_CONS_TX_BUF_SIZE)
+		vdev->tx_head -= TMFIFO_CONS_TX_BUF_SIZE;
+}
+
+/* Allocate vrings for the fifo. */
+static int tmfifo_alloc_vrings(struct tmfifo *fifo,
+			       struct tmfifo_vdev *tm_vdev, int vdev_id)
+{
+	dma_addr_t dma;
+	void *va;
+	int i, size;
+	struct tmfifo_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+		vring->fifo = fifo;
+		vring->size = tmfifo_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+		vring->vdev_id = vdev_id;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = dma_alloc_coherent(tm_vdev->vdev.dev.parent, size, &dma,
+					GFP_KERNEL);
+		if (!va) {
+			dev_err(tm_vdev->vdev.dev.parent,
+				"vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+		vring->dma = dma;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the fifo device. */
+static void tmfifo_free_vrings(struct tmfifo *fifo, int vdev_id)
+{
+	int i, size;
+	struct tmfifo_vring *vring;
+	struct tmfifo_vdev *tm_vdev = fifo->vdev[vdev_id];
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+					  vring->va, vring->dma);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Free interrupts of the fifo device. */
+static void tmfifo_free_irqs(struct tmfifo *fifo)
+{
+	int i, irq;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		irq = fifo->irq_info[i].irq;
+		if (irq) {
+			fifo->irq_info[i].irq = 0;
+			disable_irq(irq);
+			free_irq(irq, (u8 *)fifo + i);
+		}
+	}
+}
+
+/* Interrupt handler. */
+static irqreturn_t tmfifo_irq_handler(int irq, void *arg)
+{
+	struct tmfifo_irq_info *irq_info = (struct tmfifo_irq_info *)arg;
+
+	if (irq_info->index < TM_IRQ_CNT &&
+	    !test_and_set_bit(irq_info->index, &irq_info->fifo->pend_events))
+		schedule_work(&irq_info->fifo->work);
+
+	return IRQ_HANDLED;
+}
+
+/* Nothing to do for now. */
+static void tmfifo_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+tmfifo_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+
+	if (!vr || vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vr->num;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vr->num);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+static inline void tmfifo_virtio_release_desc(struct virtio_device *vdev,
+					      struct vring *vr,
+					      struct vring_desc *desc, u32 len)
+{
+	unsigned int idx;
+
+	idx = vr->used->idx % vr->num;
+	vr->used->ring[idx].id = desc - vr->desc;
+	vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+	/* Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 tmfifo_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+static void tmfifo_release_pkt(struct virtio_device *vdev,
+			       struct tmfifo_vring *vring,
+			       struct vring_desc **desc)
+{
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+	struct vring_desc *desc_head;
+	uint32_t pkt_len = 0;
+
+	if (!vr)
+		return;
+
+	if (desc != NULL && *desc != NULL && vring->desc_head != NULL) {
+		desc_head = vring->desc_head;
+		pkt_len = vring->pkt_len;
+	} else {
+		desc_head = tmfifo_virtio_get_next_desc(vring->vq);
+		if (desc_head != NULL) {
+			pkt_len = tmfifo_virtio_get_pkt_len(vdev,
+							desc_head, vr);
+		}
+	}
+
+	if (desc_head != NULL)
+		tmfifo_virtio_release_desc(vdev, vr, desc_head, pkt_len);
+
+	if (desc != NULL)
+		*desc = NULL;
+	vring->pkt_len = 0;
+}
+
+/* House-keeping timer. */
+static void tmfifo_timer(struct timer_list *arg)
+{
+	struct tmfifo *fifo = container_of(arg, struct tmfifo, timer);
+
+	/*
+	 * Wake up the work handler to poll the Rx FIFO in case interrupt
+	 * missing or any leftover bytes stuck in the FIFO.
+	 */
+	test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events);
+
+	schedule_work(&fifo->work);
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+}
+
+/* Buffer the console output. */
+static void tmfifo_console_output(struct tmfifo_vdev *cons,
+				  struct virtqueue *vq)
+{
+	u32 len, pkt_len, idx;
+	struct vring_desc *head_desc, *desc = NULL;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &cons->vdev;
+	void *addr;
+	union tmfifo_msg_hdr *hdr;
+
+	for (;;) {
+		head_desc = tmfifo_virtio_get_next_desc(vq);
+		if (head_desc == NULL)
+			break;
+
+		/* Release the packet if no more space. */
+		pkt_len = tmfifo_virtio_get_pkt_len(vdev, head_desc, vr);
+		if (pkt_len + sizeof(*hdr) > tmfifo_vdev_tx_buf_avail(cons)) {
+			tmfifo_virtio_release_desc(vdev, vr, head_desc,
+						   pkt_len);
+			break;
+		}
+
+		hdr = (union tmfifo_msg_hdr *)&cons->tx_buf[cons->tx_tail];
+		hdr->data = 0;
+		hdr->type = VIRTIO_ID_CONSOLE;
+		hdr->len = htons(pkt_len);
+
+		tmfifo_vdev_tx_buf_push(cons, sizeof(*hdr));
+		desc = head_desc;
+
+		while (desc != NULL) {
+			addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+			len = virtio32_to_cpu(vdev, desc->len);
+
+			if (len <= TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail) {
+				memcpy(cons->tx_buf + cons->tx_tail, addr, len);
+			} else {
+				u32 seg;
+
+				seg = TMFIFO_CONS_TX_BUF_SIZE - cons->tx_tail;
+				memcpy(cons->tx_buf + cons->tx_tail, addr, seg);
+				addr += seg;
+				memcpy(cons->tx_buf, addr, len - seg);
+			}
+			tmfifo_vdev_tx_buf_push(cons, len);
+
+			if (!(virtio16_to_cpu(vdev, desc->flags) &
+			    VRING_DESC_F_NEXT))
+				break;
+			idx = virtio16_to_cpu(vdev, desc->next);
+			desc = &vr->desc[idx];
+		}
+
+		/* Make each packet 8-byte aligned. */
+		tmfifo_vdev_tx_buf_push(cons, ((pkt_len + 7) & -8) - pkt_len);
+
+		tmfifo_virtio_release_desc(vdev, vr, head_desc, pkt_len);
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct tmfifo_vring *vring;
+	struct tmfifo *fifo;
+	struct vring *vr;
+	struct virtio_device *vdev;
+	u64 sts, data;
+	int num_avail = 0, hdr_len, tx_reserve;
+	void *addr;
+	u32 len, idx;
+	struct vring_desc *desc;
+	unsigned long flags;
+	struct tmfifo_vdev *cons;
+
+	if (!vq)
+		return;
+
+	vring = (struct tmfifo_vring *)vq->priv;
+	fifo = vring->fifo;
+	vr = (struct vring *)virtqueue_get_vring(vq);
+
+	if (!fifo->vdev[vring->vdev_id])
+		return;
+	vdev = &fifo->vdev[vring->vdev_id]->vdev;
+	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+
+	/* Don't continue if another vring is running. */
+	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)
+		return;
+
+	/* tx_reserve is used to reserved some room in FIFO for console. */
+	if (vring->vdev_id == VIRTIO_ID_NET) {
+		hdr_len = sizeof(struct virtio_net_hdr);
+		tx_reserve = fifo->tx_fifo_size / 16;
+	} else {
+		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
+		hdr_len = 0;
+		tx_reserve = 1;
+	}
+
+	desc = vring->desc;
+
+again:
+	while (1) {
+		/* Get available FIFO space. */
+		if (num_avail == 0) {
+			if (is_rx) {
+				/* Get the number of available words in FIFO. */
+				sts = readq(fifo->rx_base + TMFIFO_RX_STS);
+				num_avail = FIELD_GET(
+					TMFIFO_RX_STS__COUNT_MASK, sts);
+
+				/* Don't continue if nothing in FIFO. */
+				if (num_avail <= 0)
+					break;
+			} else {
+				/* Get available space in FIFO. */
+				sts = readq(fifo->tx_base + TMFIFO_TX_STS);
+				num_avail = fifo->tx_fifo_size - tx_reserve -
+					FIELD_GET(
+						TMFIFO_TX_STS__COUNT_MASK, sts);
+
+				if (num_avail <= 0)
+					break;
+			}
+		}
+
+		/* Console output always comes from the Tx buffer. */
+		if (!is_rx && vring->vdev_id == VIRTIO_ID_CONSOLE &&
+		    cons != NULL && cons->tx_buf != NULL) {
+			for (;;) {
+				spin_lock_irqsave(&fifo->spin_lock, flags);
+				if (cons->tx_head == cons->tx_tail) {
+					spin_unlock_irqrestore(
+						&fifo->spin_lock, flags);
+					return;
+				}
+				addr = cons->tx_buf + cons->tx_head;
+				writeq(cpu_to_le64(*(u64 *)addr),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+				tmfifo_vdev_tx_buf_pop(cons, sizeof(u64));
+				spin_unlock_irqrestore(&fifo->spin_lock,
+						       flags);
+				if (--num_avail <= 0)
+					goto again;
+			}
+		}
+
+		/* Get the desc of next packet. */
+		if (!desc) {
+			/* Save the head desc of the chain. */
+			vring->desc_head = tmfifo_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				return;
+			}
+			desc = vring->desc_head;
+			vring->desc = desc;
+
+			if (is_rx && vring->vdev_id == VIRTIO_ID_NET) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+						vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			}
+		}
+
+		/* Beginning of each packet. */
+		if (vring->pkt_len == 0) {
+			int vdev_id, vring_change = 0;
+			union tmfifo_msg_hdr hdr;
+
+			num_avail--;
+
+			/* Read/Write packet length. */
+			if (is_rx) {
+				hdr.data = readq(fifo->rx_base +
+						 TMFIFO_RX_DATA);
+				hdr.data = le64_to_cpu(hdr.data);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (hdr.len == 0)
+					continue;
+
+				/* Check packet type. */
+				if (hdr.type == VIRTIO_ID_NET) {
+					vdev_id = VIRTIO_ID_NET;
+					hdr_len = sizeof(struct virtio_net_hdr);
+				} else if (hdr.type == VIRTIO_ID_CONSOLE) {
+					vdev_id = VIRTIO_ID_CONSOLE;
+					hdr_len = 0;
+				} else {
+					continue;
+				}
+
+				/*
+				 * Check whether the new packet still belongs
+				 * to this vring or not. If not, update the
+				 * pkt_len of the new vring and return.
+				 */
+				if (vdev_id != vring->vdev_id) {
+					struct tmfifo_vdev *dev2 =
+						fifo->vdev[vdev_id];
+
+					if (!dev2)
+						break;
+					vring->desc = desc;
+					vring = &dev2->vrings[TMFIFO_VRING_RX];
+					vring_change = 1;
+				}
+				vring->pkt_len = ntohs(hdr.len) + hdr_len;
+			} else {
+				vring->pkt_len = tmfifo_virtio_get_pkt_len(
+					vdev, desc, vr);
+
+				hdr.data = 0;
+				hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+					VIRTIO_ID_NET :
+					VIRTIO_ID_CONSOLE;
+				hdr.len = htons(vring->pkt_len - hdr_len);
+				writeq(cpu_to_le64(hdr.data),
+				       fifo->tx_base + TMFIFO_TX_DATA);
+			}
+
+			vring->cur_len = hdr_len;
+			vring->rem_len = vring->pkt_len;
+			fifo->vring[is_rx] = vring;
+
+			if (vring_change)
+				return;
+			continue;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check if the current desc is already done. */
+		if (vring->cur_len == len)
+			goto check_done;
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		/* Read a word from FIFO for Rx. */
+		if (is_rx) {
+			data = readq(fifo->rx_base + TMFIFO_RX_DATA);
+			data = le64_to_cpu(data);
+		}
+
+		if (vring->cur_len + sizeof(u64) <= len) {
+			/* The whole word. */
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       sizeof(u64));
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       sizeof(u64));
+			}
+			vring->cur_len += sizeof(u64);
+		} else {
+			/* Leftover bytes. */
+			BUG_ON(vring->cur_len > len);
+			if (is_rx) {
+				memcpy(addr + vring->cur_len, &data,
+				       len - vring->cur_len);
+			} else {
+				memcpy(&data, addr + vring->cur_len,
+				       len - vring->cur_len);
+			}
+			vring->cur_len = len;
+		}
+
+		/* Write the word into FIFO for Tx. */
+		if (!is_rx) {
+			writeq(cpu_to_le64(data),
+			       fifo->tx_base + TMFIFO_TX_DATA);
+		}
+
+		num_avail--;
+
+check_done:
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done and release the desc. */
+			tmfifo_release_pkt(vdev, vring, &desc);
+			fifo->vring[is_rx] = NULL;
+
+			/* Notify upper layer that packet is done. */
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			vring_interrupt(0, vq);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			continue;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool tmfifo_virtio_notify(struct virtqueue *vq)
+{
+	struct tmfifo_vring *vring = (struct tmfifo_vring *)vq->priv;
+	struct tmfifo *fifo = vring->fifo;
+	unsigned long flags;
+
+	/*
+	 * Virtio maintains vrings in pairs, even number ring for Rx
+	 * and odd number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX HWM bit to start Rx. */
+		if (!test_and_set_bit(TM_RX_HWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	} else {
+		/*
+		 * Console could make blocking call with interrupts disabled.
+		 * In such case, the vring needs to be served right away. For
+		 * other cases, just set the TX LWM bit to start Tx in the
+		 * worker handler.
+		 */
+		if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+			spin_lock_irqsave(&fifo->spin_lock, flags);
+			tmfifo_console_output(fifo->vdev[VIRTIO_ID_CONSOLE],
+					      vq);
+			spin_unlock_irqrestore(&fifo->spin_lock, flags);
+			schedule_work(&fifo->work);
+		} else if (!test_and_set_bit(TM_TX_LWM_IRQ, &fifo->pend_events))
+			schedule_work(&fifo->work);
+	}
+
+	return true;
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void tmfifo_work_handler(struct work_struct *work)
+{
+	int i;
+	struct tmfifo_vdev *tm_vdev;
+	struct tmfifo *fifo = container_of(work, struct tmfifo, work);
+
+	if (!fifo->is_ready)
+		return;
+
+	mutex_lock(&fifo->lock);
+
+	/* Tx. */
+	if (test_and_clear_bit(TM_TX_LWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq_info[TM_TX_LWM_IRQ].irq) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_TX].vq,
+					false);
+			}
+		}
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(TM_RX_HWM_IRQ, &fifo->pend_events) &&
+		       fifo->irq_info[TM_RX_HWM_IRQ].irq) {
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
+			tm_vdev = fifo->vdev[i];
+			if (tm_vdev != NULL) {
+				tmfifo_virtio_rxtx(
+					tm_vdev->vrings[TMFIFO_VRING_RX].vq,
+					true);
+			}
+		}
+	}
+
+	mutex_unlock(&fifo->lock);
+}
+
+/* Get the array of feature bits for this device. */
+static u64 tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+		vring = &tm_vdev->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			tmfifo_release_pkt(&tm_vdev->vdev, vring, &vring->desc);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+				  unsigned int nvqs,
+				  struct virtqueue *vqs[],
+				  vq_callback_t *callbacks[],
+				  const char * const names[],
+				  const bool *ctx,
+				  struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct tmfifo_vring *vring;
+	struct virtqueue *vq;
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &tm_vdev->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+		vq = vring_new_virtqueue(i, vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 tmfifo_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vqs[i] = vq;
+		vring->vq = vq;
+		vq->priv = vring;
+	}
+
+	return 0;
+
+error:
+	tmfifo_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void tmfifo_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void tmfifo_virtio_get(struct virtio_device *vdev,
+			      unsigned int offset,
+			      void *buf,
+			      unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void tmfifo_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct tmfifo_vdev *tm_vdev = container_of(vdev, struct tmfifo_vdev,
+						   vdev);
+
+	if (offset + len > sizeof(tm_vdev->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops tmfifo_virtio_config_ops = {
+	.get_features = tmfifo_virtio_get_features,
+	.finalize_features = tmfifo_virtio_finalize_features,
+	.find_vqs = tmfifo_virtio_find_vqs,
+	.del_vqs = tmfifo_virtio_del_vqs,
+	.reset = tmfifo_virtio_reset,
+	.set_status = tmfifo_virtio_set_status,
+	.get_status = tmfifo_virtio_get_status,
+	.get = tmfifo_virtio_get,
+	.set = tmfifo_virtio_set,
+};
+
+/* Create vdev type in a tmfifo. */
+int tmfifo_create_vdev(struct tmfifo *fifo, int vdev_id, u64 features,
+		       void *config, u32 size)
+{
+	struct tmfifo_vdev *tm_vdev;
+	int ret = 0;
+
+	mutex_lock(&fifo->lock);
+
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev != NULL) {
+		pr_err("vdev %d already exists\n", vdev_id);
+		ret = -EEXIST;
+		goto already_exist;
+	}
+
+	tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+	if (!tm_vdev) {
+		ret = -ENOMEM;
+		goto already_exist;
+	}
+
+	tm_vdev->vdev.id.device = vdev_id;
+	tm_vdev->vdev.config = &tmfifo_virtio_config_ops;
+	tm_vdev->vdev.dev.parent = &fifo->pdev->dev;
+	tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+	tm_vdev->features = features;
+	if (config)
+		memcpy(&tm_vdev->config, config, size);
+	if (tmfifo_alloc_vrings(fifo, tm_vdev, vdev_id)) {
+		pr_err("Unable to allocate vring\n");
+		ret = -ENOMEM;
+		goto alloc_vring_fail;
+	}
+	if (vdev_id == VIRTIO_ID_CONSOLE) {
+		tm_vdev->tx_buf = kmalloc(TMFIFO_CONS_TX_BUF_SIZE,
+					  GFP_KERNEL);
+	}
+	fifo->vdev[vdev_id] = tm_vdev;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(&tm_vdev->vdev);
+	if (ret) {
+		dev_err(&fifo->pdev->dev, "register_virtio_device() failed\n");
+		goto register_fail;
+	}
+
+	mutex_unlock(&fifo->lock);
+	return 0;
+
+register_fail:
+	tmfifo_free_vrings(fifo, vdev_id);
+	fifo->vdev[vdev_id] = NULL;
+alloc_vring_fail:
+	kfree(tm_vdev);
+already_exist:
+	mutex_unlock(&fifo->lock);
+	return ret;
+}
+
+/* Delete vdev type from a tmfifo. */
+int tmfifo_delete_vdev(struct tmfifo *fifo, int vdev_id)
+{
+	struct tmfifo_vdev *tm_vdev;
+
+	mutex_lock(&fifo->lock);
+
+	/* Unregister vdev. */
+	tm_vdev = fifo->vdev[vdev_id];
+	if (tm_vdev) {
+		unregister_virtio_device(&tm_vdev->vdev);
+		tmfifo_free_vrings(fifo, vdev_id);
+		kfree(tm_vdev->tx_buf);
+		kfree(tm_vdev);
+		fifo->vdev[vdev_id] = NULL;
+	}
+
+	mutex_unlock(&fifo->lock);
+
+	return 0;
+}
+
+/* Device remove function. */
+static int tmfifo_remove(struct platform_device *pdev)
+{
+	int i;
+	struct tmfifo *fifo = platform_get_drvdata(pdev);
+	struct resource *rx_res, *tx_res;
+
+	if (fifo) {
+		mutex_lock(&tmfifo_lock);
+
+		fifo->is_ready = false;
+
+		/* Stop the timer. */
+		del_timer_sync(&fifo->timer);
+
+		/* Release interrupts. */
+		tmfifo_free_irqs(fifo);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&fifo->work);
+
+		for (i = 0; i < TMFIFO_VDEV_MAX; i++)
+			tmfifo_delete_vdev(fifo, i);
+
+		/* Release IO resources. */
+		if (fifo->rx_base)
+			iounmap(fifo->rx_base);
+		if (fifo->tx_base)
+			iounmap(fifo->tx_base);
+
+		platform_set_drvdata(pdev, NULL);
+		kfree(fifo);
+
+		mutex_unlock(&tmfifo_lock);
+	}
+
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (rx_res)
+		release_mem_region(rx_res->start, resource_size(rx_res));
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (tx_res)
+		release_mem_region(tx_res->start, resource_size(tx_res));
+
+	return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void tmfifo_get_cfg_mac(u8 *mac)
+{
+	u8 buf[6];
+	efi_status_t status;
+	unsigned long size = sizeof(buf);
+	efi_char16_t name[] = { 'R', 's', 'h', 'i', 'm', 'M', 'a', 'c',
+				'A', 'd', 'd', 'r', 0 };
+	efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+
+	status = efi.get_variable(name, &guid, NULL, &size, buf);
+	if (status == EFI_SUCCESS && size == sizeof(buf))
+		memcpy(mac, buf, sizeof(buf));
+}
+
+/* Probe the TMFIFO. */
+static int tmfifo_probe(struct platform_device *pdev)
+{
+	u64 ctl;
+	struct tmfifo *fifo;
+	struct resource *rx_res, *tx_res;
+	struct virtio_net_config net_config;
+	int i, ret;
+
+	/* Get the resource of the Rx & Tx FIFO. */
+	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!rx_res || !tx_res) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (request_mem_region(rx_res->start,
+			       resource_size(rx_res), "bf-tmfifo") == NULL) {
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	if (request_mem_region(tx_res->start,
+			       resource_size(tx_res), "bf-tmfifo") == NULL) {
+		release_mem_region(rx_res->start, resource_size(rx_res));
+		ret = -EBUSY;
+		goto early_err;
+	}
+
+	ret = -ENOMEM;
+	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
+	if (!fifo)
+		goto err;
+
+	fifo->pdev = pdev;
+	platform_set_drvdata(pdev, fifo);
+
+	spin_lock_init(&fifo->spin_lock);
+	INIT_WORK(&fifo->work, tmfifo_work_handler);
+
+	timer_setup(&fifo->timer, tmfifo_timer, 0);
+	fifo->timer.function = tmfifo_timer;
+
+	for (i = 0; i < TM_IRQ_CNT; i++) {
+		fifo->irq_info[i].index = i;
+		fifo->irq_info[i].fifo = fifo;
+		fifo->irq_info[i].irq = platform_get_irq(pdev, i);
+		ret = request_irq(fifo->irq_info[i].irq, tmfifo_irq_handler, 0,
+				  "tmfifo", &fifo->irq_info[i]);
+		if (ret) {
+			pr_err("Unable to request irq\n");
+			fifo->irq_info[i].irq = 0;
+			goto err;
+		}
+	}
+
+	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
+	if (!fifo->rx_base)
+		goto err;
+
+	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
+	if (!fifo->tx_base)
+		goto err;
+
+	/* Get Tx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->tx_base + TMFIFO_TX_CTL);
+	fifo->tx_fifo_size =
+		FIELD_GET(TMFIFO_TX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~TMFIFO_TX_CTL__LWM_MASK) |
+		FIELD_PREP(TMFIFO_TX_CTL__LWM_MASK, fifo->tx_fifo_size / 2);
+	ctl = (ctl & ~TMFIFO_TX_CTL__HWM_MASK) |
+		FIELD_PREP(TMFIFO_TX_CTL__HWM_MASK, fifo->tx_fifo_size - 1);
+	writeq(ctl, fifo->tx_base + TMFIFO_TX_CTL);
+
+	/* Get Rx FIFO size and set the low/high watermark. */
+	ctl = readq(fifo->rx_base + TMFIFO_RX_CTL);
+	fifo->rx_fifo_size =
+		FIELD_GET(TMFIFO_RX_CTL__MAX_ENTRIES_MASK, ctl);
+	ctl = (ctl & ~TMFIFO_RX_CTL__LWM_MASK) |
+		FIELD_PREP(TMFIFO_RX_CTL__LWM_MASK, 0);
+	ctl = (ctl & ~TMFIFO_RX_CTL__HWM_MASK) |
+		FIELD_PREP(TMFIFO_RX_CTL__HWM_MASK, 1);
+	writeq(ctl, fifo->rx_base + TMFIFO_RX_CTL);
+
+	mutex_init(&fifo->lock);
+
+	/* Create the console vdev. */
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+	if (ret)
+		goto err;
+
+	/* Create the network vdev. */
+	memset(&net_config, 0, sizeof(net_config));
+	net_config.mtu = TMFIFO_NET_MTU;
+	net_config.status = VIRTIO_NET_S_LINK_UP;
+	memcpy(net_config.mac, tmfifo_net_default_mac, 6);
+	tmfifo_get_cfg_mac(net_config.mac);
+	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_NET, TMFIFO_NET_FEATURES,
+				 &net_config, sizeof(net_config));
+	if (ret)
+		goto err;
+
+	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
+
+	fifo->is_ready = true;
+
+	return 0;
+
+err:
+	tmfifo_remove(pdev);
+early_err:
+	dev_err(&pdev->dev, "Probe Failed\n");
+	return ret;
+}
+
+static const struct of_device_id tmfifo_match[] = {
+	{ .compatible = "mellanox,bf-tmfifo" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, tmfifo_match);
+
+static const struct acpi_device_id bf_tmfifo_acpi_match[] = {
+	{ "MLNXBF01", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, bf_tmfifo_acpi_match);
+
+static struct platform_driver tmfifo_driver = {
+	.probe = tmfifo_probe,
+	.remove = tmfifo_remove,
+	.driver = {
+		.name = "bf-tmfifo",
+		.of_match_table = tmfifo_match,
+		.acpi_match_table = ACPI_PTR(bf_tmfifo_acpi_match),
+	},
+};
+
+static int __init tmfifo_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&tmfifo_driver);
+	if (ret)
+		pr_err("Failed to register tmfifo driver.\n");
+
+	return ret;
+}
+
+static void __exit tmfifo_exit(void)
+{
+	platform_driver_unregister(&tmfifo_driver);
+}
+
+module_init(tmfifo_init);
+module_exit(tmfifo_exit);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
new file mode 100644
index 0000000..9f21764
--- /dev/null
+++ b/drivers/soc/mellanox/tmfifo_regs.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __TMFIFO_REGS_H__
+#define __TMFIFO_REGS_H__
+
+#include <linux/types.h>
+
+#define TMFIFO_TX_DATA 0x0
+
+#define TMFIFO_TX_STS 0x8
+#define TMFIFO_TX_STS__LENGTH 0x0001
+#define TMFIFO_TX_STS__COUNT_SHIFT 0
+#define TMFIFO_TX_STS__COUNT_WIDTH 9
+#define TMFIFO_TX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_TX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_TX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_TX_CTL 0x10
+#define TMFIFO_TX_CTL__LENGTH 0x0001
+#define TMFIFO_TX_CTL__LWM_SHIFT 0
+#define TMFIFO_TX_CTL__LWM_WIDTH 8
+#define TMFIFO_TX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__LWM_RMASK 0xff
+#define TMFIFO_TX_CTL__LWM_MASK  0xff
+#define TMFIFO_TX_CTL__HWM_SHIFT 8
+#define TMFIFO_TX_CTL__HWM_WIDTH 8
+#define TMFIFO_TX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_TX_CTL__HWM_RMASK 0xff
+#define TMFIFO_TX_CTL__HWM_MASK  0xff00
+#define TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#define TMFIFO_RX_DATA 0x0
+
+#define TMFIFO_RX_STS 0x8
+#define TMFIFO_RX_STS__LENGTH 0x0001
+#define TMFIFO_RX_STS__COUNT_SHIFT 0
+#define TMFIFO_RX_STS__COUNT_WIDTH 9
+#define TMFIFO_RX_STS__COUNT_RESET_VAL 0
+#define TMFIFO_RX_STS__COUNT_RMASK 0x1ff
+#define TMFIFO_RX_STS__COUNT_MASK  0x1ff
+
+#define TMFIFO_RX_CTL 0x10
+#define TMFIFO_RX_CTL__LENGTH 0x0001
+#define TMFIFO_RX_CTL__LWM_SHIFT 0
+#define TMFIFO_RX_CTL__LWM_WIDTH 8
+#define TMFIFO_RX_CTL__LWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__LWM_RMASK 0xff
+#define TMFIFO_RX_CTL__LWM_MASK  0xff
+#define TMFIFO_RX_CTL__HWM_SHIFT 8
+#define TMFIFO_RX_CTL__HWM_WIDTH 8
+#define TMFIFO_RX_CTL__HWM_RESET_VAL 128
+#define TMFIFO_RX_CTL__HWM_RMASK 0xff
+#define TMFIFO_RX_CTL__HWM_MASK  0xff00
+#define TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
+#define TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
+
+#endif /* !defined(__TMFIFO_REGS_H__) */
-- 
1.8.3.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v7 2/9] arm64: Add Mellanox BlueField SoC config option
  2018-05-25 16:06 [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
                   ` (10 preceding siblings ...)
  2019-01-03 19:17 ` [PATCH v7 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
@ 2019-01-03 19:17 ` Liming Sun
  2019-01-03 19:17 ` [PATCH v7 3/9] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
                   ` (7 subsequent siblings)
  19 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2019-01-03 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit introduces config option for Mellanox BlueField SoC,
which can be used to build the SoC specific drivers, and enables
it by default in configs/defconfig.

Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 arch/arm64/Kconfig.platforms | 6 ++++++
 arch/arm64/configs/defconfig | 1 +
 2 files changed, 7 insertions(+)

diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 51bc479..a9b10a0 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -119,6 +119,12 @@ config ARCH_MESON
 	help
 	  This enables support for the Amlogic S905 SoCs.
 
+config ARCH_MLNX_BLUEFIELD
+	bool "Mellanox BlueField SoC Family"
+	select SOC_MLNX
+	help
+	  This enables support for the Mellanox BlueField SoC.
+
 config ARCH_MVEBU
 	bool "Marvell EBU SoC Family"
 	select ARMADA_AP806_SYSCON
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index c9a57d1..85dbef0 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -44,6 +44,7 @@ CONFIG_ARCH_LG1K=y
 CONFIG_ARCH_HISI=y
 CONFIG_ARCH_MEDIATEK=y
 CONFIG_ARCH_MESON=y
+CONFIG_ARCH_MLNX_BLUEFIELD=y
 CONFIG_ARCH_MVEBU=y
 CONFIG_ARCH_QCOM=y
 CONFIG_ARCH_ROCKCHIP=y
-- 
1.8.3.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v7 3/9] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-05-25 16:06 [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
                   ` (11 preceding siblings ...)
  2019-01-03 19:17 ` [PATCH v7 2/9] arm64: Add Mellanox BlueField SoC config option Liming Sun
@ 2019-01-03 19:17 ` Liming Sun
  2019-01-03 19:17 ` [PATCH v7 4/9] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
                   ` (6 subsequent siblings)
  19 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2019-01-03 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

Add devicetree bindings for the TmFifo which is found on Mellanox
BlueField SoCs.

Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 .../devicetree/bindings/soc/mellanox/tmfifo.txt    | 23 ++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt

diff --git a/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
new file mode 100644
index 0000000..8a13fa6
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
@@ -0,0 +1,23 @@
+* Mellanox BlueField SoC TmFifo
+
+BlueField TmFifo provides a shared FIFO between the target and the
+external host machine, which can be accessed by external host via
+USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
+to implement virtual console and network interface based on the virtio
+framework.
+
+Required properties:
+
+- compatible:	Should be "mellanox,bf-tmfifo"
+- reg:		Physical base address and length of Rx/Tx block
+- interrupts:	The interrupt number of Rx low water mark, Rx high water mark
+		Tx low water mark, Tx high water mark respectively.
+
+Example:
+
+tmfifo@800a20 {
+	compatible = "mellanox,bf-tmfifo";
+	reg = <0x00800a20 0x00000018
+	       0x00800a40 0x00000018>;
+	interrupts = <41, 42, 43, 44>;
+};
-- 
1.8.3.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v7 4/9] MAINTAINERS: Add entry for Mellanox Bluefield Soc
  2018-05-25 16:06 [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
                   ` (12 preceding siblings ...)
  2019-01-03 19:17 ` [PATCH v7 3/9] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
@ 2019-01-03 19:17 ` Liming Sun
  2019-01-03 19:17 ` [PATCH v7 5/9] soc: mellanox: host: Add the common host side Rshim driver Liming Sun
                   ` (5 subsequent siblings)
  19 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2019-01-03 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

Add maintainer information for Mellanox BlueField SoC.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index f063443..bb2ea86 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1759,6 +1759,14 @@ S:	Maintained
 F:	drivers/phy/mediatek/
 F:	Documentation/devicetree/bindings/phy/phy-mtk-*
 
+ARM/Mellanox BlueField SoC support
+M:	David Woods <dwoods@mellanox.com>
+M:	Liming Sun <lsun@mellanox.com>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	drivers/soc/mellanox/*
+F:	Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
+
 ARM/MICREL KS8695 ARCHITECTURE
 M:	Greg Ungerer <gerg@uclinux.org>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-- 
1.8.3.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v7 5/9] soc: mellanox: host: Add the common host side Rshim driver
  2018-05-25 16:06 [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
                   ` (13 preceding siblings ...)
  2019-01-03 19:17 ` [PATCH v7 4/9] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
@ 2019-01-03 19:17 ` Liming Sun
  2019-01-03 19:17 ` [PATCH v7 6/9] soc: mellanox: host: Add networking support over Rshim Liming Sun
                   ` (4 subsequent siblings)
  19 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2019-01-03 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

An external host can connect to a Mellanox BlueField SoC via an
interface called Rshim. The Rshim driver provides boot, console,
and networking services over this interface. This commit is
the common driver where the other backend (transport) driver will
use.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/Kconfig           |    8 +
 drivers/soc/mellanox/Makefile          |    1 +
 drivers/soc/mellanox/host/Makefile     |    2 +
 drivers/soc/mellanox/host/rshim.c      | 2673 ++++++++++++++++++++++++++++++++
 drivers/soc/mellanox/host/rshim.h      |  361 +++++
 drivers/soc/mellanox/host/rshim_regs.h |  152 ++
 6 files changed, 3197 insertions(+)
 create mode 100644 drivers/soc/mellanox/host/Makefile
 create mode 100644 drivers/soc/mellanox/host/rshim.c
 create mode 100644 drivers/soc/mellanox/host/rshim.h
 create mode 100644 drivers/soc/mellanox/host/rshim_regs.h

diff --git a/drivers/soc/mellanox/Kconfig b/drivers/soc/mellanox/Kconfig
index d88efa1..ecd83a4 100644
--- a/drivers/soc/mellanox/Kconfig
+++ b/drivers/soc/mellanox/Kconfig
@@ -16,3 +16,11 @@ config MLNX_BLUEFIELD_TMFIFO
 	  the implementation of a console and network driver.
 
 endif # ARCH_MLNX_BLUEFIELD
+
+config MLNX_BLUEFIELD_HOST
+	tristate "Mellnox BlueField host side drivers"
+	help
+	  If you say yes to this option, then support will be added
+	  for control and communication of Mellanox BlueField SoCs
+	  from an external host via USB or PCI-express.
+
diff --git a/drivers/soc/mellanox/Makefile b/drivers/soc/mellanox/Makefile
index c44c0e2..aaaf2be 100644
--- a/drivers/soc/mellanox/Makefile
+++ b/drivers/soc/mellanox/Makefile
@@ -3,3 +3,4 @@
 # Makefile for Mellanox SoC drivers.
 #
 obj-$(CONFIG_MLNX_BLUEFIELD_TMFIFO)	+= tmfifo.o
+obj-$(CONFIG_MLNX_BLUEFIELD_HOST)	+= host/
diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
new file mode 100644
index 0000000..e47842f
--- /dev/null
+++ b/drivers/soc/mellanox/host/Makefile
@@ -0,0 +1,2 @@
+obj-m := rshim.o
+
diff --git a/drivers/soc/mellanox/host/rshim.c b/drivers/soc/mellanox/host/rshim.c
new file mode 100644
index 0000000..32f1124
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim.c
@@ -0,0 +1,2673 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_common.c - Mellanox host-side driver for RShim
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.	See the GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <linux/ioctl.h>
+#include <linux/termios.h>
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+#include <asm/termbits.h>
+#include <linux/circ_buf.h>
+#include <linux/delay.h>
+#include <linux/virtio_ids.h>
+
+#include "rshim.h"
+
+/* Maximum number of devices controlled by this driver. */
+int rshim_nr_devs = 64;
+module_param(rshim_nr_devs, int, 0444);
+MODULE_PARM_DESC(rshim_nr_devs, "Maximum number of supported devices");
+
+static char *backend_driver = "";
+module_param(backend_driver, charp, 0444);
+MODULE_PARM_DESC(backend_driver, "Rshim backend driver to use");
+
+static int rshim_keepalive_period = 300;
+module_param(rshim_keepalive_period, int, 0644);
+MODULE_PARM_DESC(rshim_keepalive_period, "keepalive period in milliseconds");
+
+#define RSH_KEEPALIVE_MAGIC_NUM 0x5089836482ULL
+
+/* Circular buffer macros. */
+
+#define read_empty(bd, chan) \
+	(CIRC_CNT((bd)->read_fifo[chan].head, \
+		  (bd)->read_fifo[chan].tail, READ_FIFO_SIZE) == 0)
+#define read_full(bd, chan) \
+	(CIRC_SPACE((bd)->read_fifo[chan].head, \
+		    (bd)->read_fifo[chan].tail, READ_FIFO_SIZE) == 0)
+#define read_space(bd, chan) \
+	CIRC_SPACE((bd)->read_fifo[chan].head, \
+		   (bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_cnt(bd, chan) \
+	CIRC_CNT((bd)->read_fifo[chan].head, \
+		 (bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_cnt_to_end(bd, chan) \
+	CIRC_CNT_TO_END((bd)->read_fifo[chan].head, \
+			(bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_data_ptr(bd, chan) \
+	((bd)->read_fifo[chan].data + \
+	 ((bd)->read_fifo[chan].tail & (READ_FIFO_SIZE - 1)))
+#define read_consume_bytes(bd, chan, nbytes) \
+	((bd)->read_fifo[chan].tail = \
+		((bd)->read_fifo[chan].tail + (nbytes)) & \
+		 (READ_FIFO_SIZE - 1))
+#define read_space_to_end(bd, chan) \
+	CIRC_SPACE_TO_END((bd)->read_fifo[chan].head, \
+			  (bd)->read_fifo[chan].tail, READ_FIFO_SIZE)
+#define read_space_offset(bd, chan) \
+	((bd)->read_fifo[chan].head & (READ_FIFO_SIZE - 1))
+#define read_space_ptr(bd, chan) \
+	((bd)->read_fifo[chan].data + read_space_offset(bd, (chan)))
+#define read_add_bytes(bd, chan, nbytes) \
+	((bd)->read_fifo[chan].head = \
+		((bd)->read_fifo[chan].head + (nbytes)) & \
+		 (READ_FIFO_SIZE - 1))
+#define read_reset(bd, chan) \
+	((bd)->read_fifo[chan].head = (bd)->read_fifo[chan].tail = 0)
+
+#define write_empty(bd, chan) \
+	(CIRC_CNT((bd)->write_fifo[chan].head, \
+		  (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE) == 0)
+#define write_full(bd, chan) \
+	(CIRC_SPACE((bd)->write_fifo[chan].head, \
+		    (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE) == 0)
+#define write_space(bd, chan) \
+	CIRC_SPACE((bd)->write_fifo[chan].head, \
+		   (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_cnt(bd, chan) \
+	CIRC_CNT((bd)->write_fifo[chan].head, \
+		 (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_cnt_to_end(bd, chan) \
+	CIRC_CNT_TO_END((bd)->write_fifo[chan].head, \
+			(bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_data_offset(bd, chan) \
+	((bd)->write_fifo[chan].tail & (WRITE_FIFO_SIZE - 1))
+#define write_data_ptr(bd, chan) \
+	((bd)->write_fifo[chan].data + write_data_offset(bd, (chan)))
+#define write_consume_bytes(bd, chan, nbytes) \
+	((bd)->write_fifo[chan].tail = \
+		 ((bd)->write_fifo[chan].tail + (nbytes)) & \
+		  (WRITE_FIFO_SIZE - 1))
+#define write_space_to_end(bd, chan) \
+	CIRC_SPACE_TO_END((bd)->write_fifo[chan].head, \
+			  (bd)->write_fifo[chan].tail, WRITE_FIFO_SIZE)
+#define write_space_ptr(bd, chan) \
+	((bd)->write_fifo[chan].data + \
+	 ((bd)->write_fifo[chan].head & (WRITE_FIFO_SIZE - 1)))
+#define write_add_bytes(bd, chan, nbytes) \
+	((bd)->write_fifo[chan].head = \
+	 ((bd)->write_fifo[chan].head + (nbytes)) & \
+	  (WRITE_FIFO_SIZE - 1))
+#define write_reset(bd, chan) \
+	((bd)->write_fifo[chan].head = (bd)->write_fifo[chan].tail = 0)
+
+/*
+ * Tile-to-host bits (UART 0 scratchpad).
+ */
+/*
+ * Output write pointer mask.  Note that this is the maximum size; the
+ * write pointer may be smaller if requested by the host.
+ */
+#define CONS_RSHIM_T2H_OUT_WPTR_MASK     0x3FF
+
+/* Tile is done mask. */
+#define CONS_RSHIM_T2H_DONE_MASK         0x400
+
+/*
+ * Input read pointer mask.  Note that this is the maximum size; the read
+ * pointer may be smaller if requested by the host.
+ */
+#define CONS_RSHIM_T2H_IN_RPTR_MASK      0x1FF800
+
+/* Input read pointer shift. */
+#define CONS_RSHIM_T2H_IN_RPTR_SHIFT     11
+
+/* Tile is done mask. */
+#define CONS_RSHIM_T2H_DONE_MASK         0x400
+
+/* Number of words to send as sync-data (calculated by packet MTU). */
+#define TMFIFO_MAX_SYNC_WORDS            (1536 / 8)
+
+/* Terminal characteristics for newly created consoles. */
+static struct ktermios init_console_termios = {
+	.c_iflag = INLCR | ICRNL,
+	.c_oflag = OPOST | ONLCR,
+	.c_cflag = B115200 | HUPCL | CLOCAL | CREAD | CS8,
+	.c_lflag = ISIG | ICANON | ECHOE | ECHOK | ECHOCTL | ECHOKE | IEXTEN,
+	.c_line = 0,
+	.c_cc = INIT_C_CC,
+};
+
+/* Global mutex. */
+static DEFINE_MUTEX(rshim_mutex);
+
+/*
+ * Array of all of the rshim devices.  The high bits of our minor number
+ * index into this table to find the relevant device.
+ */
+struct rshim_backend **rshim_devs;
+
+/*
+ * Work queue. Right now we have one for the whole driver; we might
+ * eventually decide that we need one per device, but we'll see.
+ */
+struct workqueue_struct *rshim_wq;
+EXPORT_SYMBOL(rshim_wq);
+
+/*
+ * Array of pointers to kmalloc'ed strings, holding the path name for
+ * all of the devices we've seen.  If rshim_devs[i] is non-NULL, then
+ * rshim_dev_names[i] is its path name.  If rshim_devs[i] is NULL, then
+ * rshim_dev_names[i] is the name that was last used for that device.
+ * When we see a new device, we look it up in this table; this allows us to
+ * use the same device index we did last time we saw the device.  The
+ * strings within the array persist until the driver is unloaded.
+ */
+char **rshim_dev_names;
+
+/* Name of the sub-device types. */
+char *rshim_dev_minor_names[RSH_DEV_TYPES] = {
+	[RSH_DEV_TYPE_RSHIM] = "rshim",
+	[RSH_DEV_TYPE_BOOT] = "boot",
+	[RSH_DEV_TYPE_CONSOLE] = "console",
+	[RSH_DEV_TYPE_NET] = "net",
+	[RSH_DEV_TYPE_MISC] = "misc",
+};
+
+/* dev_t base index. */
+static dev_t rshim_dev_base;
+
+/* Class structure for our device class. */
+static struct class *rshim_class;
+
+/* Registered services. */
+static struct rshim_service *rshim_svc[RSH_SVC_MAX];
+
+/* FIFO reset. */
+static void rshim_fifo_reset(struct rshim_backend *bd);
+
+/* Global lock / unlock. */
+
+void rshim_lock(void)
+{
+	mutex_lock(&rshim_mutex);
+}
+EXPORT_SYMBOL(rshim_lock);
+
+void rshim_unlock(void)
+{
+	mutex_unlock(&rshim_mutex);
+}
+EXPORT_SYMBOL(rshim_unlock);
+
+/*
+ * Read some bytes from RShim.
+ *
+ * The provided buffer size should be multiple of 8 bytes. If not, the
+ * leftover bytes (which presumably were sent as NUL bytes by the sender)
+ * will be discarded.
+ */
+static ssize_t rshim_read_default(struct rshim_backend *bd, int devtype,
+				char *buf, size_t count)
+{
+	int retval, total = 0, avail = 0;
+	u64 word;
+
+	/* Read is only supported for RShim TMFIFO. */
+	if (devtype != RSH_DEV_TYPE_NET && devtype != RSH_DEV_TYPE_CONSOLE) {
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+	if (bd->is_boot_open)
+		return 0;
+
+	while (total < count) {
+		if (avail == 0) {
+			retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+						RSH_TM_TILE_TO_HOST_STS, &word);
+			if (retval < 0)
+				break;
+			avail = word & RSH_TM_TILE_TO_HOST_STS__COUNT_MASK;
+			if (avail == 0)
+				break;
+		}
+		retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+					RSH_TM_TILE_TO_HOST_DATA, &word);
+		if (retval < 0)
+			break;
+		/*
+		 * Convert it to little endian before sending to RShim. The
+		 * other side should decode it as little endian as well which
+		 * is usually the default case.
+		 */
+		word = le64_to_cpu(word);
+		if (total + sizeof(word) <= count) {
+			*(u64 *)buf = word;
+			buf += sizeof(word);
+			total += sizeof(word);
+		} else {
+			/* Copy the rest data which is less than 8 bytes. */
+			memcpy(buf, &word, count - total);
+			total = count;
+			break;
+		}
+		avail--;
+	}
+
+	return total;
+}
+
+/*
+ * Write some bytes to the RShim backend.
+ *
+ * If count is not multiple of 8-bytes, the data will be padded to 8-byte
+ * aligned which is required by RShim HW.
+ */
+static ssize_t rshim_write_delayed(struct rshim_backend *bd, int devtype,
+				   const char *buf, size_t count)
+{
+	u64 word;
+	char pad_buf[sizeof(u64)] = { 0 };
+	int size_addr, size_mask, data_addr, max_size;
+	int retval, avail = 0, byte_cnt = 0, retry;
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		if (bd->is_boot_open)
+			return count;
+		size_addr = RSH_TM_HOST_TO_TILE_STS;
+		size_mask = RSH_TM_HOST_TO_TILE_STS__COUNT_MASK;
+		data_addr = RSH_TM_HOST_TO_TILE_DATA;
+		retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+					RSH_TM_HOST_TO_TILE_CTL, &word);
+		if (retval < 0) {
+			pr_err("read_rshim error %d\n", retval);
+			return retval;
+		}
+		max_size = (word >> RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_SHIFT)
+			   & RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RMASK;
+		break;
+
+	case RSH_DEV_TYPE_BOOT:
+		size_addr = RSH_BOOT_FIFO_COUNT;
+		size_mask = RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_MASK;
+		data_addr = RSH_BOOT_FIFO_DATA;
+		max_size = RSH_BOOT_FIFO_SIZE;
+		break;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+
+	while (byte_cnt < count) {
+		/* Check the boot cancel condition. */
+		if (devtype == RSH_DEV_TYPE_BOOT && !bd->boot_work_buf)
+			break;
+
+		/* Add padding if less than 8 bytes left. */
+		if (byte_cnt + sizeof(u64) > count) {
+			memcpy(pad_buf, buf, count - byte_cnt);
+			buf = (const char *)pad_buf;
+		}
+
+		retry = 0;
+		while (avail <= 0) {
+			/* Calculate available space in words. */
+			retval = bd->read_rshim(bd, RSHIM_CHANNEL, size_addr,
+						&word);
+			if (retval < 0) {
+				pr_err("read_rshim error %d\n", retval);
+				break;
+			}
+			avail = max_size - (int)(word & size_mask) - 8;
+			if (avail > 0)
+				break;
+
+			/*
+			 * Retry 100s, or else return failure since the other
+			 * side seems not to be responding.
+			 */
+			if (++retry > 100000)
+				return -ETIMEDOUT;
+			msleep(1);
+		}
+
+		word = *(u64 *)buf;
+		/*
+		 * Convert to little endian before sending to RShim. The
+		 * receiving side should call le64_to_cpu() to convert
+		 * it back.
+		 */
+		word = cpu_to_le64(word);
+		retval = bd->write_rshim(bd, RSHIM_CHANNEL, data_addr, word);
+		if (retval < 0) {
+			pr_err("write_rshim error %d\n", retval);
+			break;
+		}
+		buf += sizeof(word);
+		byte_cnt += sizeof(word);
+		avail--;
+	}
+
+	/* Return number shouldn't count the padded bytes. */
+	return (byte_cnt > count) ? count : byte_cnt;
+}
+
+static ssize_t rshim_write_default(struct rshim_backend *bd, int devtype,
+				   const char *buf, size_t count)
+{
+	int retval;
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		if (bd->is_boot_open)
+			return count;
+
+		/* Set the flag so there is only one outstanding request. */
+		bd->spin_flags |= RSH_SFLG_WRITING;
+
+		/* Wake up the worker. */
+		bd->fifo_work_buf = (char *)buf;
+		bd->fifo_work_buf_len = count;
+		bd->fifo_work_devtype = devtype;
+		/*
+		 * Add barrier so the above writes complete before setting the
+		 * has_fifo_work flag.
+		 */
+		wmb();
+		bd->has_fifo_work = 1;
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+		return 0;
+
+	case RSH_DEV_TYPE_BOOT:
+		reinit_completion(&bd->boot_write_complete);
+		bd->boot_work_buf_len = count;
+		bd->boot_work_buf_actual_len = 0;
+		/*
+		 * Add barrier so the above writes complete before setting the
+		 * boot_work_buf pointer since it's checked in other places.
+		 */
+		wmb();
+		bd->boot_work_buf = (char *)buf;
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+
+		mutex_unlock(&bd->mutex);
+		retval = wait_for_completion_interruptible(
+					&bd->boot_write_complete);
+		/* Cancel the request if interrupted. */
+		if (retval)
+			bd->boot_work_buf = NULL;
+
+		mutex_lock(&bd->mutex);
+		return bd->boot_work_buf_actual_len;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+}
+
+/* Boot file operations routines */
+
+/*
+ * Wait for boot to complete, if necessary.  Return 0 if the boot is done
+ * and it's safe to continue, an error code if something went wrong.  Note
+ * that this routine must be called with the device mutex held.  If it
+ * returns successfully, the mutex will still be held (although it may have
+ * been dropped and reacquired); if it returns unsuccessfully the mutex
+ * will have been dropped.
+ */
+static int wait_for_boot_done(struct rshim_backend *bd)
+{
+	int retval;
+
+	if (!bd->has_reprobe)
+		return 0;
+
+	if (!bd->has_rshim || bd->is_booting) {
+		while (bd->is_booting) {
+			pr_info("boot write, waiting for re-probe\n");
+			/* We're booting, and the backend isn't ready yet. */
+			mutex_unlock(&bd->mutex);
+			/*
+			 * FIXME: might we want a timeout here, too?  If
+			 * the reprobe takes a very long time, something's
+			 * probably wrong.  Maybe a couple of minutes?
+			 */
+			retval = wait_for_completion_interruptible(
+				&bd->booting_complete);
+			if (retval)
+				return retval;
+			mutex_lock(&bd->mutex);
+		}
+		if (!bd->has_rshim) {
+			mutex_unlock(&bd->mutex);
+			return -ENODEV;
+		}
+	}
+
+	return 0;
+}
+
+static ssize_t rshim_boot_write(struct file *file, const char *user_buffer,
+			      size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+	int retval = 0, whichbuf = 0;
+	size_t bytes_written = 0, bytes_left;
+
+	/*
+	 * Hardware requires that we send multiples of 8 bytes.  Ideally
+	 * we'd handle the case where we got unaligned writes by
+	 * accumulating the residue somehow, but none of our clients
+	 * typically do this, so we just clip the size to prevent any
+	 * inadvertent errors from causing hardware problems.
+	 */
+	bytes_left = count & (-((size_t)8));
+	if (!bytes_left)
+		return 0;
+
+	mutex_lock(&bd->mutex);
+	if (bd->is_in_boot_write) {
+		mutex_unlock(&bd->mutex);
+		return -EBUSY;
+	}
+
+	retval = wait_for_boot_done(bd);
+	if (retval) {
+		pr_err("boot_write: wait for boot failed, err %d\n", retval);
+		/* wait_for_boot_done already dropped mutex */
+		return retval;
+	}
+
+	/*
+	 * We're going to drop the mutex while we wait for any outstanding
+	 * write to complete; this keeps another thread from getting in here
+	 * while we do that.
+	 */
+	bd->is_in_boot_write = 1;
+
+	while (bytes_left) {
+		size_t buf_bytes = min((size_t)BOOT_BUF_SIZE, bytes_left);
+		char *buf = bd->boot_buf[whichbuf];
+
+		whichbuf ^= 1;
+		if (copy_from_user(buf, user_buffer, buf_bytes)) {
+			retval = -EFAULT;
+			pr_err("boot_write: copy from user failed\n");
+			break;
+		}
+
+		retval = bd->write(bd, RSH_DEV_TYPE_BOOT, buf, buf_bytes);
+		if (retval > 0) {
+			bytes_left -= retval;
+			user_buffer += retval;
+			bytes_written += retval;
+		} else if (retval == 0) {
+			/* Wait for some time instead of busy polling. */
+			msleep_interruptible(1);
+			continue;
+		}
+		if (retval != buf_bytes)
+			break;
+	}
+
+	bd->is_in_boot_write = 0;
+	mutex_unlock(&bd->mutex);
+
+	/*
+	 * Return an error in case the 'count' is not multiple of 8 bytes.
+	 * At this moment, the truncated data has already been sent to
+	 * the BOOT fifo and hopefully it could still boot the chip.
+	 */
+	if (count % 8 != 0)
+		return -EINVAL;
+
+	return bytes_written ? bytes_written : retval;
+}
+
+static int rshim_boot_release(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+	struct module *owner;
+	int retval;
+
+	/* Restore the boot mode register. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL,
+				 RSH_BOOT_CONTROL,
+				 RSH_BOOT_CONTROL__BOOT_MODE_VAL_EMMC);
+	if (retval)
+		pr_err("couldn't set boot_control, err %d\n", retval);
+
+	mutex_lock(&bd->mutex);
+	bd->is_boot_open = 0;
+	queue_delayed_work(rshim_wq, &bd->work, HZ);
+	mutex_unlock(&bd->mutex);
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+static const struct file_operations rshim_boot_fops = {
+	.owner = THIS_MODULE,
+	.write = rshim_boot_write,
+	.release = rshim_boot_release,
+};
+
+int rshim_boot_open(struct file *file)
+{
+	int retval;
+	int i;
+	struct rshim_backend *bd = file->private_data;
+#if RSH_RESET_MUTEX
+	unsigned long devs_locked = 0;
+#endif
+
+	file->f_op = &rshim_boot_fops;
+
+#if RSH_RESET_MUTEX
+	/*
+	 * We're going to prevent resets and operations from running in
+	 * parallel with other resets.  Our method for this is to grab
+	 * every device's mutex before doing the reset, and then holding
+	 * onto them until the device we reset is reprobed, or a timeout
+	 * expires; the latter is mostly paranoia.  Anyway, in order to
+	 * find all of the other devices, we're going to need to walk the
+	 * device table, so we need to grab its mutex.  We have to do it
+	 * before we get our own device's mutex for lock ordering reasons.
+	 */
+	rshim_lock();
+#endif
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->is_boot_open) {
+		pr_info("can't boot, boot file already open\n");
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		rshim_unlock();
+#endif
+		return -EBUSY;
+	}
+
+	if (!bd->has_rshim) {
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		rshim_unlock();
+#endif
+		return -ENODEV;
+	}
+
+	pr_info("begin booting\n");
+	reinit_completion(&bd->booting_complete);
+	bd->is_booting = 1;
+
+	/*
+	 * Before we reset the chip, make sure we don't have any
+	 * outstanding writes, and flush the write and read FIFOs. (Note
+	 * that we can't have any outstanding reads, since we kill those
+	 * upon release of the TM FIFO file.)
+	 */
+	if (bd->cancel)
+		bd->cancel(bd, RSH_DEV_TYPE_NET, true);
+	bd->read_buf_bytes = 0;
+	bd->read_buf_pkt_rem = 0;
+	bd->read_buf_pkt_padding = 0;
+	spin_lock_irq(&bd->spinlock);
+	/* FIXME: should we be waiting for WRITING to go off, instead? */
+	bd->spin_flags &= ~RSH_SFLG_WRITING;
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		read_reset(bd, i);
+		write_reset(bd, i);
+	}
+	spin_unlock_irq(&bd->spinlock);
+
+	/* Set RShim (external) boot mode. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_BOOT_CONTROL,
+				 RSH_BOOT_CONTROL__BOOT_MODE_VAL_NONE);
+	if (retval) {
+		pr_err("boot_open: error %d writing boot control\n", retval);
+		bd->is_booting = 0;
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		rshim_unlock();
+#endif
+		return retval;
+	}
+
+#if RSH_RESET_MUTEX
+	/*
+	 * Acquire all of the other devices' mutexes, to keep them from
+	 * doing anything while we're performing the reset.  Also kill
+	 * any outstanding boot urbs; that way we'll restart them, after
+	 * the reset is done, and not report errors to the writers.
+	 */
+	for (i = 0; i < rshim_nr_devs; i++) {
+		if (rshim_devs[i] && rshim_devs[i] != bd) {
+			mutex_lock(&rshim_devs[i]->mutex);
+			devs_locked |= 1UL << i;
+			if (rshim_devs[i]->cancel) {
+				rshim_devs[i]->cancel(rshim_devs[i],
+						    RSH_DEV_TYPE_BOOT, true);
+			}
+		}
+	}
+	reinit_completion(&bd->reset_complete);
+#endif
+
+	bd->is_boot_open = 1;
+
+	/* SW reset. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_RESET_CONTROL,
+				 RSH_RESET_CONTROL__RESET_CHIP_VAL_KEY);
+
+	/* Reset the TmFifo. */
+	rshim_fifo_reset(bd);
+
+	/*
+	 * Note that occasionally, we get various errors on writing to
+	 * the reset register.  This appears to be caused by the chip
+	 * actually resetting before the response goes out, or perhaps by
+	 * our noticing the device unplug before we've seen the response.
+	 * Either way, the chip _does_ actually reset, so we just ignore
+	 * the error.  Should we ever start getting these errors without
+	 * the chip being reset, we'll have to figure out how to handle
+	 * this more intelligently.  (One potential option is to not reset
+	 * directly, but to set up a down counter to do the reset, but that
+	 * seems kind of kludgy, especially since Tile software might also
+	 * be trying to use the down counter.)
+	 */
+	if (retval && retval != -EPROTO && retval != -ESHUTDOWN &&
+#ifdef RSH_USB_BMC
+	    /*
+	     * The host driver on the BMC sometimes produces EOVERFLOW on
+	     * reset.  It also seems to have seems to have some sort of bug
+	     * which makes it return more bytes than we actually wrote!  In
+	     * that case we're returning EBADE.
+	     */
+	    retval != -EOVERFLOW && retval != -EBADE &&
+#endif
+	    retval != -ETIMEDOUT && retval != -EPIPE) {
+		pr_err("boot_open: error %d writing reset control\n", retval);
+		mutex_unlock(&bd->mutex);
+#if RSH_RESET_MUTEX
+		while (devs_locked) {
+			int i = __builtin_ctzl(devs_locked);
+
+			mutex_unlock(&rshim_devs[i]->mutex);
+			devs_locked &= ~(1UL << i);
+		}
+		rshim_unlock();
+#endif
+		bd->is_boot_open = 0;
+
+		return retval;
+	}
+
+	if (retval)
+		pr_err("boot_open: got error %d on reset write\n", retval);
+
+	mutex_unlock(&bd->mutex);
+
+#if RSH_RESET_MUTEX
+	rshim_unlock();
+	/*
+	 * We wait for reset_complete (signaled by probe), or for an
+	 * interrupt, or a timeout (set to 5s because of no re-probe
+	 * in the PCIe case). Note that we dropped dev->mutex above
+	 * so that probe can run; the BOOT_OPEN flag should keep our device
+	 * from trying to do anything before the device is reprobed.
+	 */
+	retval = wait_for_completion_interruptible_timeout(&bd->reset_complete,
+							   5 * HZ);
+	if (retval == 0)
+		pr_err("timed out waiting for device reprobe after reset\n");
+
+	while (devs_locked) {
+		int i = __builtin_ctz(devs_locked);
+
+		mutex_unlock(&rshim_devs[i]->mutex);
+		devs_locked &= ~(1UL << i);
+	}
+#endif
+
+	return 0;
+}
+
+/* FIFO common file operations routines */
+
+/*
+ * Signal an error on the FIFO, and wake up anyone who might need to know
+ * about it.
+ */
+static void rshim_fifo_err(struct rshim_backend *bd, int err)
+{
+	int i;
+
+	bd->tmfifo_error = err;
+	wake_up_interruptible_all(&bd->write_completed);
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		wake_up_interruptible_all(&bd->read_fifo[i].operable);
+		wake_up_interruptible_all(&bd->write_fifo[i].operable);
+	}
+}
+
+/* Drain the read buffer, and start another read/interrupt if needed. */
+static void rshim_fifo_input(struct rshim_backend *bd)
+{
+	union rshim_tmfifo_msg_hdr *hdr;
+	bool rx_avail = false;
+
+	if (bd->is_boot_open)
+		return;
+
+again:
+	while (bd->read_buf_next < bd->read_buf_bytes) {
+		int copysize;
+
+		/*
+		 * If we're at the start of a packet, then extract the
+		 * header, and update our count of bytes remaining in the
+		 * packet.
+		 */
+		if (bd->read_buf_pkt_rem == 0) {
+			/* Make sure header is received. */
+			if (bd->read_buf_next + sizeof(*hdr) >
+				bd->read_buf_bytes)
+				break;
+
+			pr_debug("next hdr %d\n", bd->read_buf_next);
+
+			hdr = (union rshim_tmfifo_msg_hdr *)
+				&bd->read_buf[bd->read_buf_next];
+
+			bd->read_buf_pkt_rem = ntohs(hdr->len) + sizeof(*hdr);
+			bd->read_buf_pkt_padding =
+				(8 - (bd->read_buf_pkt_rem & 7)) & 7;
+			if (hdr->type == VIRTIO_ID_NET)
+				bd->rx_chan = TMFIFO_NET_CHAN;
+			else if (hdr->type == VIRTIO_ID_CONSOLE) {
+				bd->rx_chan = TMFIFO_CONS_CHAN;
+				/* Strip off the message header for console. */
+				bd->read_buf_next += sizeof(*hdr);
+				bd->read_buf_pkt_rem -= sizeof(*hdr);
+				if (bd->read_buf_pkt_rem == 0)
+					continue;
+			} else {
+				pr_debug("bad type %d, drop it", hdr->type);
+				bd->read_buf_pkt_rem = 0;
+				bd->read_buf_pkt_padding = 0;
+				bd->read_buf_next = bd->read_buf_bytes;
+				break;
+			}
+
+			pr_debug("drain: hdr, nxt %d rem %d chn %d\n",
+			      bd->read_buf_next, bd->read_buf_pkt_rem,
+			      bd->rx_chan);
+			bd->drop = 0;
+		}
+
+		if (bd->rx_chan == TMFIFO_CONS_CHAN &&
+		    !(bd->spin_flags & RSH_SFLG_CONS_OPEN)) {
+			/*
+			 * If data is coming in for a closed console
+			 * channel, we want to just throw it away.
+			 * Resetting the channel every time through this
+			 * loop is a relatively cheap way to do that.  Note
+			 * that this works because the read buffer is no
+			 * larger than the read FIFO; thus, we know that if
+			 * we reset it here, we will always be able to
+			 * drain the read buffer of any console data, and
+			 * will then launch another read.
+			 */
+			read_reset(bd, TMFIFO_CONS_CHAN);
+			bd->drop = 1;
+		} else if (bd->rx_chan == TMFIFO_NET_CHAN && bd->net == NULL) {
+			/* Drop if networking is not enabled. */
+			read_reset(bd, TMFIFO_NET_CHAN);
+			bd->drop = 1;
+		}
+
+		copysize = min(bd->read_buf_pkt_rem,
+			       bd->read_buf_bytes - bd->read_buf_next);
+		copysize = min(copysize,
+			       read_space_to_end(bd, bd->rx_chan));
+
+		pr_debug("drain: copysize %d, head %d, tail %d, remaining %d\n",
+			 copysize, bd->read_fifo[bd->rx_chan].head,
+			 bd->read_fifo[bd->rx_chan].tail,
+			 bd->read_buf_pkt_rem);
+
+		if (copysize == 0) {
+			/*
+			 * We have data, but no space to put it in, so
+			 * we're done.
+			 */
+			pr_debug("drain: no more space in channel %d\n",
+				 bd->rx_chan);
+			break;
+		}
+
+		if (!bd->drop) {
+			memcpy(read_space_ptr(bd, bd->rx_chan),
+			       &bd->read_buf[bd->read_buf_next],
+			       copysize);
+			read_add_bytes(bd, bd->rx_chan, copysize);
+		}
+
+		bd->read_buf_next += copysize;
+		bd->read_buf_pkt_rem -= copysize;
+
+		wake_up_interruptible_all(&bd->read_fifo[
+				      bd->rx_chan].operable);
+		pr_debug("woke up readable chan %d\n", bd->rx_chan);
+
+		if (bd->read_buf_pkt_rem <= 0) {
+			bd->read_buf_next = bd->read_buf_next +
+				bd->read_buf_pkt_padding;
+			rx_avail = true;
+		}
+	}
+
+	/*
+	 * We've processed all of the data we can, so now we decide if we
+	 * need to launch another I/O.  If there's still data in the read
+	 * buffer, or if we're already reading, don't launch any new
+	 * operations.  If an interrupt just completed, and said there was
+	 * data, or the last time we did a read we got some data, then do
+	 * another read.  Otherwise, do an interrupt.
+	 */
+	if (bd->read_buf_next < bd->read_buf_bytes ||
+	    (bd->spin_flags & RSH_SFLG_READING)) {
+		/* We're doing nothing. */
+		pr_debug("fifo_input: no new read: %s\n",
+			 (bd->read_buf_next < bd->read_buf_bytes) ?
+			 "have data" : "already reading");
+	} else {
+		int len;
+
+		/* Process it if more data is received. */
+		len = bd->read(bd, RSH_DEV_TYPE_NET, (char *)bd->read_buf,
+			      READ_BUF_SIZE);
+		if (len > 0) {
+			bd->read_buf_bytes = len;
+			bd->read_buf_next = 0;
+			goto again;
+		}
+	}
+
+	if (rx_avail) {
+		if (bd->rx_chan == TMFIFO_NET_CHAN) {
+			struct rshim_service *svc;
+
+			/*
+			 * Protect rshim_svc with RCU lock. See comments in
+			 * rshim_register_service() / rshim_register_service()
+			 */
+			rcu_read_lock();
+			svc = rcu_dereference(rshim_svc[RSH_SVC_NET]);
+			if (svc != NULL)
+				(*svc->rx_notify)(bd);
+			rcu_read_unlock();
+		}
+	}
+}
+
+ssize_t rshim_fifo_read(struct rshim_backend *bd, char *buffer,
+		      size_t count, int chan, bool nonblock,
+		      bool to_user)
+{
+	size_t rd_cnt = 0;
+
+	mutex_lock(&bd->mutex);
+
+	while (count) {
+		size_t readsize;
+		int pass1;
+		int pass2;
+
+		pr_debug("fifo_read, top of loop, remaining count %zd\n",
+			 count);
+
+		/*
+		 * We check this each time through the loop since the
+		 * device could get disconnected while we're waiting for
+		 * more data in the read FIFO.
+		 */
+		if (!bd->has_tm) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_read: returning %zd/ENODEV\n", rd_cnt);
+			return rd_cnt ? rd_cnt : -ENODEV;
+		}
+
+		if (bd->tmfifo_error) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_read: returning %zd/%d\n", rd_cnt,
+			      bd->tmfifo_error);
+			return rd_cnt ? rd_cnt : bd->tmfifo_error;
+		}
+
+		if (read_empty(bd, chan)) {
+			pr_debug("fifo_read: fifo empty\n");
+			if (rd_cnt || nonblock) {
+				if (rd_cnt == 0) {
+					spin_lock_irq(&bd->spinlock);
+					rshim_fifo_input(bd);
+					spin_unlock_irq(&bd->spinlock);
+				}
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_read: returning %zd/EAGAIN\n",
+				      rd_cnt);
+				return rd_cnt ? rd_cnt : -EAGAIN;
+			}
+
+			mutex_unlock(&bd->mutex);
+
+			pr_debug("fifo_read: waiting for readable chan %d\n",
+				 chan);
+			if (wait_event_interruptible(
+					bd->read_fifo[chan].operable,
+					    !read_empty(bd, chan))) {
+				pr_debug("fifo_read: returning ERESTARTSYS\n");
+				return to_user ? -EINTR : -ERESTARTSYS;
+			}
+
+			mutex_lock(&bd->mutex);
+
+			/*
+			 * Since we dropped the mutex, we must make
+			 * sure our interface is still there before
+			 * we do anything else.
+			 */
+			continue;
+		}
+
+		/*
+		 * Figure out how many bytes we will transfer on this pass.
+		 */
+		spin_lock_irq(&bd->spinlock);
+
+		readsize = min(count, (size_t)read_cnt(bd, chan));
+
+		pass1 = min(readsize, (size_t)read_cnt_to_end(bd, chan));
+		pass2 = readsize - pass1;
+
+		spin_unlock_irq(&bd->spinlock);
+
+		pr_debug("fifo_read: readsize %zd, head %d, tail %d\n",
+			 readsize, bd->read_fifo[chan].head,
+			 bd->read_fifo[chan].tail);
+
+		if (!to_user) {
+			memcpy(buffer, read_data_ptr(bd, chan), pass1);
+			if (pass2) {
+				memcpy(buffer + pass1,
+				       bd->read_fifo[chan].data, pass2);
+			}
+		} else {
+			if (copy_to_user(buffer, read_data_ptr(bd, chan),
+				pass1) || (pass2 && copy_to_user(buffer + pass1,
+				bd->read_fifo[chan].data, pass2))) {
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_read: returns %zd/EFAULT\n",
+					 rd_cnt);
+				return rd_cnt ? rd_cnt : -EFAULT;
+			}
+		}
+
+		spin_lock_irq(&bd->spinlock);
+
+		read_consume_bytes(bd, chan, readsize);
+
+		/*
+		 * We consumed some bytes, so let's see if we can process
+		 * any more incoming data.
+		 */
+		rshim_fifo_input(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+
+		count -= readsize;
+		buffer += readsize;
+		rd_cnt += readsize;
+		pr_debug("fifo_read: transferred %zd bytes\n", readsize);
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	pr_debug("fifo_read: returning %zd\n", rd_cnt);
+	return rd_cnt;
+}
+EXPORT_SYMBOL(rshim_fifo_read);
+
+static void rshim_fifo_output(struct rshim_backend *bd)
+{
+	int writesize, write_buf_next = 0;
+	int write_avail = WRITE_BUF_SIZE - write_buf_next;
+	int numchan = TMFIFO_MAX_CHAN;
+	int chan, chan_offset;
+
+	/* If we're already writing, we have nowhere to put data. */
+	if (bd->spin_flags & RSH_SFLG_WRITING)
+		return;
+
+	/* Walk through all the channels, sending as much data as possible. */
+	for (chan_offset = 0; chan_offset < numchan; chan_offset++) {
+		/*
+		 * Pick the current channel if not done, otherwise round-robin
+		 * to the next channel.
+		 */
+		if (bd->write_buf_pkt_rem > 0)
+			chan = bd->tx_chan;
+		else {
+			u16 cur_len;
+			union rshim_tmfifo_msg_hdr *hdr = &bd->msg_hdr;
+
+			chan = bd->tx_chan = (bd->tx_chan + 1) % numchan;
+			cur_len = write_cnt(bd, chan);
+
+			/*
+			 * Set up message header for console data which is byte
+			 * stream. Network packets already have the message
+			 * header included.
+			 */
+			if (chan == TMFIFO_CONS_CHAN) {
+				if (cur_len == 0)
+					continue;
+				hdr->data = 0;
+				hdr->type = VIRTIO_ID_CONSOLE;
+				hdr->len = htons(cur_len);
+			} else {
+				int pass1;
+
+				if (cur_len <
+					sizeof(union rshim_tmfifo_msg_hdr))
+					continue;
+
+				pass1 = write_cnt_to_end(bd, chan);
+				if (pass1 >= sizeof(*hdr)) {
+					hdr = (union rshim_tmfifo_msg_hdr *)
+						write_data_ptr(bd, chan);
+				} else {
+					memcpy(hdr, write_data_ptr(bd, chan),
+					       pass1);
+					memcpy((u8 *)hdr + pass1,
+					       bd->write_fifo[chan].data,
+					       sizeof(*hdr) - pass1);
+				}
+			}
+
+			bd->write_buf_pkt_rem = ntohs(hdr->len) + sizeof(*hdr);
+		}
+
+		/* Send out the packet header for the console data. */
+		if (chan == TMFIFO_CONS_CHAN &&
+		    bd->write_buf_pkt_rem > ntohs(bd->msg_hdr.len)) {
+			union rshim_tmfifo_msg_hdr *hdr = &bd->msg_hdr;
+			int left = bd->write_buf_pkt_rem - ntohs(hdr->len);
+			u8 *pos = (u8 *)hdr + sizeof(*hdr) - left;
+
+			writesize = min(write_avail, left);
+			memcpy(&bd->write_buf[write_buf_next], pos, writesize);
+			write_buf_next += writesize;
+			bd->write_buf_pkt_rem -= writesize;
+			write_avail -= writesize;
+
+			/*
+			 * Don't continue if no more space for the header.
+			 * It'll be picked up next time.
+			 */
+			if (left != writesize)
+				break;
+		}
+
+		writesize = min(write_avail, (int)write_cnt(bd, chan));
+		writesize = min(writesize, bd->write_buf_pkt_rem);
+
+		/*
+		 * The write size should be aligned to 8 bytes unless for the
+		 * last block, which will be padded at the end.
+		 */
+		if (bd->write_buf_pkt_rem != writesize)
+			writesize &= -8;
+
+		if (writesize > 0) {
+			int pass1;
+			int pass2;
+
+			pass1 = min(writesize,
+				    (int)write_cnt_to_end(bd, chan));
+			pass2 = writesize - pass1;
+
+			pr_debug("fifo_outproc: chan %d, writesize %d, next %d,"
+				 " head %d, tail %d\n",
+				 chan, writesize, write_buf_next,
+				 bd->write_fifo[chan].head,
+				 bd->write_fifo[chan].tail);
+
+			memcpy(&bd->write_buf[write_buf_next],
+			       write_data_ptr(bd, chan), pass1);
+			memcpy(&bd->write_buf[write_buf_next + pass1],
+			       bd->write_fifo[chan].data, pass2);
+
+			write_consume_bytes(bd, chan, writesize);
+			write_buf_next += writesize;
+			bd->write_buf_pkt_rem -= writesize;
+			/* Add padding at the end. */
+			if (bd->write_buf_pkt_rem == 0)
+				write_buf_next = (write_buf_next + 7) & -8;
+			write_avail = WRITE_BUF_SIZE - write_buf_next;
+
+			wake_up_interruptible_all(
+				&bd->write_fifo[chan].operable);
+			pr_debug("woke up writable chan %d\n", chan);
+		}
+	}
+
+	/* Drop the data if it is still booting. */
+	if (bd->is_boot_open)
+		return;
+
+	/* If we actually put anything in the buffer, send it. */
+	if (write_buf_next) {
+		bd->write(bd, RSH_DEV_TYPE_NET, (char *)bd->write_buf,
+			  write_buf_next);
+	}
+}
+
+int rshim_fifo_alloc(struct rshim_backend *bd)
+{
+	int i, allocfail = 0;
+
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		if (!bd->read_fifo[i].data)
+			bd->read_fifo[i].data =
+				kmalloc(READ_FIFO_SIZE, GFP_KERNEL);
+		allocfail |= bd->read_fifo[i].data == 0;
+
+		if (!bd->write_fifo[i].data)
+			bd->write_fifo[i].data =
+				kmalloc(WRITE_FIFO_SIZE, GFP_KERNEL);
+		allocfail |= bd->write_fifo[i].data == 0;
+	}
+
+	return allocfail;
+}
+EXPORT_SYMBOL(rshim_fifo_alloc);
+
+static void rshim_fifo_reset(struct rshim_backend *bd)
+{
+	int i;
+
+	bd->read_buf_bytes = 0;
+	bd->read_buf_pkt_rem = 0;
+	bd->read_buf_next = 0;
+	bd->read_buf_pkt_padding = 0;
+	bd->write_buf_pkt_rem = 0;
+	bd->rx_chan = bd->tx_chan = 0;
+
+	spin_lock_irq(&bd->spinlock);
+	bd->spin_flags &= ~(RSH_SFLG_WRITING |
+			    RSH_SFLG_READING);
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		read_reset(bd, i);
+		write_reset(bd, i);
+	}
+	spin_unlock_irq(&bd->spinlock);
+}
+
+void rshim_fifo_free(struct rshim_backend *bd)
+{
+	int i;
+
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		kfree(bd->read_fifo[i].data);
+		bd->read_fifo[i].data = NULL;
+		kfree(bd->write_fifo[i].data);
+		bd->write_fifo[i].data = NULL;
+	}
+
+	rshim_fifo_reset(bd);
+
+	bd->has_tm = 0;
+}
+EXPORT_SYMBOL(rshim_fifo_free);
+
+ssize_t rshim_fifo_write(struct rshim_backend *bd, const char *buffer,
+		       size_t count, int chan, bool nonblock,
+		       bool from_user)
+{
+	size_t wr_cnt = 0;
+
+	mutex_lock(&bd->mutex);
+
+	while (count) {
+		size_t writesize;
+		int pass1;
+		int pass2;
+
+		pr_debug("fifo_write, top of loop, remaining count %zd\n",
+			 count);
+
+		/*
+		 * We check this each time through the loop since the
+		 * device could get disconnected while we're waiting for
+		 * more space in the write buffer.
+		 */
+		if (!bd->has_tm) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_write: returning %zd/ENODEV\n", wr_cnt);
+			return wr_cnt ? wr_cnt : -ENODEV;
+		}
+
+		if (bd->tmfifo_error) {
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_write: returning %zd/%d\n", wr_cnt,
+				 bd->tmfifo_error);
+			return wr_cnt ? wr_cnt : bd->tmfifo_error;
+		}
+
+		if (write_full(bd, chan)) {
+			pr_debug("fifo_write: fifo full\n");
+			if (nonblock) {
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_write: returning %zd/EAGAIN\n",
+					 wr_cnt);
+				return wr_cnt ? wr_cnt : -EAGAIN;
+			}
+
+			mutex_unlock(&bd->mutex);
+			pr_debug("fifo_write: waiting for writable chan %d\n",
+				 chan);
+			if (wait_event_interruptible(
+				     bd->write_fifo[chan].operable,
+					     !write_full(bd, chan))) {
+				pr_debug("fifo_write: returning %zd/ERESTARTSYS\n",
+					 wr_cnt);
+				return wr_cnt ? wr_cnt : -ERESTARTSYS;
+			}
+			mutex_lock(&bd->mutex);
+			/*
+			 * Since we dropped the mutex, we must make
+			 * sure our interface is still there before
+			 * we do anything else.
+			 */
+			continue;
+		}
+
+		spin_lock_irq(&bd->spinlock);
+
+		writesize = min(count, (size_t)write_space(bd, chan));
+		pass1 = min(writesize, (size_t)write_space_to_end(bd, chan));
+		pass2 = writesize - pass1;
+
+		spin_unlock_irq(&bd->spinlock);
+
+		pr_debug("fifo_write: writesize %zd, head %d, tail %d\n",
+			 writesize, bd->write_fifo[chan].head,
+			 bd->write_fifo[chan].tail);
+
+		if (!from_user) {
+			memcpy(write_space_ptr(bd, chan), buffer, pass1);
+			if (pass2) {
+				memcpy(bd->write_fifo[chan].data,
+				       buffer + pass1, pass2);
+			}
+		} else {
+			if (copy_from_user(write_space_ptr(bd, chan), buffer,
+				pass1) || (pass2 &&
+				copy_from_user(bd->write_fifo[chan].data,
+						buffer + pass1, pass2))) {
+				mutex_unlock(&bd->mutex);
+				pr_debug("fifo_write: returns %zd/EFAULT\n",
+					 wr_cnt);
+				return wr_cnt ? wr_cnt : -EFAULT;
+			}
+		}
+
+		spin_lock_irq(&bd->spinlock);
+
+		write_add_bytes(bd, chan, writesize);
+
+		/* We have some new bytes, let's see if we can write any. */
+		rshim_fifo_output(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+
+		count -= writesize;
+		buffer += writesize;
+		wr_cnt += writesize;
+		pr_debug("fifo_write: transferred %zd bytes this pass\n",
+			 writesize);
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	pr_debug("fifo_write: returning %zd\n", wr_cnt);
+	return wr_cnt;
+}
+EXPORT_SYMBOL(rshim_fifo_write);
+
+static int rshim_fifo_fsync(struct file *file, loff_t start, loff_t end,
+			    int datasync, int chan)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	mutex_lock(&bd->mutex);
+
+	/*
+	 * To ensure that all of our data has actually made it to the
+	 * device, we first wait until the channel is empty, then we wait
+	 * until there is no outstanding write urb.
+	 */
+	while (!write_empty(bd, chan))
+		if (wait_event_interruptible(bd->write_fifo[chan].operable,
+					     write_empty(bd, chan))) {
+			mutex_unlock(&bd->mutex);
+			return -ERESTARTSYS;
+		}
+
+	while (bd->spin_flags & RSH_SFLG_WRITING)
+		if (wait_event_interruptible(bd->write_completed,
+					     !(bd->spin_flags &
+					       RSH_SFLG_WRITING))) {
+			mutex_unlock(&bd->mutex);
+			return -ERESTARTSYS;
+		}
+
+	mutex_unlock(&bd->mutex);
+
+	return 0;
+}
+
+static unsigned int rshim_fifo_poll(struct file *file, poll_table *wait,
+				  int chan)
+{
+	struct rshim_backend *bd = file->private_data;
+	unsigned int retval = 0;
+
+	mutex_lock(&bd->mutex);
+
+	poll_wait(file, &bd->read_fifo[chan].operable, wait);
+	poll_wait(file, &bd->write_fifo[chan].operable, wait);
+
+	spin_lock_irq(&bd->spinlock);
+
+	if (!read_empty(bd, chan))
+		retval |= POLLIN | POLLRDNORM;
+	if (!write_full(bd, chan))
+		retval |= POLLOUT | POLLWRNORM;
+	/*
+	 * We don't report POLLERR on the console so that it doesn't get
+	 * automatically disconnected when it fails, and so that you can
+	 * connect to it in the error state before rebooting the target.
+	 * This is inconsistent, but being consistent turns out to be very
+	 * annoying.  If someone tries to actually type on it, they'll
+	 * get an error.
+	 */
+	if (bd->tmfifo_error && chan != TMFIFO_CONS_CHAN)
+		retval |= POLLERR;
+	spin_unlock_irq(&bd->spinlock);
+
+	mutex_unlock(&bd->mutex);
+
+	pr_debug("poll chan %d file %p returns 0x%x\n", chan, file, retval);
+
+	return retval;
+}
+
+
+static int rshim_fifo_release(struct inode *inode, struct file *file,
+			      int chan)
+{
+	struct rshim_backend *bd = file->private_data;
+	struct module *owner;
+
+	mutex_lock(&bd->mutex);
+
+	if (chan == TMFIFO_CONS_CHAN) {
+		/*
+		 * If we aren't the last console file, nothing to do but
+		 * fix the reference count.
+		 */
+		bd->console_opens--;
+		if (bd->console_opens) {
+			mutex_unlock(&bd->mutex);
+			return 0;
+		}
+
+		/*
+		 * We've told the host to stop using the TM FIFO console,
+		 * but there may be a lag before it does.  Unless we
+		 * continue to read data from the console stream, the host
+		 * may spin forever waiting for the console to be drained
+		 * and not realize that it's time to stop using it.
+		 * Clearing the CONS_OPEN spin flag will discard any future
+		 * incoming console data, but if our input buffers are full
+		 * now, we might not be even reading from the hardware
+		 * FIFO.  To avoid problems, clear the buffers and call the
+		 * drainer so that it knows there's space.
+		 */
+		spin_lock_irq(&bd->spinlock);
+
+		bd->spin_flags &= ~RSH_SFLG_CONS_OPEN;
+
+		read_reset(bd, TMFIFO_CONS_CHAN);
+		write_reset(bd, TMFIFO_CONS_CHAN);
+
+		if (bd->has_tm)
+			rshim_fifo_input(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+	}
+
+	if (chan == TMFIFO_CONS_CHAN)
+		bd->is_cons_open = 0;
+	else
+		bd->is_tm_open = 0;
+
+	if (!bd->is_tm_open && !bd->is_cons_open) {
+		if (bd->cancel)
+			bd->cancel(bd, RSH_DEV_TYPE_NET, false);
+
+		spin_lock_irq(&bd->spinlock);
+		bd->spin_flags &= ~RSH_SFLG_READING;
+		spin_unlock_irq(&bd->spinlock);
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+/* TMFIFO file operations routines */
+
+static ssize_t rshim_tmfifo_read(struct file *file, char *user_buffer,
+				   size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_read(bd, user_buffer, count, TMFIFO_NET_CHAN,
+			     file->f_flags & O_NONBLOCK, true);
+}
+
+static ssize_t rshim_tmfifo_write(struct file *file, const char *user_buffer,
+				size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_write(bd, user_buffer, count, TMFIFO_NET_CHAN,
+			      file->f_flags & O_NONBLOCK, true);
+}
+
+static int rshim_tmfifo_fsync(struct file *file, loff_t start,
+			      loff_t end, int datasync)
+{
+	return rshim_fifo_fsync(file, start, end, datasync, TMFIFO_NET_CHAN);
+}
+
+static unsigned int rshim_tmfifo_poll(struct file *file, poll_table *wait)
+{
+	return rshim_fifo_poll(file, wait, TMFIFO_NET_CHAN);
+}
+
+static int rshim_tmfifo_release(struct inode *inode, struct file *file)
+{
+	return rshim_fifo_release(inode, file, TMFIFO_NET_CHAN);
+}
+
+static const struct file_operations rshim_tmfifo_fops = {
+	.owner = THIS_MODULE,
+	.read = rshim_tmfifo_read,
+	.write = rshim_tmfifo_write,
+	.fsync = rshim_tmfifo_fsync,
+	.poll = rshim_tmfifo_poll,
+	.release = rshim_tmfifo_release,
+};
+
+static int rshim_tmfifo_open(struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	file->f_op = &rshim_tmfifo_fops;
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->is_tm_open) {
+		pr_debug("tmfifo_open: file already open\n");
+		mutex_unlock(&bd->mutex);
+		return -EBUSY;
+	}
+
+	bd->is_tm_open = 1;
+
+	spin_lock_irq(&bd->spinlock);
+
+	/* Call the drainer to do an initial read, if needed. */
+	rshim_fifo_input(bd);
+
+	spin_unlock_irq(&bd->spinlock);
+
+	mutex_unlock(&bd->mutex);
+
+	return 0;
+}
+
+/* Console file operations routines */
+
+static void rshim_work_handler(struct work_struct *work)
+{
+	struct rshim_backend *bd = container_of((struct delayed_work *) work,
+					      struct rshim_backend, work);
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->keepalive && bd->has_rshim) {
+		bd->write_rshim(bd, RSHIM_CHANNEL, RSH_SCRATCHPAD1,
+				RSH_KEEPALIVE_MAGIC_NUM);
+		bd->keepalive = 0;
+	}
+
+	if (bd->boot_work_buf != NULL) {
+		bd->boot_work_buf_actual_len = rshim_write_delayed(bd,
+							RSH_DEV_TYPE_BOOT,
+							bd->boot_work_buf,
+							bd->boot_work_buf_len);
+		bd->boot_work_buf = NULL;
+		complete_all(&bd->boot_write_complete);
+	}
+
+	if (bd->is_boot_open) {
+		mutex_unlock(&bd->mutex);
+		return;
+	}
+
+	if (bd->has_fifo_work) {
+		int len;
+
+		len = rshim_write_delayed(bd, bd->fifo_work_devtype,
+					  bd->fifo_work_buf,
+					  bd->fifo_work_buf_len);
+		bd->has_fifo_work = 0;
+
+		spin_lock(&bd->spinlock);
+		bd->spin_flags &= ~RSH_SFLG_WRITING;
+		if (len == bd->fifo_work_buf_len) {
+			wake_up_interruptible_all(&bd->write_completed);
+			rshim_notify(bd, RSH_EVENT_FIFO_OUTPUT, 0);
+		} else {
+			pr_err("fifo_write: completed abnormally.\n");
+			rshim_notify(bd, RSH_EVENT_FIFO_ERR, -1);
+		}
+		spin_unlock(&bd->spinlock);
+	}
+
+	if (bd->has_cons_work) {
+		spin_lock_irq(&bd->spinlock);
+
+		/* FIFO output. */
+		rshim_fifo_output(bd);
+
+		/* FIFO input. */
+		rshim_fifo_input(bd);
+
+		spin_unlock_irq(&bd->spinlock);
+
+		bd->has_cons_work = 0;
+	}
+
+	if (!bd->has_reprobe && bd->is_cons_open) {
+		bd->has_cons_work = 1;
+		mod_timer(&bd->timer, jiffies + HZ / 10);
+	}
+
+	mutex_unlock(&bd->mutex);
+}
+
+static ssize_t rshim_console_read(struct file *file, char *user_buffer,
+				    size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_read(bd, user_buffer, count, TMFIFO_CONS_CHAN,
+			     file->f_flags & O_NONBLOCK, true);
+}
+
+static ssize_t rshim_console_write(struct file *file, const char *user_buffer,
+				 size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	return rshim_fifo_write(bd, user_buffer, count, TMFIFO_CONS_CHAN,
+			      file->f_flags & O_NONBLOCK, true);
+}
+
+static int rshim_console_fsync(struct file *file, loff_t start,
+			       loff_t end, int datasync)
+{
+	return rshim_fifo_fsync(file, start, end, datasync, TMFIFO_CONS_CHAN);
+}
+
+static long rshim_console_unlocked_ioctl(struct file *file, unsigned int
+				       cmd, unsigned long arg)
+{
+	struct rshim_backend *bd = file->private_data;
+	int retval = 0;
+
+	mutex_lock(&bd->mutex);
+
+	switch (cmd) {
+	case TCGETS: {
+#ifdef TCGETS2
+		if (kernel_termios_to_user_termios_1(
+			(struct termios __user *)arg, &bd->cons_termios))
+#else
+		if (kernel_termios_to_user_termios(
+			(struct termios __user *)arg, &bd->cons_termios))
+#endif
+			retval = -EFAULT;
+		break;
+	}
+
+	case TCSETS:
+	case TCSETSW:
+	case TCSETSF: {
+#ifdef TCGETS2
+		if (user_termios_to_kernel_termios_1(
+			&bd->cons_termios, (struct termios __user *)arg))
+#else
+		if (user_termios_to_kernel_termios(
+			&bd->cons_termios, (struct termios __user *)arg))
+#endif
+			retval = -EFAULT;
+		break;
+	}
+
+	default:
+		retval = -EINVAL;
+		break;
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	return retval;
+}
+
+static unsigned int rshim_console_poll(struct file *file, poll_table *wait)
+{
+	return rshim_fifo_poll(file, wait, TMFIFO_CONS_CHAN);
+}
+
+static int rshim_console_release(struct inode *inode, struct file *file)
+{
+	return rshim_fifo_release(inode, file, TMFIFO_CONS_CHAN);
+}
+
+static const struct file_operations rshim_console_fops = {
+	.owner = THIS_MODULE,
+	.read = rshim_console_read,
+	.write = rshim_console_write,
+	.fsync = rshim_console_fsync,
+	.unlocked_ioctl = rshim_console_unlocked_ioctl,
+	.poll = rshim_console_poll,
+	.release = rshim_console_release,
+};
+
+static int rshim_console_open(struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+
+	file->f_op = &rshim_console_fops;
+
+	mutex_lock(&bd->mutex);
+
+	if (bd->is_cons_open) {
+		/*
+		 * The console is already open.  This is OK, but it means
+		 * there's no work to do other than updating the reference
+		 * count.
+		 */
+		bd->console_opens++;
+		mutex_unlock(&bd->mutex);
+		return 0;
+	}
+
+	bd->is_cons_open = 1;
+
+	spin_lock_irq(&bd->spinlock);
+
+	bd->spin_flags |= RSH_SFLG_CONS_OPEN;
+
+	spin_unlock_irq(&bd->spinlock);
+
+	if (!bd->has_cons_work) {
+		bd->has_cons_work = 1;
+		queue_delayed_work(rshim_wq, &bd->work, HZ / 10);
+	}
+
+	bd->console_opens++;
+	mutex_unlock(&bd->mutex);
+
+	return 0;
+}
+
+static int rshim_boot_done(struct rshim_backend *bd)
+{
+	if (bd->has_rshim && bd->has_tm) {
+		/* Clear any previous errors. */
+		bd->tmfifo_error = 0;
+
+		/*
+		 * If someone might be waiting for the device to come up,
+		 * tell them it's ready.
+		 */
+		if (bd->is_booting) {
+			bd->is_booting = 0;
+
+			pr_debug("signaling booting complete\n");
+			complete_all(&bd->booting_complete);
+#if RSH_RESET_MUTEX
+			complete_all(&bd->reset_complete);
+#endif
+		};
+
+		/* If the console device is open, start the worker. */
+		if (bd->is_cons_open && !bd->has_cons_work) {
+			bd->has_cons_work = 1;
+			pr_debug("probe: console_work submitted\n");
+			queue_delayed_work(rshim_wq, &bd->work, 0);
+		}
+
+		/* Tell the user this device is now attached. */
+		pr_info("%s now attached\n", rshim_dev_names[bd->dev_index]);
+	}
+
+	return 0;
+}
+
+/* Rshim file operations routines */
+
+static ssize_t rshim_rshim_read(struct file *file, char *user_buffer,
+			      size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd;
+	int retval = 0;
+	u64 buf;
+
+	/* rshim registers are all 8-byte aligned. */
+	if (count != 8 || (*ppos & 7) != 0)
+		return -EINVAL;
+
+	bd = file->private_data;
+
+	mutex_lock(&bd->mutex);
+	retval = bd->read_rshim(bd,
+				(*ppos >> 16) & 0xF, /* channel # */
+				*ppos & 0xFFFF,	 /* addr */
+				&buf);
+	mutex_unlock(&bd->mutex);
+
+	/* If the read was successful, copy the data to userspace */
+	if (!retval && copy_to_user(user_buffer, &buf, count))
+		return -EFAULT;
+
+	return retval ? retval : count;
+}
+
+static ssize_t rshim_rshim_write(struct file *file, const char *user_buffer,
+			       size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd;
+	int retval = 0;
+	u64 buf;
+
+	/* rshim registers are all 8-byte aligned. */
+	if (count != 8 || (*ppos & 7) != 0)
+		return -EINVAL;
+
+	/* Copy the data from userspace */
+	if (copy_from_user(&buf, user_buffer, count))
+		return -EFAULT;
+
+	bd = file->private_data;
+
+	mutex_lock(&bd->mutex);
+	retval = bd->write_rshim(bd,
+				 (*ppos >> 16) & 0xF, /* channel # */
+				 *ppos & 0xFFFF, /* addr */
+				 buf);
+	mutex_unlock(&bd->mutex);
+
+	return retval ? retval : count;
+}
+
+static int rshim_rshim_release(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+	struct module *owner;
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+static const struct file_operations rshim_rshim_fops = {
+	.owner = THIS_MODULE,
+	.read = rshim_rshim_read,
+	.write = rshim_rshim_write,
+	.release = rshim_rshim_release,
+	.llseek = default_llseek,
+};
+
+static int rshim_rshim_open(struct file *file)
+{
+	file->f_op = &rshim_rshim_fops;
+
+	return 0;
+}
+
+/* Misc file operations routines */
+
+static int
+rshim_misc_seq_show(struct seq_file *s, void *token)
+{
+	struct rshim_backend *bd = s->private;
+	int retval;
+	u64 value;
+
+	/* Boot mode. */
+	retval = bd->read_rshim(bd, RSHIM_CHANNEL, RSH_BOOT_CONTROL,
+				&value);
+	if (retval) {
+		pr_err("couldn't read rshim register\n");
+		return retval;
+	}
+	seq_printf(s, "BOOT_MODE %lld\n",
+		   value & RSH_BOOT_CONTROL__BOOT_MODE_MASK);
+
+	/* SW reset flag is always 0. */
+	seq_printf(s, "SW_RESET  %d\n", 0);
+
+	/* Display the driver name. */
+	seq_printf(s, "DRV_NAME  %s\n", bd->owner->name);
+
+	return 0;
+}
+
+static ssize_t rshim_misc_write(struct file *file, const char *user_buffer,
+				size_t count, loff_t *ppos)
+{
+	struct rshim_backend *bd;
+	int retval = 0, value;
+	char buf[64], key[32];
+
+	if (*ppos != 0 || count >= sizeof(buf))
+		return -EINVAL;
+
+	/* Copy the data from userspace */
+	if (copy_from_user(buf, user_buffer, count))
+		return -EFAULT;
+
+	if (sscanf(buf, "%s %x", key, &value) != 2)
+		return -EINVAL;
+
+	bd = ((struct seq_file *)file->private_data)->private;
+
+	if (strcmp(key, "BOOT_MODE") == 0) {
+		retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_BOOT_CONTROL,
+				 value & RSH_BOOT_CONTROL__BOOT_MODE_MASK);
+	} else if (strcmp(key, "SW_RESET") == 0) {
+		if (value) {
+			if (!bd->has_reprobe) {
+				/* Detach, which shouldn't hold bd->mutex. */
+				rshim_notify(bd, RSH_EVENT_DETACH, 0);
+
+				mutex_lock(&bd->mutex);
+				/* Reset the TmFifo. */
+				rshim_fifo_reset(bd);
+				mutex_unlock(&bd->mutex);
+			}
+
+			retval = bd->write_rshim(bd, RSHIM_CHANNEL,
+					RSH_RESET_CONTROL,
+					RSH_RESET_CONTROL__RESET_CHIP_VAL_KEY);
+
+			if (!bd->has_reprobe) {
+				/* Attach. */
+				msleep_interruptible(1000);
+				mutex_lock(&bd->mutex);
+				rshim_notify(bd, RSH_EVENT_ATTACH, 0);
+				mutex_unlock(&bd->mutex);
+			}
+		}
+	} else
+		return -EINVAL;
+
+	return retval ? retval : count;
+}
+
+static int rshim_misc_release(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd;
+	struct module *owner;
+	int retval;
+
+	/*
+	 * Note that since this got turned into a seq file by
+	 * rshim_misc_open(), our device pointer isn't in the usual spot
+	 * (the file's private data); that's used by the seq file
+	 * subsystem.
+	 */
+	bd = ((struct seq_file *)file->private_data)->private;
+
+	retval = single_release(inode, file);
+	if (retval)
+		return retval;
+
+	rshim_lock();
+	owner = RSHIM_READ_ONCE(bd->owner);
+	kref_put(&bd->kref, bd->destroy);
+	module_put(owner);
+	rshim_unlock();
+
+	return 0;
+}
+
+static const struct file_operations rshim_misc_fops = {
+	.owner = THIS_MODULE,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.write = rshim_misc_write,
+	.release = rshim_misc_release,
+};
+
+static int rshim_misc_open(struct file *file)
+{
+	struct rshim_backend *bd = file->private_data;
+	int retval;
+
+	/*
+	 * If file->private_data is non-NULL, seq_open (called by
+	 * single_open) thinks it's already a seq_file struct, and
+	 * scribbles over it!  Very bad.
+	 */
+	file->private_data = NULL;
+
+	file->f_op = &rshim_misc_fops;
+	retval = single_open(file, rshim_misc_seq_show, bd);
+
+	return retval;
+}
+
+/* Common file operations routines */
+
+static int rshim_open(struct inode *inode, struct file *file)
+{
+	struct rshim_backend *bd;
+	int subminor = iminor(inode);
+	int retval;
+
+	rshim_lock();
+
+	bd = rshim_devs[subminor / RSH_DEV_TYPES];
+	if (!bd) {
+		rshim_unlock();
+		return -ENODEV;
+	}
+
+	/* Add a reference to the owner. */
+	if (!try_module_get(bd->owner)) {
+		rshim_unlock();
+		return -ENODEV;
+	}
+
+	/* Increment our usage count for the device. */
+	kref_get(&bd->kref);
+
+	rshim_unlock();
+
+	file->private_data = bd;
+
+	switch (subminor % RSH_DEV_TYPES) {
+	case RSH_DEV_TYPE_BOOT:
+		retval = rshim_boot_open(file);
+		break;
+
+	case RSH_DEV_TYPE_RSHIM:
+		retval = rshim_rshim_open(file);
+		break;
+
+	case RSH_DEV_TYPE_CONSOLE:
+		retval = rshim_console_open(file);
+		break;
+
+	case RSH_DEV_TYPE_NET:
+		retval = rshim_tmfifo_open(file);
+		break;
+
+	case RSH_DEV_TYPE_MISC:
+		retval = rshim_misc_open(file);
+		break;
+
+	default:
+		retval = -ENODEV;
+		break;
+	}
+
+	/* If the minor open failed, drop the usage count. */
+	if (retval < 0) {
+		struct module *owner;
+
+		rshim_lock();
+		owner = RSHIM_READ_ONCE(bd->owner);
+		kref_put(&bd->kref, bd->destroy);
+		module_put(owner);
+		rshim_unlock();
+	}
+
+	return retval;
+}
+
+static const struct file_operations rshim_fops = {
+	.owner = THIS_MODULE,
+	.open =	rshim_open,
+};
+
+int rshim_tmfifo_sync(struct rshim_backend *bd)
+{
+	u64 word;
+	int i, retval, max_size, avail;
+	union rshim_tmfifo_msg_hdr hdr;
+
+	/* Get FIFO max size. */
+	retval = bd->read_rshim(bd, RSHIM_CHANNEL,
+				RSH_TM_HOST_TO_TILE_CTL, &word);
+	if (retval < 0) {
+		pr_err("read_rshim error %d\n", retval);
+		return retval;
+	}
+	max_size = (word >> RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_SHIFT)
+		   & RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RMASK;
+
+	/* Calculate available size. */
+	retval = bd->read_rshim(bd, RSHIM_CHANNEL, RSH_TM_HOST_TO_TILE_STS,
+				&word);
+	if (retval < 0) {
+		pr_err("read_rshim error %d\n", retval);
+		return retval;
+	}
+	avail = max_size - (int)(word & RSH_TM_HOST_TO_TILE_STS__COUNT_MASK);
+
+	if (avail > TMFIFO_MAX_SYNC_WORDS)
+		avail = TMFIFO_MAX_SYNC_WORDS;
+
+	hdr.type = VIRTIO_ID_NET;
+	hdr.len = 0;
+	for (i = 0; i < avail; i++) {
+		retval = bd->write_rshim(bd, RSHIM_CHANNEL,
+					 RSH_TM_HOST_TO_TILE_STS, hdr.data);
+		if (retval < 0)
+			break;
+	}
+
+	return 0;
+}
+
+int rshim_notify(struct rshim_backend *bd, int event, int code)
+{
+	int i, rc = 0;
+	struct rshim_service *svc;
+
+	switch (event) {
+	case RSH_EVENT_FIFO_INPUT:
+		rshim_fifo_input(bd);
+		break;
+
+	case RSH_EVENT_FIFO_OUTPUT:
+		rshim_fifo_output(bd);
+		break;
+
+	case RSH_EVENT_FIFO_ERR:
+		rshim_fifo_err(bd, code);
+		break;
+
+	case RSH_EVENT_ATTACH:
+		rshim_boot_done(bd);
+
+		/* Sync-up the tmfifo if reprobe is not supported. */
+		if (!bd->has_reprobe && bd->has_rshim)
+			rshim_tmfifo_sync(bd);
+
+		rcu_read_lock();
+		for (i = 0; i < RSH_SVC_MAX; i++) {
+			svc = rcu_dereference(rshim_svc[i]);
+			if (svc != NULL && svc->create != NULL) {
+				rc = (*svc->create)(bd);
+				if (rc == -EEXIST)
+					rc = 0;
+				else if (rc) {
+					pr_err("Failed to attach svc %d\n", i);
+					break;
+				}
+			}
+		}
+		rcu_read_unlock();
+
+		spin_lock_irq(&bd->spinlock);
+		rshim_fifo_input(bd);
+		spin_unlock_irq(&bd->spinlock);
+		break;
+
+	case RSH_EVENT_DETACH:
+		for (i = 0; i < RSH_SVC_MAX; i++) {
+			/*
+			 * The svc->delete() could call into Linux kernel and
+			 * potentially trigger synchronize_rcu(). So it should
+			 * be outside of the rcu_read_lock(). Instead, a ref
+			 * counter is used here to avoid race condition between
+			 * svc deletion such as caused by kernel module unload.
+			 */
+			rcu_read_lock();
+			svc = rcu_dereference(rshim_svc[i]);
+			if (svc != NULL)
+				atomic_inc(&svc->ref);
+			rcu_read_unlock();
+
+			if (svc != NULL) {
+				(*svc->delete)(bd);
+				atomic_dec(&svc->ref);
+			}
+		}
+		bd->dev = NULL;
+		break;
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL(rshim_notify);
+
+static int rshim_find_index(char *dev_name)
+{
+	int i, dev_index = -1;
+
+	/* First look for a match with a previous device name. */
+	for (i = 0; i < rshim_nr_devs; i++)
+		if (rshim_dev_names[i] &&
+		    !strcmp(dev_name, rshim_dev_names[i])) {
+			pr_debug("found match with previous at index %d\n", i);
+			dev_index = i;
+			break;
+		}
+
+	/* Then look for a never-used slot. */
+	if (dev_index < 0) {
+		for (i = 0; i < rshim_nr_devs; i++)
+			if (!rshim_dev_names[i]) {
+				pr_debug("found never-used slot %d\n", i);
+				dev_index = i;
+				break;
+			}
+	}
+
+	/* Finally look for a currently-unused slot. */
+	if (dev_index < 0) {
+		for (i = 0; i < rshim_nr_devs; i++)
+			if (!rshim_devs[i]) {
+				pr_debug("found unused slot %d\n", i);
+				dev_index = i;
+				break;
+			}
+	}
+
+	return dev_index;
+}
+
+struct rshim_backend *rshim_find(char *dev_name)
+{
+	int dev_index = rshim_find_index(dev_name);
+
+	/* If none of that worked, we fail. */
+	if (dev_index < 0) {
+		pr_err("couldn't find slot for new device %s\n", dev_name);
+		return NULL;
+	}
+
+	return rshim_devs[dev_index];
+}
+EXPORT_SYMBOL(rshim_find);
+
+/* House-keeping timer. */
+static void rshim_timer_func(struct timer_list *arg)
+{
+	struct rshim_backend *bd =
+	  container_of(arg, struct rshim_backend, timer);
+
+	u32 period = msecs_to_jiffies(rshim_keepalive_period);
+
+	if (bd->has_cons_work)
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+
+	/* Request keepalive update and restart the ~300ms timer. */
+	if (time_after(jiffies, (unsigned long)bd->last_keepalive + period)) {
+		bd->keepalive = 1;
+		bd->last_keepalive = jiffies;
+		queue_delayed_work(rshim_wq, &bd->work, 0);
+	}
+	mod_timer(&bd->timer, jiffies + period);
+}
+
+static ssize_t rshim_path_show(struct device *cdev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct rshim_backend *bd = dev_get_drvdata(cdev);
+
+	if (bd == NULL)
+		return -ENODEV;
+	return snprintf(buf, PAGE_SIZE, "%s\n",
+			rshim_dev_names[bd->dev_index]);
+}
+
+static DEVICE_ATTR(rshim_path, 0444, rshim_path_show, NULL);
+
+static void
+rshim_load_modules(struct work_struct *work)
+{
+	request_module("rshim_net");
+}
+
+static DECLARE_DELAYED_WORK(rshim_load_modules_work, rshim_load_modules);
+
+/* Check whether backend is allowed to register or not. */
+static int rshim_access_check(struct rshim_backend *bd)
+{
+	int i, retval;
+	u64 value;
+
+	/* Write value 0 to RSH_SCRATCHPAD1. */
+	retval = bd->write_rshim(bd, RSHIM_CHANNEL, RSH_SCRATCHPAD1, 0);
+	if (retval < 0)
+		return -ENODEV;
+
+	/*
+	 * Poll RSH_SCRATCHPAD1 up to one second to check whether it's reset to
+	 * the keepalive magic value, which indicates another backend driver has
+	 * already attached to this target.
+	 */
+	for (i = 0; i < 10; i++) {
+		retval = bd->read_rshim(bd, RSHIM_CHANNEL, RSH_SCRATCHPAD1,
+					&value);
+		if (retval < 0)
+			return -ENODEV;
+
+		if (value == RSH_KEEPALIVE_MAGIC_NUM) {
+			pr_info("another backend already attached.\n");
+			return -EEXIST;
+		}
+
+		msleep(100);
+	}
+
+	return 0;
+}
+
+int rshim_register(struct rshim_backend *bd)
+{
+	int i, retval, dev_index;
+
+	if (bd->registered)
+		return 0;
+
+	if (backend_driver[0] && strcmp(backend_driver, bd->owner->name))
+		return -EACCES;
+
+	dev_index = rshim_find_index(bd->dev_name);
+	if (dev_index < 0)
+		return -ENODEV;
+
+	if (!bd->read_rshim || !bd->write_rshim) {
+		pr_err("read_rshim/write_rshim missing\n");
+		return -EINVAL;
+	}
+
+	retval = rshim_access_check(bd);
+	if (retval)
+		return retval;
+
+	if (!bd->write)
+		bd->write = rshim_write_default;
+	if (!bd->read)
+		bd->read = rshim_read_default;
+
+	kref_init(&bd->kref);
+	spin_lock_init(&bd->spinlock);
+#if RSH_RESET_MUTEX
+	init_completion(&bd->reset_complete);
+#endif
+	for (i = 0; i < TMFIFO_MAX_CHAN; i++) {
+		init_waitqueue_head(&bd->read_fifo[i].operable);
+		init_waitqueue_head(&bd->write_fifo[i].operable);
+	}
+
+	init_waitqueue_head(&bd->write_completed);
+	init_completion(&bd->booting_complete);
+	init_completion(&bd->boot_write_complete);
+	memcpy(&bd->cons_termios, &init_console_termios,
+	       sizeof(init_console_termios));
+	INIT_DELAYED_WORK(&bd->work, rshim_work_handler);
+
+	bd->dev_index = dev_index;
+	if (rshim_dev_names[dev_index] != bd->dev_name) {
+		kfree(rshim_dev_names[dev_index]);
+		rshim_dev_names[dev_index] = bd->dev_name;
+	}
+	rshim_devs[dev_index] = bd;
+
+	for (i = 0; i < RSH_DEV_TYPES; i++) {
+		struct device *cl_dev;
+		int err;
+		char devbuf[32];
+
+		cdev_init(&bd->cdevs[i], &rshim_fops);
+		bd->cdevs[i].owner = THIS_MODULE;
+		/*
+		 * FIXME: is this addition really legal, or should
+		 * we be using MKDEV?
+		 */
+		err = cdev_add(&bd->cdevs[i],
+			       rshim_dev_base +
+			       bd->dev_index * RSH_DEV_TYPES + i,
+			       1);
+		/*
+		 * We complain if this fails, but we don't return
+		 * an error; it really shouldn't happen, and it's
+		 * hard to go un-do the rest of the adds.
+		 */
+		if (err)
+			pr_err("rsh%d: couldn't add minor %d\n", dev_index, i);
+
+		cl_dev = device_create(rshim_class, NULL, rshim_dev_base +
+				       bd->dev_index * RSH_DEV_TYPES + i, NULL,
+				       "rshim%d!%s",
+				       bd->dev_index, rshim_dev_minor_names[i]);
+		if (IS_ERR(cl_dev)) {
+			pr_err("rsh%d: couldn't add dev %s, err %ld\n",
+			       dev_index,
+			       format_dev_t(devbuf, rshim_dev_base + dev_index *
+					    RSH_DEV_TYPES + i),
+			       PTR_ERR(cl_dev));
+		} else {
+			pr_debug("added class dev %s\n",
+				 format_dev_t(devbuf, rshim_dev_base +
+					      bd->dev_index *
+					      RSH_DEV_TYPES + i));
+		}
+
+		dev_set_drvdata(cl_dev, bd);
+		if (device_create_file(cl_dev, &dev_attr_rshim_path))
+			pr_err("could not create rshim_path file in sysfs\n");
+	}
+
+	for (i = 0; i < 2; i++) {
+		bd->boot_buf[i] = kmalloc(BOOT_BUF_SIZE, GFP_KERNEL);
+		if (!bd->boot_buf[i]) {
+			if (i == 1) {
+				kfree(bd->boot_buf[0]);
+				bd->boot_buf[0] = NULL;
+			}
+		}
+	}
+
+	timer_setup(&bd->timer, rshim_timer_func, 0);
+
+	bd->registered = 1;
+
+	/* Start the keepalive timer. */
+	bd->last_keepalive = jiffies;
+	mod_timer(&bd->timer, jiffies + 1);
+
+	schedule_delayed_work(&rshim_load_modules_work, 3 * HZ);
+
+	return 0;
+}
+EXPORT_SYMBOL(rshim_register);
+
+void rshim_deregister(struct rshim_backend *bd)
+{
+	int i;
+
+	if (!bd->registered)
+		return;
+
+	/* Stop the timer. */
+	del_timer_sync(&bd->timer);
+
+	for (i = 0; i < 2; i++)
+		kfree(bd->boot_buf[i]);
+
+	for (i = 0; i < RSH_DEV_TYPES; i++) {
+		cdev_del(&bd->cdevs[i]);
+		device_destroy(rshim_class,
+			       rshim_dev_base + bd->dev_index *
+			       RSH_DEV_TYPES + i);
+	}
+
+	rshim_devs[bd->dev_index] = NULL;
+	bd->registered = 0;
+}
+EXPORT_SYMBOL(rshim_deregister);
+
+int rshim_register_service(struct rshim_service *service)
+{
+	int i, retval = 0;
+	struct rshim_service *svc;
+
+	rshim_lock();
+
+	atomic_set(&service->ref, 0);
+
+	BUG_ON(service->type >= RSH_SVC_MAX);
+
+	if (!rshim_svc[service->type]) {
+		svc = kmalloc(sizeof(*svc), GFP_KERNEL);
+		if (svc) {
+			memcpy(svc, service, sizeof(*svc));
+			/*
+			 * Add memory barrir to make sure 'svc' is ready
+			 * before switching the pointer.
+			 */
+			smp_mb();
+
+			/*
+			 * rshim_svc[] is protected by RCU. References to it
+			 * should have rcu_read_lock() / rcu_dereference() /
+			 * rcu_read_lock().
+			 */
+			rcu_assign_pointer(rshim_svc[service->type], svc);
+
+			/* Attach the service to all backends. */
+			for (i = 0; i < rshim_nr_devs; i++) {
+				if (rshim_devs[i] != NULL) {
+					retval = svc->create(rshim_devs[i]);
+					if (retval && retval != -EEXIST)
+						break;
+				}
+			}
+		} else
+			retval = -ENOMEM;
+	} else
+		retval = -EEXIST;
+
+	rshim_unlock();
+
+	/* Deregister / cleanup the service in case of failures. */
+	if (retval && retval != -EEXIST)
+		rshim_deregister_service(service);
+
+	return retval;
+}
+EXPORT_SYMBOL(rshim_register_service);
+
+void rshim_deregister_service(struct rshim_service *service)
+{
+	int i;
+	struct rshim_service *svc = NULL;
+
+	BUG_ON(service->type >= RSH_SVC_MAX);
+
+	/*
+	 * Use synchronize_rcu() to make sure no more outstanding
+	 * references to the 'svc' pointer before releasing it.
+	 *
+	 * The reason to use RCU is that the rshim_svc pointer will be
+	 * accessed in rshim_notify() which could be called in interrupt
+	 * context and not suitable for mutex lock.
+	 */
+	rshim_lock();
+	if (rshim_svc[service->type]) {
+		svc = rshim_svc[service->type];
+
+		/* Delete the service from all backends. */
+		for (i = 0; i < rshim_nr_devs; i++)
+			if (rshim_devs[i] != NULL)
+				svc->delete(rshim_devs[i]);
+
+		rcu_assign_pointer(rshim_svc[service->type], NULL);
+	}
+	rshim_unlock();
+	if (svc != NULL) {
+		synchronize_rcu();
+
+		/* Make sure no more references to the svc pointer. */
+		while (atomic_read(&svc->ref) != 0)
+			msleep(100);
+		kfree(svc);
+	}
+}
+EXPORT_SYMBOL(rshim_deregister_service);
+
+static int __init rshim_init(void)
+{
+	int result, class_registered = 0;
+
+	/* Register our device class. */
+	rshim_class = class_create(THIS_MODULE, "rsh");
+	if (IS_ERR(rshim_class)) {
+		result = PTR_ERR(rshim_class);
+		goto error;
+	}
+	class_registered = 1;
+
+	/* Allocate major/minor numbers. */
+	result = alloc_chrdev_region(&rshim_dev_base, 0,
+				     rshim_nr_devs * RSH_DEV_TYPES,
+				     "rsh");
+	if (result < 0) {
+		pr_err("can't get rshim major\n");
+		goto error;
+	}
+
+	rshim_dev_names = kzalloc(rshim_nr_devs *
+				    sizeof(rshim_dev_names[0]), GFP_KERNEL);
+	rshim_devs = kcalloc(rshim_nr_devs, sizeof(rshim_devs[0]),
+			       GFP_KERNEL);
+
+	if (!rshim_dev_names || !rshim_devs) {
+		result = -ENOMEM;
+		goto error;
+	}
+
+	rshim_wq = create_workqueue("rshim");
+	if (!rshim_wq) {
+		result = -ENOMEM;
+		goto error;
+	}
+
+	return 0;
+
+error:
+	if (rshim_dev_base)
+		unregister_chrdev_region(rshim_dev_base,
+				 rshim_nr_devs * RSH_DEV_TYPES);
+	if (class_registered)
+		class_destroy(rshim_class);
+	kfree(rshim_dev_names);
+	kfree(rshim_devs);
+
+	return result;
+}
+
+static void __exit rshim_exit(void)
+{
+	int i;
+
+	flush_delayed_work(&rshim_load_modules_work);
+
+	/* Free the major/minor numbers. */
+	unregister_chrdev_region(rshim_dev_base,
+				 rshim_nr_devs * RSH_DEV_TYPES);
+
+	/* Destroy our device class. */
+	class_destroy(rshim_class);
+
+	/* Destroy our work queue. */
+	destroy_workqueue(rshim_wq);
+
+	for (i = 0; i < RSH_SVC_MAX; i++)
+		kfree(rshim_svc[i]);
+
+	for (i = 0; i < rshim_nr_devs; i++)
+		kfree(rshim_dev_names[i]);
+
+	kfree(rshim_dev_names);
+	kfree(rshim_devs);
+}
+
+module_init(rshim_init);
+module_exit(rshim_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.12");
diff --git a/drivers/soc/mellanox/host/rshim.h b/drivers/soc/mellanox/host/rshim.h
new file mode 100644
index 0000000..3ac3410
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim.h
@@ -0,0 +1,361 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#ifndef _RSHIM_H
+#define _RSHIM_H
+
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/termios.h>
+#include <linux/workqueue.h>
+#include <linux/device.h>
+#include <linux/cdev.h>
+
+#include "rshim_regs.h"
+
+/* ACCESS_ONCE() wrapper. */
+#define RSHIM_READ_ONCE(x)	READ_ONCE(x)
+
+/*
+ * This forces only one reset to occur at a time.  Once we've gotten
+ * more experience with this mode we'll probably remove the #define.
+ */
+#define RSH_RESET_MUTEX		1
+
+/* Spin flag values. */
+#define RSH_SFLG_READING	0x1  /* read is active. */
+#define RSH_SFLG_WRITING	0x2  /* write_urb is active. */
+#define RSH_SFLG_CONS_OPEN	0x4  /* console stream is open. */
+
+/*
+ * Buffer/FIFO sizes.  Note that the FIFO sizes must be powers of 2; also,
+ * the read and write buffers must be no larger than the corresponding
+ * FIFOs.
+ */
+#define READ_BUF_SIZE		2048
+#define WRITE_BUF_SIZE		2048
+#define READ_FIFO_SIZE		(4 * 1024)
+#define WRITE_FIFO_SIZE		(4 * 1024)
+#define BOOT_BUF_SIZE		(16 * 1024)
+
+/* Sub-device types. */
+enum {
+	RSH_DEV_TYPE_RSHIM,
+	RSH_DEV_TYPE_BOOT,
+	RSH_DEV_TYPE_CONSOLE,
+	RSH_DEV_TYPE_NET,
+	RSH_DEV_TYPE_MISC,
+	RSH_DEV_TYPES
+};
+
+/* Event types used in rshim_notify(). */
+enum {
+	RSH_EVENT_FIFO_INPUT,		/* fifo ready for input */
+	RSH_EVENT_FIFO_OUTPUT,		/* fifo ready for output */
+	RSH_EVENT_FIFO_ERR,		/* fifo error */
+	RSH_EVENT_ATTACH,		/* backend attaching */
+	RSH_EVENT_DETACH,		/* backend detaching */
+};
+
+/* RShim service types. */
+enum {
+	RSH_SVC_NET,			/* networking service */
+	RSH_SVC_MAX
+};
+
+/* TMFIFO message header. */
+union rshim_tmfifo_msg_hdr {
+	struct {
+		u8 type;		/* message type */
+		__be16 len;		/* payload length */
+		u8 unused[5];		/* reserved, set to 0 */
+	} __packed;
+	u64 data;
+};
+
+/* TMFIFO demux channels. */
+enum {
+	TMFIFO_CONS_CHAN,	/* Console */
+	TMFIFO_NET_CHAN,	/* Network */
+	TMFIFO_MAX_CHAN		/* Number of channels */
+};
+
+/* Various rshim definitions. */
+#define RSH_INT_VEC0_RTC__SWINT3_MASK 0x8
+
+#define RSH_BYTE_ACC_READ_TRIGGER 0x50000000
+#define RSH_BYTE_ACC_SIZE 0x10000000
+#define RSH_BYTE_ACC_PENDING 0x20000000
+
+
+#define BOOT_CHANNEL        RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_BOOT
+#define RSHIM_CHANNEL       RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_RSHIM
+#define UART0_CHANNEL       RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART0
+#define UART1_CHANNEL       RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART1
+
+#define RSH_BOOT_FIFO_SIZE   512
+
+/* FIFO structure. */
+struct rshim_fifo {
+	unsigned char *data;
+	unsigned int head;
+	unsigned int tail;
+	wait_queue_head_t operable;
+};
+
+/* RShim backend. */
+struct rshim_backend {
+	/* Device name. */
+	char *dev_name;
+
+	/* Backend owner. */
+	struct module *owner;
+
+	/* Pointer to the backend device. */
+	struct device *dev;
+
+	/* Pointer to the net device. */
+	void *net;
+
+	/* House-keeping Timer. */
+	struct timer_list timer;
+
+	/* Character device structure for each device. */
+	struct cdev cdevs[RSH_DEV_TYPES];
+
+	/*
+	 * The reference count for this structure.  This is incremented by
+	 * each open, and by the probe routine (thus, one reference for
+	 * each of the two interfaces).  It's decremented on each release,
+	 * and on each disconnect.
+	 */
+	struct kref kref;
+
+	/* State flags. */
+	u32 is_booting : 1;        /* Waiting for device to come back. */
+	u32 is_boot_open : 1;      /* Boot device is open. */
+	u32 is_tm_open : 1;        /* TM FIFO device is open. */
+	u32 is_cons_open : 1;      /* Console device is open. */
+	u32 is_in_boot_write : 1;  /* A thread is in boot_write(). */
+	u32 has_cons_work : 1;     /* Console worker thread running. */
+	u32 has_debug : 1;         /* Debug enabled for this device. */
+	u32 has_tm : 1;            /* TM FIFO found. */
+	u32 has_rshim : 1;         /* RSHIM found. */
+	u32 has_fifo_work : 1;     /* FIFO output to be done in worker. */
+	u32 has_reprobe : 1;       /* Reprobe support after SW reset. */
+	u32 drop : 1;              /* Drop the rest of the packet. */
+	u32 registered : 1;        /* Backend has been registered. */
+	u32 keepalive : 1;         /* A flag to update keepalive. */
+
+	/* Jiffies of last keepalive. */
+	u64 last_keepalive;
+
+	/* State flag bits from RSH_SFLG_xxx (see above). */
+	int spin_flags;
+
+	/* Total bytes in the read buffer. */
+	int read_buf_bytes;
+	/* Offset of next unread byte in the read buffer. */
+	int read_buf_next;
+	/* Bytes left in the current packet, or 0 if no current packet. */
+	int read_buf_pkt_rem;
+	/* Padded bytes in the read buffer. */
+	int read_buf_pkt_padding;
+
+	/* Bytes left in the current packet pending to write. */
+	int write_buf_pkt_rem;
+
+	/* Current message header. */
+	union rshim_tmfifo_msg_hdr msg_hdr;
+
+	/* Read FIFOs. */
+	struct rshim_fifo read_fifo[TMFIFO_MAX_CHAN];
+
+	/* Write FIFOs. */
+	struct rshim_fifo write_fifo[TMFIFO_MAX_CHAN];
+
+	/* Read buffer.  This is a DMA'able buffer. */
+	unsigned char *read_buf;
+	dma_addr_t read_buf_dma;
+
+	/* Write buffer.  This is a DMA'able buffer. */
+	unsigned char *write_buf;
+	dma_addr_t write_buf_dma;
+
+	/* Current Tx FIFO channel. */
+	int tx_chan;
+
+	/* Current Rx FIFO channel. */
+	int rx_chan;
+
+	/* First error encountered during read or write. */
+	int tmfifo_error;
+
+	/* Buffers used for boot writes.  Allocated at startup. */
+	char *boot_buf[2];
+
+	/*
+	 * This mutex is used to prevent the interface pointers and the
+	 * device pointer from disappearing while a driver entry point
+	 * is using them.  It's held throughout a read or write operation
+	 * (at least the parts of those operations which depend upon those
+	 * pointers) and is also held whenever those pointers are modified.
+	 * It also protects state flags, and booting_complete.
+	 */
+	struct mutex mutex;
+
+	/* We'll signal completion on this when FLG_BOOTING is turned off. */
+	struct completion booting_complete;
+
+#ifdef RSH_RESET_MUTEX
+	/* Signaled when a device is disconnected. */
+	struct completion reset_complete;
+#endif
+
+	/*
+	 * This wait queue supports fsync; it's woken up whenever an
+	 * outstanding USB write URB is done.  This will need to be more
+	 * complex if we start doing write double-buffering.
+	 */
+	wait_queue_head_t write_completed;
+
+	/* State for our outstanding boot write. */
+	struct completion boot_write_complete;
+
+	/*
+	 * This spinlock is used to protect items which must be updated by
+	 * URB completion handlers, since those can't sleep.  This includes
+	 * the read and write buffer pointers, as well as spin_flags.
+	 */
+	spinlock_t spinlock;
+
+	/* Current termios settings for the console. */
+	struct ktermios cons_termios;
+
+	/* Work queue entry. */
+	struct delayed_work	work;
+
+	/* Pending boot & fifo request for the worker. */
+	u8 *boot_work_buf;
+	u32 boot_work_buf_len;
+	u32 boot_work_buf_actual_len;
+	u8 *fifo_work_buf;
+	u32 fifo_work_buf_len;
+	int fifo_work_devtype;
+
+	/* Number of open console files. */
+	long console_opens;
+
+	/*
+	 * Our index in rshim_devs, which is also the high bits of our
+	 * minor number.
+	 */
+	int dev_index;
+
+	/* APIs provided by backend. */
+
+	/* API to write bulk data to RShim via the backend. */
+	ssize_t (*write)(struct rshim_backend *bd, int devtype,
+			 const char *buf, size_t count);
+
+	/* API to read bulk data from RShim via the backend. */
+	ssize_t (*read)(struct rshim_backend *bd, int devtype,
+			char *buf, size_t count);
+
+	/* API to cancel a read / write request (optional). */
+	void (*cancel)(struct rshim_backend *bd, int devtype, bool is_write);
+
+	/* API to destroy the backend. */
+	void (*destroy)(struct kref *kref);
+
+	/* API to read 8 bytes from RShim. */
+	int (*read_rshim)(struct rshim_backend *bd, int chan, int addr,
+			  u64 *value);
+
+	/* API to write 8 bytes to RShim. */
+	int (*write_rshim)(struct rshim_backend *bd, int chan, int addr,
+			   u64 value);
+};
+
+/* RShim service. */
+struct rshim_service {
+	/* Service type RSH_SVC_xxx. */
+	int type;
+
+	/* Reference number. */
+	atomic_t ref;
+
+	/* Create service. */
+	int (*create)(struct rshim_backend *bd);
+
+	/* Delete service. */
+	int (*delete)(struct rshim_backend *bd);
+
+	/* Notify service Rx is ready. */
+	void (*rx_notify)(struct rshim_backend *bd);
+};
+
+/* Global variables. */
+
+/* Global array to store RShim devices and names. */
+extern struct workqueue_struct *rshim_wq;
+
+/* Common APIs. */
+
+/* Register/unregister backend. */
+int rshim_register(struct rshim_backend *bd);
+void rshim_deregister(struct rshim_backend *bd);
+
+/* Register / deregister service. */
+int rshim_register_service(struct rshim_service *service);
+void rshim_deregister_service(struct rshim_service *service);
+
+/* Find backend by name. */
+struct rshim_backend *rshim_find(char *dev_name);
+
+/* RShim global lock. */
+void rshim_lock(void);
+void rshim_unlock(void);
+
+/* Event notification. */
+int rshim_notify(struct rshim_backend *bd, int event, int code);
+
+/*
+ * FIFO APIs.
+ *
+ * FIFO is demuxed into two channels, one for network interface
+ * (TMFIFO_NET_CHAN), one for console (TMFIFO_CONS_CHAN).
+ */
+
+/* Write / read some bytes to / from the FIFO via the backend. */
+ssize_t rshim_fifo_read(struct rshim_backend *bd, char *buffer,
+		      size_t count, int chan, bool nonblock,
+		      bool to_user);
+ssize_t rshim_fifo_write(struct rshim_backend *bd, const char *buffer,
+		       size_t count, int chan, bool nonblock,
+		       bool from_user);
+
+/* Alloc/free the FIFO. */
+int rshim_fifo_alloc(struct rshim_backend *bd);
+void rshim_fifo_free(struct rshim_backend *bd);
+
+/* Console APIs. */
+
+/* Enable early console. */
+int rshim_cons_early_enable(struct rshim_backend *bd);
+
+#endif /* _RSHIM_H */
diff --git a/drivers/soc/mellanox/host/rshim_regs.h b/drivers/soc/mellanox/host/rshim_regs.h
new file mode 100644
index 0000000..b14df716
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_regs.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#ifndef __RSHIM_REGS_H__
+#define __RSHIM_REGS_H__
+
+#ifdef __ASSEMBLER__
+#define _64bit(x) x
+#else /* __ASSEMBLER__ */
+#define _64bit(x) x ## ULL
+#endif /* __ASSEMBLER */
+
+#include <linux/types.h>
+
+#define RSH_BOOT_FIFO_DATA 0x408
+
+#define RSH_BOOT_FIFO_COUNT 0x488
+#define RSH_BOOT_FIFO_COUNT__LENGTH 0x0001
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_SHIFT 0
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_WIDTH 10
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_RESET_VAL 0
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_RMASK 0x3ff
+#define RSH_BOOT_FIFO_COUNT__BOOT_FIFO_COUNT_MASK  0x3ff
+
+#define RSH_BOOT_CONTROL 0x528
+#define RSH_BOOT_CONTROL__LENGTH 0x0001
+#define RSH_BOOT_CONTROL__BOOT_MODE_SHIFT 0
+#define RSH_BOOT_CONTROL__BOOT_MODE_WIDTH 2
+#define RSH_BOOT_CONTROL__BOOT_MODE_RESET_VAL 0
+#define RSH_BOOT_CONTROL__BOOT_MODE_RMASK 0x3
+#define RSH_BOOT_CONTROL__BOOT_MODE_MASK  0x3
+#define RSH_BOOT_CONTROL__BOOT_MODE_VAL_NONE 0x0
+#define RSH_BOOT_CONTROL__BOOT_MODE_VAL_EMMC 0x1
+#define RSH_BOOT_CONTROL__BOOT_MODE_VAL_EMMC_LEGACY 0x3
+
+#define RSH_RESET_CONTROL 0x500
+#define RSH_RESET_CONTROL__LENGTH 0x0001
+#define RSH_RESET_CONTROL__RESET_CHIP_SHIFT 0
+#define RSH_RESET_CONTROL__RESET_CHIP_WIDTH 32
+#define RSH_RESET_CONTROL__RESET_CHIP_RESET_VAL 0
+#define RSH_RESET_CONTROL__RESET_CHIP_RMASK 0xffffffff
+#define RSH_RESET_CONTROL__RESET_CHIP_MASK  0xffffffff
+#define RSH_RESET_CONTROL__RESET_CHIP_VAL_KEY 0xca710001
+#define RSH_RESET_CONTROL__DISABLE_SHIFT 32
+#define RSH_RESET_CONTROL__DISABLE_WIDTH 1
+#define RSH_RESET_CONTROL__DISABLE_RESET_VAL 0
+#define RSH_RESET_CONTROL__DISABLE_RMASK 0x1
+#define RSH_RESET_CONTROL__DISABLE_MASK  _64bit(0x100000000)
+#define RSH_RESET_CONTROL__REQ_PND_SHIFT 33
+#define RSH_RESET_CONTROL__REQ_PND_WIDTH 1
+#define RSH_RESET_CONTROL__REQ_PND_RESET_VAL 0
+#define RSH_RESET_CONTROL__REQ_PND_RMASK 0x1
+#define RSH_RESET_CONTROL__REQ_PND_MASK  _64bit(0x200000000)
+
+#define RSH_SCRATCHPAD1 0xc20
+
+#define RSH_SCRATCH_BUF_CTL 0x600
+
+#define RSH_SCRATCH_BUF_DAT 0x610
+
+#define RSH_SEMAPHORE0 0x28
+
+#define RSH_SCRATCHPAD 0x20
+
+#define RSH_TM_HOST_TO_TILE_CTL 0xa30
+#define RSH_TM_HOST_TO_TILE_CTL__LENGTH 0x0001
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_SHIFT 0
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_WIDTH 8
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_RESET_VAL 128
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_RMASK 0xff
+#define RSH_TM_HOST_TO_TILE_CTL__LWM_MASK  0xff
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_SHIFT 8
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_WIDTH 8
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_RESET_VAL 128
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_RMASK 0xff
+#define RSH_TM_HOST_TO_TILE_CTL__HWM_MASK  0xff00
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_SHIFT 32
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_WIDTH 9
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RESET_VAL 256
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_RMASK 0x1ff
+#define RSH_TM_HOST_TO_TILE_CTL__MAX_ENTRIES_MASK  _64bit(0x1ff00000000)
+
+#define RSH_TM_HOST_TO_TILE_STS 0xa28
+#define RSH_TM_HOST_TO_TILE_STS__LENGTH 0x0001
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_SHIFT 0
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_WIDTH 9
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_RESET_VAL 0
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_RMASK 0x1ff
+#define RSH_TM_HOST_TO_TILE_STS__COUNT_MASK  0x1ff
+
+#define RSH_TM_TILE_TO_HOST_STS 0xa48
+#define RSH_TM_TILE_TO_HOST_STS__LENGTH 0x0001
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_SHIFT 0
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_WIDTH 9
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_RESET_VAL 0
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_RMASK 0x1ff
+#define RSH_TM_TILE_TO_HOST_STS__COUNT_MASK  0x1ff
+
+#define RSH_TM_HOST_TO_TILE_DATA 0xa20
+
+#define RSH_TM_TILE_TO_HOST_DATA 0xa40
+
+#define RSH_MMIO_ADDRESS_SPACE__LENGTH 0x10000000000
+#define RSH_MMIO_ADDRESS_SPACE__STRIDE 0x8
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_SHIFT 0
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_WIDTH 16
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_RESET_VAL 0
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_RMASK 0xffff
+#define RSH_MMIO_ADDRESS_SPACE__OFFSET_MASK  0xffff
+#define RSH_MMIO_ADDRESS_SPACE__PROT_SHIFT 16
+#define RSH_MMIO_ADDRESS_SPACE__PROT_WIDTH 3
+#define RSH_MMIO_ADDRESS_SPACE__PROT_RESET_VAL 0
+#define RSH_MMIO_ADDRESS_SPACE__PROT_RMASK 0x7
+#define RSH_MMIO_ADDRESS_SPACE__PROT_MASK  0x70000
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_SHIFT 23
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_WIDTH 4
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_RESET_VAL 0
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_RMASK 0xf
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_MASK  0x7800000
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_BOOT 0x0
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_RSHIM 0x1
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART0 0x2
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_UART1 0x3
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_DIAG_UART 0x4
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU 0x5
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU_EXT1 0x6
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU_EXT2 0x7
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TYU_EXT3 0x8
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TIMER 0x9
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_USB 0xa
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_GPIO 0xb
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_MMC 0xc
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_TIMER_EXT 0xd
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_WDOG_NS 0xe
+#define RSH_MMIO_ADDRESS_SPACE__CHANNEL_VAL_WDOG_SEC 0xf
+
+#define RSH_SWINT 0x318
+
+#define RSH_BYTE_ACC_CTL 0x490
+
+#define RSH_BYTE_ACC_WDAT 0x498
+
+#define RSH_BYTE_ACC_RDAT 0x4a0
+
+#define RSH_BYTE_ACC_ADDR 0x4a8
+
+#endif /* !defined(__RSHIM_REGS_H__) */
-- 
1.8.3.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v7 6/9] soc: mellanox: host: Add networking support over Rshim
  2018-05-25 16:06 [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
                   ` (14 preceding siblings ...)
  2019-01-03 19:17 ` [PATCH v7 5/9] soc: mellanox: host: Add the common host side Rshim driver Liming Sun
@ 2019-01-03 19:17 ` Liming Sun
  2019-01-03 19:17 ` [PATCH v7 7/9] soc: mellanox: host: Add the Rshim USB backend driver Liming Sun
                   ` (3 subsequent siblings)
  19 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2019-01-03 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit adds networking support over the Rshim interface of
the BlueField SoC. It communicates with the target (ARM) side via
the Rshim TmFifo.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/host/Makefile    |   2 +-
 drivers/soc/mellanox/host/rshim_net.c | 834 ++++++++++++++++++++++++++++++++++
 2 files changed, 835 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/mellanox/host/rshim_net.c

diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
index e47842f..1a282b9 100644
--- a/drivers/soc/mellanox/host/Makefile
+++ b/drivers/soc/mellanox/host/Makefile
@@ -1,2 +1,2 @@
-obj-m := rshim.o
+obj-m := rshim.o rshim_net.o
 
diff --git a/drivers/soc/mellanox/host/rshim_net.c b/drivers/soc/mellanox/host/rshim_net.c
new file mode 100644
index 0000000..6d10497
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_net.c
@@ -0,0 +1,834 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_net.c - Mellanox RShim network host driver
+ *
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/platform_device.h>
+#include <linux/resource.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_ring.h>
+#include <linux/virtio_net.h>
+#include <linux/cache.h>
+#include <linux/interrupt.h>
+#include <linux/version.h>
+#include <asm/byteorder.h>
+
+#include "rshim.h"
+
+/* Vring size. */
+#define RSH_NET_VRING_SIZE			1024
+
+/*
+ * Keepalive time in seconds. If configured, the link is considered down
+ * if no Rx activity within the configured time.
+ */
+static int rshim_net_keepalive;
+module_param(rshim_net_keepalive, int, 0644);
+MODULE_PARM_DESC(rshim_net_keepalive,
+		 "Keepalive time in seconds.");
+
+/* Use a timer for house-keeping. */
+static int rshim_net_timer_interval = HZ / 10;
+
+/* Flag to drain the current pending packet. */
+static bool rshim_net_draining_mode;
+
+/* Spin lock. */
+static DEFINE_SPINLOCK(rshim_net_spin_lock);
+
+/* Virtio ring size. */
+static int rshim_net_vring_size = RSH_NET_VRING_SIZE;
+module_param(rshim_net_vring_size, int, 0444);
+MODULE_PARM_DESC(rshim_net_vring_size, "Size of the vring.");
+
+/* Supported virtio-net features. */
+#define RSH_NET_FEATURES		((1 << VIRTIO_NET_F_MTU) | \
+					 (1 << VIRTIO_NET_F_MAC) | \
+					 (1 << VIRTIO_NET_F_STATUS))
+
+/* Default MAC. */
+static u8 rshim_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x02};
+module_param_array(rshim_net_default_mac, byte, NULL, 0);
+MODULE_PARM_DESC(rshim_net_default_mac, "default MAC address");
+
+#define VIRTIO_GET_FEATURES_RETURN_TYPE		u64
+#define VIRTIO_FINALIZE_FEATURES_RETURN_TYPE	int
+#define VIRTIO_NOTIFY_RETURN_TYPE	bool
+#define VIRTIO_NOTIFY_RETURN		{ return true; }
+
+/* MTU setting of the virtio-net interface. */
+#define RSH_NET_MTU			1500
+
+struct rshim_net;
+static void rshim_net_virtio_rxtx(struct virtqueue *vq, bool is_rx);
+static void rshim_net_update_activity(struct rshim_net *net, bool activity);
+
+/* Structure to maintain the ring state. */
+struct rshim_net_vring {
+	void *va;			/* virtual address */
+	struct virtqueue *vq;		/* virtqueue pointer */
+	struct vring_desc *desc;	/* current desc */
+	struct vring_desc *desc_head;	/* current desc head */
+	int cur_len;			/* processed len in current desc */
+	int rem_len;			/* remaining length to be processed */
+	int size;			/* vring size */
+	int align;			/* vring alignment */
+	int id;				/* vring id */
+	u32 pkt_len;			/* packet total length */
+	u16 next_avail;			/* next avail desc id */
+	union rshim_tmfifo_msg_hdr hdr;	/* header of the current packet */
+	struct rshim_net *net;		/* pointer back to the rshim_net */
+};
+
+/* Event types. */
+enum {
+	RSH_NET_RX_EVENT,		/* Rx event */
+	RSH_NET_TX_EVENT		/* Tx event */
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+	RSH_NET_VRING_RX,		/* Rx ring */
+	RSH_NET_VRING_TX,		/* Tx ring */
+	RSH_NET_VRING_NUM
+};
+
+/* RShim net device structure */
+struct rshim_net {
+	struct virtio_device vdev;	/* virtual device */
+	struct mutex lock;
+	struct rshim_backend *bd;		/* backend */
+	u8 status;
+	u16 virtio_registered : 1;
+	u64 features;
+	int tx_fifo_size;		/* number of entries of the Tx FIFO */
+	int rx_fifo_size;		/* number of entries of the Rx FIFO */
+	unsigned long pend_events;	/* pending bits for deferred process */
+	struct work_struct work;	/* work struct for deferred process */
+	struct timer_list timer;	/* keepalive timer */
+	unsigned long rx_jiffies;	/* last Rx jiffies */
+	struct rshim_net_vring vrings[RSH_NET_VRING_NUM];
+	struct virtio_net_config config;	/* virtio config space */
+};
+
+/* Allocate vrings for the net device. */
+static int rshim_net_alloc_vrings(struct rshim_net *net)
+{
+	void *va;
+	int i, size;
+	struct rshim_net_vring *vring;
+	struct virtio_device *vdev = &net->vdev;
+
+	for (i = 0; i < ARRAY_SIZE(net->vrings); i++) {
+		vring = &net->vrings[i];
+		vring->net = net;
+		vring->size = rshim_net_vring_size;
+		vring->align = SMP_CACHE_BYTES;
+		vring->id = i;
+
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		va = kzalloc(size, GFP_KERNEL);
+		if (!va) {
+			dev_err(vdev->dev.parent, "vring allocation failed\n");
+			return -EINVAL;
+		}
+
+		vring->va = va;
+	}
+
+	return 0;
+}
+
+/* Free vrings of the net device. */
+static void rshim_net_free_vrings(struct rshim_net *net)
+{
+	int i, size;
+	struct rshim_net_vring *vring;
+
+	for (i = 0; i < ARRAY_SIZE(net->vrings); i++) {
+		vring = &net->vrings[i];
+		size = PAGE_ALIGN(vring_size(vring->size, vring->align));
+		if (vring->va) {
+			kfree(vring->va);
+			vring->va = NULL;
+			if (vring->vq) {
+				vring_del_virtqueue(vring->vq);
+				vring->vq = NULL;
+			}
+		}
+	}
+}
+
+/* Work handler for Rx, Tx or activity monitoring. */
+static void rshim_net_work_handler(struct work_struct *work)
+{
+	struct virtqueue *vq;
+	struct rshim_net *net = container_of(work, struct rshim_net, work);
+
+	/* Tx. */
+	if (test_and_clear_bit(RSH_NET_TX_EVENT, &net->pend_events) &&
+		       net->virtio_registered) {
+		vq = net->vrings[RSH_NET_VRING_TX].vq;
+		if (vq)
+			rshim_net_virtio_rxtx(vq, false);
+	}
+
+	/* Rx. */
+	if (test_and_clear_bit(RSH_NET_RX_EVENT, &net->pend_events) &&
+		       net->virtio_registered) {
+		vq = net->vrings[RSH_NET_VRING_RX].vq;
+		if (vq)
+			rshim_net_virtio_rxtx(vq, true);
+	}
+
+	/* Keepalive check. */
+	if (rshim_net_keepalive &&
+	    time_after(jiffies, net->rx_jiffies +
+		       (unsigned long)rshim_net_keepalive * HZ)) {
+		mutex_lock(&net->lock);
+		rshim_net_update_activity(net, false);
+		mutex_unlock(&net->lock);
+	}
+}
+
+/* Nothing to do for now. */
+static void rshim_net_virtio_dev_release(struct device *dev)
+{
+}
+
+/* Get the next packet descriptor from the vring. */
+static inline struct vring_desc *
+rshim_net_virtio_get_next_desc(struct virtqueue *vq)
+{
+	unsigned int idx, head;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct rshim_net_vring *vring = (struct rshim_net_vring *)vq->priv;
+
+	if (vring->next_avail == vr->avail->idx)
+		return NULL;
+
+	idx = vring->next_avail % vring->size;
+	head = vr->avail->ring[idx];
+	BUG_ON(head >= vring->size);
+	vring->next_avail++;
+	return &vr->desc[head];
+}
+
+/* Get the total length of a descriptor chain. */
+static inline u32 rshim_net_virtio_get_pkt_len(struct virtio_device *vdev,
+			struct vring_desc *desc, struct vring *vr)
+{
+	u32 len = 0, idx;
+
+	while (desc) {
+		len += virtio32_to_cpu(vdev, desc->len);
+		if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+			break;
+		idx = virtio16_to_cpu(vdev, desc->next);
+		desc = &vr->desc[idx];
+	}
+
+	return len;
+}
+
+/* House-keeping timer. */
+static void rshim_net_timer(struct timer_list *arg)
+{
+	struct rshim_net *net = container_of(arg, struct rshim_net, timer);
+
+	/*
+	 * Wake up Rx handler in case Rx event is missing or any leftover
+	 * bytes are stuck in the backend.
+	 */
+	test_and_set_bit(RSH_NET_RX_EVENT, &net->pend_events);
+
+	/*
+	 * Wake up Tx handler in case virtio has queued too many packets
+	 * and are waiting for buffer return.
+	 */
+	test_and_set_bit(RSH_NET_TX_EVENT, &net->pend_events);
+
+	schedule_work(&net->work);
+
+	mod_timer(&net->timer, jiffies + rshim_net_timer_interval);
+}
+
+static void rshim_net_release_cur_desc(struct virtio_device *vdev,
+				       struct rshim_net_vring *vring)
+{
+	int idx;
+	unsigned long flags;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vring->vq);
+
+	idx = vr->used->idx % vring->size;
+	vr->used->ring[idx].id = vring->desc_head - vr->desc;
+	vr->used->ring[idx].len =
+		cpu_to_virtio32(vdev, vring->pkt_len);
+
+	/*
+	 * Virtio could poll and check the 'idx' to decide
+	 * whether the desc is done or not. Add a memory
+	 * barrier here to make sure the update above completes
+	 * before updating the idx.
+	 */
+	mb();
+	vr->used->idx++;
+
+	vring->desc = NULL;
+
+	/* Notify upper layer. */
+	spin_lock_irqsave(&rshim_net_spin_lock, flags);
+	vring_interrupt(0, vring->vq);
+	spin_unlock_irqrestore(&rshim_net_spin_lock, flags);
+}
+
+/* Update the link activity. */
+static void rshim_net_update_activity(struct rshim_net *net, bool activity)
+{
+	if (activity) {
+		/* Bring up the link. */
+		if (!(net->config.status & VIRTIO_NET_S_LINK_UP)) {
+			net->config.status |= VIRTIO_NET_S_LINK_UP;
+			virtio_config_changed(&net->vdev);
+		}
+	} else {
+		/* Bring down the link. */
+		if (net->config.status & VIRTIO_NET_S_LINK_UP) {
+			int i;
+
+			net->config.status &= ~VIRTIO_NET_S_LINK_UP;
+			virtio_config_changed(&net->vdev);
+
+			/* Reset the ring state. */
+			for (i = 0; i < RSH_NET_VRING_NUM; i++) {
+				net->vrings[i].pkt_len =
+						sizeof(struct virtio_net_hdr);
+				net->vrings[i].cur_len = 0;
+				net->vrings[i].rem_len = 0;
+			}
+		}
+	}
+}
+
+/* Rx & Tx processing of a virtual queue. */
+static void rshim_net_virtio_rxtx(struct virtqueue *vq, bool is_rx)
+{
+	struct rshim_net_vring *vring = (struct rshim_net_vring *)vq->priv;
+	struct rshim_net *net = vring->net;
+	struct vring *vr = (struct vring *)virtqueue_get_vring(vq);
+	struct virtio_device *vdev = &net->vdev;
+	void *addr;
+	int len, idx, seg_len;
+	struct vring_desc *desc;
+
+	mutex_lock(&net->lock);
+
+	/* Get the current pending descriptor. */
+	desc = vring->desc;
+
+	/* Don't continue if booting. */
+	if (net->bd->is_boot_open) {
+		/* Drop the pending buffer. */
+		if (desc != NULL)
+			rshim_net_release_cur_desc(vdev, vring);
+		mutex_unlock(&net->lock);
+		return;
+	}
+
+	while (1) {
+		if (!desc) {
+			/* Don't process new packet in draining mode. */
+			if (RSHIM_READ_ONCE(rshim_net_draining_mode))
+				break;
+
+			/* Get the head desc of next packet. */
+			vring->desc_head = rshim_net_virtio_get_next_desc(vq);
+			if (!vring->desc_head) {
+				vring->desc = NULL;
+				mutex_unlock(&net->lock);
+				return;
+			}
+			desc = vring->desc_head;
+
+			/* Packet length is unknown yet. */
+			vring->pkt_len = 0;
+			vring->rem_len = sizeof(vring->hdr);
+		}
+
+		/* Beginning of a packet. */
+		if (vring->pkt_len == 0) {
+			if (is_rx) {
+				struct virtio_net_hdr *net_hdr;
+
+				/* Read the packet header. */
+				len = rshim_fifo_read(net->bd,
+					(void *)&vring->hdr +
+					sizeof(vring->hdr) - vring->rem_len,
+					vring->rem_len, TMFIFO_NET_CHAN, true,
+					false);
+				if (len > 0) {
+					vring->rem_len -= len;
+					if (vring->rem_len != 0)
+						continue;
+				} else
+					break;
+
+				/* Update activity. */
+				net->rx_jiffies = jiffies;
+				rshim_net_update_activity(net, true);
+
+				/* Skip the length 0 packet (keepalive). */
+				if (vring->hdr.len == 0) {
+					vring->rem_len = sizeof(vring->hdr);
+					continue;
+				}
+
+				/* Update total length. */
+				vring->pkt_len = ntohs(vring->hdr.len) +
+					sizeof(struct virtio_net_hdr);
+
+				/* Initialize the packet header. */
+				net_hdr = (struct virtio_net_hdr *)
+					phys_to_virt(virtio64_to_cpu(
+					vdev, desc->addr));
+				memset(net_hdr, 0, sizeof(*net_hdr));
+			} else {
+				/* Write packet header. */
+				if (vring->rem_len == sizeof(vring->hdr)) {
+					len = rshim_net_virtio_get_pkt_len(
+							vdev, desc, vr);
+					vring->hdr.data = 0;
+					vring->hdr.type = VIRTIO_ID_NET;
+					vring->hdr.len = htons(len -
+						sizeof(struct virtio_net_hdr));
+				}
+
+				len = rshim_fifo_write(net->bd,
+					(void *)&vring->hdr +
+					sizeof(vring->hdr) - vring->rem_len,
+					vring->rem_len, TMFIFO_NET_CHAN,
+					true, false);
+				if (len > 0) {
+					vring->rem_len -= len;
+					if (vring->rem_len != 0)
+						continue;
+				} else
+					break;
+
+				/* Update total length. */
+				vring->pkt_len = rshim_net_virtio_get_pkt_len(
+							vdev, desc, vr);
+			}
+
+			vring->cur_len = sizeof(struct virtio_net_hdr);
+			vring->rem_len = vring->pkt_len;
+		}
+
+		/* Check available space in this desc. */
+		len = virtio32_to_cpu(vdev, desc->len);
+		if (len > vring->rem_len)
+			len = vring->rem_len;
+
+		/* Check whether this desc is full or completed. */
+		if (vring->cur_len == len) {
+			vring->cur_len = 0;
+			vring->rem_len -= len;
+
+			/* Get the next desc on the chain. */
+			if (vring->rem_len > 0 &&
+			    (virtio16_to_cpu(vdev, desc->flags) &
+						VRING_DESC_F_NEXT)) {
+				idx = virtio16_to_cpu(vdev, desc->next);
+				desc = &vr->desc[idx];
+				continue;
+			}
+
+			/* Done with this chain. */
+			rshim_net_release_cur_desc(vdev, vring);
+
+			/* Clear desc and go back to the loop. */
+			desc = NULL;
+
+			continue;
+		}
+
+		addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+		if (is_rx) {
+			seg_len = rshim_fifo_read(net->bd,
+					addr + vring->cur_len,
+					len - vring->cur_len,
+					TMFIFO_NET_CHAN, true, false);
+		} else {
+			seg_len = rshim_fifo_write(net->bd,
+					addr + vring->cur_len,
+					len - vring->cur_len,
+					TMFIFO_NET_CHAN, true, false);
+		}
+		if (seg_len > 0)
+			vring->cur_len += seg_len;
+		else {
+			/* Schedule the worker to speed up Tx. */
+			if (!is_rx) {
+				if (!test_and_set_bit(RSH_NET_TX_EVENT,
+				    &net->pend_events))
+					schedule_work(&net->work);
+			}
+			break;
+		}
+	}
+
+	/* Save the current desc. */
+	vring->desc = desc;
+
+	mutex_unlock(&net->lock);
+}
+
+/* The notify function is called when new buffers are posted. */
+static VIRTIO_NOTIFY_RETURN_TYPE rshim_net_virtio_notify(struct virtqueue *vq)
+{
+	struct rshim_net_vring *vring = (struct rshim_net_vring *)vq->priv;
+	struct rshim_net *net = vring->net;
+
+	/*
+	 * Virtio-net maintains vrings in pairs. Odd number ring for Rx
+	 * and even number ring for Tx.
+	 */
+	if (!(vring->id & 1)) {
+		/* Set the RX bit. */
+		if (!test_and_set_bit(RSH_NET_RX_EVENT, &net->pend_events))
+			schedule_work(&net->work);
+	} else {
+		/* Set the TX bit. */
+		if (!test_and_set_bit(RSH_NET_TX_EVENT, &net->pend_events))
+			schedule_work(&net->work);
+	}
+
+	VIRTIO_NOTIFY_RETURN;
+}
+
+/* Get the array of feature bits for this device. */
+static VIRTIO_GET_FEATURES_RETURN_TYPE rshim_net_virtio_get_features(
+	struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	return net->features;
+}
+
+/* Confirm device features to use. */
+static VIRTIO_FINALIZE_FEATURES_RETURN_TYPE rshim_net_virtio_finalize_features(
+	struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	net->features = vdev->features;
+	return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void rshim_net_virtio_del_vqs(struct virtio_device *vdev)
+{
+	int i;
+	struct rshim_net_vring *vring;
+	struct virtqueue *vq;
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	for (i = 0; i < ARRAY_SIZE(net->vrings); i++) {
+		vring = &net->vrings[i];
+
+		/* Release the pending packet. */
+		if (vring->desc != NULL)
+			rshim_net_release_cur_desc(vdev, vring);
+
+		vq = vring->vq;
+		if (vq) {
+			vring->vq = NULL;
+			vring_del_virtqueue(vq);
+		}
+	}
+}
+
+/* Create and initialize the virtual queues. */
+static int rshim_net_virtio_find_vqs(struct virtio_device *vdev,
+				     unsigned int nvqs,
+				     struct virtqueue *vqs[],
+				     vq_callback_t *callbacks[],
+				     const char * const names[],
+				     const bool *ctx,
+				     struct irq_affinity *desc)
+{
+	int i, ret = -EINVAL, size;
+	struct rshim_net_vring *vring;
+	struct virtqueue *vq;
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	if (nvqs > ARRAY_SIZE(net->vrings))
+		return -EINVAL;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i])
+			goto error;
+		vring = &net->vrings[i];
+
+		/* zero vring */
+		size = vring_size(vring->size, vring->align);
+		memset(vring->va, 0, size);
+
+		vq = vring_new_virtqueue(
+					 i,
+					 vring->size, vring->align, vdev,
+					 false, false, vring->va,
+					 rshim_net_virtio_notify,
+					 callbacks[i], names[i]);
+		if (!vq) {
+			dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		vq->priv = vring;
+		/*
+		 * Add barrier to make sure vq is ready before assigning to
+		 * vring.
+		 */
+		mb();
+		vring->vq = vq;
+		vqs[i] = vq;
+	}
+
+	return 0;
+
+error:
+	rshim_net_virtio_del_vqs(vdev);
+	return ret;
+}
+
+/* Read the status byte. */
+static u8 rshim_net_virtio_get_status(struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	return net->status;
+}
+
+/* Write the status byte. */
+static void rshim_net_virtio_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	net->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void rshim_net_virtio_reset(struct virtio_device *vdev)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	net->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void rshim_net_virtio_get(struct virtio_device *vdev,
+				 unsigned int offset,
+				 void *buf,
+				 unsigned int len)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	if (offset + len > sizeof(net->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy(buf, (u8 *)&net->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void rshim_net_virtio_set(struct virtio_device *vdev,
+				 unsigned int offset,
+				 const void *buf,
+				 unsigned int len)
+{
+	struct rshim_net *net = container_of(vdev, struct rshim_net, vdev);
+
+	if (offset + len > sizeof(net->config) || offset + len < len) {
+		dev_err(vdev->dev.parent, "virtio_get access out of bounds\n");
+		return;
+	}
+
+	memcpy((u8 *)&net->config + offset, buf, len);
+}
+
+/* Virtio config operations. */
+static struct virtio_config_ops rshim_net_virtio_config_ops = {
+	.get_features = rshim_net_virtio_get_features,
+	.finalize_features = rshim_net_virtio_finalize_features,
+	.find_vqs = rshim_net_virtio_find_vqs,
+	.del_vqs = rshim_net_virtio_del_vqs,
+	.reset = rshim_net_virtio_reset,
+	.set_status = rshim_net_virtio_set_status,
+	.get_status = rshim_net_virtio_get_status,
+	.get = rshim_net_virtio_get,
+	.set = rshim_net_virtio_set,
+};
+
+/* Remove. */
+static int rshim_net_delete_dev(struct rshim_net *net)
+{
+	if (net) {
+		/* Stop the timer. */
+		del_timer_sync(&net->timer);
+
+		/* Cancel the pending work. */
+		cancel_work_sync(&net->work);
+
+		/* Unregister virtio. */
+		if (net->virtio_registered)
+			unregister_virtio_device(&net->vdev);
+
+		/* Free vring. */
+		rshim_net_free_vrings(net);
+
+		kfree(net);
+	}
+
+	return 0;
+}
+
+/* Rx ready. */
+void rshim_net_rx_notify(struct rshim_backend *bd)
+{
+	struct rshim_net *net = (struct rshim_net *)bd->net;
+
+	if (net) {
+		test_and_set_bit(RSH_NET_RX_EVENT, &net->pend_events);
+		schedule_work(&net->work);
+	}
+}
+
+/* Remove. */
+int rshim_net_delete(struct rshim_backend *bd)
+{
+	int ret = 0;
+
+	if (bd->net) {
+		ret = rshim_net_delete_dev((struct rshim_net *)bd->net);
+		bd->net = NULL;
+	}
+
+	return ret;
+}
+
+/* Init. */
+int rshim_net_create(struct rshim_backend *bd)
+{
+	struct rshim_net *net;
+	struct virtio_device *vdev;
+	int ret = -ENOMEM;
+
+	if (bd->net)
+		return -EEXIST;
+
+	net = kzalloc(sizeof(struct rshim_net), GFP_KERNEL);
+	if (!net)
+		return ret;
+
+	INIT_WORK(&net->work, rshim_net_work_handler);
+
+	timer_setup(&net->timer, rshim_net_timer, 0);
+	net->timer.function = rshim_net_timer;
+
+	net->features = RSH_NET_FEATURES;
+	net->config.mtu = RSH_NET_MTU;
+	memcpy(net->config.mac, rshim_net_default_mac,
+	       sizeof(rshim_net_default_mac));
+	/* Set MAC address to be unique even number. */
+	net->config.mac[5] += bd->dev_index * 2;
+
+	mutex_init(&net->lock);
+
+	vdev = &net->vdev;
+	vdev->id.device = VIRTIO_ID_NET;
+	vdev->config = &rshim_net_virtio_config_ops;
+	vdev->dev.parent = bd->dev;
+	vdev->dev.release = rshim_net_virtio_dev_release;
+	if (rshim_net_alloc_vrings(net))
+		goto err;
+
+	/* Register the virtio device. */
+	ret = register_virtio_device(vdev);
+	if (ret) {
+		dev_err(bd->dev, "register_virtio_device() failed\n");
+		goto err;
+	}
+	net->virtio_registered = 1;
+
+	mod_timer(&net->timer, jiffies + rshim_net_timer_interval);
+
+	net->bd = bd;
+	/* Add a barrier to keep the order of the two pointer assignments. */
+	mb();
+	bd->net = net;
+
+	/* Bring up the interface. */
+	mutex_lock(&net->lock);
+	rshim_net_update_activity(net, true);
+	mutex_unlock(&net->lock);
+
+	return 0;
+
+err:
+	rshim_net_delete_dev(net);
+	return ret;
+}
+
+struct rshim_service rshim_svc = {
+	.type = RSH_SVC_NET,
+	.create = rshim_net_create,
+	.delete = rshim_net_delete,
+	.rx_notify = rshim_net_rx_notify
+};
+
+static int __init rshim_net_init(void)
+{
+	return rshim_register_service(&rshim_svc);
+}
+
+static void __exit rshim_net_exit(void)
+{
+	/*
+	 * Wait 200ms, which should be good enough to drain the current
+	 * pending packet.
+	 */
+	rshim_net_draining_mode = true;
+	msleep(200);
+
+	return rshim_deregister_service(&rshim_svc);
+}
+
+module_init(rshim_net_init);
+module_exit(rshim_net_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.7");
-- 
1.8.3.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v7 7/9] soc: mellanox: host: Add the Rshim USB backend driver
  2018-05-25 16:06 [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
                   ` (15 preceding siblings ...)
  2019-01-03 19:17 ` [PATCH v7 6/9] soc: mellanox: host: Add networking support over Rshim Liming Sun
@ 2019-01-03 19:17 ` Liming Sun
  2019-01-03 19:17 ` [PATCH v7 8/9] soc: mellanox: host: Add the Rshim PCIe " Liming Sun
                   ` (2 subsequent siblings)
  19 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2019-01-03 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit adds the USB backend driver to access the Rshim
interface on the BlueField SoC. It can be used when a USB cable
is connected to the Smart NIC or standalone device.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/host/Makefile    |    2 +-
 drivers/soc/mellanox/host/rshim_usb.c | 1035 +++++++++++++++++++++++++++++++++
 2 files changed, 1036 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/mellanox/host/rshim_usb.c

diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
index 1a282b9..c6703cd 100644
--- a/drivers/soc/mellanox/host/Makefile
+++ b/drivers/soc/mellanox/host/Makefile
@@ -1,2 +1,2 @@
-obj-m := rshim.o rshim_net.o
+obj-m := rshim.o rshim_net.o rshim_usb.o
 
diff --git a/drivers/soc/mellanox/host/rshim_usb.c b/drivers/soc/mellanox/host/rshim_usb.c
new file mode 100644
index 0000000..aad6250
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_usb.c
@@ -0,0 +1,1035 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_usb.c - Mellanox RShim USB host driver
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+/*
+ * This source code was originally derived from:
+ *
+ *   USB Skeleton driver - 2.0
+ *
+ *   Copyright (C) 2001-2004 Greg Kroah-Hartman (greg@kroah.com)
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License as
+ *	published by the Free Software Foundation, version 2.
+ *
+ * Some code was also lifted from the example drivers in "Linux Device
+ * Drivers" by Alessandro Rubini and Jonathan Corbet, published by
+ * O'Reilly & Associates.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/usb.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <linux/ioctl.h>
+#include <linux/termios.h>
+#include <linux/workqueue.h>
+#include <asm/termbits.h>
+#include <linux/circ_buf.h>
+
+#include "rshim.h"
+
+/* Disable RSim access. */
+static int rshim_disable;
+module_param(rshim_disable, int, 0444);
+MODULE_PARM_DESC(rshim_disable, "Disable rshim (obsoleted)");
+
+/* Our USB vendor/product IDs. */
+#define USB_TILERA_VENDOR_ID	0x22dc	 /* Tilera Corporation */
+#define USB_BLUEFIELD_PRODUCT_ID	0x0004	 /* Mellanox Bluefield */
+
+/* Number of retries for the tmfifo read/write path. */
+#define READ_RETRIES		5
+#define WRITE_RETRIES		5
+
+/* Structure to hold all of our device specific stuff. */
+struct rshim_usb {
+	/* RShim backend structure. */
+	struct rshim_backend bd;
+
+	/*
+	 * The USB device for this device.  We bump its reference count
+	 * when the first interface is probed, and drop the ref when the
+	 * last interface is disconnected.
+	 */
+	struct usb_device *udev;
+
+	/* The USB interfaces for this device. */
+	struct usb_interface *rshim_interface;
+
+	/* State for our outstanding boot write. */
+	struct urb *boot_urb;
+
+	/* Control data. */
+	u64 ctrl_data;
+
+	/* Interrupt data buffer.  This is a USB DMA'able buffer. */
+	u64 *intr_buf;
+	dma_addr_t intr_buf_dma;
+
+	/* Read/interrupt urb, retries, and mode. */
+	struct urb *read_or_intr_urb;
+	int read_or_intr_retries;
+	int read_urb_is_intr;
+
+	/* Write urb and retries. */
+	struct urb *write_urb;
+	int write_retries;
+
+	/* The address of the boot FIFO endpoint. */
+	u8 boot_fifo_ep;
+	/* The address of the tile-monitor FIFO interrupt endpoint. */
+	u8 tm_fifo_int_ep;
+	/* The address of the tile-monitor FIFO input endpoint. */
+	u8 tm_fifo_in_ep;
+	/* The address of the tile-monitor FIFO output endpoint. */
+	u8 tm_fifo_out_ep;
+};
+
+/* Table of devices that work with this driver */
+static struct usb_device_id rshim_usb_table[] = {
+	{ USB_DEVICE(USB_TILERA_VENDOR_ID, USB_BLUEFIELD_PRODUCT_ID) },
+	{ }					/* Terminating entry */
+};
+MODULE_DEVICE_TABLE(usb, rshim_usb_table);
+
+/* Random compatibility hacks. */
+
+/* Arguments to an urb completion handler. */
+#define URB_COMP_ARGS struct urb *urb
+
+static void rshim_usb_delete(struct kref *kref)
+{
+	struct rshim_backend *bd;
+	struct rshim_usb *dev;
+
+	bd = container_of(kref, struct rshim_backend, kref);
+	dev = container_of(bd, struct rshim_usb, bd);
+
+	rshim_deregister(bd);
+	kfree(dev);
+}
+
+/* Rshim read/write routines */
+
+static int rshim_usb_read_rshim(struct rshim_backend *bd, int chan, int addr,
+			      u64 *result)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+	int retval;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	/* Do a blocking control read and endian conversion. */
+	retval = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0),
+				 0,  /* request */
+				 USB_RECIP_ENDPOINT | USB_TYPE_VENDOR |
+				 USB_DIR_IN,  /* request type */
+				 chan, /* value */
+				 addr, /* index */
+				 &dev->ctrl_data, 8, 2000);
+
+	/*
+	 * The RShim HW puts bytes on the wire in little-endian order
+	 * regardless of endianness settings either in the host or the ARM
+	 * cores.
+	 */
+	*result = le64_to_cpu(dev->ctrl_data);
+	if (retval == 8)
+		return 0;
+
+	/*
+	 * These are weird error codes, but we want to use something
+	 * the USB stack doesn't use so that we can identify short/long
+	 * reads.
+	 */
+	return retval >= 0 ? (retval > 8 ? -EBADE : -EBADR) : retval;
+}
+
+static int rshim_usb_write_rshim(struct rshim_backend *bd, int chan, int addr,
+			       u64 value)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+	int retval;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	/* Convert the word to little endian and do blocking control write. */
+	dev->ctrl_data = cpu_to_le64(value);
+	retval = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0),
+				 0,  /* request */
+				 USB_RECIP_ENDPOINT | USB_TYPE_VENDOR |
+				 USB_DIR_OUT,  /* request type */
+				 chan, /* value */
+				 addr, /* index */
+				 &dev->ctrl_data, 8, 2000);
+
+	if (retval == 8)
+		return 0;
+
+	/*
+	 * These are weird error codes, but we want to use something
+	 * the USB stack doesn't use so that we can identify short/long
+	 * writes.
+	 */
+	return retval >= 0 ? (retval > 8 ? -EBADE : -EBADR) : retval;
+}
+
+/* Boot routines */
+
+static void rshim_usb_boot_write_callback(URB_COMP_ARGS)
+{
+	struct rshim_usb *dev = urb->context;
+
+	if (urb->status == -ENOENT)
+		pr_debug("boot tx canceled, actual length %d\n",
+			 urb->actual_length);
+	else if (urb->status)
+		pr_debug("boot tx failed, status %d, actual length %d\n",
+			 urb->status, urb->actual_length);
+
+	complete_all(&dev->bd.boot_write_complete);
+}
+
+static ssize_t rshim_usb_boot_write(struct rshim_usb *dev, const char *buf,
+				  size_t count)
+{
+	struct rshim_backend *bd = &dev->bd;
+	int retval = 0;
+	size_t bytes_written = 0;
+
+	/* Create and fill an urb */
+	dev->boot_urb = usb_alloc_urb(0, GFP_KERNEL);
+	if (unlikely(!dev->boot_urb)) {
+		pr_debug("boot_write: couldn't allocate urb\n");
+		return -ENOMEM;
+	}
+	usb_fill_bulk_urb(dev->boot_urb, dev->udev,
+			  usb_sndbulkpipe(dev->udev, dev->boot_fifo_ep),
+			  (char *)buf, count, rshim_usb_boot_write_callback,
+			  dev);
+
+	/* Submit the urb. */
+	reinit_completion(&bd->boot_write_complete);
+	retval = usb_submit_urb(dev->boot_urb, GFP_KERNEL);
+	if (retval)
+		goto done;
+
+	/*
+	 * Wait until it's done. If anything goes wrong in the USB layer,
+	 * the callback function might never get called and cause stuck.
+	 * Here we release the mutex so user could use 'ctrl + c' to terminate
+	 * the current write. Once the boot file is opened again, the
+	 * outstanding urb will be canceled.
+	 *
+	 * Note: when boot stream starts to write, it will either run to
+	 * completion, or be interrupted by user. The urb callback function will
+	 * be called during this period. There are no other operations to affect
+	 * the boot stream. So unlocking the mutex is considered safe.
+	 */
+	mutex_unlock(&bd->mutex);
+	retval = wait_for_completion_interruptible(&bd->boot_write_complete);
+	mutex_lock(&bd->mutex);
+	if (retval) {
+		usb_kill_urb(dev->boot_urb);
+		bytes_written += dev->boot_urb->actual_length;
+		goto done;
+	}
+
+	if (dev->boot_urb->actual_length !=
+		dev->boot_urb->transfer_buffer_length) {
+		pr_debug("length mismatch, exp %d act %d stat %d\n",
+			 dev->boot_urb->transfer_buffer_length,
+			 dev->boot_urb->actual_length,
+			 dev->boot_urb->status);
+	}
+
+#ifdef RSH_USB_BMC
+	/*
+	 * The UHCI host controller on the BMC seems to
+	 * overestimate the amount of data it's
+	 * successfully sent when it sees a babble error.
+	 */
+	if (dev->boot_urb->status == -EOVERFLOW &&
+	    dev->boot_urb->actual_length >= 64) {
+		dev->boot_urb->actual_length -= 64;
+		pr_debug("saw babble, new length %d\n",
+		dev->boot_urb->actual_length);
+	}
+#endif
+
+	bytes_written = dev->boot_urb->actual_length;
+
+	if (dev->boot_urb->status == -ENOENT &&
+	    dev->boot_urb->transfer_buffer_length !=
+	    dev->boot_urb->actual_length) {
+		pr_debug("boot_write: urb canceled.\n");
+	} else {
+		if (dev->boot_urb->status) {
+			pr_debug("boot_write: urb failed, status %d\n",
+				 dev->boot_urb->status);
+		}
+		if (dev->boot_urb->status != -ENOENT && !retval)
+			retval = dev->boot_urb->status;
+	}
+
+done:
+	usb_free_urb(dev->boot_urb);
+	dev->boot_urb = NULL;
+
+	return bytes_written ? bytes_written : retval;
+}
+
+/* FIFO routines */
+
+static void rshim_usb_fifo_read_callback(URB_COMP_ARGS)
+{
+	struct rshim_usb *dev = urb->context;
+	struct rshim_backend *bd = &dev->bd;
+
+	spin_lock(&bd->spinlock);
+
+	pr_debug("usb_fifo_read_callback: %s urb completed, status %d, "
+		 "actual length %d, intr buf %d\n",
+		 dev->read_urb_is_intr ? "interrupt" : "read",
+		 urb->status, urb->actual_length, (int) *dev->intr_buf);
+
+	bd->spin_flags &= ~RSH_SFLG_READING;
+
+	if (urb->status == 0) {
+		/*
+		 * If a read completed, clear the number of bytes available
+		 * from the last interrupt, and set up the new buffer for
+		 * processing.  (If an interrupt completed, there's nothing
+		 * to do, since the number of bytes available was already
+		 * set by the I/O itself.)
+		 */
+		if (!dev->read_urb_is_intr) {
+			*dev->intr_buf = 0;
+			bd->read_buf_bytes = urb->actual_length;
+			bd->read_buf_next = 0;
+		}
+
+		/* Process any data we got, and launch another I/O if needed. */
+		rshim_notify(bd, RSH_EVENT_FIFO_INPUT, 0);
+	} else if (urb->status == -ENOENT) {
+		/*
+		 * The urb was explicitly cancelled.  The only time we
+		 * currently do this is when we close the stream.  If we
+		 * mark this as an error, tile-monitor --resume won't work,
+		 * so we just want to do nothing.
+		 */
+	} else if (urb->status == -ECONNRESET ||
+		   urb->status == -ESHUTDOWN) {
+		/*
+		 * The device went away.  We don't want to retry this, and
+		 * we expect things to get better, probably after a device
+		 * reset, but in the meantime, we should let upper layers
+		 * know there was a problem.
+		 */
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	} else if (dev->read_or_intr_retries < READ_RETRIES &&
+		   urb->actual_length == 0 &&
+		   (urb->status == -EPROTO || urb->status == -EILSEQ ||
+		    urb->status == -EOVERFLOW)) {
+		/*
+		 * We got an error which could benefit from being retried.
+		 * Just submit the same urb again.  Note that we don't
+		 * handle partial reads; it's hard, and we haven't really
+		 * seen them.
+		 */
+		int retval;
+
+		dev->read_or_intr_retries++;
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			pr_debug("fifo_read_callback: resubmitted urb but got error %d",
+				 retval);
+			/*
+			 * In this case, we won't try again; signal the
+			 * error to upper layers.
+			 */
+			rshim_notify(bd, RSH_EVENT_FIFO_ERR, retval);
+		} else {
+			bd->spin_flags |= RSH_SFLG_READING;
+		}
+	} else {
+		/*
+		 * We got some error we don't know how to handle, or we got
+		 * too many errors.  Either way we don't retry any more,
+		 * but we signal the error to upper layers.
+		 */
+		pr_err("fifo_read_callback: %s urb completed abnormally, "
+		       "error %d\n",
+		       dev->read_urb_is_intr ? "interrupt" : "read",
+		       urb->status);
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	}
+
+	spin_unlock(&bd->spinlock);
+}
+
+static void rshim_usb_fifo_read(struct rshim_usb *dev, char *buffer,
+			      size_t count)
+{
+	struct rshim_backend *bd = &dev->bd;
+
+	if ((int) *dev->intr_buf || bd->read_buf_bytes) {
+		/* We're doing a read. */
+
+		int retval;
+		struct urb *urb = dev->read_or_intr_urb;
+
+		usb_fill_bulk_urb(urb, dev->udev,
+				  usb_rcvbulkpipe(dev->udev,
+						  dev->tm_fifo_in_ep),
+				  buffer, count,
+				  rshim_usb_fifo_read_callback,
+				  dev);
+		urb->transfer_dma = dev->bd.read_buf_dma;
+		urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+
+		dev->bd.spin_flags |= RSH_SFLG_READING;
+		dev->read_urb_is_intr = 0;
+		dev->read_or_intr_retries = 0;
+
+		/* Submit the urb. */
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			dev->bd.spin_flags &= ~RSH_SFLG_READING;
+			pr_debug("fifo_drain: failed submitting read "
+			      "urb, error %d", retval);
+		}
+		pr_debug("fifo_read_callback: resubmitted read urb\n");
+	} else {
+		/* We're doing an interrupt. */
+
+		int retval;
+		struct urb *urb = dev->read_or_intr_urb;
+
+		usb_fill_int_urb(urb, dev->udev,
+				 usb_rcvintpipe(dev->udev, dev->tm_fifo_int_ep),
+				 dev->intr_buf, sizeof(*dev->intr_buf),
+				 rshim_usb_fifo_read_callback,
+				 /*
+				  * FIXME: is 6 a good interval value?  That's
+				  * polling at 8000/(1 << 6) == 125 Hz.
+				  */
+				 dev, 6);
+		urb->transfer_dma = dev->intr_buf_dma;
+		urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+
+		dev->bd.spin_flags |= RSH_SFLG_READING;
+		dev->read_urb_is_intr = 1;
+		dev->read_or_intr_retries = 0;
+
+		/* Submit the urb */
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			dev->bd.spin_flags &= ~RSH_SFLG_READING;
+			pr_debug("fifo_read_callback: failed submitting "
+			      "interrupt urb, error %d", retval);
+		}
+		pr_debug("fifo_read_callback: resubmitted interrupt urb\n");
+	}
+}
+
+static void rshim_usb_fifo_write_callback(URB_COMP_ARGS)
+{
+	struct rshim_usb *dev = urb->context;
+	struct rshim_backend *bd = &dev->bd;
+
+	spin_lock(&bd->spinlock);
+
+	pr_debug("fifo_write_callback: urb completed, status %d, "
+		 "actual length %d, intr buf %d\n",
+		 urb->status, urb->actual_length, (int) *dev->intr_buf);
+
+	bd->spin_flags &= ~RSH_SFLG_WRITING;
+
+	if (urb->status == 0) {
+		/* A write completed. */
+		wake_up_interruptible_all(&bd->write_completed);
+		rshim_notify(bd, RSH_EVENT_FIFO_OUTPUT, 0);
+	} else if (urb->status == -ENOENT) {
+		/*
+		 * The urb was explicitly cancelled.  The only time we
+		 * currently do this is when we close the stream.  If we
+		 * mark this as an error, tile-monitor --resume won't work,
+		 * so we just want to do nothing.
+		 */
+	} else if (urb->status == -ECONNRESET ||
+		   urb->status == -ESHUTDOWN) {
+		/*
+		 * The device went away.  We don't want to retry this, and
+		 * we expect things to get better, probably after a device
+		 * reset, but in the meantime, we should let upper layers
+		 * know there was a problem.
+		 */
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	} else if (dev->write_retries < WRITE_RETRIES &&
+		   urb->actual_length == 0 &&
+		   (urb->status == -EPROTO || urb->status == -EILSEQ ||
+		    urb->status == -EOVERFLOW)) {
+		/*
+		 * We got an error which could benefit from being retried.
+		 * Just submit the same urb again.  Note that we don't
+		 * handle partial writes; it's hard, and we haven't really
+		 * seen them.
+		 */
+		int retval;
+
+		dev->write_retries++;
+		retval = usb_submit_urb(urb, GFP_ATOMIC);
+		if (retval) {
+			pr_err("fifo_write_callback: resubmitted urb but "
+			       "got error %d\n", retval);
+			/*
+			 * In this case, we won't try again; signal the
+			 * error to upper layers.
+			 */
+			rshim_notify(bd, RSH_EVENT_FIFO_ERR, retval);
+		} else {
+			bd->spin_flags |= RSH_SFLG_WRITING;
+		}
+	} else {
+		/*
+		 * We got some error we don't know how to handle, or we got
+		 * too many errors.  Either way we don't retry any more,
+		 * but we signal the error to upper layers.
+		 */
+		pr_err("fifo_write_callback: urb completed abnormally, "
+		       "error %d\n", urb->status);
+		rshim_notify(bd, RSH_EVENT_FIFO_ERR, urb->status);
+	}
+
+	spin_unlock(&bd->spinlock);
+}
+
+static int rshim_usb_fifo_write(struct rshim_usb *dev, const char *buffer,
+			      size_t count)
+{
+	struct rshim_backend *bd = &dev->bd;
+	int retval;
+
+	WARN_ONCE(count % 8 != 0, "rshim write %d is not multiple of 8 bytes\n",
+		  (int)count);
+
+	/* Initialize the urb properly. */
+	usb_fill_bulk_urb(dev->write_urb, dev->udev,
+			  usb_sndbulkpipe(dev->udev,
+					  dev->tm_fifo_out_ep),
+			  (char *)buffer,
+			  count,
+			  rshim_usb_fifo_write_callback,
+			  dev);
+	dev->write_urb->transfer_dma = bd->write_buf_dma;
+	dev->write_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+	dev->write_retries = 0;
+
+	/* Send the data out the bulk port. */
+	retval = usb_submit_urb(dev->write_urb, GFP_ATOMIC);
+	if (retval) {
+		bd->spin_flags &= ~RSH_SFLG_WRITING;
+		pr_err("fifo_write: failed submitting write "
+		       "urb, error %d\n", retval);
+		return -1;
+	}
+
+	bd->spin_flags |= RSH_SFLG_WRITING;
+	return 0;
+}
+
+/* Probe routines */
+
+/* These make the endpoint test code in rshim_usb_probe() a lot cleaner. */
+#define is_in_ep(ep)   (((ep)->bEndpointAddress & USB_ENDPOINT_DIR_MASK) == \
+			USB_DIR_IN)
+#define is_bulk_ep(ep) (((ep)->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == \
+			USB_ENDPOINT_XFER_BULK)
+#define is_int_ep(ep)  (((ep)->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) == \
+			USB_ENDPOINT_XFER_INT)
+#define max_pkt(ep)    le16_to_cpu(ep->wMaxPacketSize)
+#define ep_addr(ep)    (ep->bEndpointAddress)
+
+static ssize_t rshim_usb_backend_read(struct rshim_backend *bd, int devtype,
+				    char *buf, size_t count)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		rshim_usb_fifo_read(dev, buf, count);
+		return 0;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+}
+
+static ssize_t rshim_usb_backend_write(struct rshim_backend *bd, int devtype,
+				     const char *buf, size_t count)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		return rshim_usb_fifo_write(dev, buf, count);
+
+	case RSH_DEV_TYPE_BOOT:
+		return rshim_usb_boot_write(dev, buf, count);
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		return -EINVAL;
+	}
+}
+
+static void rshim_usb_backend_cancel_req(struct rshim_backend *bd, int devtype,
+				       bool is_write)
+{
+	struct rshim_usb *dev = container_of(bd, struct rshim_usb, bd);
+
+	switch (devtype) {
+	case RSH_DEV_TYPE_NET:
+	case RSH_DEV_TYPE_CONSOLE:
+		if (is_write)
+			usb_kill_urb(dev->write_urb);
+		else
+			usb_kill_urb(dev->read_or_intr_urb);
+		break;
+
+	case RSH_DEV_TYPE_BOOT:
+		usb_kill_urb(dev->boot_urb);
+		break;
+
+	default:
+		pr_err("bad devtype %d\n", devtype);
+		break;
+	}
+}
+
+static int rshim_usb_probe(struct usb_interface *interface,
+			 const struct usb_device_id *id)
+{
+	char *usb_dev_name;
+	int dev_name_len = 32;
+	struct rshim_usb *dev = NULL;
+	struct rshim_backend *bd;
+	struct usb_host_interface *iface_desc;
+	struct usb_endpoint_descriptor *ep;
+	int i;
+	int allocfail = 0;
+	int retval = -ENOMEM;
+
+	/*
+	 * Get our device pathname.  The usb_make_path interface uselessly
+	 * returns -1 if the output buffer is too small, instead of telling
+	 * us how big it needs to be, so we just start with a reasonable
+	 * size and double it until the name fits.
+	 */
+	while (1) {
+		usb_dev_name = kmalloc(dev_name_len, GFP_KERNEL);
+		if (!usb_dev_name)
+			goto error;
+		if (usb_make_path(interface_to_usbdev(interface), usb_dev_name,
+				  dev_name_len) >= 0)
+			break;
+		kfree(usb_dev_name);
+		dev_name_len *= 2;
+	}
+
+	pr_debug("probing %s\n", usb_dev_name);
+
+	/*
+	 * Now see if we've previously seen this device.  If so, we use the
+	 * same device number, otherwise we pick the first available one.
+	 */
+	rshim_lock();
+
+	/* Find the backend. */
+	bd = rshim_find(usb_dev_name);
+	if (bd) {
+		pr_debug("found previously allocated rshim_usb structure\n");
+		kref_get(&bd->kref);
+		dev = container_of(bd, struct rshim_usb, bd);
+		kfree(usb_dev_name);
+		usb_dev_name = NULL;
+	} else {
+		pr_debug("creating new rshim_usb structure\n");
+		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+		if (dev == NULL) {
+			pr_err("couldn't get memory for new device\n");
+			rshim_unlock();
+			goto error;
+		}
+
+		bd = &dev->bd;
+		bd->dev_name = usb_dev_name;
+		bd->read = rshim_usb_backend_read;
+		bd->write = rshim_usb_backend_write;
+		bd->cancel = rshim_usb_backend_cancel_req;
+		bd->destroy = rshim_usb_delete;
+		bd->read_rshim = rshim_usb_read_rshim;
+		bd->write_rshim = rshim_usb_write_rshim;
+		bd->has_reprobe = 1;
+		bd->owner = THIS_MODULE;
+		mutex_init(&bd->mutex);
+	}
+
+	/*
+	 * This has to be done on the first probe, whether or not we
+	 * allocated a new rshim_usb structure, since it's always dropped
+	 * on the second disconnect.
+	 */
+	if (!bd->has_rshim && !bd->has_tm)
+		dev->udev = usb_get_dev(interface_to_usbdev(interface));
+
+	/*
+	 * It would seem more logical to allocate these above when we create
+	 * a new rshim_usb structure, but we don't want to do it until we've
+	 * upped the usb device reference count.
+	 */
+	allocfail |= rshim_fifo_alloc(bd);
+
+	if (!bd->read_buf)
+		bd->read_buf = usb_alloc_coherent(dev->udev, READ_BUF_SIZE,
+						   GFP_KERNEL,
+						   &bd->read_buf_dma);
+	allocfail |= bd->read_buf == 0;
+
+	if (!dev->intr_buf) {
+		dev->intr_buf = usb_alloc_coherent(dev->udev,
+						   sizeof(*dev->intr_buf),
+						   GFP_KERNEL,
+						   &dev->intr_buf_dma);
+		if (dev->intr_buf != NULL)
+			*dev->intr_buf = 0;
+	}
+	allocfail |= dev->intr_buf == 0;
+
+	if (!bd->write_buf) {
+		bd->write_buf = usb_alloc_coherent(dev->udev,
+						       WRITE_BUF_SIZE,
+						       GFP_KERNEL,
+						       &bd->write_buf_dma);
+	}
+	allocfail |= bd->write_buf == 0;
+
+	if (!dev->read_or_intr_urb)
+		dev->read_or_intr_urb = usb_alloc_urb(0, GFP_KERNEL);
+	allocfail |= dev->read_or_intr_urb == 0;
+
+	if (!dev->write_urb)
+		dev->write_urb = usb_alloc_urb(0, GFP_KERNEL);
+	allocfail |= dev->write_urb == 0;
+
+	if (allocfail) {
+		pr_err("can't allocate buffers or urbs\n");
+		rshim_unlock();
+		goto error;
+	}
+
+	rshim_unlock();
+
+	iface_desc = interface->cur_altsetting;
+
+	/* Make sure this is a vendor-specific interface class. */
+	if (iface_desc->desc.bInterfaceClass != 0xFF)
+		goto error;
+
+	/* See which interface this is, then save the correct data. */
+
+	mutex_lock(&bd->mutex);
+	if (iface_desc->desc.bInterfaceSubClass == 0) {
+		pr_debug("found rshim interface\n");
+		/*
+		 * We only expect one endpoint here, just make sure its
+		 * attributes match.
+		 */
+		if (iface_desc->desc.bNumEndpoints != 1) {
+			pr_err("wrong number of endpoints for rshim "
+			       "interface\n");
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+		ep = &iface_desc->endpoint[0].desc;
+
+		/* We expect a bulk out endpoint. */
+		if (!is_bulk_ep(ep) || is_in_ep(ep)) {
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+
+		bd->has_rshim = 1;
+		dev->rshim_interface = interface;
+		dev->boot_fifo_ep = ep_addr(ep);
+
+	} else if (iface_desc->desc.bInterfaceSubClass == 1) {
+		pr_debug("found tmfifo interface\n");
+		/*
+		 * We expect 3 endpoints here.  Since they're listed in
+		 * random order we have to use their attributes to figure
+		 * out which is which.
+		 */
+		if (iface_desc->desc.bNumEndpoints != 3) {
+			pr_err("wrong number of endpoints for tm "
+			       "interface\n");
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+		dev->tm_fifo_in_ep = 0;
+		dev->tm_fifo_int_ep = 0;
+		dev->tm_fifo_out_ep = 0;
+
+		for (i = 0; i < iface_desc->desc.bNumEndpoints; i++) {
+			ep = &iface_desc->endpoint[i].desc;
+
+			if (is_in_ep(ep)) {
+				if (is_bulk_ep(ep)) {
+					/* Bulk in endpoint. */
+					dev->tm_fifo_in_ep = ep_addr(ep);
+				} else if (is_int_ep(ep)) {
+					/* Interrupt in endpoint. */
+					dev->tm_fifo_int_ep = ep_addr(ep);
+				}
+			} else {
+				if (is_bulk_ep(ep)) {
+					/* Bulk out endpoint. */
+					dev->tm_fifo_out_ep = ep_addr(ep);
+				}
+			}
+		}
+
+		if (!dev->tm_fifo_in_ep || !dev->tm_fifo_int_ep ||
+		    !dev->tm_fifo_out_ep) {
+			pr_err("could not find all required endpoints for "
+			       "tm interface\n");
+			mutex_unlock(&bd->mutex);
+			goto error;
+		}
+		bd->has_tm = 1;
+	} else {
+		mutex_unlock(&bd->mutex);
+		goto error;
+	}
+
+	/* Save our data pointer in this interface device. */
+	usb_set_intfdata(interface, dev);
+
+	if (!bd->dev)
+		bd->dev = &dev->udev->dev;
+
+	/*
+	 * Register rshim here since it needs to detect whether other backend
+	 * has already registered or not, which involves reading/writing rshim
+	 * registers and has assumption that the under layer is working.
+	 */
+	rshim_lock();
+	if (!bd->registered) {
+		retval = rshim_register(bd);
+		if (retval) {
+			rshim_unlock();
+			goto error;
+		}
+	}
+	rshim_unlock();
+
+	/* Notify that device is attached. */
+	retval = rshim_notify(&dev->bd, RSH_EVENT_ATTACH, 0);
+	mutex_unlock(&dev->bd.mutex);
+	if (retval)
+		goto error;
+
+	return 0;
+
+error:
+	if (dev) {
+		usb_free_urb(dev->read_or_intr_urb);
+		dev->read_or_intr_urb = NULL;
+		usb_free_urb(dev->write_urb);
+		dev->write_urb = NULL;
+
+		usb_free_coherent(dev->udev, READ_BUF_SIZE,
+				  dev->bd.read_buf, dev->bd.read_buf_dma);
+		dev->bd.read_buf = NULL;
+
+		usb_free_coherent(dev->udev, WRITE_BUF_SIZE,
+				  dev->bd.write_buf, dev->bd.write_buf_dma);
+		dev->bd.write_buf = NULL;
+
+		rshim_fifo_free(&dev->bd);
+
+		usb_free_coherent(dev->udev, sizeof(*dev->intr_buf),
+				  dev->intr_buf, dev->intr_buf_dma);
+		dev->intr_buf = NULL;
+
+		rshim_lock();
+		kref_put(&dev->bd.kref, rshim_usb_delete);
+		rshim_unlock();
+	}
+
+	kfree(usb_dev_name);
+	return retval;
+}
+
+static void rshim_usb_disconnect(struct usb_interface *interface)
+{
+	struct rshim_usb *dev;
+	struct rshim_backend *bd;
+	int flush_wq = 0;
+
+	dev = usb_get_intfdata(interface);
+	bd = &dev->bd;
+	usb_set_intfdata(interface, NULL);
+
+	rshim_notify(bd, RSH_EVENT_DETACH, 0);
+
+	/*
+	 * Clear this interface so we don't unregister our devices next
+	 * time.
+	 */
+	mutex_lock(&bd->mutex);
+
+	if (dev->rshim_interface == interface) {
+		bd->has_rshim = 0;
+		dev->rshim_interface = NULL;
+	} else {
+		/*
+		 * We have to get rid of any USB state, since it may be
+		 * tied to the USB device which is going to vanish as soon
+		 * as we get both disconnects.  We'll reallocate these
+		 * on the next probe.
+		 *
+		 * Supposedly the code which called us already killed any
+		 * outstanding URBs, but it doesn't hurt to be sure.
+		 */
+
+		/*
+		 * We must make sure the console worker isn't running
+		 * before we free all these resources, and particularly
+		 * before we decrement our usage count, below.  Most of the
+		 * time, if it's even enabled, it'll be scheduled to run at
+		 * some point in the future, and we can take care of that
+		 * by asking that it be canceled.
+		 *
+		 * However, it's possible that it's already started
+		 * running, but can't make progress because it's waiting
+		 * for the device mutex, which we currently have.  We
+		 * handle this case by clearing the bit that says it's
+		 * enabled.  The worker tests this bit as soon as it gets
+		 * the mutex, and if it's clear, it just returns without
+		 * rescheduling itself.  Note that if we didn't
+		 * successfully cancel it, we flush the work entry below,
+		 * after we drop the mutex, to be sure it's done before we
+		 * decrement the device usage count.
+		 *
+		 * XXX This might be racy; what if something else which
+		 * would enable the worker runs after we drop the mutex
+		 * but before the worker itself runs?
+		 */
+		flush_wq = !cancel_delayed_work(&bd->work);
+		bd->has_cons_work = 0;
+
+		usb_kill_urb(dev->read_or_intr_urb);
+		usb_free_urb(dev->read_or_intr_urb);
+		dev->read_or_intr_urb = NULL;
+		usb_kill_urb(dev->write_urb);
+		usb_free_urb(dev->write_urb);
+		dev->write_urb = NULL;
+
+		usb_free_coherent(dev->udev, READ_BUF_SIZE,
+				  bd->read_buf, bd->read_buf_dma);
+		bd->read_buf = NULL;
+
+		usb_free_coherent(dev->udev, sizeof(*dev->intr_buf),
+				  dev->intr_buf, dev->intr_buf_dma);
+		dev->intr_buf = NULL;
+
+		usb_free_coherent(dev->udev, WRITE_BUF_SIZE,
+				  bd->write_buf, bd->write_buf_dma);
+		bd->write_buf = NULL;
+
+		rshim_fifo_free(bd);
+	}
+
+	if (!bd->has_rshim && !bd->has_tm) {
+		usb_put_dev(dev->udev);
+		dev->udev = NULL;
+		pr_info("now disconnected\n");
+	} else {
+		pr_debug("partially disconnected\n");
+	}
+
+	mutex_unlock(&bd->mutex);
+
+	/* This can't be done while we hold the mutex; see comments above. */
+	if (flush_wq)
+		flush_workqueue(rshim_wq);
+
+	/* decrement our usage count */
+	rshim_lock();
+	kref_put(&bd->kref, rshim_usb_delete);
+	rshim_unlock();
+}
+
+static struct usb_driver rshim_usb_driver = {
+	.name = "rshim_usb",
+	.probe = rshim_usb_probe,
+	.disconnect = rshim_usb_disconnect,
+	.id_table = rshim_usb_table,
+};
+
+static int __init rshim_usb_init(void)
+{
+	int result;
+
+	/* Register this driver with the USB subsystem. */
+	result = usb_register(&rshim_usb_driver);
+	if (result)
+		pr_err("usb_register failed, error number %d\n", result);
+
+	return result;
+}
+
+static void __exit rshim_usb_exit(void)
+{
+	/* Deregister this driver with the USB subsystem. */
+	usb_deregister(&rshim_usb_driver);
+}
+
+module_init(rshim_usb_init);
+module_exit(rshim_usb_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.6");
-- 
1.8.3.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v7 8/9] soc: mellanox: host: Add the Rshim PCIe backend driver
  2018-05-25 16:06 [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
                   ` (16 preceding siblings ...)
  2019-01-03 19:17 ` [PATCH v7 7/9] soc: mellanox: host: Add the Rshim USB backend driver Liming Sun
@ 2019-01-03 19:17 ` Liming Sun
  2019-01-03 19:17 ` [PATCH v7 9/9] soc: mellanox: host: Add the Rshim PCIe live-fish " Liming Sun
  2019-01-21 19:17 ` [PATCH v7 0/9] Mellanox BlueField ARM SoC Rshim driver Liming Sun
  19 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2019-01-03 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit adds the PCIe backend driver to access the Rshim
interface on the BlueField SoC, such as on the Smart NIC.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/host/Makefile     |   2 +-
 drivers/soc/mellanox/host/rshim_pcie.c | 478 +++++++++++++++++++++++++++++++++
 2 files changed, 479 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/mellanox/host/rshim_pcie.c

diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
index c6703cd..fa4b21c 100644
--- a/drivers/soc/mellanox/host/Makefile
+++ b/drivers/soc/mellanox/host/Makefile
@@ -1,2 +1,2 @@
-obj-m := rshim.o rshim_net.o rshim_usb.o
+obj-m := rshim.o rshim_net.o rshim_usb.o rshim_pcie.o
 
diff --git a/drivers/soc/mellanox/host/rshim_pcie.c b/drivers/soc/mellanox/host/rshim_pcie.c
new file mode 100644
index 0000000..3fa7bd9
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_pcie.c
@@ -0,0 +1,478 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_pcie.c - Mellanox RShim PCIe host driver
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/pci.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+
+#include "rshim.h"
+
+/* Disable RSim access. */
+static int rshim_disable;
+module_param(rshim_disable, int, 0444);
+MODULE_PARM_DESC(rshim_disable, "Disable rshim (obsoleted)");
+
+/** Our Vendor/Device IDs. */
+#define TILERA_VENDOR_ID					0x15b3
+#define BLUEFIELD_DEVICE_ID					0xc2d2
+
+/** The offset in BAR2 of the RShim region. */
+#define PCI_RSHIM_WINDOW_OFFSET					0x0
+
+/** The size the RShim region. */
+#define PCI_RSHIM_WINDOW_SIZE					0x100000
+
+/* Maximum number of devices this driver can handle */
+#define MAX_DEV_COUNT						16
+
+struct rshim_pcie {
+	/* RShim backend structure. */
+	struct rshim_backend	bd;
+
+	struct pci_dev *pci_dev;
+
+	/* RShim BAR size. */
+	uint64_t bar0_size;
+
+	/* Address of the RShim registers. */
+	u8 __iomem *rshim_regs;
+
+	/* Keep track of number of 8-byte word writes */
+	u8 write_count;
+};
+
+static struct rshim_pcie *instances[MAX_DEV_COUNT];
+
+#ifndef CONFIG_64BIT
+/* Wait until the RSH_BYTE_ACC_CTL pending bit is cleared */
+static int rshim_byte_acc_pending_wait(struct rshim_pcie *dev, int chan)
+{
+	u32 read_value;
+
+	do {
+		read_value = readl(dev->rshim_regs +
+			(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+		if (signal_pending(current))
+			return -EINTR;
+
+	} while (read_value & RSH_BYTE_ACC_PENDING);
+
+	return 0;
+}
+
+/*
+ * RShim read/write methods for 32-bit systems
+ * Mechanism to do an 8-byte access to the Rshim using
+ * two 4-byte accesses through the Rshim Byte Access Widget.
+ */
+static int rshim_byte_acc_read(struct rshim_pcie *dev, int chan, int addr,
+				u64 *result)
+{
+	int retval;
+	u32 read_value;
+	u64 read_result;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	writel(RSH_BYTE_ACC_SIZE, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	writel(addr, dev->rshim_regs + (RSH_BYTE_ACC_ADDR | (chan << 16)));
+
+	/* Write trigger bits to perform read */
+	writel(RSH_BYTE_ACC_READ_TRIGGER, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read lower 32-bits of data */
+	read_value = readl(dev->rshim_regs +
+		(RSH_BYTE_ACC_RDAT | (chan << 16)));
+
+	read_result = (u64)read_value << 32;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read upper 32-bits of data */
+	read_value = readl(dev->rshim_regs +
+		(RSH_BYTE_ACC_RDAT | (chan << 16)));
+
+	read_result |= (u64)read_value;
+	*result = be64_to_cpu(read_result);
+
+	return 0;
+}
+
+static int rshim_byte_acc_write(struct rshim_pcie *dev, int chan, int addr,
+				u64 value)
+{
+	int retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	writel(RSH_BYTE_ACC_SIZE, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	writel(addr, dev->rshim_regs + (RSH_BYTE_ACC_ADDR | (chan << 16)));
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	writel(RSH_BYTE_ACC_SIZE, dev->rshim_regs +
+		(RSH_BYTE_ACC_CTL | (chan << 16)));
+
+	/* Write lower 32 bits of data to TRIO_CR_GW_DATA */
+	writel((u32)(value >> 32), dev->rshim_regs +
+		(RSH_BYTE_ACC_WDAT | (chan << 16)));
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(dev, chan);
+	if (retval)
+		return retval;
+
+	/* Write upper 32 bits of data to TRIO_CR_GW_DATA */
+	writel((u32)(value), dev->rshim_regs +
+		(RSH_BYTE_ACC_WDAT | (chan << 16)));
+
+	return 0;
+}
+#endif /* CONFIG_64BIT */
+
+/* RShim read/write routines */
+static int rshim_pcie_read(struct rshim_backend *bd, int chan, int addr,
+				u64 *result)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	int retval = 0;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	dev->write_count = 0;
+
+#ifndef CONFIG_64BIT
+	retval = rshim_byte_acc_read(dev, chan, addr, result);
+#else
+	*result = readq(dev->rshim_regs + (addr | (chan << 16)));
+#endif
+	return retval;
+}
+
+static int rshim_pcie_write(struct rshim_backend *bd, int chan, int addr,
+				u64 value)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	u64 result;
+	int retval = 0;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	/*
+	 * We cannot stream large numbers of PCIe writes to the RShim's BAR.
+	 * Instead, we must write no more than 15 8-byte words before
+	 * doing a read from another register within the BAR,
+	 * which forces previous writes to drain.
+	 */
+	if (dev->write_count == 15) {
+		/* Add memory barrier to synchronize the order. */
+		mb();
+		rshim_pcie_read(bd, chan, RSH_SCRATCHPAD, &result);
+	}
+	dev->write_count++;
+#ifndef CONFIG_64BIT
+	retval = rshim_byte_acc_write(dev, chan, addr, value);
+#else
+	writeq(value, dev->rshim_regs + (addr | (chan << 16)));
+#endif
+
+	return retval;
+}
+
+static void rshim_pcie_delete(struct kref *kref)
+{
+	struct rshim_backend *bd;
+	struct rshim_pcie *dev;
+
+	bd = container_of(kref, struct rshim_backend, kref);
+	dev = container_of(bd, struct rshim_pcie, bd);
+
+	rshim_deregister(bd);
+	if (dev->pci_dev)
+		dev_set_drvdata(&dev->pci_dev->dev, NULL);
+	kfree(dev);
+}
+
+/* Probe routine */
+static int rshim_pcie_probe(struct pci_dev *pci_dev,
+			    const struct pci_device_id *id)
+{
+	struct rshim_pcie *dev;
+	struct rshim_backend *bd;
+	char *pcie_dev_name;
+	int index, retval, err = 0, allocfail = 0;
+	const int max_name_len = 20;
+
+	for (index = 0; index < MAX_DEV_COUNT; index++)
+		if (instances[index] == NULL)
+			break;
+	if (index == MAX_DEV_COUNT) {
+		pr_err("Driver cannot handle any more devices.\n");
+		return -ENODEV;
+	}
+
+	pcie_dev_name = kzalloc(max_name_len, GFP_KERNEL);
+	if (pcie_dev_name == NULL) {
+		err = -ENOMEM;
+		goto error;
+	}
+	retval = snprintf(pcie_dev_name, max_name_len,
+				"rshim_pcie%d", index);
+	if (WARN_ON_ONCE(retval >= max_name_len)) {
+		err = -EINVAL;
+		goto error;
+	}
+
+	pr_debug("Probing %s\n", pcie_dev_name);
+
+	rshim_lock();
+
+	/* Find the backend. */
+	bd = rshim_find(pcie_dev_name);
+	if (bd) {
+		kref_get(&bd->kref);
+		dev = container_of(bd, struct rshim_pcie, bd);
+	} else {
+		/* Get some memory for this device's driver state. */
+		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+		if (dev == NULL) {
+			err = -ENOMEM;
+			rshim_unlock();
+			goto error;
+		}
+
+		instances[index] = dev;
+		bd = &dev->bd;
+		bd->has_rshim = 1;
+		bd->has_tm = 1;
+		bd->dev_name = pcie_dev_name;
+		bd->read_rshim = rshim_pcie_read;
+		bd->write_rshim = rshim_pcie_write;
+		bd->destroy = rshim_pcie_delete;
+		bd->owner = THIS_MODULE;
+		dev->write_count = 0;
+		mutex_init(&bd->mutex);
+	}
+
+	retval = rshim_fifo_alloc(bd);
+	if (retval) {
+		rshim_unlock();
+		dev_err(&pci_dev->dev, "Failed to allocate fifo\n");
+		err = -ENOMEM;
+		goto enable_failed;
+	}
+
+	allocfail |= rshim_fifo_alloc(bd);
+
+	if (!bd->read_buf) {
+		bd->read_buf = kzalloc(READ_BUF_SIZE,
+					   GFP_KERNEL);
+	}
+	allocfail |= bd->read_buf == 0;
+
+	if (!bd->write_buf) {
+		bd->write_buf = kzalloc(WRITE_BUF_SIZE,
+					    GFP_KERNEL);
+	}
+	allocfail |= bd->write_buf == 0;
+
+	if (allocfail) {
+		rshim_unlock();
+		pr_err("can't allocate buffers\n");
+		goto enable_failed;
+	}
+
+	rshim_unlock();
+
+	/* Enable the device. */
+	err = pci_enable_device(pci_dev);
+	if (err != 0) {
+		pr_err("Device enable failed with error %d\n", err);
+		goto enable_failed;
+	}
+
+	/* Initialize object */
+	dev->pci_dev = pci_dev;
+	dev_set_drvdata(&pci_dev->dev, dev);
+
+	dev->bar0_size = pci_resource_len(pci_dev, 0);
+
+	/* Fail if the BAR is unassigned. */
+	if (!dev->bar0_size) {
+		pr_err("BAR unassigned, run 'lspci -v'.\n");
+		err = -ENOMEM;
+		goto rshim_map_failed;
+	}
+
+	/* Map in the RShim registers. */
+	dev->rshim_regs = ioremap(pci_resource_start(pci_dev, 0) +
+				  PCI_RSHIM_WINDOW_OFFSET,
+				  PCI_RSHIM_WINDOW_SIZE);
+	if (dev->rshim_regs == NULL) {
+		dev_err(&pci_dev->dev, "Failed to map RShim registers\n");
+		err = -ENOMEM;
+		goto rshim_map_failed;
+	}
+
+	/* Enable PCI bus mastering. */
+	pci_set_master(pci_dev);
+
+	/*
+	 * Register rshim here since it needs to detect whether other backend
+	 * has already registered or not, which involves reading/writing rshim
+	 * registers and has assumption that the under layer is working.
+	 */
+	rshim_lock();
+	if (!bd->registered) {
+		retval = rshim_register(bd);
+		if (retval) {
+			rshim_unlock();
+			goto rshim_map_failed;
+		} else
+			pcie_dev_name = NULL;
+	}
+	rshim_unlock();
+
+	/* Notify that the device is attached */
+	mutex_lock(&bd->mutex);
+	retval = rshim_notify(bd, RSH_EVENT_ATTACH, 0);
+	mutex_unlock(&bd->mutex);
+	if (retval)
+		goto rshim_map_failed;
+
+	return 0;
+
+ rshim_map_failed:
+	pci_disable_device(pci_dev);
+ enable_failed:
+	rshim_lock();
+	kref_put(&bd->kref, rshim_pcie_delete);
+	rshim_unlock();
+ error:
+	kfree(pcie_dev_name);
+	return err;
+}
+
+/* Called via pci_unregister_driver() when the module is removed. */
+static void rshim_pcie_remove(struct pci_dev *pci_dev)
+{
+	struct rshim_pcie *dev = dev_get_drvdata(&pci_dev->dev);
+	int flush_wq;
+
+	if (!dev)
+		return;
+
+	/*
+	 * Reset TRIO_PCIE_INTFC_RX_BAR0_ADDR_MASK and TRIO_MAP_RSH_BASE.
+	 * Otherwise, upon host reboot, the two registers will retain previous
+	 * values that don't match the new BAR0 address that is assigned to
+	 * the PCIe ports, causing host MMIO access to RShim to fail.
+	 */
+	rshim_pcie_write(&dev->bd, (RSH_SWINT >> 16) & 0xF,
+		RSH_SWINT & 0xFFFF, RSH_INT_VEC0_RTC__SWINT3_MASK);
+
+	/* Clear the flags before unmapping rshim registers to avoid race. */
+	dev->bd.has_rshim = 0;
+	dev->bd.has_tm = 0;
+	/* Add memory barrier to synchronize the order. */
+	mb();
+
+	if (dev->rshim_regs)
+		iounmap(dev->rshim_regs);
+
+	rshim_notify(&dev->bd, RSH_EVENT_DETACH, 0);
+	mutex_lock(&dev->bd.mutex);
+	flush_wq = !cancel_delayed_work(&dev->bd.work);
+	if (flush_wq)
+		flush_workqueue(rshim_wq);
+	dev->bd.has_cons_work = 0;
+	kfree(dev->bd.read_buf);
+	kfree(dev->bd.write_buf);
+	rshim_fifo_free(&dev->bd);
+	mutex_unlock(&dev->bd.mutex);
+
+	rshim_lock();
+	kref_put(&dev->bd.kref, rshim_pcie_delete);
+	rshim_unlock();
+
+	pci_disable_device(pci_dev);
+	dev_set_drvdata(&pci_dev->dev, NULL);
+}
+
+static struct pci_device_id rshim_pcie_table[] = {
+	{ PCI_DEVICE(TILERA_VENDOR_ID, BLUEFIELD_DEVICE_ID), },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, rshim_pcie_table);
+
+static struct pci_driver rshim_pcie_driver = {
+	.name = "rshim_pcie",
+	.probe = rshim_pcie_probe,
+	.remove = rshim_pcie_remove,
+	.id_table = rshim_pcie_table,
+};
+
+static int __init rshim_pcie_init(void)
+{
+	int result;
+
+	/* Register the driver */
+	result = pci_register_driver(&rshim_pcie_driver);
+	if (result)
+		pr_err("pci_register failed, error number %d\n", result);
+
+	return result;
+}
+
+static void __exit rshim_pcie_exit(void)
+{
+	/* Unregister the driver. */
+	pci_unregister_driver(&rshim_pcie_driver);
+}
+
+module_init(rshim_pcie_init);
+module_exit(rshim_pcie_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.6");
-- 
1.8.3.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* [PATCH v7 9/9] soc: mellanox: host: Add the Rshim PCIe live-fish backend driver
  2018-05-25 16:06 [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
                   ` (17 preceding siblings ...)
  2019-01-03 19:17 ` [PATCH v7 8/9] soc: mellanox: host: Add the Rshim PCIe " Liming Sun
@ 2019-01-03 19:17 ` Liming Sun
  2019-01-21 19:17 ` [PATCH v7 0/9] Mellanox BlueField ARM SoC Rshim driver Liming Sun
  19 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2019-01-03 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This commit adds the PCIe live-fish backend driver to access the
Rshim interface on the BlueField SoC, such as on the Smart NIC.
It is slow access and can be used for live-fish mode when the NIC
firmware hasn't been programmed yet.

Reviewed-by: David Woods <dwoods@mellanox.com>
Signed-off-by: Liming Sun <lsun@mellanox.com>
---
 drivers/soc/mellanox/host/Makefile        |   2 +-
 drivers/soc/mellanox/host/rshim_pcie_lf.c | 695 ++++++++++++++++++++++++++++++
 2 files changed, 696 insertions(+), 1 deletion(-)
 create mode 100644 drivers/soc/mellanox/host/rshim_pcie_lf.c

diff --git a/drivers/soc/mellanox/host/Makefile b/drivers/soc/mellanox/host/Makefile
index fa4b21c..79a1c86 100644
--- a/drivers/soc/mellanox/host/Makefile
+++ b/drivers/soc/mellanox/host/Makefile
@@ -1,2 +1,2 @@
-obj-m := rshim.o rshim_net.o rshim_usb.o rshim_pcie.o
+obj-m := rshim.o rshim_net.o rshim_usb.o rshim_pcie.o rshim_pcie_lf.o
 
diff --git a/drivers/soc/mellanox/host/rshim_pcie_lf.c b/drivers/soc/mellanox/host/rshim_pcie_lf.c
new file mode 100644
index 0000000..08e2c15
--- /dev/null
+++ b/drivers/soc/mellanox/host/rshim_pcie_lf.c
@@ -0,0 +1,695 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rshim_pcie_lf.c - Mellanox RShim PCIe Livefish driver for x86 host
+ *
+ * Copyright 2017 Mellanox Technologies. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/pci.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+
+#include "rshim.h"
+
+/* Disable RSim access. */
+static int rshim_disable;
+module_param(rshim_disable, int, 0444);
+MODULE_PARM_DESC(rshim_disable, "Disable rshim (obsoleted)");
+
+/** Our Vendor/Device IDs. */
+#define TILERA_VENDOR_ID					0x15b3
+#define BLUEFIELD_DEVICE_ID					0x0211
+
+/* Maximum number of devices this driver can handle */
+#define MAX_DEV_COUNT						16
+
+/* Mellanox Address & Data Capabilities */
+#define MELLANOX_ADDR						0x58
+#define MELLANOX_DATA						0x5c
+#define MELLANOX_CAP_READ					0x1
+
+/* TRIO_CR_GATEWAY registers */
+#define TRIO_CR_GW_LOCK						0xe38a0
+#define TRIO_CR_GW_LOCK_CPY					0xe38a4
+#define TRIO_CR_GW_DATA_UPPER					0xe38ac
+#define TRIO_CR_GW_DATA_LOWER					0xe38b0
+#define TRIO_CR_GW_CTL						0xe38b4
+#define TRIO_CR_GW_ADDR_UPPER					0xe38b8
+#define TRIO_CR_GW_ADDR_LOWER					0xe38bc
+#define TRIO_CR_GW_LOCK_ACQUIRED				0x80000000
+#define TRIO_CR_GW_LOCK_RELEASE					0x0
+#define TRIO_CR_GW_BUSY						0x60000000
+#define TRIO_CR_GW_TRIGGER					0xe0000000
+#define TRIO_CR_GW_READ_4BYTE					0x6
+#define TRIO_CR_GW_WRITE_4BYTE					0x2
+
+/* Base RShim Address */
+#define RSH_BASE_ADDR						0x80000000
+#define RSH_CHANNEL1_BASE					0x80010000
+
+struct rshim_pcie {
+	/* RShim backend structure. */
+	struct rshim_backend	bd;
+
+	struct pci_dev *pci_dev;
+
+	/* Keep track of number of 8-byte word writes */
+	u8 write_count;
+};
+
+static struct rshim_pcie *instances[MAX_DEV_COUNT];
+
+/* Mechanism to access the CR space using hidden PCI capabilities */
+static int pci_cap_read(struct pci_dev *pci_dev, int offset,
+				u32 *result)
+{
+	int retval;
+
+	/*
+	 * Write target offset to MELLANOX_ADDR.
+	 * Set LSB to indicate a read operation.
+	 */
+	retval = pci_write_config_dword(pci_dev, MELLANOX_ADDR,
+				offset | MELLANOX_CAP_READ);
+	if (retval)
+		return retval;
+
+	/* Read result from MELLANOX_DATA */
+	retval = pci_read_config_dword(pci_dev, MELLANOX_DATA,
+				result);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+static int pci_cap_write(struct pci_dev *pci_dev, int offset,
+				u32 value)
+{
+	int retval;
+
+	/* Write data to MELLANOX_DATA */
+	retval = pci_write_config_dword(pci_dev, MELLANOX_DATA,
+				value);
+	if (retval)
+		return retval;
+
+	/*
+	 * Write target offset to MELLANOX_ADDR.
+	 * Leave LSB clear to indicate a write operation.
+	 */
+	retval = pci_write_config_dword(pci_dev, MELLANOX_ADDR,
+				offset);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/* Acquire and release the TRIO_CR_GW_LOCK. */
+static int trio_cr_gw_lock_acquire(struct pci_dev *pci_dev)
+{
+	int retval;
+	u32 read_value;
+
+	/* Wait until TRIO_CR_GW_LOCK is free */
+	do {
+		retval = pci_cap_read(pci_dev, TRIO_CR_GW_LOCK,
+				&read_value);
+		if (retval)
+			return retval;
+		if (signal_pending(current))
+			return -EINTR;
+	} while (read_value & TRIO_CR_GW_LOCK_ACQUIRED);
+
+	/* Acquire TRIO_CR_GW_LOCK */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_LOCK_ACQUIRED);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+static int trio_cr_gw_lock_release(struct pci_dev *pci_dev)
+{
+	int retval;
+
+	/* Release TRIO_CR_GW_LOCK */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_LOCK_RELEASE);
+
+	return retval;
+}
+
+/*
+ * Mechanism to access the RShim from the CR space using the
+ * TRIO_CR_GATEWAY.
+ */
+static int trio_cr_gw_read(struct pci_dev *pci_dev, int addr,
+				u32 *result)
+{
+	int retval;
+
+	/* Acquire TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_acquire(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write addr to TRIO_CR_GW_ADDR_LOWER */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_ADDR_LOWER,
+				addr);
+	if (retval)
+		return retval;
+
+	/* Set TRIO_CR_GW_READ_4BYTE */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_CTL,
+				TRIO_CR_GW_READ_4BYTE);
+	if (retval)
+		return retval;
+
+	/* Trigger TRIO_CR_GW to read from addr */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_TRIGGER);
+	if (retval)
+		return retval;
+
+	/* Read 32-bit data from TRIO_CR_GW_DATA_LOWER */
+	retval = pci_cap_read(pci_dev, TRIO_CR_GW_DATA_LOWER,
+				result);
+	if (retval)
+		return retval;
+
+	/* Release TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_release(pci_dev);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+static int trio_cr_gw_write(struct pci_dev *pci_dev, int addr,
+				u32 value)
+{
+	int retval;
+
+	/* Acquire TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_acquire(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write 32-bit data to TRIO_CR_GW_DATA_LOWER */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_DATA_LOWER,
+				value);
+	if (retval)
+		return retval;
+
+	/* Write addr to TRIO_CR_GW_ADDR_LOWER */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_ADDR_LOWER,
+				addr);
+	if (retval)
+		return retval;
+
+	/* Set TRIO_CR_GW_WRITE_4BYTE */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_CTL,
+				TRIO_CR_GW_WRITE_4BYTE);
+	if (retval)
+		return retval;
+
+	/* Trigger CR gateway to write to RShim */
+	retval = pci_cap_write(pci_dev, TRIO_CR_GW_LOCK,
+				TRIO_CR_GW_TRIGGER);
+	if (retval)
+		return retval;
+
+	/* Release TRIO_CR_GW_LOCK */
+	retval = trio_cr_gw_lock_release(pci_dev);
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/* Wait until the RSH_BYTE_ACC_CTL pending bit is cleared */
+static int rshim_byte_acc_pending_wait(struct pci_dev *pci_dev)
+{
+	int retval;
+	u32 read_value;
+
+	do {
+		retval = trio_cr_gw_read(pci_dev,
+			RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL, &read_value);
+		if (retval)
+			return retval;
+		if (signal_pending(current))
+			return -EINTR;
+	} while (read_value & (RSH_CHANNEL1_BASE + RSH_BYTE_ACC_PENDING));
+
+	return 0;
+}
+
+/*
+ * Mechanism to do an 8-byte access to the Rshim using
+ * two 4-byte accesses through the Rshim Byte Access Widget.
+ */
+static int rshim_byte_acc_read(struct pci_dev *pci_dev, int addr,
+				u64 *result)
+{
+	int retval;
+	u32 read_value;
+	u64 read_result;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_SIZE);
+	if (retval)
+		return retval;
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_ADDR, addr);
+	if (retval)
+		return retval;
+
+	/* Write trigger bits to perform read */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_READ_TRIGGER);
+	if (retval)
+		return retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read lower 32-bits of data */
+	retval = trio_cr_gw_read(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_RDAT,
+				&read_value);
+	if (retval)
+		return retval;
+
+	read_result = (u64)read_value << 32;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Read RSH_BYTE_ACC_RDAT to read upper 32-bits of data */
+	retval = trio_cr_gw_read(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_RDAT,
+				&read_value);
+	if (retval)
+		return retval;
+
+	read_result |= (u64)read_value;
+	*result = be64_to_cpu(read_result);
+
+	return 0;
+}
+
+static int rshim_byte_acc_write(struct pci_dev *pci_dev, int addr,
+				u64 value)
+{
+	int retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_SIZE);
+	if (retval)
+		return retval;
+
+	/* Write target address to RSH_BYTE_ACC_ADDR */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_ADDR, addr);
+	if (retval)
+		return retval;
+
+	/* Write control bits to RSH_BYTE_ACC_CTL */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE + RSH_BYTE_ACC_CTL,
+				RSH_BYTE_ACC_SIZE);
+	if (retval)
+		return retval;
+
+	/* Write lower 32 bits of data to TRIO_CR_GW_DATA */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_WDAT, (u32)(value >> 32));
+	if (retval)
+		return retval;
+
+	/* Wait for RSH_BYTE_ACC_CTL pending bit to be cleared */
+	retval = rshim_byte_acc_pending_wait(pci_dev);
+	if (retval)
+		return retval;
+
+	/* Write upper 32 bits of data to TRIO_CR_GW_DATA */
+	retval = trio_cr_gw_write(pci_dev, RSH_CHANNEL1_BASE +
+				  RSH_BYTE_ACC_WDAT, (u32)(value));
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/*
+ * The RShim Boot FIFO has a holding register which can couple
+ * two consecutive 4-byte writes into a single 8-byte write
+ * before pushing the data into the FIFO.
+ * Hence the RShim Byte Access Widget is not necessary to write
+ * to the BOOT FIFO using 4-byte writes.
+ */
+static int rshim_boot_fifo_write(struct pci_dev *pci_dev, int addr,
+				u64 value)
+{
+	int retval;
+
+	/* Write lower 32 bits of data to RSH_BOOT_FIFO_DATA */
+	retval = trio_cr_gw_write(pci_dev, addr,
+				(u32)(value >> 32));
+	if (retval)
+		return retval;
+
+	/* Write upper 32 bits of data to RSH_BOOT_FIFO_DATA */
+	retval = trio_cr_gw_write(pci_dev, addr,
+				(u32)(value));
+	if (retval)
+		return retval;
+
+	return 0;
+}
+
+/* RShim read/write routines */
+static int rshim_pcie_read(struct rshim_backend *bd, int chan, int addr,
+				u64 *result)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	struct pci_dev *pci_dev = dev->pci_dev;
+	int retval;
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	dev->write_count = 0;
+
+	addr = RSH_BASE_ADDR + (addr | (chan << 16));
+	addr = be32_to_cpu(addr);
+
+	retval = rshim_byte_acc_read(pci_dev, addr, result);
+
+	return retval;
+}
+
+static int rshim_pcie_write(struct rshim_backend *bd, int chan, int addr,
+				u64 value)
+{
+	struct rshim_pcie *dev = container_of(bd, struct rshim_pcie, bd);
+	struct pci_dev *pci_dev = dev->pci_dev;
+	int retval;
+	u64 result;
+	bool is_boot_stream = (addr == RSH_BOOT_FIFO_DATA);
+
+	if (!bd->has_rshim)
+		return -ENODEV;
+
+	addr = RSH_BASE_ADDR + (addr | (chan << 16));
+	if (!is_boot_stream)
+		addr = be32_to_cpu(addr);
+
+	value = be64_to_cpu(value);
+
+	/*
+	 * We cannot stream large numbers of PCIe writes to the RShim.
+	 * Instead, we must write no more than 15 words before
+	 * doing a read from another register within the RShim,
+	 * which forces previous writes to drain.
+	 * Note that we allow a max write_count of 7 since each 8-byte
+	 * write is done using 2 4-byte writes in the boot fifo case.
+	 */
+	if (dev->write_count == 7) {
+		/* Add memory barrier to synchronize the order. */
+		mb();
+		rshim_pcie_read(bd, 1, RSH_SCRATCHPAD, &result);
+	}
+	dev->write_count++;
+
+	if (is_boot_stream)
+		retval = rshim_boot_fifo_write(pci_dev, addr, value);
+	else
+		retval = rshim_byte_acc_write(pci_dev, addr, value);
+
+	return retval;
+}
+
+static void rshim_pcie_delete(struct kref *kref)
+{
+	struct rshim_backend *bd;
+	struct rshim_pcie *dev;
+
+	bd = container_of(kref, struct rshim_backend, kref);
+	dev = container_of(bd, struct rshim_pcie, bd);
+
+	rshim_deregister(bd);
+	if (dev->pci_dev)
+		dev_set_drvdata(&dev->pci_dev->dev, NULL);
+	kfree(dev);
+}
+
+/* Probe routine */
+static int rshim_pcie_probe(struct pci_dev *pci_dev,
+				const struct pci_device_id *id)
+{
+	struct rshim_pcie *dev = NULL;
+	struct rshim_backend *bd = NULL;
+	char *pcie_dev_name;
+	int index, retval, err = 0, allocfail = 0;
+	const int max_name_len = 20;
+
+	for (index = 0; index < MAX_DEV_COUNT; index++)
+		if (instances[index] == NULL)
+			break;
+	if (index == MAX_DEV_COUNT) {
+		pr_err("Driver cannot handle any more devices.\n");
+		return -ENODEV;
+	}
+
+	pcie_dev_name = kzalloc(max_name_len, GFP_KERNEL);
+	if (pcie_dev_name == NULL)
+		return -ENOMEM;
+	retval = snprintf(pcie_dev_name, max_name_len,
+				"rshim_pcie%d", index);
+	if (WARN_ON_ONCE(retval >= max_name_len)) {
+		err = -EINVAL;
+		goto error;
+	}
+
+	pr_debug("Probing %s\n", pcie_dev_name);
+
+	rshim_lock();
+
+	/* Find the backend. */
+	bd = rshim_find(pcie_dev_name);
+	if (bd) {
+		kref_get(&bd->kref);
+		dev = container_of(bd, struct rshim_pcie, bd);
+	} else {
+		/* Get some memory for this device's driver state. */
+		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+		if (dev == NULL) {
+			err = -ENOMEM;
+			rshim_unlock();
+			goto error;
+		}
+
+		instances[index] = dev;
+		bd = &dev->bd;
+		bd->has_rshim = 1;
+		bd->has_tm = 1;
+		bd->owner = THIS_MODULE;
+		bd->dev_name = pcie_dev_name;
+		bd->destroy = rshim_pcie_delete;
+		bd->read_rshim = rshim_pcie_read;
+		bd->write_rshim = rshim_pcie_write;
+		dev->write_count = 0;
+		mutex_init(&bd->mutex);
+	}
+
+	retval = rshim_fifo_alloc(bd);
+	if (retval) {
+		rshim_unlock();
+		pr_err("Failed to allocate fifo\n");
+		err = -ENOMEM;
+		goto enable_failed;
+	}
+
+	allocfail |= rshim_fifo_alloc(bd);
+
+	if (!bd->read_buf) {
+		bd->read_buf = kzalloc(READ_BUF_SIZE,
+					   GFP_KERNEL);
+	}
+	allocfail |= bd->read_buf == 0;
+
+	if (!bd->write_buf) {
+		bd->write_buf = kzalloc(WRITE_BUF_SIZE,
+					    GFP_KERNEL);
+	}
+	allocfail |= bd->write_buf == 0;
+
+	if (allocfail) {
+		rshim_unlock();
+		pr_err("can't allocate buffers\n");
+		goto enable_failed;
+	}
+
+	rshim_unlock();
+
+	/* Enable the device. */
+	err = pci_enable_device(pci_dev);
+	if (err != 0) {
+		pr_err("Device enable failed with error %d\n", err);
+		goto enable_failed;
+	}
+
+	/* Initialize object */
+	dev->pci_dev = pci_dev;
+	dev_set_drvdata(&pci_dev->dev, dev);
+
+	/* Enable PCI bus mastering. */
+	pci_set_master(pci_dev);
+
+	/*
+	 * Register rshim here since it needs to detect whether other backend
+	 * has already registered or not, which involves reading/writing rshim
+	 * registers and has assumption that the under layer is working.
+	 */
+	rshim_lock();
+	if (!bd->registered) {
+		retval = rshim_register(bd);
+		if (retval) {
+			pr_err("Backend register failed with error %d\n",
+				 retval);
+			rshim_unlock();
+			goto register_failed;
+		}
+	}
+	rshim_unlock();
+
+	/* Notify that the device is attached */
+	mutex_lock(&bd->mutex);
+	retval = rshim_notify(bd, RSH_EVENT_ATTACH, 0);
+	mutex_unlock(&bd->mutex);
+	if (retval)
+		goto register_failed;
+
+	return 0;
+
+register_failed:
+	pci_disable_device(pci_dev);
+
+enable_failed:
+	rshim_lock();
+	kref_put(&dev->bd.kref, rshim_pcie_delete);
+	rshim_unlock();
+error:
+	kfree(pcie_dev_name);
+
+	return err;
+}
+
+/* Called via pci_unregister_driver() when the module is removed. */
+static void rshim_pcie_remove(struct pci_dev *pci_dev)
+{
+	struct rshim_pcie *dev = dev_get_drvdata(&pci_dev->dev);
+	int retval, flush_wq;
+
+	/*
+	 * Reset TRIO_PCIE_INTFC_RX_BAR0_ADDR_MASK and TRIO_MAP_RSH_BASE.
+	 * Otherwise, upon host reboot, the two registers will retain previous
+	 * values that don't match the new BAR0 address that is assigned to
+	 * the PCIe ports, causing host MMIO access to RShim to fail.
+	 */
+	retval = rshim_pcie_write(&dev->bd, (RSH_SWINT >> 16) & 0xF,
+			RSH_SWINT & 0xFFFF, RSH_INT_VEC0_RTC__SWINT3_MASK);
+	if (retval)
+		pr_err("RShim write failed\n");
+
+	/* Clear the flags before deleting the backend. */
+	dev->bd.has_rshim = 0;
+	dev->bd.has_tm = 0;
+
+	rshim_notify(&dev->bd, RSH_EVENT_DETACH, 0);
+	mutex_lock(&dev->bd.mutex);
+	flush_wq = !cancel_delayed_work(&dev->bd.work);
+	if (flush_wq)
+		flush_workqueue(rshim_wq);
+	dev->bd.has_cons_work = 0;
+	kfree(dev->bd.read_buf);
+	kfree(dev->bd.write_buf);
+	rshim_fifo_free(&dev->bd);
+	mutex_unlock(&dev->bd.mutex);
+
+	rshim_lock();
+	kref_put(&dev->bd.kref, rshim_pcie_delete);
+	rshim_unlock();
+
+	pci_disable_device(pci_dev);
+	dev_set_drvdata(&pci_dev->dev, NULL);
+}
+
+static struct pci_device_id rshim_pcie_table[] = {
+	{ PCI_DEVICE(TILERA_VENDOR_ID, BLUEFIELD_DEVICE_ID), },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, rshim_pcie_table);
+
+static struct pci_driver rshim_pcie_driver = {
+	.name = "rshim_pcie_lf",
+	.probe = rshim_pcie_probe,
+	.remove = rshim_pcie_remove,
+	.id_table = rshim_pcie_table,
+};
+
+static int __init rshim_pcie_init(void)
+{
+	int result;
+
+	/* Register the driver */
+	result = pci_register_driver(&rshim_pcie_driver);
+	if (result)
+		pr_err("pci_register failed, error number %d\n", result);
+
+	return result;
+}
+
+static void __exit rshim_pcie_exit(void)
+{
+	/* Unregister the driver. */
+	pci_unregister_driver(&rshim_pcie_driver);
+}
+
+module_init(rshim_pcie_init);
+module_exit(rshim_pcie_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mellanox Technologies");
+MODULE_VERSION("0.4");
-- 
1.8.3.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 70+ messages in thread

* RE: [PATCH v6 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc
  2018-12-12 23:07   ` Matthias Brugger
@ 2019-01-03 19:20     ` Liming Sun
  0 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2019-01-03 19:20 UTC (permalink / raw)
  To: Matthias Brugger, y, Olof Johansson, Arnd Bergmann, David Woods,
	Robin Murphy, arm-soc
  Cc: devicetree, mbrugger@suse.com >> Matthias Brugger,
	linux-arm-kernel

Thanks Matthias for the comments and sorry for the holiday delay.
The recommended changes have been posted in patch v7 1/9.

Thanks,
Liming

> -----Original Message-----
> From: Matthias Brugger <matthias.bgg@gmail.com>
> Sent: Wednesday, December 12, 2018 6:08 PM
> To: Liming Sun <lsun@mellanox.com>; y@bu-lab53.mtbu.labs.mlnx; Olof Johansson <olof@lixom.net>; Arnd Bergmann <arnd@arndb.de>;
> David Woods <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-soc <arm@kernel.org>
> Cc: devicetree@vger.kernel.org; linux-arm-kernel@lists.infradead.org; mbrugger@suse.com >> Matthias Brugger <mbrugger@suse.com>
> Subject: Re: [PATCH v6 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc
> 
> 
> 
> On 01/11/2018 17:23, Liming Sun wrote:> This commit adds the TmFifo driver for
> Mellanox BlueField Soc.
> > TmFifo is a shared FIFO which enables external host machine to
> > exchange data with the SoC via USB or PCIe. The driver is based on
> > virtio framework and has console and network access enabled.
> >
> > Reviewed-by: David Woods <dwoods@mellanox.com>
> > Signed-off-by: Liming Sun <lsun@mellanox.com>
> > ---
> >  drivers/soc/Kconfig                |    1 +
> >  drivers/soc/Makefile               |    1 +
> >  drivers/soc/mellanox/Kconfig       |   18 +
> >  drivers/soc/mellanox/Makefile      |    5 +
> >  drivers/soc/mellanox/tmfifo.c      | 1236 ++++++++++++++++++++++++++++++++++++
> >  drivers/soc/mellanox/tmfifo_regs.h |   76 +++
> >  6 files changed, 1337 insertions(+)
> >  create mode 100644 drivers/soc/mellanox/Kconfig
> >  create mode 100644 drivers/soc/mellanox/Makefile
> >  create mode 100644 drivers/soc/mellanox/tmfifo.c
> >  create mode 100644 drivers/soc/mellanox/tmfifo_regs.h
> >
> 
> [...]
> 
> > +
> > +/* Interrupt handler. */
> > +static irqreturn_t tmfifo_irq_handler(int irq, void *dev_id)
> > +{
> > +	int i = (uintptr_t)dev_id % sizeof(void *);
> > +	struct tmfifo *fifo = dev_id - i;
> > +
> > +	if (i < TM_IRQ_CNT && !test_and_set_bit(i, &fifo->pend_events))
> > +		schedule_work(&fifo->work);
> > +
> > +	return IRQ_HANDLED;
> > +}
> > +
> [...]
> > +
> > +/* Probe the TMFIFO. */
> > +static int tmfifo_probe(struct platform_device *pdev)
> > +{
> > +	u64 ctl;
> > +	struct tmfifo *fifo;
> > +	struct resource *rx_res, *tx_res;
> > +	struct virtio_net_config net_config;
> > +	int i, ret;
> > +
> > +	/* Get the resource of the Rx & Tx FIFO. */
> > +	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> > +	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> > +	if (!rx_res || !tx_res) {
> > +		ret = -EINVAL;
> > +		goto err;
> > +	}
> > +
> > +	if (request_mem_region(rx_res->start,
> > +			       resource_size(rx_res), "bf-tmfifo") == NULL) {
> > +		ret = -EBUSY;
> > +		goto early_err;
> > +	}
> > +
> > +	if (request_mem_region(tx_res->start,
> > +			       resource_size(tx_res), "bf-tmfifo") == NULL) {
> > +		release_mem_region(rx_res->start, resource_size(rx_res));
> > +		ret = -EBUSY;
> > +		goto early_err;
> > +	}
> > +
> > +	ret = -ENOMEM;
> > +	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
> > +	if (!fifo)
> > +		goto err;
> > +
> > +	fifo->pdev = pdev;
> > +	platform_set_drvdata(pdev, fifo);
> > +
> > +	spin_lock_init(&fifo->spin_lock);
> > +	INIT_WORK(&fifo->work, tmfifo_work_handler);
> > +
> > +	timer_setup(&fifo->timer, tmfifo_timer, 0);
> > +	fifo->timer.function = tmfifo_timer;
> > +
> > +	for (i = 0; i < TM_IRQ_CNT; i++) {
> > +		fifo->irq[i] = platform_get_irq(pdev, i);
> > +		ret = request_irq(fifo->irq[i], tmfifo_irq_handler, 0,
> > +				  "tmfifo", (u8 *)fifo + i);
> 
> I think it would be better if you create a struct that passes a pointer to fifo
> and the ID instead of "hiding" the ID inside the address.
> 
> Regards,
> Matthias
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
  2018-11-01 16:25   ` [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver Liming Sun
@ 2019-01-18 16:02     ` Arnd Bergmann
  2019-01-21 19:22       ` Liming Sun
  2019-01-22 12:20       ` Vincent Whitchurch
  0 siblings, 2 replies; 70+ messages in thread
From: Arnd Bergmann @ 2019-01-18 16:02 UTC (permalink / raw)
  To: Liming Sun
  Cc: DTML, David Woods, linux-pci, Vincent Whitchurch, arm-soc,
	Olof Johansson, linux-ntb, Robin Murphy, Christoph Hellwig,
	Linux ARM

On Thu, Nov 1, 2018 at 5:49 PM Liming Sun <lsun@mellanox.com> wrote:
>
> An external host can connect to a Mellanox BlueField SoC via an
> interface called Rshim. The Rshim driver provides boot, console,
> and networking services over this interface. This commit is
> the common driver where the other backend (transport) driver will
> use.
>
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>

Hi Liming,

I've taken a new look at your patch series for drivers/soc/ now,
thanks for your continued submissions.

This is again just a set of very high-level comments, but I think we
should resolve some of the fundamental questions first.
Incidentally, Vincent Whitchurch has recently posted another
patch series with a very similar intention, but for other hardware
and taking a different approach.

In both cases, the idea is to use virtio based drivers to provide
services from a host machine into another Linux instance running
on an embedded system behind a PCIe slot or similar. Your
Bluefield SoC patches are well written, but are intentionally
kept specific to a particular use case and tied to one piece
of hardware. In contrast, Vincent uses the existing code from
drivers/misc/mic/vop/ that is equally hardware specific, but he
extends it to be applicable to other hardware as well.

It would be good if you could look at each other's approaches
to see where we could take it from here. I think ideally we
should have a common driver framework for doing the same
thing across both of your devices and as well as others.

That would also resolve my main concern about the drivers,
which is the placement in drivers/soc/ for a set of drivers
that are unlike most drivers in that directory not mean for
running on the SoC itself in order drive unusual functionality
on the SoC, but are (at least partially) meant for running on
a host machine to communicate with that SoC over PCIe
or USB.

As an example, your network driver should really be placed
in drivers/net/, though it is unclear to me how it relates
to the existing virtio_net driver. In the case of mic/vop,
the idea is to use virtio_net on the device side, but have
vhost_net or a user space implementation on the host side,
but that is apparently not what you do here. Can you
explain why?

Another high-level question I have is on how your various
drivers relate to one another. This should normally be
explained in the 0/9 email, but I don't seem to have received
such a mail. I see that you have multiple back-end drivers
for the underlying transport, with one of them based on USB.
Have you come up with a way to use the same high-level
driver such as the network link over this USB back-end,
or is this for something else?

      Arnd

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 70+ messages in thread

* [PATCH v7 0/9] Mellanox BlueField ARM SoC Rshim driver
  2018-05-25 16:06 [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
                   ` (18 preceding siblings ...)
  2019-01-03 19:17 ` [PATCH v7 9/9] soc: mellanox: host: Add the Rshim PCIe live-fish " Liming Sun
@ 2019-01-21 19:17 ` Liming Sun
  2019-02-18 13:24   ` Arnd Bergmann
  19 siblings, 1 reply; 70+ messages in thread
From: Liming Sun @ 2019-01-21 19:17 UTC (permalink / raw)
  To: Olof Johansson, Arnd Bergmann, David Woods, Robin Murphy, arm-soc
  Cc: devicetree, Liming Sun, linux-arm-kernel

This patch series implements Linux driver support for the Rshim
component on Mellanox BlueField ARM SoC.

1. Overview of the Rshim component

The Rshim component provides a connection among different other
components of the SoC and can be accessed externally via USB or
PCIe. USB access is available on standard appliance and SmartNIC.
PCIe access is for SmartNIC only.

From user perspective, Rshim provides the following
functionalities.

1.1 Boot FIFO

  The Boot FIFO is polled by the boot ROM. At a later booting
  phase it's read by UEFI as well to load the Linux kernel.

  The Boot FIFO is a one-way direction FIFO. The data could
  come from different sources.

    a) On-chip eMMC, which is the default configuration.

      After chip reset, HW will generate eMMC commands, which
      triggers the boot stream on the eMMC boot partition to show
      up in Boot FIFO.

      This one doesn't need any Linux driver.

    b) Injected externally from USB or PCIe.

      External host could overwrite the boot mode and inject
      boot stream into the Boot FIFO over USB or PCIe. It's
      used in board bring-up, fault recovering or debugging.

      This one usually needs driver support in the external host.

1.2 TmFifo

  TmFifo are FIFOs which can be used for communication between
  the SoC ARM core and the external side. Each direction has its
  own FIFO.

  TmFifo needs driver support on both the SoC side and the
  external host side.

1.3 Rshim register space access

  Rshim register space could be accessed externally via
  USB or PCIe. It's used for configuration, debugging, etc.
  More specifically, it could be used to access the ARM
  CoreSight component which could do on-chip debugging,
  such as for boot loader, drivers, or crash information
  collection.

  This one needs driver support from external host.

2. Driver Introduction

  This patch series provides several drivers.

  *) Patch 1/9 ~ 4/9 (tmfifo.c, tmfifo_regs.h, Kconfig, Makefile)

    They provide Linux driver support for the TmFifo on the
    SoC side (see above 1.2).

  *) Patch 5/9 ~ 9/9

    They provide Linux driver support for the external host.

    - Patch 5/9 (rshim.c, rshim.h, rshim_regs.h, Makefile)

      It provides common functionalities and register access
      to the RShim component. Several character devices are
      created like below for easy user-access.

      /dev/rshim<N>/boot 

        This device file is used to inject boot stream into
        the Boot FIFO, such as
              'cat boot-image > /dev/rshim<N>/boot'
        This one is to implement the above function 1.1.b

      /dev/rshim<N>/rshim

        This device file is used to access Rshim register
        space like a plat file.

      /dev/rshim<N>/console

        This device file is used to access the virtual
        console to the SoC via the TmFifo (see above
        functionality 1.2). It can be used by any standard tools
        like minicom, screen, etc.

    - Patch 6/9 (rshim_net.c)

      This patch provides virtual networking support over
      the Rshim TmFifo (see above functionality 1.2).

    - Patch 7/9 (rshim_usb.c)

      This is the USB backend driver, which implements the
      details to access the RShim component via USB. It supports
      endpoints, USB bulk-transfer and interrupts. After loaded
      by ACPI, it triggers the loading of the common driver
      and network driver, and provides low-level APIs to the
      common driver.

      Below is an example of the networking calling stack.

      Linux Network Stack --> virtio_net --> rshim_net -->
      rshim common driver --> rshim USB backend driver

    - Patch 8/9 (rshim_pcie.c)

      This is the PCIe backend driver, which implements the
      details to access the RShim component via PCIe when
      the HCA firmware is ready. In such case, the Rshim component
      is exposed as a standard PCIe PF and it's register space
      can be mapped in BAR0.

      Linux Network Stack --> virtio_net --> rshim_net -->
      rshim common driver --> rshim PCIe backend driver

   - Patch 9/9 (rshim_pcie_lf.c)

     Similarly, this is another PCIe backend driver, but provides
     slow access when the HCA firmware is not ready. In such as,
     the BAR0 mapping is not available. The Rshim register access
     is done via a slow access gateway. The below calling stack
     is also similar.

      Linux Network Stack --> virtio_net --> rshim_net -->
      rshim common driver --> rshim PCIe livefish backend driver

Liming Sun (9):
  soc: Add TmFifo driver for Mellanox BlueField Soc
  arm64: Add Mellanox BlueField SoC config option
  dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  MAINTAINERS: Add entry for Mellanox Bluefield Soc
  soc: mellanox: host: Add the common host side Rshim driver
  soc: mellanox: host: Add networking support over Rshim
  soc: mellanox: host: Add the Rshim USB backend driver
  soc: mellanox: host: Add the Rshim PCIe backend driver
  soc: mellanox: host: Add the Rshim PCIe live-fish backend driver

 .../devicetree/bindings/soc/mellanox/tmfifo.txt    |   23 +
 MAINTAINERS                                        |    8 +
 arch/arm64/Kconfig.platforms                       |    6 +
 arch/arm64/configs/defconfig                       |    1 +
 drivers/soc/Kconfig                                |    1 +
 drivers/soc/Makefile                               |    1 +
 drivers/soc/mellanox/Kconfig                       |   26 +
 drivers/soc/mellanox/Makefile                      |    6 +
 drivers/soc/mellanox/host/Makefile                 |    2 +
 drivers/soc/mellanox/host/rshim.c                  | 2673 ++++++++++++++++++++
 drivers/soc/mellanox/host/rshim.h                  |  361 +++
 drivers/soc/mellanox/host/rshim_net.c              |  834 ++++++
 drivers/soc/mellanox/host/rshim_pcie.c             |  478 ++++
 drivers/soc/mellanox/host/rshim_pcie_lf.c          |  695 +++++
 drivers/soc/mellanox/host/rshim_regs.h             |  152 ++
 drivers/soc/mellanox/host/rshim_usb.c              | 1035 ++++++++
 drivers/soc/mellanox/tmfifo.c                      | 1244 +++++++++
 drivers/soc/mellanox/tmfifo_regs.h                 |   76 +
 18 files changed, 7622 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
 create mode 100644 drivers/soc/mellanox/Kconfig
 create mode 100644 drivers/soc/mellanox/Makefile
 create mode 100644 drivers/soc/mellanox/host/Makefile
 create mode 100644 drivers/soc/mellanox/host/rshim.c
 create mode 100644 drivers/soc/mellanox/host/rshim.h
 create mode 100644 drivers/soc/mellanox/host/rshim_net.c
 create mode 100644 drivers/soc/mellanox/host/rshim_pcie.c
 create mode 100644 drivers/soc/mellanox/host/rshim_pcie_lf.c
 create mode 100644 drivers/soc/mellanox/host/rshim_regs.h
 create mode 100644 drivers/soc/mellanox/host/rshim_usb.c
 create mode 100644 drivers/soc/mellanox/tmfifo.c
 create mode 100644 drivers/soc/mellanox/tmfifo_regs.h

-- 
1.8.3.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 70+ messages in thread

* RE: [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
  2019-01-18 16:02     ` Arnd Bergmann
@ 2019-01-21 19:22       ` Liming Sun
  2019-01-22 12:20       ` Vincent Whitchurch
  1 sibling, 0 replies; 70+ messages in thread
From: Liming Sun @ 2019-01-21 19:22 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: DTML, David Woods, linux-pci, Vincent Whitchurch, arm-soc,
	Olof Johansson, linux-ntb, Robin Murphy, Christoph Hellwig,
	Linux ARM

Thanks Arnd for the comments. The 0/9 email was sent out just now to
add more details about the design and changes. Please also see my response
below.

- Liming

> -----Original Message-----
> From: Arnd Bergmann <arnd@arndb.de>
> Sent: Friday, January 18, 2019 11:02 AM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-soc
> <arm@kernel.org>; DTML <devicetree@vger.kernel.org>; Linux ARM <linux-arm-kernel@lists.infradead.org>; Vincent Whitchurch
> <vincent.whitchurch@axis.com>; linux-pci <linux-pci@vger.kernel.org>; linux-ntb@googlegroups.com; Christoph Hellwig <hch@lst.de>
> Subject: Re: [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
> 
> On Thu, Nov 1, 2018 at 5:49 PM Liming Sun <lsun@mellanox.com> wrote:
> >
> > An external host can connect to a Mellanox BlueField SoC via an
> > interface called Rshim. The Rshim driver provides boot, console,
> > and networking services over this interface. This commit is
> > the common driver where the other backend (transport) driver will
> > use.
> >
> > Reviewed-by: David Woods <dwoods@mellanox.com>
> > Signed-off-by: Liming Sun <lsun@mellanox.com>
> 
> Hi Liming,
> 
> I've taken a new look at your patch series for drivers/soc/ now,
> thanks for your continued submissions.
> 
> This is again just a set of very high-level comments, but I think we
> should resolve some of the fundamental questions first.
> Incidentally, Vincent Whitchurch has recently posted another
> patch series with a very similar intention, but for other hardware
> and taking a different approach.
> 
> In both cases, the idea is to use virtio based drivers to provide
> services from a host machine into another Linux instance running
> on an embedded system behind a PCIe slot or similar. Your
> Bluefield SoC patches are well written, but are intentionally
> kept specific to a particular use case and tied to one piece
> of hardware. In contrast, Vincent uses the existing code from
> drivers/misc/mic/vop/ that is equally hardware specific, but he
> extends it to be applicable to other hardware as well.
> 
> It would be good if you could look at each other's approaches
> to see where we could take it from here. I think ideally we
> should have a common driver framework for doing the same
> thing across both of your devices and as well as others.

Yes, I checked drivers/misc/mic/vop and Vincent Whitchurch's patches 
(Virtio-over-PCIe on non-MIC) and related comments. I kind of feel 
that besides the common virtio infrastructure, there seems not much
to be reused in the rest of implementation yet, though they are trying
to do the similar things.  (Feel free to correct me if I misunderstood it.)

I just submitted the patch 0/9 to explain some details of the rshim
component and the driver patches. Could you help take a look?

The rshim driver of BlueField SoC has a few more functionalities 
which are very HW-specific. Some needs driver support from both 
ARM target and the external host, some only needs external host 
driver support.

As for common framework, we used to implement the drivers based on
the remote proc (Documentation/remoteproc.txt), which seems more
close to what we wanted (in my humble opinion). Later due to more 
functionalities to add and the lack of remote proc in old kernels, we 
changed to use virtio framework directly, which seems very helpful and
saved quite some driver work.

> 
> That would also resolve my main concern about the drivers,
> which is the placement in drivers/soc/ for a set of drivers
> that are unlike most drivers in that directory not mean for
> running on the SoC itself in order drive unusual functionality
> on the SoC, but are (at least partially) meant for running on
> a host machine to communicate with that SoC over PCIe
> or USB.
> 
> As an example, your network driver should really be placed
> in drivers/net/, though it is unclear to me how it relates
> to the existing virtio_net driver. In the case of mic/vop,
> the idea is to use virtio_net on the device side, but have
> vhost_net or a user space implementation on the host side,
> but that is apparently not what you do here. Can you
> explain why?

Yes, I actually have the same concerns where the host side
drivers should go.  For now ther're just added for code review
purpose. drivers/soc/ seems not a good place. One thought
is to move the rshim_net, rshim_pcie and rshim_pcie_lf backend
driver to drivers/net/ethernet/Mellanox/rshim/ and move the
rshim common driver to drivers/char as it creates the character
devices?

The device side of this patch uses the virtio_net driver as well. 

The host side is not just for networking, which was mentioned 
in the 0/9 patch. The host side driver manages the whole rshim
component and is called the 'rshim' driver. It includes driver
to access the TmFifo, where virtio_net is used to provide 
networking support. It needs to talk to the common
driver then the USB or PCIe backend driver.  It seems to me that
vhost_net doesn't quite fit this model and might make it 
over-complicated.

> 
> Another high-level question I have is on how your various
> drivers relate to one another. This should normally be
> explained in the 0/9 email, but I don't seem to have received
> such a mail. I see that you have multiple back-end drivers
> for the underlying transport, with one of them based on USB.
> Have you come up with a way to use the same high-level
> driver such as the network link over this USB back-end,
> or is this for something else?

Yes, 0/9 has been sent. Sorry I should have provided since beginning.

The USB (or PCIe) provide the general transport to access the RShim
component, for networking, console, register access, boot service,
etc. So it's not just for network link. The implementation seems very
HW specific, such as providing APIs like rshim_usb_read_rshim()
and rshim_usb_write_rshim(). In PCIe backend it has similar APIs
like rshim_pcie_read(), rshim_pcie_write().

Not very clear about what you meant the " the same high-level driver 
such as the network link over this USB back-end". Do you mean using
any existing network over USB framework or provide some mechanism
to be reused by other network over USB driver?

By the way, the 0/9 has been sent. Could you help take a look whether 
it clarifies a little bit or not?

> 
>       Arnd
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
  2019-01-18 16:02     ` Arnd Bergmann
  2019-01-21 19:22       ` Liming Sun
@ 2019-01-22 12:20       ` Vincent Whitchurch
       [not found]         ` <DB6PR05MB32235BE3E5EBCA60ABDEDB13A1980@DB6PR05MB3223.eurprd05.prod.outlook.com>
  1 sibling, 1 reply; 70+ messages in thread
From: Vincent Whitchurch @ 2019-01-22 12:20 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: DTML, David Woods, linux-pci, Liming Sun, virtualization,
	arm-soc, Olof Johansson, linux-ntb, Robin Murphy,
	Christoph Hellwig, Linux ARM

On Fri, Jan 18, 2019 at 05:02:21PM +0100, Arnd Bergmann wrote:
> On Thu, Nov 1, 2018 at 5:49 PM Liming Sun <lsun@mellanox.com> wrote:
> >
> > An external host can connect to a Mellanox BlueField SoC via an
> > interface called Rshim. The Rshim driver provides boot, console,
> > and networking services over this interface. This commit is
> > the common driver where the other backend (transport) driver will
> > use.
> >
> > Reviewed-by: David Woods <dwoods@mellanox.com>
> > Signed-off-by: Liming Sun <lsun@mellanox.com>
> 
> Hi Liming,
> 
> I've taken a new look at your patch series for drivers/soc/ now,
> thanks for your continued submissions.
> 
> This is again just a set of very high-level comments, but I think we
> should resolve some of the fundamental questions first.
> Incidentally, Vincent Whitchurch has recently posted another
> patch series with a very similar intention, but for other hardware
> and taking a different approach.
> 
> In both cases, the idea is to use virtio based drivers to provide
> services from a host machine into another Linux instance running
> on an embedded system behind a PCIe slot or similar. Your
> Bluefield SoC patches are well written, but are intentionally
> kept specific to a particular use case and tied to one piece
> of hardware. In contrast, Vincent uses the existing code from
> drivers/misc/mic/vop/ that is equally hardware specific, but he
> extends it to be applicable to other hardware as well.
> 
> It would be good if you could look at each other's approaches
> to see where we could take it from here. I think ideally we
> should have a common driver framework for doing the same
> thing across both of your devices and as well as others.

As far as I can see the biggest difference is that Rshim appears to
support interfaces which do not have shared memory between the host and
the card, which means that it has to jump through a lot more hoops to
make virtio work.

For example, the card side seems to use normal virtio-net and
virto-console drivers, but the drivers/soc/mellanox/tmfifo.c driver,
also running on the card, appears to have to actually look inside the
virtqueues and shuffle the data over the TmFifo interface, and this
driver has hard-coded support for only network and console, since it
apparently needs to know the details of how the virtio drivers use their
virtqueues (see tmfifo_virtio_rxtx()).

And the host side appears to _also_ run the virtio-net driver and there
the drivers/soc/mellanox/host/rshim_net.c code instead has to look
inside the virtqueues and shuffle the data over the other side of the
TmFifo interface.

So to me this looks very different from a traditional virtio
driver/device setup (which is what mic/vop uses).  I may be missing
something, but I don't quite understand why it's even using virtio in
the first place.

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
       [not found]         ` <DB6PR05MB32235BE3E5EBCA60ABDEDB13A1980@DB6PR05MB3223.eurprd05.prod.outlook.com>
@ 2019-01-22 13:36           ` Liming Sun
  0 siblings, 0 replies; 70+ messages in thread
From: Liming Sun @ 2019-01-22 13:36 UTC (permalink / raw)
  To: Vincent Whitchurch, Arnd Bergmann
  Cc: DTML, David Woods, linux-pci, virtualization, arm-soc,
	Olof Johansson, linux-ntb, Robin Murphy, Christoph Hellwig,
	Linux ARM



From: Vincent Whitchurch <vincent.whitchurch@axis.com>
Sent: Tuesday, January 22, 2019 7:20 AM
To: Arnd Bergmann
Cc: Liming Sun; Olof Johansson; David Woods; Robin Murphy; arm-soc; DTML; Linux ARM; linux-pci; linux-ntb@googlegroups.com; Christoph Hellwig; virtualization@lists.linux-foundation.org
Subject: Re: [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver
  

On Fri, Jan 18, 2019 at 05:02:21PM +0100, Arnd Bergmann wrote:
>> On Thu, Nov 1, 2018 at 5:49 PM Liming Sun <lsun@mellanox.com> wrote:
>> >
>> > An external host can connect to a Mellanox BlueField SoC via an
>> > interface called Rshim. The Rshim driver provides boot, console,
>> > and networking services over this interface. This commit is
>> > the common driver where the other backend (transport) driver will
>> > use.
>> >
>> > Reviewed-by: David Woods <dwoods@mellanox.com>
>> > Signed-off-by: Liming Sun <lsun@mellanox.com>
>> 
>> Hi Liming,
>> 
>> I've taken a new look at your patch series for drivers/soc/ now,
>> thanks for your continued submissions.
>> 
>> This is again just a set of very high-level comments, but I think we
>> should resolve some of the fundamental questions first.
>> Incidentally, Vincent Whitchurch has recently posted another
>> patch series with a very similar intention, but for other hardware
>> and taking a different approach.
>> 
>> In both cases, the idea is to use virtio based drivers to provide
>> services from a host machine into another Linux instance running
>> on an embedded system behind a PCIe slot or similar. Your
>> Bluefield SoC patches are well written, but are intentionally
>> kept specific to a particular use case and tied to one piece
>> of hardware. In contrast, Vincent uses the existing code from
>> drivers/misc/mic/vop/ that is equally hardware specific, but he
>> extends it to be applicable to other hardware as well.
>> 
>> It would be good if you could look at each other's approaches
>> to see where we could take it from here. I think ideally we
>> should have a common driver framework for doing the same
>> thing across both of your devices and as well as others.

> As far as I can see the biggest difference is that Rshim appears to
> support interfaces which do not have shared memory between the host and
> the card, which means that it has to jump through a lot more hoops to
> make virtio work.

> For example, the card side seems to use normal virtio-net and
> virto-console drivers, but the drivers/soc/mellanox/tmfifo.c driver,
> also running on the card, appears to have to actually look inside the
> virtqueues and shuffle the data over the TmFifo interface, and this
> driver has hard-coded support for only network and console, since it
> apparently needs to know the details of how the virtio drivers use their
> virtqueues (see tmfifo_virtio_rxtx()).

> And the host side appears to _also_ run the virtio-net driver and there
> the drivers/soc/mellanox/host/rshim_net.c code instead has to look
> inside the virtqueues and shuffle the data over the other side of the
> TmFifo interface.

> So to me this looks very different from a traditional virtio
> driver/device setup (which is what mic/vop uses).  I may be missing
> something, but I don't quite understand why it's even using virtio in
> the first place.

Thanks  Vincent! This appears to be very good summary of this driver
does on the tmfifo part and the difference between  mic/vop. The fifo is
accessed by register instead of shared memory.

The reason to use virtio framework is that it can be easily used to add
more virtual devices as needed without implementing driver details for
each one. For example, the device side supports console and networking
for now over the FIFO. It only needs to implement function 
tmfifo_virtio_rxtx() once to take care of the virtqueues Rx/Tx, which are
shared by all virtual devices. With minimum changes, we could easily add
another device over tmfifo, like a virtio block device, since the queue
handling is already there. 

The host side handles the virtqueues as well in rshim_net.c. It behaves
like a peer-to-peer to the device side while the tmfifo behaves like a
'wire' (transport) to pass data between the host and the device without
worrying about the data details.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 70+ messages in thread

* RE: [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
  2018-10-26 20:33         ` Arnd Bergmann
  2018-10-29 16:48           ` Liming Sun
@ 2019-01-24 15:07           ` Liming Sun
  1 sibling, 0 replies; 70+ messages in thread
From: Liming Sun @ 2019-01-24 15:07 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: DTML, David Woods, Vadim Pasternak, arm-soc, Andy Shevchenko,
	Olof Johansson, Darren Hart, Robin Murphy, Linux ARM

Arnd,

According to the emails and discussions I saw recently, I think that your comments "Finally, drivers/platform/mellanox might be a reasonable choice, and it would let you keep both sides of the driver in one place." does make more sense.

I'll try to resubmit the changes under the drivers/platform/mellanox directory.

Thanks!
Liming

> -----Original Message-----
> From: Arnd Bergmann <arnd@arndb.de>
> Sent: Friday, October 26, 2018 4:34 PM
> To: Liming Sun <lsun@mellanox.com>
> Cc: Olof Johansson <olof@lixom.net>; David Woods <dwoods@mellanox.com>; Robin Murphy <robin.murphy@arm.com>; arm-soc
> <arm@kernel.org>; DTML <devicetree@vger.kernel.org>; Linux ARM <linux-arm-kernel@lists.infradead.org>
> Subject: Re: [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
> 
> On Fri, Oct 26, 2018 at 9:36 PM Liming Sun <lsun@mellanox.com> wrote:
> > > -----Original Message-----
> > > From: arndbergmann@gmail.com [mailto:arndbergmann@gmail.com] On
> > > > --- /dev/null
> > > > +++ b/Documentation/devicetree/bindings/soc/mellanox/tmfifo.txt
> > > > @@ -0,0 +1,23 @@
> > > > +* Mellanox BlueField SoC TmFifo
> > > > +
> > > > +BlueField TmFifo provides a shared FIFO between the target and the
> > > > +external host machine, which can be accessed by external host via
> > > > +USB or PCIe. In the current tmfifo driver, this FIFO has been demuxed
> > > > +to implement virtual console and network interface based on the virtio
> > > > +framework.
> > > > +
> > > > +Required properties:
> > > > +
> > > > +- compatible:      Should be "mellanox,bf-tmfifo"
> > > > +- reg:             Physical base address and length of Rx/Tx block
> > > > +- interrupts:      The interrupt number of Rx low water mark, Rx high water
> > > > mark
> > > > +           Tx low water mark, Tx high water mark respectively.
> > >
> > >
> > > This sounds like it might fit into the mailbox subsystem, and perhaps
> > > it should use the mailbox DT bindings. Have you had a look at that?
> >
> > This commit of dt-bindings is mainly to solve the warning of checkpatch.pl.
> > Like the response to patch 2/4, ACPI is actually used now instead of device tree.
> > The TMFIFO definition in the ACPI DSDT table would be something like below.
> >
> >     // RShim TMFIFO
> >     Device(RSH0) {
> >       Name(_HID, "MLNXBF01")
> >       Name(_UID, Zero)
> >       Name(_CCA, 1)
> >       Name(_CRS, ResourceTemplate() {
> >         Memory32Fixed(ReadWrite, 0x00800a20, 0x00000018)
> >         Memory32Fixed(ReadWrite, 0x00800a40, 0x00000018)
> >         Interrupt(ResourceConsumer, Edge, ActiveHigh, Exclusive)
> >           { BF1_RSH0_TM_HTT_LWM_INT,
> >             BF1_RSH0_TM_HTT_HWM_INT,
> >             BF1_RSH0_TM_TTH_LWM_INT,
> >             BF1_RSH0_TM_TTH_HWM_INT
> >           }
> >       })
> >     }
> >
> > Any suggestion how it should be added into Linux Documentation, or maybe I
> > should just remove this commit from this patch series?
> 
> Maybe the best way here would be to not use ACPI for the case
> where bluefin is integrated into a PCIe endpoint, since ACPI is
> not as flexible here and generally relies on having an SBSA
> compliant hardware that you no longer have if you require
> random platform devices for booting from and for your console.
> 
> For the case where a bluefin SoC is used in a standalone system,
> having ACPI makes more sense, as that lets you install Red Hat
> Linux or other operating systems that rely on SBBR and SBSA.
> 
> > As for the sub-component of this driver, the "soc" might be better fit than the mailbox
> > for some reasons. It's a communication between extern machines and the SoC via
> > USB / PCIe,  like pushing boot stream, console and network mgmt. Some of the features,
> > like pushing boot stream, doesn't communicate with the ARM core. The boot stream
> > is pushed to the SoC HW logic directly. I'll add the host-side virtio-based driver in patch v5.
> 
> Right, the drivers/mailbox subsystem was not the right idea here,
> I noticed that myself after actually reading the driver. Drivers/soc
> may also not be the best fit, since this is not really about it being
> a SoC, but rather a way to encapsulate virtual devices. The
> mic driver I mentioned is in drivers/misc, but I don't like to add stuff
> there if we can avoid it.
> 
> drivers/virtio, drivers/bus or drivers/mfd might also be an option that
> could fit better than drivers/soc, or you could have your own subdir
> below drivers/ as some others do. Finally, drivers/platform/mellanox
> might be a reasonable choice, and it would let you keep both sides
> of the driver in one place.
> 
>        Arnd
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v7 0/9] Mellanox BlueField ARM SoC Rshim driver
  2019-01-21 19:17 ` [PATCH v7 0/9] Mellanox BlueField ARM SoC Rshim driver Liming Sun
@ 2019-02-18 13:24   ` Arnd Bergmann
  0 siblings, 0 replies; 70+ messages in thread
From: Arnd Bergmann @ 2019-02-18 13:24 UTC (permalink / raw)
  To: Liming Sun
  Cc: DTML, David Woods, arm-soc, Olof Johansson, Robin Murphy, Linux ARM

On Mon, Jan 21, 2019 at 8:18 PM Liming Sun <lsun@mellanox.com> wrote:
>
> Liming Sun (9):
>   soc: Add TmFifo driver for Mellanox BlueField Soc
>   arm64: Add Mellanox BlueField SoC config option
>   dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC
>   MAINTAINERS: Add entry for Mellanox Bluefield Soc

Could you send the arch/arm64 changes to soc@kernel.org for
inclusion? I'm not sure what the status of the rshim driver is now,
but we should probably merge the base support through the soc
tree.

     Arnd

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 70+ messages in thread

* Re: [PATCH v7 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc
  2019-01-03 19:17 ` [PATCH v7 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
@ 2019-03-15 13:18   ` Matthias Brugger
  0 siblings, 0 replies; 70+ messages in thread
From: Matthias Brugger @ 2019-03-15 13:18 UTC (permalink / raw)
  To: Liming Sun, Olof Johansson, Arnd Bergmann, David Woods,
	Robin Murphy, arm-soc
  Cc: devicetree, linux-arm-kernel



On 03/01/2019 20:17, Liming Sun wrote:
> This commit adds the TmFifo driver for Mellanox BlueField Soc.
> TmFifo is a shared FIFO which enables external host machine to
> exchange data with the SoC via USB or PCIe. The driver is based on
> virtio framework and has console and network access enabled.
> 
> Reviewed-by: David Woods <dwoods@mellanox.com>
> Signed-off-by: Liming Sun <lsun@mellanox.com>
> ---
>  drivers/soc/Kconfig                |    1 +
>  drivers/soc/Makefile               |    1 +
>  drivers/soc/mellanox/Kconfig       |   18 +
>  drivers/soc/mellanox/Makefile      |    5 +
>  drivers/soc/mellanox/tmfifo.c      | 1244 ++++++++++++++++++++++++++++++++++++
>  drivers/soc/mellanox/tmfifo_regs.h |   76 +++
>  6 files changed, 1345 insertions(+)
>  create mode 100644 drivers/soc/mellanox/Kconfig
>  create mode 100644 drivers/soc/mellanox/Makefile
>  create mode 100644 drivers/soc/mellanox/tmfifo.c
>  create mode 100644 drivers/soc/mellanox/tmfifo_regs.h
> 
[..]
> diff --git a/drivers/soc/mellanox/tmfifo.c b/drivers/soc/mellanox/tmfifo.c
> new file mode 100644
> index 0000000..2975229
> --- /dev/null
> +++ b/drivers/soc/mellanox/tmfifo.c
[..]

> +
> +/* Console Tx buffer size. */
> +#define TMFIFO_CONS_TX_BUF_SIZE			(32 * 1024)
> +
> +/* House-keeping timer interval. */
> +static int tmfifo_timer_interval = HZ / 10;
> +module_param(tmfifo_timer_interval, int, 0644);
> +MODULE_PARM_DESC(tmfifo_timer_interval, "timer interval");
> +
> +/* Global lock. */
> +static DEFINE_MUTEX(tmfifo_lock);

Why do we need that? To synchronize between different tmfifo driver instances?

> +
> +/* Virtio ring size. */
> +static int tmfifo_vring_size = TMFIFO_VRING_SIZE;
> +module_param(tmfifo_vring_size, int, 0444);
> +MODULE_PARM_DESC(tmfifo_vring_size, "Size of the vring");
> +
> +/* Struct declaration. */
> +struct tmfifo;
> +
> +/* Virtual devices sharing the TM FIFO. */
> +#define TMFIFO_VDEV_MAX		(VIRTIO_ID_CONSOLE + 1)
> +
> +/* Structure to maintain the ring state. */
> +struct tmfifo_vring {
> +	void *va;			/* virtual address */
> +	dma_addr_t dma;			/* dma address */
> +	struct virtqueue *vq;		/* virtqueue pointer */
> +	struct vring_desc *desc;	/* current desc */
> +	struct vring_desc *desc_head;	/* current desc head */
> +	int cur_len;			/* processed len in current desc */
> +	int rem_len;			/* remaining length to be processed */
> +	int size;			/* vring size */
> +	int align;			/* vring alignment */
> +	int id;				/* vring id */
> +	int vdev_id;			/* TMFIFO_VDEV_xxx */
> +	u32 pkt_len;			/* packet total length */
> +	__virtio16 next_avail;		/* next avail desc id */
> +	struct tmfifo *fifo;		/* pointer back to the tmfifo */
> +};
> +
> +/* Interrupt types. */
> +enum {
> +	TM_RX_LWM_IRQ,			/* Rx low water mark irq */
> +	TM_RX_HWM_IRQ,			/* Rx high water mark irq */
> +	TM_TX_LWM_IRQ,			/* Tx low water mark irq */
> +	TM_TX_HWM_IRQ,			/* Tx high water mark irq */
> +	TM_IRQ_CNT
> +};
> +
> +/* Ring types (Rx & Tx). */
> +enum {
> +	TMFIFO_VRING_RX,		/* Rx ring */
> +	TMFIFO_VRING_TX,		/* Tx ring */
> +	TMFIFO_VRING_NUM
> +};
> +
> +struct tmfifo_vdev {
> +	struct virtio_device vdev;	/* virtual device */
> +	u8 status;
> +	u64 features;
> +	union {				/* virtio config space */
> +		struct virtio_console_config cons;
> +		struct virtio_net_config net;
> +	} config;
> +	struct tmfifo_vring vrings[TMFIFO_VRING_NUM];
> +	u8 *tx_buf;			/* tx buffer */
> +	u32 tx_head;			/* tx buffer head */
> +	u32 tx_tail;			/* tx buffer tail */
> +};
> +
> +struct tmfifo_irq_info {
> +	struct tmfifo *fifo;		/* tmfifo structure */
> +	int irq;			/* interrupt number */
> +	int index;			/* array index */
> +};
> +
> +/* TMFIFO device structure */
> +struct tmfifo {
> +	struct tmfifo_vdev *vdev[TMFIFO_VDEV_MAX];	/* virtual devices */
> +	struct platform_device *pdev;	/* platform device */
> +	struct mutex lock;

From what I understand we use this lock to syncronize between
tmfifo_create_vdev, tmfifo_delete_vdev and tmfifo_work_handler.

Create happens in probe and delete in remove, so we don't need a lock here.
So the only reason I can see we need this lock here is, to make sure that we
don't mess up between create a vdev and being already in the work handler. That
can only happen, if an IRQ was triggered. If we enable the IRQs after creating
the vdev, we don't need the lock at all.

> +	void __iomem *rx_base;		/* mapped register base */
> +	void __iomem *tx_base;		/* mapped register base */
> +	int tx_fifo_size;		/* number of entries of the Tx FIFO */
> +	int rx_fifo_size;		/* number of entries of the Rx FIFO */
> +	unsigned long pend_events;	/* pending bits for deferred process */
> +	struct tmfifo_irq_info irq_info[TM_IRQ_CNT];	/* irq info */
> +	struct work_struct work;	/* work struct for deferred process */
> +	struct timer_list timer;	/* keepalive timer */
> +	struct tmfifo_vring *vring[2];	/* current Tx/Rx ring */
> +	bool is_ready;			/* ready flag */
> +	spinlock_t spin_lock;		/* spin lock */
> +};
> +
> +union tmfifo_msg_hdr {
> +	struct {
> +		u8 type;		/* message type */
> +		__be16 len;		/* payload length */
> +		u8 unused[5];		/* reserved, set to 0 */
> +	} __packed;
> +	u64 data;
> +};
> +
> +/*
> + * Default MAC.
> + * This MAC address will be read from EFI persistent variable if configured.
> + * It can also be reconfigured with standard Linux tools.
> + */
> +static u8 tmfifo_net_default_mac[6] = {0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01};
> +
> +/* MTU setting of the virtio-net interface. */
> +#define TMFIFO_NET_MTU		1500
> +
> +/* Supported virtio-net features. */
> +#define TMFIFO_NET_FEATURES	((1UL << VIRTIO_NET_F_MTU) | \
> +				 (1UL << VIRTIO_NET_F_STATUS) | \
> +				 (1UL << VIRTIO_NET_F_MAC))
> +
> +/* Return the available Tx buffer space. */
> +static inline int tmfifo_vdev_tx_buf_avail(struct tmfifo_vdev *vdev)
> +{
> +	return ((vdev->tx_tail >= vdev->tx_head) ?
> +		(TMFIFO_CONS_TX_BUF_SIZE - 8 - (vdev->tx_tail -
> +		vdev->tx_head)) : (vdev->tx_head - vdev->tx_tail - 8));

Why do we need to subtract 8 from the available buffer size?

> +}
> +
> +/* Update Tx buffer pointer after pushing data. */
> +static inline void tmfifo_vdev_tx_buf_push(struct tmfifo_vdev *vdev, u32 len)
> +{
> +	vdev->tx_tail += len;
> +	if (vdev->tx_tail >= TMFIFO_CONS_TX_BUF_SIZE)
> +		vdev->tx_tail -= TMFIFO_CONS_TX_BUF_SIZE;

I would have expected
vdev->tx_tail = (vdev->tx_tail + len) % TMFIFO_CONS_TX_BUF_SIZE;

But I suppose your code executes faster.
What I miss is some code to assure that no ring buffer overflow/underrun can happen.

> +}
> +
> +/* Update Tx buffer pointer after popping data. */
> +static inline void tmfifo_vdev_tx_buf_pop(struct tmfifo_vdev *vdev, u32 len)
> +{
> +	vdev->tx_head += len;
> +	if (vdev->tx_head >= TMFIFO_CONS_TX_BUF_SIZE)
> +		vdev->tx_head -= TMFIFO_CONS_TX_BUF_SIZE;
> +}
> +

[...]

> +
> +/* Rx & Tx processing of a virtual queue. */
> +static void tmfifo_virtio_rxtx(struct virtqueue *vq, bool is_rx)> +{
> +	struct tmfifo_vring *vring;
> +	struct tmfifo *fifo;
> +	struct vring *vr;
> +	struct virtio_device *vdev;
> +	u64 sts, data;
> +	int num_avail = 0, hdr_len, tx_reserve;
> +	void *addr;
> +	u32 len, idx;
> +	struct vring_desc *desc;
> +	unsigned long flags;
> +	struct tmfifo_vdev *cons;
> +
> +	if (!vq)
> +		return;
> +
> +	vring = (struct tmfifo_vring *)vq->priv;


You can pass strict tmfifo_vring* instead of virtqueue as function parameter,
then you don't have to do this.


> +	fifo = vring->fifo;
> +	vr = (struct vring *)virtqueue_get_vring(vq);
> +
> +	if (!fifo->vdev[vring->vdev_id])
> +		return;
> +	vdev = &fifo->vdev[vring->vdev_id]->vdev;
> +	cons = fifo->vdev[VIRTIO_ID_CONSOLE];
> +
> +	/* Don't continue if another vring is running. */
> +	if (fifo->vring[is_rx] != NULL && fifo->vring[is_rx] != vring)

How can that happen?

> +		return;
> +
> +	/* tx_reserve is used to reserved some room in FIFO for console. */
> +	if (vring->vdev_id == VIRTIO_ID_NET) {
> +		hdr_len = sizeof(struct virtio_net_hdr);
> +		tx_reserve = fifo->tx_fifo_size / 16;
> +	} else {
> +		BUG_ON(vring->vdev_id != VIRTIO_ID_CONSOLE);
> +		hdr_len = 0;
> +		tx_reserve = 1;
> +	}
> +
> +	desc = vring->desc;
> +
[...]
> +
> +/* Work handler for Rx, Tx or activity monitoring. */
> +static void tmfifo_work_handler(struct work_struct *work)
> +{
> +	int i;
> +	struct tmfifo_vdev *tm_vdev;
> +	struct tmfifo *fifo = container_of(work, struct tmfifo, work);
> +
> +	if (!fifo->is_ready)
> +		return;
> +
> +	mutex_lock(&fifo->lock);
> +

So you don't want to queue up more work when remove is called. As is_ready is
not atomic you could deadlock here:
  remove             work_handler
 mutex_lock
                    if(!is_ready)
                   mutex_lock <- sleeps
is_ready = false
    ...
cancel_work_sync <- deadlock

> +	/* Tx. */
> +	if (test_and_clear_bit(TM_TX_LWM_IRQ, &fifo->pend_events) &&
> +		       fifo->irq_info[TM_TX_LWM_IRQ].irq) {
> +		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
> +			tm_vdev = fifo->vdev[i];
> +			if (tm_vdev != NULL) {
> +				tmfifo_virtio_rxtx(
> +					tm_vdev->vrings[TMFIFO_VRING_TX].vq,
> +					false);
> +			}
> +		}
> +	}
> +
> +	/* Rx. */
> +	if (test_and_clear_bit(TM_RX_HWM_IRQ, &fifo->pend_events) &&
> +		       fifo->irq_info[TM_RX_HWM_IRQ].irq) {
> +		for (i = 0; i < TMFIFO_VDEV_MAX; i++) {
> +			tm_vdev = fifo->vdev[i];
> +			if (tm_vdev != NULL) {
> +				tmfifo_virtio_rxtx(
> +					tm_vdev->vrings[TMFIFO_VRING_RX].vq,
> +					true);
> +			}
> +		}
> +	}
> +
> +	mutex_unlock(&fifo->lock);
> +}
[...]

> +
> +/* Probe the TMFIFO. */
> +static int tmfifo_probe(struct platform_device *pdev)
> +{
> +	u64 ctl;
> +	struct tmfifo *fifo;
> +	struct resource *rx_res, *tx_res;
> +	struct virtio_net_config net_config;
> +	int i, ret;
> +
> +	/* Get the resource of the Rx & Tx FIFO. */
> +	rx_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	tx_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
> +	if (!rx_res || !tx_res) {
> +		ret = -EINVAL;
> +		goto err;
> +	}
> +
> +	if (request_mem_region(rx_res->start,
> +			       resource_size(rx_res), "bf-tmfifo") == NULL) {
> +		ret = -EBUSY;
> +		goto early_err;
> +	}
> +
> +	if (request_mem_region(tx_res->start,
> +			       resource_size(tx_res), "bf-tmfifo") == NULL) {

Can't we use devm_request_mem_region and get rid of the release_mem_region here
and in the remove function?

Regards,
Matthias

> +		release_mem_region(rx_res->start, resource_size(rx_res));
> +		ret = -EBUSY;
> +		goto early_err;
> +	}
> +
> +	ret = -ENOMEM;
> +	fifo = kzalloc(sizeof(struct tmfifo), GFP_KERNEL);
> +	if (!fifo)
> +		goto err;
> +
> +	fifo->pdev = pdev;
> +	platform_set_drvdata(pdev, fifo);
> +
> +	spin_lock_init(&fifo->spin_lock);
> +	INIT_WORK(&fifo->work, tmfifo_work_handler);
> +
> +	timer_setup(&fifo->timer, tmfifo_timer, 0);
> +	fifo->timer.function = tmfifo_timer;
> +
> +	for (i = 0; i < TM_IRQ_CNT; i++) {
> +		fifo->irq_info[i].index = i;
> +		fifo->irq_info[i].fifo = fifo;
> +		fifo->irq_info[i].irq = platform_get_irq(pdev, i);
> +		ret = request_irq(fifo->irq_info[i].irq, tmfifo_irq_handler, 0,
> +				  "tmfifo", &fifo->irq_info[i]);
> +		if (ret) {
> +			pr_err("Unable to request irq\n");
> +			fifo->irq_info[i].irq = 0;
> +			goto err;
> +		}
> +	}
> +
> +	fifo->rx_base = ioremap(rx_res->start, resource_size(rx_res));
> +	if (!fifo->rx_base)
> +		goto err;
> +
> +	fifo->tx_base = ioremap(tx_res->start, resource_size(tx_res));
> +	if (!fifo->tx_base)
> +		goto err;
> +
> +	/* Get Tx FIFO size and set the low/high watermark. */
> +	ctl = readq(fifo->tx_base + TMFIFO_TX_CTL);
> +	fifo->tx_fifo_size =
> +		FIELD_GET(TMFIFO_TX_CTL__MAX_ENTRIES_MASK, ctl);
> +	ctl = (ctl & ~TMFIFO_TX_CTL__LWM_MASK) |
> +		FIELD_PREP(TMFIFO_TX_CTL__LWM_MASK, fifo->tx_fifo_size / 2);
> +	ctl = (ctl & ~TMFIFO_TX_CTL__HWM_MASK) |
> +		FIELD_PREP(TMFIFO_TX_CTL__HWM_MASK, fifo->tx_fifo_size - 1);
> +	writeq(ctl, fifo->tx_base + TMFIFO_TX_CTL);
> +
> +	/* Get Rx FIFO size and set the low/high watermark. */
> +	ctl = readq(fifo->rx_base + TMFIFO_RX_CTL);
> +	fifo->rx_fifo_size =
> +		FIELD_GET(TMFIFO_RX_CTL__MAX_ENTRIES_MASK, ctl);
> +	ctl = (ctl & ~TMFIFO_RX_CTL__LWM_MASK) |
> +		FIELD_PREP(TMFIFO_RX_CTL__LWM_MASK, 0);
> +	ctl = (ctl & ~TMFIFO_RX_CTL__HWM_MASK) |
> +		FIELD_PREP(TMFIFO_RX_CTL__HWM_MASK, 1);
> +	writeq(ctl, fifo->rx_base + TMFIFO_RX_CTL);
> +
> +	mutex_init(&fifo->lock);
> +
> +	/* Create the console vdev. */
> +	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
> +	if (ret)
> +		goto err;
> +
> +	/* Create the network vdev. */
> +	memset(&net_config, 0, sizeof(net_config));
> +	net_config.mtu = TMFIFO_NET_MTU;
> +	net_config.status = VIRTIO_NET_S_LINK_UP;
> +	memcpy(net_config.mac, tmfifo_net_default_mac, 6);
> +	tmfifo_get_cfg_mac(net_config.mac);
> +	ret = tmfifo_create_vdev(fifo, VIRTIO_ID_NET, TMFIFO_NET_FEATURES,
> +				 &net_config, sizeof(net_config));
> +	if (ret)
> +		goto err;
> +
> +	mod_timer(&fifo->timer, jiffies + tmfifo_timer_interval);
> +
> +	fifo->is_ready = true;
> +
> +	return 0;
> +
> +err:
> +	tmfifo_remove(pdev);
> +early_err:
> +	dev_err(&pdev->dev, "Probe Failed\n");
> +	return ret;
> +}
> +
> +static const struct of_device_id tmfifo_match[] = {
> +	{ .compatible = "mellanox,bf-tmfifo" },
> +	{},
> +};
> +MODULE_DEVICE_TABLE(of, tmfifo_match);
> +
> +static const struct acpi_device_id bf_tmfifo_acpi_match[] = {
> +	{ "MLNXBF01", 0 },
> +	{},
> +};
> +MODULE_DEVICE_TABLE(acpi, bf_tmfifo_acpi_match);
> +
> +static struct platform_driver tmfifo_driver = {
> +	.probe = tmfifo_probe,
> +	.remove = tmfifo_remove,
> +	.driver = {
> +		.name = "bf-tmfifo",
> +		.of_match_table = tmfifo_match,
> +		.acpi_match_table = ACPI_PTR(bf_tmfifo_acpi_match),
> +	},
> +};
> +
> +static int __init tmfifo_init(void)
> +{
> +	int ret;
> +
> +	ret = platform_driver_register(&tmfifo_driver);
> +	if (ret)
> +		pr_err("Failed to register tmfifo driver.\n");
> +
> +	return ret;
> +}
> +
> +static void __exit tmfifo_exit(void)
> +{
> +	platform_driver_unregister(&tmfifo_driver);
> +}
> +
> +module_init(tmfifo_init);
> +module_exit(tmfifo_exit);
> +
> +MODULE_DESCRIPTION("Mellanox BlueField SoC TMFIFO Driver");
> +MODULE_LICENSE("GPL v2");
> +MODULE_AUTHOR("Mellanox Technologies");
> diff --git a/drivers/soc/mellanox/tmfifo_regs.h b/drivers/soc/mellanox/tmfifo_regs.h
> new file mode 100644
> index 0000000..9f21764
> --- /dev/null
> +++ b/drivers/soc/mellanox/tmfifo_regs.h
> @@ -0,0 +1,76 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (c) 2018, Mellanox Technologies. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 and
> + * only version 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +
> +#ifndef __TMFIFO_REGS_H__
> +#define __TMFIFO_REGS_H__
> +
> +#include <linux/types.h>
> +
> +#define TMFIFO_TX_DATA 0x0
> +
> +#define TMFIFO_TX_STS 0x8
> +#define TMFIFO_TX_STS__LENGTH 0x0001
> +#define TMFIFO_TX_STS__COUNT_SHIFT 0
> +#define TMFIFO_TX_STS__COUNT_WIDTH 9
> +#define TMFIFO_TX_STS__COUNT_RESET_VAL 0
> +#define TMFIFO_TX_STS__COUNT_RMASK 0x1ff
> +#define TMFIFO_TX_STS__COUNT_MASK  0x1ff
> +
> +#define TMFIFO_TX_CTL 0x10
> +#define TMFIFO_TX_CTL__LENGTH 0x0001
> +#define TMFIFO_TX_CTL__LWM_SHIFT 0
> +#define TMFIFO_TX_CTL__LWM_WIDTH 8
> +#define TMFIFO_TX_CTL__LWM_RESET_VAL 128
> +#define TMFIFO_TX_CTL__LWM_RMASK 0xff
> +#define TMFIFO_TX_CTL__LWM_MASK  0xff
> +#define TMFIFO_TX_CTL__HWM_SHIFT 8
> +#define TMFIFO_TX_CTL__HWM_WIDTH 8
> +#define TMFIFO_TX_CTL__HWM_RESET_VAL 128
> +#define TMFIFO_TX_CTL__HWM_RMASK 0xff
> +#define TMFIFO_TX_CTL__HWM_MASK  0xff00
> +#define TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
> +#define TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
> +#define TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
> +#define TMFIFO_TX_CTL__MAX_ENTRIES_RMASK 0x1ff
> +#define TMFIFO_TX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
> +
> +#define TMFIFO_RX_DATA 0x0
> +
> +#define TMFIFO_RX_STS 0x8
> +#define TMFIFO_RX_STS__LENGTH 0x0001
> +#define TMFIFO_RX_STS__COUNT_SHIFT 0
> +#define TMFIFO_RX_STS__COUNT_WIDTH 9
> +#define TMFIFO_RX_STS__COUNT_RESET_VAL 0
> +#define TMFIFO_RX_STS__COUNT_RMASK 0x1ff
> +#define TMFIFO_RX_STS__COUNT_MASK  0x1ff
> +
> +#define TMFIFO_RX_CTL 0x10
> +#define TMFIFO_RX_CTL__LENGTH 0x0001
> +#define TMFIFO_RX_CTL__LWM_SHIFT 0
> +#define TMFIFO_RX_CTL__LWM_WIDTH 8
> +#define TMFIFO_RX_CTL__LWM_RESET_VAL 128
> +#define TMFIFO_RX_CTL__LWM_RMASK 0xff
> +#define TMFIFO_RX_CTL__LWM_MASK  0xff
> +#define TMFIFO_RX_CTL__HWM_SHIFT 8
> +#define TMFIFO_RX_CTL__HWM_WIDTH 8
> +#define TMFIFO_RX_CTL__HWM_RESET_VAL 128
> +#define TMFIFO_RX_CTL__HWM_RMASK 0xff
> +#define TMFIFO_RX_CTL__HWM_MASK  0xff00
> +#define TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
> +#define TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
> +#define TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
> +#define TMFIFO_RX_CTL__MAX_ENTRIES_RMASK 0x1ff
> +#define TMFIFO_RX_CTL__MAX_ENTRIES_MASK  0x1ff00000000ULL
> +
> +#endif /* !defined(__TMFIFO_REGS_H__) */
> 

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 70+ messages in thread

end of thread, other threads:[~2019-03-15 13:18 UTC | newest]

Thread overview: 70+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-25 16:06 [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
2018-05-25 16:06 ` [PATCH v1 2/4] arm64: Add Mellanox BlueField SoC config option Liming Sun
2018-05-25 16:06 ` [PATCH v1 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
2018-05-25 16:06 ` [PATCH v1 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
2018-05-25 17:14 ` [PATCH v1 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Robin Murphy
2018-05-25 20:18   ` Liming Sun
2018-05-25 20:17 ` [PATCH v2 " Liming Sun
2018-05-25 20:17   ` [PATCH v2 2/4] arm64: Add Mellanox BlueField SoC config option Liming Sun
2018-05-25 20:17   ` [PATCH v2 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
2018-05-31  3:43     ` Rob Herring
2018-06-01 14:31       ` Liming Sun
2018-05-25 20:17   ` [PATCH v2 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
2018-06-01 14:31 ` [PATCH v3 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
2018-06-01 14:31   ` [PATCH v3 2/4] arm64: Add Mellanox BlueField SoC config option Liming Sun
2018-06-01 14:31   ` [PATCH v3 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
2018-06-11 18:19     ` Rob Herring
2018-06-01 14:31   ` [PATCH v3 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
2018-10-24 17:55 ` [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
2018-10-24 17:55   ` [PATCH v4 2/4] arm64: Add Mellanox BlueField SoC config option Liming Sun
2018-10-25 15:38     ` Arnd Bergmann
2018-10-26 19:18       ` Liming Sun
2018-10-26 19:32         ` Arnd Bergmann
2018-10-29 14:58           ` Liming Sun
2018-10-29 15:26             ` Arnd Bergmann
2018-10-29 16:09               ` Liming Sun
2018-10-24 17:55   ` [PATCH v4 3/4] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
2018-10-25 15:32     ` Arnd Bergmann
2018-10-26 19:36       ` Liming Sun
2018-10-26 20:33         ` Arnd Bergmann
2018-10-29 16:48           ` Liming Sun
2019-01-24 15:07           ` Liming Sun
2018-10-24 17:55   ` [PATCH v4 4/4] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
2018-10-25 15:57   ` [PATCH v4 1/4] soc: Add TmFifo driver for Mellanox BlueField Soc Arnd Bergmann
2018-10-26 18:24     ` Liming Sun
2018-10-26 18:35       ` Arnd Bergmann
2018-10-29 14:17         ` Liming Sun
2018-10-29 14:52           ` Arnd Bergmann
2018-12-04 22:12     ` Liming Sun
2018-10-31 18:09 ` [PATCH v5 1/5] " Liming Sun
2018-10-31 18:09   ` [PATCH v5 2/5] arm64: Add Mellanox BlueField SoC config option Liming Sun
2018-10-31 18:09   ` [PATCH v5 3/5] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
2018-10-31 18:09   ` [PATCH v5 4/5] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
2018-11-01 16:23 ` [PATCH v6 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
2018-12-12 23:07   ` Matthias Brugger
2019-01-03 19:20     ` Liming Sun
2018-11-01 16:25 ` Liming Sun
2018-11-01 16:25   ` [PATCH v6 2/9] arm64: Add Mellanox BlueField SoC config option Liming Sun
2018-11-01 16:25   ` [PATCH v6 3/9] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
2018-11-01 16:25   ` [PATCH v6 4/9] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
2018-11-01 16:25   ` [PATCH v6 5/9] soc: mellanox: host: Add the common host side Rshim driver Liming Sun
2019-01-18 16:02     ` Arnd Bergmann
2019-01-21 19:22       ` Liming Sun
2019-01-22 12:20       ` Vincent Whitchurch
     [not found]         ` <DB6PR05MB32235BE3E5EBCA60ABDEDB13A1980@DB6PR05MB3223.eurprd05.prod.outlook.com>
2019-01-22 13:36           ` Liming Sun
2018-11-01 16:25   ` [PATCH v6 6/9] soc: mellanox: host: Add networking support over Rshim Liming Sun
2018-11-01 16:25   ` [PATCH v6 7/9] soc: mellanox: host: Add the Rshim USB backend driver Liming Sun
2018-11-01 16:25   ` [PATCH v6 8/9] soc: mellanox: host: Add the Rshim PCIe " Liming Sun
2018-11-01 16:25   ` [PATCH v6 9/9] soc: mellanox: host: Add the Rshim PCIe live-fish " Liming Sun
2019-01-03 19:17 ` [PATCH v7 1/9] soc: Add TmFifo driver for Mellanox BlueField Soc Liming Sun
2019-03-15 13:18   ` Matthias Brugger
2019-01-03 19:17 ` [PATCH v7 2/9] arm64: Add Mellanox BlueField SoC config option Liming Sun
2019-01-03 19:17 ` [PATCH v7 3/9] dt-bindings: soc: Add TmFifo binding for Mellanox BlueField SoC Liming Sun
2019-01-03 19:17 ` [PATCH v7 4/9] MAINTAINERS: Add entry for Mellanox Bluefield Soc Liming Sun
2019-01-03 19:17 ` [PATCH v7 5/9] soc: mellanox: host: Add the common host side Rshim driver Liming Sun
2019-01-03 19:17 ` [PATCH v7 6/9] soc: mellanox: host: Add networking support over Rshim Liming Sun
2019-01-03 19:17 ` [PATCH v7 7/9] soc: mellanox: host: Add the Rshim USB backend driver Liming Sun
2019-01-03 19:17 ` [PATCH v7 8/9] soc: mellanox: host: Add the Rshim PCIe " Liming Sun
2019-01-03 19:17 ` [PATCH v7 9/9] soc: mellanox: host: Add the Rshim PCIe live-fish " Liming Sun
2019-01-21 19:17 ` [PATCH v7 0/9] Mellanox BlueField ARM SoC Rshim driver Liming Sun
2019-02-18 13:24   ` Arnd Bergmann

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).