All of lore.kernel.org
 help / color / mirror / Atom feed
From: Maxime Coquelin <maxime.coquelin@redhat.com>
To: dev@dpdk.org, patrick.fu@intel.com, amorenoz@redhat.com
Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
Subject: [dpdk-dev] [PATCH 7/7] net/virtio: introduce Vhost-vDPA backend
Date: Fri, 11 Sep 2020 17:08:05 +0200	[thread overview]
Message-ID: <20200911150805.79901-8-maxime.coquelin@redhat.com> (raw)
In-Reply-To: <20200911150805.79901-1-maxime.coquelin@redhat.com>

vhost-vDPA is a new virtio backend type introduced by vDPA kernel
framework, which provides abstruction to the vDPA devices and
exposes an unified control interface through a char dev.

This patch adds support to the vhost-vDPA backend. As similar to
the existing vhost kernel backend, a set of virtio_user ops were
introduced for vhost-vDPA backend to handle device specific operations
such as:
 - device setup
 - ioctl message handling
 - queue pair enabling
 - dma map/unmap
vDPA relevant ioctl codes and data structures are also defined in
this patch.

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 drivers/net/virtio/meson.build                |   1 +
 drivers/net/virtio/virtio_user/vhost.h        |   1 +
 drivers/net/virtio/virtio_user/vhost_vdpa.c   | 310 ++++++++++++++++++
 .../net/virtio/virtio_user/virtio_user_dev.c  |   9 +-
 4 files changed, 320 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/virtio/virtio_user/vhost_vdpa.c

diff --git a/drivers/net/virtio/meson.build b/drivers/net/virtio/meson.build
index 3fd6051f4b..eaed46373d 100644
--- a/drivers/net/virtio/meson.build
+++ b/drivers/net/virtio/meson.build
@@ -42,6 +42,7 @@ if is_linux
 		'virtio_user/vhost_kernel.c',
 		'virtio_user/vhost_kernel_tap.c',
 		'virtio_user/vhost_user.c',
+		'virtio_user/vhost_vdpa.c',
 		'virtio_user/virtio_user_dev.c')
 	deps += ['bus_vdev']
 endif
diff --git a/drivers/net/virtio/virtio_user/vhost.h b/drivers/net/virtio/virtio_user/vhost.h
index 2e71995a79..210a3704e7 100644
--- a/drivers/net/virtio/virtio_user/vhost.h
+++ b/drivers/net/virtio/virtio_user/vhost.h
@@ -113,5 +113,6 @@ struct virtio_user_backend_ops {
 
 extern struct virtio_user_backend_ops virtio_ops_user;
 extern struct virtio_user_backend_ops virtio_ops_kernel;
+extern struct virtio_user_backend_ops virtio_ops_vdpa;
 
 #endif
diff --git a/drivers/net/virtio/virtio_user/vhost_vdpa.c b/drivers/net/virtio/virtio_user/vhost_vdpa.c
new file mode 100644
index 0000000000..d959bca0a1
--- /dev/null
+++ b/drivers/net/virtio/virtio_user/vhost_vdpa.c
@@ -0,0 +1,310 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Red Hat Inc.
+ */
+
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <rte_memory.h>
+
+#include "vhost.h"
+#include "virtio_user_dev.h"
+
+struct vhost_memory_vdpa {
+	uint32_t nregions;
+	uint32_t padding;
+	struct vhost_memory_region regions[0];
+};
+
+/* vhost kernel & vdpa ioctls */
+#define VHOST_VIRTIO 0xAF
+#define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64)
+#define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64)
+#define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01)
+#define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02)
+#define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory_vdpa)
+#define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64)
+#define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int)
+#define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state)
+#define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr)
+#define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
+#define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
+#define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file)
+#define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file)
+#define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file)
+#define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file)
+#define VHOST_VDPA_GET_DEVICE_ID _IOR(VHOST_VIRTIO, 0x70, __u32)
+#define VHOST_VDPA_GET_STATUS _IOR(VHOST_VIRTIO, 0x71, __u8)
+#define VHOST_VDPA_SET_STATUS _IOW(VHOST_VIRTIO, 0x72, __u8)
+#define VHOST_VDPA_SET_VRING_ENABLE	_IOW(VHOST_VIRTIO, 0x75, \
+					     struct vhost_vring_state)
+
+static uint64_t vhost_req_user_to_vdpa[] = {
+	[VHOST_USER_SET_OWNER] = VHOST_SET_OWNER,
+	[VHOST_USER_RESET_OWNER] = VHOST_RESET_OWNER,
+	[VHOST_USER_SET_FEATURES] = VHOST_SET_FEATURES,
+	[VHOST_USER_GET_FEATURES] = VHOST_GET_FEATURES,
+	[VHOST_USER_SET_VRING_CALL] = VHOST_SET_VRING_CALL,
+	[VHOST_USER_SET_VRING_NUM] = VHOST_SET_VRING_NUM,
+	[VHOST_USER_SET_VRING_BASE] = VHOST_SET_VRING_BASE,
+	[VHOST_USER_GET_VRING_BASE] = VHOST_GET_VRING_BASE,
+	[VHOST_USER_SET_VRING_ADDR] = VHOST_SET_VRING_ADDR,
+	[VHOST_USER_SET_VRING_KICK] = VHOST_SET_VRING_KICK,
+	[VHOST_USER_SET_MEM_TABLE] = VHOST_SET_MEM_TABLE,
+	[VHOST_USER_SET_STATUS] = VHOST_VDPA_SET_STATUS,
+	[VHOST_USER_GET_STATUS] = VHOST_VDPA_GET_STATUS,
+	[VHOST_USER_SET_VRING_ENABLE] = VHOST_VDPA_SET_VRING_ENABLE,
+};
+
+/* no alignment requirement */
+struct vhost_iotlb_msg {
+	uint64_t iova;
+	uint64_t size;
+	uint64_t uaddr;
+#define VHOST_ACCESS_RO      0x1
+#define VHOST_ACCESS_WO      0x2
+#define VHOST_ACCESS_RW      0x3
+	uint8_t perm;
+#define VHOST_IOTLB_MISS           1
+#define VHOST_IOTLB_UPDATE         2
+#define VHOST_IOTLB_INVALIDATE     3
+#define VHOST_IOTLB_ACCESS_FAIL    4
+	uint8_t type;
+};
+
+#define VHOST_IOTLB_MSG_V2 0x2
+
+struct vhost_msg {
+	uint32_t type;
+	uint32_t reserved;
+	union {
+		struct vhost_iotlb_msg iotlb;
+		uint8_t padding[64];
+	};
+};
+
+static int
+vhost_vdpa_dma_map(struct virtio_user_dev *dev, void *addr,
+				  uint64_t iova, size_t len)
+{
+	struct vhost_msg msg = {};
+
+	msg.type = VHOST_IOTLB_MSG_V2;
+	msg.iotlb.type = VHOST_IOTLB_UPDATE;
+	msg.iotlb.iova = iova;
+	msg.iotlb.uaddr = (uint64_t)(uintptr_t)addr;
+	msg.iotlb.size = len;
+	msg.iotlb.perm = VHOST_ACCESS_RW;
+
+	if (write(dev->vhostfd, &msg, sizeof(msg)) != sizeof(msg)) {
+		PMD_DRV_LOG(ERR, "Failed to send IOTLB update (%s)",
+				strerror(errno));
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+vhost_vdpa_dma_unmap(struct virtio_user_dev *dev, void __rte_unused *addr,
+				  uint64_t iova, size_t len)
+{
+	struct vhost_msg msg = {};
+
+	msg.type = VHOST_IOTLB_MSG_V2;
+	msg.iotlb.type = VHOST_IOTLB_INVALIDATE;
+	msg.iotlb.iova = iova;
+	msg.iotlb.size = len;
+
+	if (write(dev->vhostfd, &msg, sizeof(msg)) != sizeof(msg)) {
+		PMD_DRV_LOG(ERR, "Failed to send IOTLB invalidate (%s)",
+				strerror(errno));
+		return -1;
+	}
+
+	return 0;
+}
+
+
+static int
+vhost_vdpa_map_contig(const struct rte_memseg_list *msl,
+		const struct rte_memseg *ms, size_t len, void *arg)
+{
+	struct virtio_user_dev *dev = arg;
+
+	if (msl->external)
+		return 0;
+
+	return vhost_vdpa_dma_map(dev, ms->addr, ms->iova, len);
+}
+
+static int
+vhost_vdpa_map(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+		void *arg)
+{
+	struct virtio_user_dev *dev = arg;
+
+	/* skip external memory that isn't a heap */
+	if (msl->external && !msl->heap)
+		return 0;
+
+	/* skip any segments with invalid IOVA addresses */
+	if (ms->iova == RTE_BAD_IOVA)
+		return 0;
+
+	/* if IOVA mode is VA, we've already mapped the internal segments */
+	if (!msl->external && rte_eal_iova_mode() == RTE_IOVA_VA)
+		return 0;
+
+	return vhost_vdpa_dma_map(dev, ms->addr, ms->iova, ms->len);
+}
+
+static int
+vhost_vdpa_dma_map_all(struct virtio_user_dev *dev)
+{
+	vhost_vdpa_dma_unmap(dev, 0, 0ULL, 0ULL - 1);
+
+	if (rte_eal_iova_mode() == RTE_IOVA_VA) {
+		/* with IOVA as VA mode, we can get away with mapping contiguous
+		 * chunks rather than going page-by-page.
+		 */
+		int ret = rte_memseg_contig_walk_thread_unsafe(
+				vhost_vdpa_map_contig, dev);
+		if (ret)
+			return ret;
+		/* we have to continue the walk because we've skipped the
+		 * external segments during the config walk.
+		 */
+	}
+	return rte_memseg_walk_thread_unsafe(vhost_vdpa_map, dev);
+}
+
+/* with below features, vhost vdpa does not need to do the checksum and TSO,
+ * these info will be passed to virtio_user through virtio net header.
+ */
+#define VHOST_VDPA_GUEST_OFFLOADS_MASK	\
+	((1ULL << VIRTIO_NET_F_GUEST_CSUM) |	\
+	 (1ULL << VIRTIO_NET_F_GUEST_TSO4) |	\
+	 (1ULL << VIRTIO_NET_F_GUEST_TSO6) |	\
+	 (1ULL << VIRTIO_NET_F_GUEST_ECN)  |	\
+	 (1ULL << VIRTIO_NET_F_GUEST_UFO))
+
+#define VHOST_VDPA_HOST_OFFLOADS_MASK		\
+	((1ULL << VIRTIO_NET_F_HOST_TSO4) |	\
+	 (1ULL << VIRTIO_NET_F_HOST_TSO6) |	\
+	 (1ULL << VIRTIO_NET_F_CSUM))
+
+static int
+vhost_vdpa_ioctl(struct virtio_user_dev *dev,
+		   enum vhost_user_request req,
+		   void *arg)
+{
+	int ret = -1;
+	uint64_t req_vdpa;
+	struct vhost_memory_vdpa *vm = NULL;
+
+	PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]);
+
+	req_vdpa = vhost_req_user_to_vdpa[req];
+
+	if (req_vdpa == VHOST_SET_MEM_TABLE)
+		return vhost_vdpa_dma_map_all(dev);
+
+	if (req_vdpa == VHOST_SET_FEATURES) {
+		/* WORKAROUND */
+		*(uint64_t *)arg |= 1ULL << VIRTIO_F_IOMMU_PLATFORM;
+
+		/* Multiqueue not supported for now */
+		*(uint64_t *)arg &= ~(1ULL << VIRTIO_NET_F_MQ);
+	}
+
+	switch (req_vdpa) {
+	case VHOST_SET_VRING_NUM:
+	case VHOST_SET_VRING_ADDR:
+	case VHOST_SET_VRING_BASE:
+	case VHOST_GET_VRING_BASE:
+	case VHOST_SET_VRING_KICK:
+	case VHOST_SET_VRING_CALL:
+		*(unsigned int *)arg = *(unsigned int *)arg;
+		PMD_DRV_LOG(DEBUG, "vhostfd=%d, index=%u",
+			    dev->vhostfd, *(unsigned int *)arg);
+		break;
+	default:
+		break;
+	}
+
+	ret = ioctl(dev->vhostfd, req_vdpa, arg);
+
+	if (vm)
+		free(vm);
+
+	if (ret < 0)
+		PMD_DRV_LOG(ERR, "%s failed: %s",
+			    vhost_msg_strings[req], strerror(errno));
+
+	return ret;
+}
+
+/**
+ * Set up environment to talk with a vhost vdpa backend.
+ *
+ * @return
+ *   - (-1) if fail to set up;
+ *   - (>=0) if successful.
+ */
+static int
+vhost_vdpa_setup(struct virtio_user_dev *dev)
+{
+	uint32_t did = (uint32_t)-1;
+
+	dev->vhostfd = open(dev->path, O_RDWR);
+	if (dev->vhostfd < 0) {
+		PMD_DRV_LOG(ERR, "Failed to open %s: %s\n",
+				dev->path, strerror(errno));
+		return -1;
+	}
+
+	if (ioctl(dev->vhostfd, VHOST_VDPA_GET_DEVICE_ID, &did) < 0 ||
+			did != VIRTIO_ID_NETWORK) {
+		PMD_DRV_LOG(ERR, "Invalid vdpa device ID: %u\n", did);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+vhost_vdpa_enable_queue_pair(struct virtio_user_dev *dev,
+			       uint16_t pair_idx,
+			       int enable)
+{
+	int i;
+
+	if (dev->qp_enabled[pair_idx] == enable)
+		return 0;
+
+	for (i = 0; i < 2; ++i) {
+		struct vhost_vring_state state = {
+			.index = pair_idx * 2 + i,
+			.num   = enable,
+		};
+
+		if (vhost_vdpa_ioctl(dev, VHOST_USER_SET_VRING_ENABLE, &state))
+			return -1;
+	}
+
+	dev->qp_enabled[pair_idx] = enable;
+
+	return 0;
+}
+
+struct virtio_user_backend_ops virtio_ops_vdpa = {
+	.setup = vhost_vdpa_setup,
+	.send_request = vhost_vdpa_ioctl,
+	.enable_qp = vhost_vdpa_enable_queue_pair,
+	.dma_map = vhost_vdpa_dma_map,
+	.dma_unmap = vhost_vdpa_dma_unmap,
+};
diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c
index 2e097a95ea..2e8311147b 100644
--- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
+++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
@@ -444,6 +444,12 @@ virtio_user_dev_setup(struct virtio_user_dev *dev)
 				dev->vhostfds[q] = -1;
 				dev->tapfds[q] = -1;
 			}
+		} else if (dev->backend_type ==
+				VIRTIO_USER_BACKEND_VHOST_VDPA) {
+			dev->ops = &virtio_ops_vdpa;
+		} else {
+			PMD_DRV_LOG(ERR, "Unknown backend type");
+			return -1;
 		}
 	}
 
@@ -878,7 +884,8 @@ virtio_user_update_status(struct virtio_user_dev *dev)
 	enum virtio_user_backend_type backend_type =
 				virtio_user_backend_type(dev->path);
 
-	if (backend_type != VIRTIO_USER_BACKEND_VHOST_USER)
+	if (backend_type != VIRTIO_USER_BACKEND_VHOST_USER &&
+			backend_type != VIRTIO_USER_BACKEND_VHOST_VDPA)
 		return 0;
 
 	err = dev->ops->send_request(dev, VHOST_USER_GET_STATUS, &ret);
-- 
2.26.2


  parent reply	other threads:[~2020-09-11 15:09 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-11 15:07 [dpdk-dev] [PATCH 0/7]virtio-user: introduce vhost-vdpa backend Maxime Coquelin
2020-09-11 15:07 ` [dpdk-dev] [PATCH 1/7] bus/vdev: add DMA mapping ops Maxime Coquelin
2020-09-24  5:25   ` Xia, Chenbo
2020-09-24  7:40     ` Maxime Coquelin
2020-09-11 15:08 ` [dpdk-dev] [PATCH 2/7] net/virtio: introduce DMA ops Maxime Coquelin
2020-09-11 15:08 ` [dpdk-dev] [PATCH 3/7] net/virtio: introduce Vhost-vDPA backend type Maxime Coquelin
2020-09-11 15:08 ` [dpdk-dev] [PATCH 4/7] net/virtio: adapt Virtio-user status size Maxime Coquelin
2020-09-24  5:25   ` Xia, Chenbo
2020-09-24  8:05     ` Maxime Coquelin
2020-09-11 15:08 ` [dpdk-dev] [PATCH 5/7] net/virtio: check protocol feature in user backend Maxime Coquelin
2020-09-11 15:08 ` [dpdk-dev] [PATCH 6/7] net/virtio: split virtio-user start Maxime Coquelin
2020-09-11 15:08 ` Maxime Coquelin [this message]
2020-09-24  5:25   ` [dpdk-dev] [PATCH 7/7] net/virtio: introduce Vhost-vDPA backend Xia, Chenbo
2020-09-24  5:43     ` Fu, Patrick
2020-09-24  5:52       ` Xia, Chenbo
2020-09-24  8:07     ` Maxime Coquelin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200911150805.79901-8-maxime.coquelin@redhat.com \
    --to=maxime.coquelin@redhat.com \
    --cc=amorenoz@redhat.com \
    --cc=dev@dpdk.org \
    --cc=patrick.fu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.