All of lore.kernel.org
 help / color / mirror / Atom feed
From: Maxime Coquelin <maxime.coquelin@redhat.com>
To: dev@dpdk.org, remy.horton@intel.com, tiwei.bie@intel.com,
	yliu@fridaylinux.org
Cc: mst@redhat.com, jfreiman@redhat.com, vkaplans@redhat.com,
	jasowang@redhat.com, Maxime Coquelin <maxime.coquelin@redhat.com>
Subject: [PATCH v3 06/19] vhost: add iotlb helper functions
Date: Thu,  5 Oct 2017 10:36:14 +0200	[thread overview]
Message-ID: <20171005083627.27828-7-maxime.coquelin@redhat.com> (raw)
In-Reply-To: <20171005083627.27828-1-maxime.coquelin@redhat.com>

Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/librte_vhost/Makefile |   4 +-
 lib/librte_vhost/iotlb.c  | 259 ++++++++++++++++++++++++++++++++++++++++++++++
 lib/librte_vhost/iotlb.h  |  70 +++++++++++++
 lib/librte_vhost/vhost.c  |   1 +
 lib/librte_vhost/vhost.h  |   6 ++
 5 files changed, 338 insertions(+), 2 deletions(-)
 create mode 100644 lib/librte_vhost/iotlb.c
 create mode 100644 lib/librte_vhost/iotlb.h

diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index 4a116fe31..e1084aba5 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -47,8 +47,8 @@ LDLIBS += -lnuma
 endif
 
 # all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c socket.c vhost.c vhost_user.c \
-				   virtio_net.c
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c iotlb.c socket.c vhost.c \
+					vhost_user.c virtio_net.c
 
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h
diff --git a/lib/librte_vhost/iotlb.c b/lib/librte_vhost/iotlb.c
new file mode 100644
index 000000000..fcfdd25d7
--- /dev/null
+++ b/lib/librte_vhost/iotlb.c
@@ -0,0 +1,259 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) 2017 Red Hat, Inc.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef RTE_LIBRTE_VHOST_NUMA
+#include <numaif.h>
+#endif
+
+#include <rte_tailq.h>
+
+#include "iotlb.h"
+#include "vhost.h"
+
+struct vhost_iotlb_entry {
+	TAILQ_ENTRY(vhost_iotlb_entry) next;
+
+	uint64_t iova;
+	uint64_t uaddr;
+	uint64_t size;
+	uint8_t perm;
+};
+
+#define IOTLB_CACHE_SIZE 1024
+
+static void
+vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq)
+{
+	struct vhost_iotlb_entry *node, *temp_node;
+
+	rte_rwlock_write_lock(&vq->iotlb_lock);
+
+	TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
+		TAILQ_REMOVE(&vq->iotlb_list, node, next);
+		rte_mempool_put(vq->iotlb_pool, node);
+	}
+
+	vq->iotlb_cache_nr = 0;
+
+	rte_rwlock_write_unlock(&vq->iotlb_lock);
+}
+
+static void
+vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq)
+{
+	struct vhost_iotlb_entry *node, *temp_node;
+	int entry_idx;
+
+	rte_rwlock_write_lock(&vq->iotlb_lock);
+
+	entry_idx = rte_rand() % vq->iotlb_cache_nr;
+
+	TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
+		if (!entry_idx) {
+			TAILQ_REMOVE(&vq->iotlb_list, node, next);
+			rte_mempool_put(vq->iotlb_pool, node);
+			vq->iotlb_cache_nr--;
+			break;
+		}
+		entry_idx--;
+	}
+
+	rte_rwlock_write_unlock(&vq->iotlb_lock);
+}
+
+void
+vhost_user_iotlb_cache_insert(struct vhost_virtqueue *vq, uint64_t iova,
+				uint64_t uaddr, uint64_t size, uint8_t perm)
+{
+	struct vhost_iotlb_entry *node, *new_node;
+	int ret;
+
+	ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
+	if (ret) {
+		RTE_LOG(DEBUG, VHOST_CONFIG, "IOTLB pool empty, evict one entry\n");
+		vhost_user_iotlb_cache_random_evict(vq);
+		ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node);
+		if (ret) {
+			RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool still empty, failure\n");
+			return;
+		}
+	}
+
+	new_node->iova = iova;
+	new_node->uaddr = uaddr;
+	new_node->size = size;
+	new_node->perm = perm;
+
+	rte_rwlock_write_lock(&vq->iotlb_lock);
+
+	TAILQ_FOREACH(node, &vq->iotlb_list, next) {
+		/*
+		 * Entries must be invalidated before being updated.
+		 * So if iova already in list, assume identical.
+		 */
+		if (node->iova == new_node->iova) {
+			rte_mempool_put(vq->iotlb_pool, new_node);
+			goto unlock;
+		} else if (node->iova > new_node->iova) {
+			TAILQ_INSERT_BEFORE(node, new_node, next);
+			vq->iotlb_cache_nr++;
+			goto unlock;
+		}
+	}
+
+	TAILQ_INSERT_TAIL(&vq->iotlb_list, new_node, next);
+	vq->iotlb_cache_nr++;
+
+unlock:
+	rte_rwlock_write_unlock(&vq->iotlb_lock);
+}
+
+void
+vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq,
+					uint64_t iova, uint64_t size)
+{
+	struct vhost_iotlb_entry *node, *temp_node;
+
+	if (unlikely(!size))
+		return;
+
+	rte_rwlock_write_lock(&vq->iotlb_lock);
+
+	TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) {
+		/* Sorted list */
+		if (unlikely(iova + size < node->iova))
+			break;
+
+		if (iova < node->iova + node->size) {
+			TAILQ_REMOVE(&vq->iotlb_list, node, next);
+			rte_mempool_put(vq->iotlb_pool, node);
+			vq->iotlb_cache_nr--;
+		}
+	}
+
+	rte_rwlock_write_unlock(&vq->iotlb_lock);
+}
+
+uint64_t
+vhost_user_iotlb_cache_find(struct vhost_virtqueue *vq, uint64_t iova,
+						uint64_t *size, uint8_t perm)
+{
+	struct vhost_iotlb_entry *node;
+	uint64_t offset, vva = 0, mapped = 0;
+
+	if (unlikely(!*size))
+		goto out;
+
+	TAILQ_FOREACH(node, &vq->iotlb_list, next) {
+		/* List sorted by iova */
+		if (unlikely(iova < node->iova))
+			break;
+
+		if (iova >= node->iova + node->size)
+			continue;
+
+		if (unlikely((perm & node->perm) != perm)) {
+			vva = 0;
+			break;
+		}
+
+		offset = iova - node->iova;
+		if (!vva)
+			vva = node->uaddr + offset;
+
+		mapped += node->size - offset;
+		iova = node->iova + node->size;
+
+		if (mapped >= *size)
+			break;
+	}
+
+out:
+	/* Only part of the requested chunk is mapped */
+	if (unlikely(mapped < *size))
+		*size = mapped;
+
+	return vva;
+}
+
+int
+vhost_user_iotlb_init(struct virtio_net *dev, int vq_index)
+{
+	char pool_name[RTE_MEMPOOL_NAMESIZE];
+	struct vhost_virtqueue *vq = dev->virtqueue[vq_index];
+	int ret = -1, socket;
+
+	if (vq->iotlb_pool) {
+		/*
+		 * The cache has already been initialized,
+		 * just drop all entries
+		 */
+		vhost_user_iotlb_cache_remove_all(vq);
+		return 0;
+	}
+
+#ifdef RTE_LIBRTE_VHOST_NUMA
+	ret = get_mempolicy(&socket, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR);
+#endif
+	if (ret)
+		socket = 0;
+
+	rte_rwlock_init(&vq->iotlb_lock);
+
+	TAILQ_INIT(&vq->iotlb_list);
+
+	snprintf(pool_name, sizeof(pool_name), "iotlb_cache_%d_%d",
+			dev->vid, vq_index);
+
+	/* If already created, free it and recreate */
+	vq->iotlb_pool = rte_mempool_lookup(pool_name);
+	if (vq->iotlb_pool)
+		rte_mempool_free(vq->iotlb_pool);
+
+	vq->iotlb_pool = rte_mempool_create(pool_name,
+			IOTLB_CACHE_SIZE, sizeof(struct vhost_iotlb_entry), 0,
+			0, 0, NULL, NULL, NULL, socket,
+			MEMPOOL_F_NO_CACHE_ALIGN |
+			MEMPOOL_F_SP_PUT |
+			MEMPOOL_F_SC_GET);
+	if (!vq->iotlb_pool) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+				"Failed to create IOTLB cache pool (%s)\n",
+				pool_name);
+		return -1;
+	}
+
+	vq->iotlb_cache_nr = 0;
+
+	return 0;
+}
+
diff --git a/lib/librte_vhost/iotlb.h b/lib/librte_vhost/iotlb.h
new file mode 100644
index 000000000..27b2d6b30
--- /dev/null
+++ b/lib/librte_vhost/iotlb.h
@@ -0,0 +1,70 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) 2017 Red Hat, Inc.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _VHOST_IOTLB_H_
+#define _VHOST_IOTLB_H_
+
+#include "vhost.h"
+
+static __rte_always_inline void
+vhost_user_iotlb_rd_lock(struct vhost_virtqueue *vq)
+{
+	rte_rwlock_read_lock(&vq->iotlb_lock);
+}
+
+static __rte_always_inline void
+vhost_user_iotlb_rd_unlock(struct vhost_virtqueue *vq)
+{
+	rte_rwlock_read_unlock(&vq->iotlb_lock);
+}
+
+static __rte_always_inline void
+vhost_user_iotlb_wr_lock(struct vhost_virtqueue *vq)
+{
+	rte_rwlock_write_lock(&vq->iotlb_lock);
+}
+
+static __rte_always_inline void
+vhost_user_iotlb_wr_unlock(struct vhost_virtqueue *vq)
+{
+	rte_rwlock_write_unlock(&vq->iotlb_lock);
+}
+
+void vhost_user_iotlb_cache_insert(struct vhost_virtqueue *vq, uint64_t iova,
+					uint64_t uaddr, uint64_t size,
+					uint8_t perm);
+void vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq,
+					uint64_t iova, uint64_t size);
+uint64_t vhost_user_iotlb_cache_find(struct vhost_virtqueue *vq, uint64_t iova,
+					uint64_t *size, uint8_t perm);
+int vhost_user_iotlb_init(struct virtio_net *dev, int vq_index);
+
+#endif /* _VHOST_IOTLB_H_ */
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 2d30f14c4..edcf1e0c5 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -103,6 +103,7 @@ free_device(struct virtio_net *dev)
 
 		rte_free(vq->shadow_used_ring);
 		rte_free(vq->batch_copy_elems);
+		rte_mempool_free(vq->iotlb_pool);
 		rte_free(vq);
 	}
 
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 94bee4c8d..09a00186f 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -45,6 +45,7 @@
 
 #include <rte_log.h>
 #include <rte_ether.h>
+#include <rte_rwlock.h>
 
 #include "rte_vhost.h"
 
@@ -127,6 +128,11 @@ struct vhost_virtqueue {
 
 	struct batch_copy_elem	*batch_copy_elems;
 	uint16_t		batch_copy_nb_elems;
+
+	rte_rwlock_t	iotlb_lock;
+	struct rte_mempool *iotlb_pool;
+	TAILQ_HEAD(, vhost_iotlb_entry) iotlb_list;
+	int				iotlb_cache_nr;
 } __rte_cache_aligned;
 
 /* Old kernels have no such macros defined */
-- 
2.13.6

  parent reply	other threads:[~2017-10-05  8:40 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-10-05  8:36 [PATCH v3 00/19] Vhost-user: Implement device IOTLB support Maxime Coquelin
2017-10-05  8:36 ` [PATCH v3 01/19] Revert "vhost: workaround MQ fails to startup" Maxime Coquelin
2017-11-01 17:11   ` Kavanagh, Mark B
2017-11-02  9:40     ` Maxime Coquelin
2017-11-03 13:05       ` Yuanhan Liu
2017-11-03 14:28         ` Maxime Coquelin
2017-11-06 12:00           ` Yuanhan Liu
2017-11-06 12:07             ` Maxime Coquelin
2017-11-06 12:24               ` Yuanhan Liu
2017-11-06 12:50                 ` Maxime Coquelin
2017-11-06 13:36                   ` Yuanhan Liu
2017-11-03 15:34       ` Thomas Monjalon
2017-11-03 16:31         ` Kavanagh, Mark B
2017-10-05  8:36 ` [PATCH v3 02/19] vhost: make error handling consistent in rx path Maxime Coquelin
2017-10-05  8:36 ` [PATCH v3 03/19] vhost: prepare send_vhost_message() to slave requests Maxime Coquelin
2017-10-05  8:36 ` [PATCH v3 04/19] vhost: add support to slave requests channel Maxime Coquelin
2017-10-05  8:36 ` [PATCH v3 05/19] vhost: declare missing IOMMU-related definitions for old kernels Maxime Coquelin
2017-10-05  8:36 ` Maxime Coquelin [this message]
2017-10-05  8:36 ` [PATCH v3 07/19] vhost: iotlb: add pending miss request list and helpers Maxime Coquelin
2017-10-05  8:36 ` [PATCH v3 08/19] vhost-user: add support to IOTLB miss slave requests Maxime Coquelin
2017-10-05  8:36 ` [PATCH v3 09/19] vhost: initialize vrings IOTLB caches Maxime Coquelin
2017-10-05  8:36 ` [PATCH v3 10/19] vhost-user: handle IOTLB update and invalidate requests Maxime Coquelin
2017-10-05  8:36 ` [PATCH v3 11/19] vhost: introduce guest IOVA to backend VA helper Maxime Coquelin
2017-10-05  8:36 ` [PATCH v3 12/19] vhost: use the guest IOVA to host " Maxime Coquelin
2017-10-05  8:36 ` [PATCH v3 13/19] vhost: enable rings at the right time Maxime Coquelin
2017-10-05  8:36 ` [PATCH v3 14/19] vhost: don't dereference invalid dev pointer after its reallocation Maxime Coquelin
2017-10-05  8:36 ` [PATCH v3 15/19] vhost: postpone rings addresses translation Maxime Coquelin
2017-10-13  1:47   ` Yao, Lei A
2017-10-13  7:32     ` Maxime Coquelin
2017-10-13  7:55       ` Yao, Lei A
2017-10-13  7:56         ` Maxime Coquelin
2017-10-16  5:59         ` Yao, Lei A
2017-10-16  6:23           ` Yao, Lei A
2017-10-16  9:47             ` Maxime Coquelin
2017-10-16 10:47               ` Maxime Coquelin
2017-10-17  1:24                 ` Yao, Lei A
2017-10-17  8:06                   ` Maxime Coquelin
2017-10-05  8:36 ` [PATCH v3 16/19] vhost-user: translate ring addresses when IOMMU enabled Maxime Coquelin
2017-10-05  8:36 ` [PATCH v3 17/19] vhost-user: iommu: postpone device creation until ring are mapped Maxime Coquelin
2017-11-02  7:21   ` Yao, Lei A
2017-11-02  8:21     ` Maxime Coquelin
2017-11-02 16:02       ` Maxime Coquelin
2017-11-03  8:25         ` Maxime Coquelin
2017-11-03 15:15           ` Michael S. Tsirkin
2017-11-03 15:54             ` Maxime Coquelin
2017-10-05  8:36 ` [PATCH v3 18/19] vhost: iommu: Invalidate vring in case of matching IOTLB invalidate Maxime Coquelin
2017-10-05  8:36 ` [PATCH v3 19/19] vhost: enable IOMMU support Maxime Coquelin
2017-10-06  6:24 ` [PATCH v3 00/19] Vhost-user: Implement device IOTLB support Yuanhan Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171005083627.27828-7-maxime.coquelin@redhat.com \
    --to=maxime.coquelin@redhat.com \
    --cc=dev@dpdk.org \
    --cc=jasowang@redhat.com \
    --cc=jfreiman@redhat.com \
    --cc=mst@redhat.com \
    --cc=remy.horton@intel.com \
    --cc=tiwei.bie@intel.com \
    --cc=vkaplans@redhat.com \
    --cc=yliu@fridaylinux.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.