All of lore.kernel.org
 help / color / mirror / Atom feed
From: Vincent Whitchurch <vincent.whitchurch@axis.com>
To: <mst@redhat.com>, <jasowang@redhat.com>
Cc: <kernel@axis.com>, <kvm@vger.kernel.org>,
	<virtualization@lists.linux-foundation.org>,
	<netdev@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
	<pbonzini@redhat.com>, <stefanha@redhat.com>,
	<sgarzare@redhat.com>,
	Vincent Whitchurch <vincent.whitchurch@axis.com>
Subject: [RFC PATCH 04/10] vhost: add support for kernel buffers
Date: Wed, 29 Sep 2021 17:11:13 +0200	[thread overview]
Message-ID: <20210929151119.14778-5-vincent.whitchurch@axis.com> (raw)
In-Reply-To: <20210929151119.14778-1-vincent.whitchurch@axis.com>

Handle the virtio rings and buffers being placed in kernel memory
instead of userspace memory.  The software IOTLB support is used to
ensure that only permitted regions are accessed.

Note that this patch does not provide a way to actually request that
kernel memory be used, an API for that is added in a later patch.

Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
---
 drivers/vhost/Kconfig |   6 ++
 drivers/vhost/vhost.c | 222 +++++++++++++++++++++++++++++++++++++++++-
 drivers/vhost/vhost.h |  34 +++++++
 3 files changed, 257 insertions(+), 5 deletions(-)

diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig
index 587fbae06182..9e76ed485fe1 100644
--- a/drivers/vhost/Kconfig
+++ b/drivers/vhost/Kconfig
@@ -20,6 +20,12 @@ config VHOST
 	  This option is selected by any driver which needs to access
 	  the core of vhost.
 
+config VHOST_KERNEL
+	tristate
+	help
+	  This option is selected by any driver which needs to access the
+	  support for kernel buffers in vhost.
+
 menuconfig VHOST_MENU
 	bool "VHOST drivers"
 	default y
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index ce81eee2a3fa..9354061ce75e 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -49,6 +49,9 @@ enum {
 #define vhost_used_event(vq) ((__virtio16 __user *)&vq->user.avail->ring[vq->num])
 #define vhost_avail_event(vq) ((__virtio16 __user *)&vq->user.used->ring[vq->num])
 
+#define vhost_used_event_kern(vq) ((__virtio16 *)&vq->kern.avail->ring[vq->num])
+#define vhost_avail_event_kern(vq) ((__virtio16 *)&vq->kern.used->ring[vq->num])
+
 #ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY
 static void vhost_disable_cross_endian(struct vhost_virtqueue *vq)
 {
@@ -482,6 +485,7 @@ void vhost_dev_init(struct vhost_dev *dev,
 	dev->iotlb = NULL;
 	dev->mm = NULL;
 	dev->worker = NULL;
+	dev->kernel = false;
 	dev->iov_limit = iov_limit;
 	dev->weight = weight;
 	dev->byte_weight = byte_weight;
@@ -785,6 +789,18 @@ static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq,
 	return (void __user *)(uintptr_t)(map->addr + addr - map->start);
 }
 
+static inline void *vhost_vq_meta_fetch_kern(struct vhost_virtqueue *vq,
+					       u64 addr, unsigned int size,
+					       int type)
+{
+	const struct vhost_iotlb_map *map = vq->meta_iotlb[type];
+
+	if (!map)
+		return NULL;
+
+	return (void *)(uintptr_t)(map->addr + addr - map->start);
+}
+
 /* Can we switch to this memory table? */
 /* Caller should have device mutex but not vq mutex */
 static bool memory_access_ok(struct vhost_dev *d, struct vhost_iotlb *umem,
@@ -849,6 +865,40 @@ static int vhost_copy_to_user(struct vhost_virtqueue *vq, void __user *to,
 	return ret;
 }
 
+static int vhost_copy_to_kern(struct vhost_virtqueue *vq, void *to,
+			      const void *from, unsigned int size)
+{
+	int ret;
+
+	/* This function should be called after iotlb
+	 * prefetch, which means we're sure that all vq
+	 * could be access through iotlb. So -EAGAIN should
+	 * not happen in this case.
+	 */
+	struct iov_iter t;
+	void *kaddr = vhost_vq_meta_fetch_kern(vq,
+			     (u64)(uintptr_t)to, size,
+			     VHOST_ADDR_USED);
+
+	if (kaddr) {
+		memcpy(kaddr, from, size);
+		return 0;
+	}
+
+	ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov,
+			     ARRAY_SIZE(vq->iotlb_iov),
+			     VHOST_ACCESS_WO);
+	if (ret < 0)
+		goto out;
+	iov_iter_kvec(&t, WRITE, &vq->iotlb_iov->kvec, ret, size);
+	ret = copy_to_iter(from, size, &t);
+	if (ret == size)
+		ret = 0;
+
+out:
+	return ret;
+}
+
 static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to,
 				void __user *from, unsigned size)
 {
@@ -889,6 +939,43 @@ static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to,
 	return ret;
 }
 
+static int vhost_copy_from_kern(struct vhost_virtqueue *vq, void *to,
+				void *from, unsigned int size)
+{
+	int ret;
+
+	/* This function should be called after iotlb
+	 * prefetch, which means we're sure that vq
+	 * could be access through iotlb. So -EAGAIN should
+	 * not happen in this case.
+	 */
+	void *kaddr = vhost_vq_meta_fetch_kern(vq,
+			     (u64)(uintptr_t)from, size,
+			     VHOST_ADDR_DESC);
+	struct iov_iter f;
+
+	if (kaddr) {
+		memcpy(to, kaddr, size);
+		return 0;
+	}
+
+	ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov,
+			     ARRAY_SIZE(vq->iotlb_iov),
+			     VHOST_ACCESS_RO);
+	if (ret < 0) {
+		vq_err(vq, "IOTLB translation failure: kaddr %p size 0x%llx\n",
+		       from, (unsigned long long) size);
+		goto out;
+	}
+	iov_iter_kvec(&f, READ, &vq->iotlb_iov->kvec, ret, size);
+	ret = copy_from_iter(to, size, &f);
+	if (ret == size)
+		ret = 0;
+
+out:
+	return ret;
+}
+
 static void __user *__vhost_get_user_slow(struct vhost_virtqueue *vq,
 					  void __user *addr, unsigned int size,
 					  int type)
@@ -915,6 +1002,33 @@ static void __user *__vhost_get_user_slow(struct vhost_virtqueue *vq,
 	return vq->iotlb_iov->iovec.iov_base;
 }
 
+static void *__vhost_get_kern_slow(struct vhost_virtqueue *vq,
+					  void *addr, unsigned int size,
+					  int type)
+{
+	void *out;
+	int ret;
+
+	ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov,
+			     ARRAY_SIZE(vq->iotlb_iov),
+			     VHOST_ACCESS_RO);
+	if (ret < 0) {
+		vq_err(vq, "IOTLB translation failure: addr %p size 0x%llx\n",
+			addr, (unsigned long long) size);
+		return NULL;
+	}
+
+	if (ret != 1 || vq->iotlb_iov->kvec.iov_len != size) {
+		vq_err(vq, "Non atomic memory access: addr %p size 0x%llx\n",
+			addr, (unsigned long long) size);
+		return NULL;
+	}
+
+	out = vq->iotlb_iov->kvec.iov_base;
+
+	return out;
+}
+
 /* This function should be called after iotlb
  * prefetch, which means we're sure that vq
  * could be access through iotlb. So -EAGAIN should
@@ -932,6 +1046,18 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
 	return __vhost_get_user_slow(vq, addr, size, type);
 }
 
+static inline void *__vhost_get_kern(struct vhost_virtqueue *vq,
+				     void *addr, unsigned int size,
+				     int type)
+{
+	void *uaddr = vhost_vq_meta_fetch_kern(vq, (u64)(uintptr_t)addr, size, type);
+
+	if (uaddr)
+		return uaddr;
+
+	return __vhost_get_kern_slow(vq, addr, size, type);
+}
+
 #define vhost_put_user(vq, x, ptr)		\
 ({ \
 	int ret; \
@@ -949,8 +1075,25 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
 	ret; \
 })
 
+#define vhost_put_kern(vq, x, ptr)		\
+({ \
+	int ret = 0; \
+	__typeof__(ptr) to = \
+		(__typeof__(ptr)) __vhost_get_kern(vq, ptr,	\
+				  sizeof(*ptr), VHOST_ADDR_USED); \
+	if (to != NULL) \
+		*to = x;	\
+	else \
+		ret = -EFAULT;	\
+	ret; \
+})
+
 static inline int vhost_put_avail_event(struct vhost_virtqueue *vq)
 {
+	if (vhost_kernel(vq))
+		return vhost_put_kern(vq, cpu_to_vhost16(vq, vq->avail_idx),
+				      vhost_avail_event_kern(vq));
+
 	return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx),
 			      vhost_avail_event(vq));
 }
@@ -959,6 +1102,10 @@ static inline int vhost_put_used(struct vhost_virtqueue *vq,
 				 struct vring_used_elem *head, int idx,
 				 int count)
 {
+	if (vhost_kernel(vq))
+		return vhost_copy_to_kern(vq, vq->kern.used->ring + idx, head,
+					  count * sizeof(*head));
+
 	return vhost_copy_to_user(vq, vq->user.used->ring + idx, head,
 				  count * sizeof(*head));
 }
@@ -966,6 +1113,10 @@ static inline int vhost_put_used(struct vhost_virtqueue *vq,
 static inline int vhost_put_used_flags(struct vhost_virtqueue *vq)
 
 {
+	if (vhost_kernel(vq))
+		return vhost_put_kern(vq, cpu_to_vhost16(vq, vq->used_flags),
+				      &vq->kern.used->flags);
+
 	return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags),
 			      &vq->user.used->flags);
 }
@@ -973,6 +1124,10 @@ static inline int vhost_put_used_flags(struct vhost_virtqueue *vq)
 static inline int vhost_put_used_idx(struct vhost_virtqueue *vq)
 
 {
+	if (vhost_kernel(vq))
+		return vhost_put_kern(vq, cpu_to_vhost16(vq, vq->last_used_idx),
+				      &vq->kern.used->idx);
+
 	return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx),
 			      &vq->user.used->idx);
 }
@@ -995,12 +1150,32 @@ static inline int vhost_put_used_idx(struct vhost_virtqueue *vq)
 	ret; \
 })
 
+#define vhost_get_kern(vq, x, ptr, type)		\
+({ \
+	int ret = 0; \
+	__typeof__(ptr) from = \
+		(__typeof__(ptr)) __vhost_get_kern(vq, ptr, \
+						   sizeof(*ptr), \
+						   type); \
+	if (from != NULL) \
+		x = *from; \
+	else \
+		ret = -EFAULT; \
+	ret; \
+})
+
 #define vhost_get_avail(vq, x, ptr) \
 	vhost_get_user(vq, x, ptr, VHOST_ADDR_AVAIL)
 
 #define vhost_get_used(vq, x, ptr) \
 	vhost_get_user(vq, x, ptr, VHOST_ADDR_USED)
 
+#define vhost_get_avail_kern(vq, x, ptr) \
+	vhost_get_kern(vq, x, ptr, VHOST_ADDR_AVAIL)
+
+#define vhost_get_used_kern(vq, x, ptr) \
+	vhost_get_kern(vq, x, ptr, VHOST_ADDR_USED)
+
 static void vhost_dev_lock_vqs(struct vhost_dev *d)
 {
 	int i = 0;
@@ -1018,12 +1193,19 @@ static void vhost_dev_unlock_vqs(struct vhost_dev *d)
 static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq,
 				      __virtio16 *idx)
 {
+	if (vhost_kernel(vq))
+		return vhost_get_avail_kern(vq, *idx, &vq->kern.avail->idx);
+
 	return vhost_get_avail(vq, *idx, &vq->user.avail->idx);
 }
 
 static inline int vhost_get_avail_head(struct vhost_virtqueue *vq,
 				       __virtio16 *head, int idx)
 {
+	if (vhost_kernel(vq))
+		return vhost_get_avail_kern(vq, *head,
+				&vq->kern.avail->ring[idx & (vq->num - 1)]);
+
 	return vhost_get_avail(vq, *head,
 			       &vq->user.avail->ring[idx & (vq->num - 1)]);
 }
@@ -1031,24 +1213,36 @@ static inline int vhost_get_avail_head(struct vhost_virtqueue *vq,
 static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq,
 					__virtio16 *flags)
 {
+	if (vhost_kernel(vq))
+		return vhost_get_avail_kern(vq, *flags, &vq->kern.avail->flags);
+
 	return vhost_get_avail(vq, *flags, &vq->user.avail->flags);
 }
 
 static inline int vhost_get_used_event(struct vhost_virtqueue *vq,
 				       __virtio16 *event)
 {
+	if (vhost_kernel(vq))
+		return vhost_get_avail_kern(vq, *event, vhost_used_event_kern(vq));
+
 	return vhost_get_avail(vq, *event, vhost_used_event(vq));
 }
 
 static inline int vhost_get_used_idx(struct vhost_virtqueue *vq,
 				     __virtio16 *idx)
 {
+	if (vhost_kernel(vq))
+		return vhost_get_used_kern(vq, *idx, &vq->kern.used->idx);
+
 	return vhost_get_used(vq, *idx, &vq->user.used->idx);
 }
 
 static inline int vhost_get_desc(struct vhost_virtqueue *vq,
 				 struct vring_desc *desc, int idx)
 {
+	if (vhost_kernel(vq))
+		return vhost_copy_from_kern(vq, desc, vq->kern.desc + idx, sizeof(*desc));
+
 	return vhost_copy_from_user(vq, desc, vq->user.desc + idx, sizeof(*desc));
 }
 
@@ -1909,6 +2103,9 @@ static int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len)
 	struct iovec *iov = &vq->log_iov->iovec;
 	int i, ret;
 
+	if (vhost_kernel(vq))
+		return 0;
+
 	if (!vq->iotlb)
 		return log_write(vq->log_base, vq->log_addr + used_offset, len);
 
@@ -1933,6 +2130,9 @@ int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
 	struct iovec *iov = &viov->iovec;
 	int i, r;
 
+	if (vhost_kernel(vq))
+		return 0;
+
 	/* Make sure data written is seen before log. */
 	smp_wmb();
 
@@ -2041,11 +2241,11 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
 	const struct vhost_iotlb_map *map;
 	struct vhost_dev *dev = vq->dev;
 	struct vhost_iotlb *umem = dev->iotlb ? dev->iotlb : dev->umem;
-	struct iovec *_iov;
 	u64 s = 0;
 	int ret = 0;
 
 	while ((u64)len > s) {
+		u64 mappedaddr;
 		u64 size;
 		if (unlikely(ret >= iov_size)) {
 			ret = -ENOBUFS;
@@ -2065,11 +2265,23 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
 			break;
 		}
 
-		_iov = &iov->iovec + ret;
 		size = map->size - addr + map->start;
-		_iov->iov_len = min((u64)len - s, size);
-		_iov->iov_base = (void __user *)(unsigned long)
-				 (map->addr + addr - map->start);
+		mappedaddr = map->addr + addr - map->start;
+
+		if (vhost_kernel(vq)) {
+			struct kvec *_kvec;
+
+			_kvec = &iov->kvec + ret;
+			_kvec->iov_len = min((u64)len - s, size);
+			_kvec->iov_base = (void *)(unsigned long)mappedaddr;
+		} else {
+			struct iovec *_iov;
+
+			_iov = &iov->iovec + ret;
+			_iov->iov_len = min((u64)len - s, size);
+			_iov->iov_base = (void __user *)(unsigned long)mappedaddr;
+		}
+
 		s += size;
 		addr += size;
 		++ret;
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 69aec724ef7f..ded1b39d7852 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -67,6 +67,7 @@ struct vhost_vring_call {
 
 struct vhost_iov {
 	union {
+		struct kvec kvec;
 		struct iovec iovec;
 	};
 };
@@ -83,6 +84,11 @@ struct vhost_virtqueue {
 		vring_avail_t __user *avail;
 		vring_used_t __user *used;
 	} user;
+	struct {
+		vring_desc_t *desc;
+		vring_avail_t *avail;
+		vring_used_t *used;
+	} kern;
 	const struct vhost_iotlb_map *meta_iotlb[VHOST_NUM_ADDRS];
 	struct file *kick;
 	struct vhost_vring_call call_ctx;
@@ -169,18 +175,41 @@ struct vhost_dev {
 	int byte_weight;
 	u64 kcov_handle;
 	bool use_worker;
+	bool kernel;
 	int (*msg_handler)(struct vhost_dev *dev,
 			   struct vhost_iotlb_msg *msg);
 };
 
+static inline bool vhost_kernel(const struct vhost_virtqueue *vq)
+{
+	if (!IS_ENABLED(CONFIG_VHOST_KERNEL))
+		return false;
+
+	return vq->dev->kernel;
+}
+
 static inline size_t vhost_iov_length(const struct vhost_virtqueue *vq, struct vhost_iov *iov,
 				      unsigned long nr_segs)
 {
+	if (vhost_kernel(vq)) {
+		size_t ret = 0;
+		const struct kvec *kvec = &iov->kvec;
+		unsigned int seg;
+
+		for (seg = 0; seg < nr_segs; seg++)
+			ret += kvec[seg].iov_len;
+
+		return ret;
+	};
+
 	return iov_length(&iov->iovec, nr_segs);
 }
 
 static inline size_t vhost_iov_len(const struct vhost_virtqueue *vq, struct vhost_iov *iov)
 {
+	if (vhost_kernel(vq))
+		return iov->kvec.iov_len;
+
 	return iov->iovec.iov_len;
 }
 
@@ -190,6 +219,11 @@ static inline void vhost_iov_iter_init(const struct vhost_virtqueue *vq,
 				       unsigned long nr_segs,
 				       size_t count)
 {
+	if (vhost_kernel(vq)) {
+		iov_iter_kvec(i, direction, &iov->kvec, nr_segs, count);
+		return;
+	}
+
 	iov_iter_init(i, direction, &iov->iovec, nr_segs, count);
 }
 
-- 
2.28.0


WARNING: multiple messages have this Message-ID (diff)
From: Vincent Whitchurch <vincent.whitchurch@axis.com>
To: <mst@redhat.com>, <jasowang@redhat.com>
Cc: kvm@vger.kernel.org, netdev@vger.kernel.org,
	Vincent Whitchurch <vincent.whitchurch@axis.com>,
	linux-kernel@vger.kernel.org,
	virtualization@lists.linux-foundation.org, kernel@axis.com,
	stefanha@redhat.com, pbonzini@redhat.com
Subject: [RFC PATCH 04/10] vhost: add support for kernel buffers
Date: Wed, 29 Sep 2021 17:11:13 +0200	[thread overview]
Message-ID: <20210929151119.14778-5-vincent.whitchurch@axis.com> (raw)
In-Reply-To: <20210929151119.14778-1-vincent.whitchurch@axis.com>

Handle the virtio rings and buffers being placed in kernel memory
instead of userspace memory.  The software IOTLB support is used to
ensure that only permitted regions are accessed.

Note that this patch does not provide a way to actually request that
kernel memory be used, an API for that is added in a later patch.

Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
---
 drivers/vhost/Kconfig |   6 ++
 drivers/vhost/vhost.c | 222 +++++++++++++++++++++++++++++++++++++++++-
 drivers/vhost/vhost.h |  34 +++++++
 3 files changed, 257 insertions(+), 5 deletions(-)

diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig
index 587fbae06182..9e76ed485fe1 100644
--- a/drivers/vhost/Kconfig
+++ b/drivers/vhost/Kconfig
@@ -20,6 +20,12 @@ config VHOST
 	  This option is selected by any driver which needs to access
 	  the core of vhost.
 
+config VHOST_KERNEL
+	tristate
+	help
+	  This option is selected by any driver which needs to access the
+	  support for kernel buffers in vhost.
+
 menuconfig VHOST_MENU
 	bool "VHOST drivers"
 	default y
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index ce81eee2a3fa..9354061ce75e 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -49,6 +49,9 @@ enum {
 #define vhost_used_event(vq) ((__virtio16 __user *)&vq->user.avail->ring[vq->num])
 #define vhost_avail_event(vq) ((__virtio16 __user *)&vq->user.used->ring[vq->num])
 
+#define vhost_used_event_kern(vq) ((__virtio16 *)&vq->kern.avail->ring[vq->num])
+#define vhost_avail_event_kern(vq) ((__virtio16 *)&vq->kern.used->ring[vq->num])
+
 #ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY
 static void vhost_disable_cross_endian(struct vhost_virtqueue *vq)
 {
@@ -482,6 +485,7 @@ void vhost_dev_init(struct vhost_dev *dev,
 	dev->iotlb = NULL;
 	dev->mm = NULL;
 	dev->worker = NULL;
+	dev->kernel = false;
 	dev->iov_limit = iov_limit;
 	dev->weight = weight;
 	dev->byte_weight = byte_weight;
@@ -785,6 +789,18 @@ static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq,
 	return (void __user *)(uintptr_t)(map->addr + addr - map->start);
 }
 
+static inline void *vhost_vq_meta_fetch_kern(struct vhost_virtqueue *vq,
+					       u64 addr, unsigned int size,
+					       int type)
+{
+	const struct vhost_iotlb_map *map = vq->meta_iotlb[type];
+
+	if (!map)
+		return NULL;
+
+	return (void *)(uintptr_t)(map->addr + addr - map->start);
+}
+
 /* Can we switch to this memory table? */
 /* Caller should have device mutex but not vq mutex */
 static bool memory_access_ok(struct vhost_dev *d, struct vhost_iotlb *umem,
@@ -849,6 +865,40 @@ static int vhost_copy_to_user(struct vhost_virtqueue *vq, void __user *to,
 	return ret;
 }
 
+static int vhost_copy_to_kern(struct vhost_virtqueue *vq, void *to,
+			      const void *from, unsigned int size)
+{
+	int ret;
+
+	/* This function should be called after iotlb
+	 * prefetch, which means we're sure that all vq
+	 * could be access through iotlb. So -EAGAIN should
+	 * not happen in this case.
+	 */
+	struct iov_iter t;
+	void *kaddr = vhost_vq_meta_fetch_kern(vq,
+			     (u64)(uintptr_t)to, size,
+			     VHOST_ADDR_USED);
+
+	if (kaddr) {
+		memcpy(kaddr, from, size);
+		return 0;
+	}
+
+	ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov,
+			     ARRAY_SIZE(vq->iotlb_iov),
+			     VHOST_ACCESS_WO);
+	if (ret < 0)
+		goto out;
+	iov_iter_kvec(&t, WRITE, &vq->iotlb_iov->kvec, ret, size);
+	ret = copy_to_iter(from, size, &t);
+	if (ret == size)
+		ret = 0;
+
+out:
+	return ret;
+}
+
 static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to,
 				void __user *from, unsigned size)
 {
@@ -889,6 +939,43 @@ static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to,
 	return ret;
 }
 
+static int vhost_copy_from_kern(struct vhost_virtqueue *vq, void *to,
+				void *from, unsigned int size)
+{
+	int ret;
+
+	/* This function should be called after iotlb
+	 * prefetch, which means we're sure that vq
+	 * could be access through iotlb. So -EAGAIN should
+	 * not happen in this case.
+	 */
+	void *kaddr = vhost_vq_meta_fetch_kern(vq,
+			     (u64)(uintptr_t)from, size,
+			     VHOST_ADDR_DESC);
+	struct iov_iter f;
+
+	if (kaddr) {
+		memcpy(to, kaddr, size);
+		return 0;
+	}
+
+	ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov,
+			     ARRAY_SIZE(vq->iotlb_iov),
+			     VHOST_ACCESS_RO);
+	if (ret < 0) {
+		vq_err(vq, "IOTLB translation failure: kaddr %p size 0x%llx\n",
+		       from, (unsigned long long) size);
+		goto out;
+	}
+	iov_iter_kvec(&f, READ, &vq->iotlb_iov->kvec, ret, size);
+	ret = copy_from_iter(to, size, &f);
+	if (ret == size)
+		ret = 0;
+
+out:
+	return ret;
+}
+
 static void __user *__vhost_get_user_slow(struct vhost_virtqueue *vq,
 					  void __user *addr, unsigned int size,
 					  int type)
@@ -915,6 +1002,33 @@ static void __user *__vhost_get_user_slow(struct vhost_virtqueue *vq,
 	return vq->iotlb_iov->iovec.iov_base;
 }
 
+static void *__vhost_get_kern_slow(struct vhost_virtqueue *vq,
+					  void *addr, unsigned int size,
+					  int type)
+{
+	void *out;
+	int ret;
+
+	ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov,
+			     ARRAY_SIZE(vq->iotlb_iov),
+			     VHOST_ACCESS_RO);
+	if (ret < 0) {
+		vq_err(vq, "IOTLB translation failure: addr %p size 0x%llx\n",
+			addr, (unsigned long long) size);
+		return NULL;
+	}
+
+	if (ret != 1 || vq->iotlb_iov->kvec.iov_len != size) {
+		vq_err(vq, "Non atomic memory access: addr %p size 0x%llx\n",
+			addr, (unsigned long long) size);
+		return NULL;
+	}
+
+	out = vq->iotlb_iov->kvec.iov_base;
+
+	return out;
+}
+
 /* This function should be called after iotlb
  * prefetch, which means we're sure that vq
  * could be access through iotlb. So -EAGAIN should
@@ -932,6 +1046,18 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
 	return __vhost_get_user_slow(vq, addr, size, type);
 }
 
+static inline void *__vhost_get_kern(struct vhost_virtqueue *vq,
+				     void *addr, unsigned int size,
+				     int type)
+{
+	void *uaddr = vhost_vq_meta_fetch_kern(vq, (u64)(uintptr_t)addr, size, type);
+
+	if (uaddr)
+		return uaddr;
+
+	return __vhost_get_kern_slow(vq, addr, size, type);
+}
+
 #define vhost_put_user(vq, x, ptr)		\
 ({ \
 	int ret; \
@@ -949,8 +1075,25 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
 	ret; \
 })
 
+#define vhost_put_kern(vq, x, ptr)		\
+({ \
+	int ret = 0; \
+	__typeof__(ptr) to = \
+		(__typeof__(ptr)) __vhost_get_kern(vq, ptr,	\
+				  sizeof(*ptr), VHOST_ADDR_USED); \
+	if (to != NULL) \
+		*to = x;	\
+	else \
+		ret = -EFAULT;	\
+	ret; \
+})
+
 static inline int vhost_put_avail_event(struct vhost_virtqueue *vq)
 {
+	if (vhost_kernel(vq))
+		return vhost_put_kern(vq, cpu_to_vhost16(vq, vq->avail_idx),
+				      vhost_avail_event_kern(vq));
+
 	return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx),
 			      vhost_avail_event(vq));
 }
@@ -959,6 +1102,10 @@ static inline int vhost_put_used(struct vhost_virtqueue *vq,
 				 struct vring_used_elem *head, int idx,
 				 int count)
 {
+	if (vhost_kernel(vq))
+		return vhost_copy_to_kern(vq, vq->kern.used->ring + idx, head,
+					  count * sizeof(*head));
+
 	return vhost_copy_to_user(vq, vq->user.used->ring + idx, head,
 				  count * sizeof(*head));
 }
@@ -966,6 +1113,10 @@ static inline int vhost_put_used(struct vhost_virtqueue *vq,
 static inline int vhost_put_used_flags(struct vhost_virtqueue *vq)
 
 {
+	if (vhost_kernel(vq))
+		return vhost_put_kern(vq, cpu_to_vhost16(vq, vq->used_flags),
+				      &vq->kern.used->flags);
+
 	return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags),
 			      &vq->user.used->flags);
 }
@@ -973,6 +1124,10 @@ static inline int vhost_put_used_flags(struct vhost_virtqueue *vq)
 static inline int vhost_put_used_idx(struct vhost_virtqueue *vq)
 
 {
+	if (vhost_kernel(vq))
+		return vhost_put_kern(vq, cpu_to_vhost16(vq, vq->last_used_idx),
+				      &vq->kern.used->idx);
+
 	return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx),
 			      &vq->user.used->idx);
 }
@@ -995,12 +1150,32 @@ static inline int vhost_put_used_idx(struct vhost_virtqueue *vq)
 	ret; \
 })
 
+#define vhost_get_kern(vq, x, ptr, type)		\
+({ \
+	int ret = 0; \
+	__typeof__(ptr) from = \
+		(__typeof__(ptr)) __vhost_get_kern(vq, ptr, \
+						   sizeof(*ptr), \
+						   type); \
+	if (from != NULL) \
+		x = *from; \
+	else \
+		ret = -EFAULT; \
+	ret; \
+})
+
 #define vhost_get_avail(vq, x, ptr) \
 	vhost_get_user(vq, x, ptr, VHOST_ADDR_AVAIL)
 
 #define vhost_get_used(vq, x, ptr) \
 	vhost_get_user(vq, x, ptr, VHOST_ADDR_USED)
 
+#define vhost_get_avail_kern(vq, x, ptr) \
+	vhost_get_kern(vq, x, ptr, VHOST_ADDR_AVAIL)
+
+#define vhost_get_used_kern(vq, x, ptr) \
+	vhost_get_kern(vq, x, ptr, VHOST_ADDR_USED)
+
 static void vhost_dev_lock_vqs(struct vhost_dev *d)
 {
 	int i = 0;
@@ -1018,12 +1193,19 @@ static void vhost_dev_unlock_vqs(struct vhost_dev *d)
 static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq,
 				      __virtio16 *idx)
 {
+	if (vhost_kernel(vq))
+		return vhost_get_avail_kern(vq, *idx, &vq->kern.avail->idx);
+
 	return vhost_get_avail(vq, *idx, &vq->user.avail->idx);
 }
 
 static inline int vhost_get_avail_head(struct vhost_virtqueue *vq,
 				       __virtio16 *head, int idx)
 {
+	if (vhost_kernel(vq))
+		return vhost_get_avail_kern(vq, *head,
+				&vq->kern.avail->ring[idx & (vq->num - 1)]);
+
 	return vhost_get_avail(vq, *head,
 			       &vq->user.avail->ring[idx & (vq->num - 1)]);
 }
@@ -1031,24 +1213,36 @@ static inline int vhost_get_avail_head(struct vhost_virtqueue *vq,
 static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq,
 					__virtio16 *flags)
 {
+	if (vhost_kernel(vq))
+		return vhost_get_avail_kern(vq, *flags, &vq->kern.avail->flags);
+
 	return vhost_get_avail(vq, *flags, &vq->user.avail->flags);
 }
 
 static inline int vhost_get_used_event(struct vhost_virtqueue *vq,
 				       __virtio16 *event)
 {
+	if (vhost_kernel(vq))
+		return vhost_get_avail_kern(vq, *event, vhost_used_event_kern(vq));
+
 	return vhost_get_avail(vq, *event, vhost_used_event(vq));
 }
 
 static inline int vhost_get_used_idx(struct vhost_virtqueue *vq,
 				     __virtio16 *idx)
 {
+	if (vhost_kernel(vq))
+		return vhost_get_used_kern(vq, *idx, &vq->kern.used->idx);
+
 	return vhost_get_used(vq, *idx, &vq->user.used->idx);
 }
 
 static inline int vhost_get_desc(struct vhost_virtqueue *vq,
 				 struct vring_desc *desc, int idx)
 {
+	if (vhost_kernel(vq))
+		return vhost_copy_from_kern(vq, desc, vq->kern.desc + idx, sizeof(*desc));
+
 	return vhost_copy_from_user(vq, desc, vq->user.desc + idx, sizeof(*desc));
 }
 
@@ -1909,6 +2103,9 @@ static int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len)
 	struct iovec *iov = &vq->log_iov->iovec;
 	int i, ret;
 
+	if (vhost_kernel(vq))
+		return 0;
+
 	if (!vq->iotlb)
 		return log_write(vq->log_base, vq->log_addr + used_offset, len);
 
@@ -1933,6 +2130,9 @@ int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
 	struct iovec *iov = &viov->iovec;
 	int i, r;
 
+	if (vhost_kernel(vq))
+		return 0;
+
 	/* Make sure data written is seen before log. */
 	smp_wmb();
 
@@ -2041,11 +2241,11 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
 	const struct vhost_iotlb_map *map;
 	struct vhost_dev *dev = vq->dev;
 	struct vhost_iotlb *umem = dev->iotlb ? dev->iotlb : dev->umem;
-	struct iovec *_iov;
 	u64 s = 0;
 	int ret = 0;
 
 	while ((u64)len > s) {
+		u64 mappedaddr;
 		u64 size;
 		if (unlikely(ret >= iov_size)) {
 			ret = -ENOBUFS;
@@ -2065,11 +2265,23 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
 			break;
 		}
 
-		_iov = &iov->iovec + ret;
 		size = map->size - addr + map->start;
-		_iov->iov_len = min((u64)len - s, size);
-		_iov->iov_base = (void __user *)(unsigned long)
-				 (map->addr + addr - map->start);
+		mappedaddr = map->addr + addr - map->start;
+
+		if (vhost_kernel(vq)) {
+			struct kvec *_kvec;
+
+			_kvec = &iov->kvec + ret;
+			_kvec->iov_len = min((u64)len - s, size);
+			_kvec->iov_base = (void *)(unsigned long)mappedaddr;
+		} else {
+			struct iovec *_iov;
+
+			_iov = &iov->iovec + ret;
+			_iov->iov_len = min((u64)len - s, size);
+			_iov->iov_base = (void __user *)(unsigned long)mappedaddr;
+		}
+
 		s += size;
 		addr += size;
 		++ret;
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 69aec724ef7f..ded1b39d7852 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -67,6 +67,7 @@ struct vhost_vring_call {
 
 struct vhost_iov {
 	union {
+		struct kvec kvec;
 		struct iovec iovec;
 	};
 };
@@ -83,6 +84,11 @@ struct vhost_virtqueue {
 		vring_avail_t __user *avail;
 		vring_used_t __user *used;
 	} user;
+	struct {
+		vring_desc_t *desc;
+		vring_avail_t *avail;
+		vring_used_t *used;
+	} kern;
 	const struct vhost_iotlb_map *meta_iotlb[VHOST_NUM_ADDRS];
 	struct file *kick;
 	struct vhost_vring_call call_ctx;
@@ -169,18 +175,41 @@ struct vhost_dev {
 	int byte_weight;
 	u64 kcov_handle;
 	bool use_worker;
+	bool kernel;
 	int (*msg_handler)(struct vhost_dev *dev,
 			   struct vhost_iotlb_msg *msg);
 };
 
+static inline bool vhost_kernel(const struct vhost_virtqueue *vq)
+{
+	if (!IS_ENABLED(CONFIG_VHOST_KERNEL))
+		return false;
+
+	return vq->dev->kernel;
+}
+
 static inline size_t vhost_iov_length(const struct vhost_virtqueue *vq, struct vhost_iov *iov,
 				      unsigned long nr_segs)
 {
+	if (vhost_kernel(vq)) {
+		size_t ret = 0;
+		const struct kvec *kvec = &iov->kvec;
+		unsigned int seg;
+
+		for (seg = 0; seg < nr_segs; seg++)
+			ret += kvec[seg].iov_len;
+
+		return ret;
+	};
+
 	return iov_length(&iov->iovec, nr_segs);
 }
 
 static inline size_t vhost_iov_len(const struct vhost_virtqueue *vq, struct vhost_iov *iov)
 {
+	if (vhost_kernel(vq))
+		return iov->kvec.iov_len;
+
 	return iov->iovec.iov_len;
 }
 
@@ -190,6 +219,11 @@ static inline void vhost_iov_iter_init(const struct vhost_virtqueue *vq,
 				       unsigned long nr_segs,
 				       size_t count)
 {
+	if (vhost_kernel(vq)) {
+		iov_iter_kvec(i, direction, &iov->kvec, nr_segs, count);
+		return;
+	}
+
 	iov_iter_init(i, direction, &iov->iovec, nr_segs, count);
 }
 
-- 
2.28.0

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

  parent reply	other threads:[~2021-09-29 15:11 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-29 15:11 [RFC PATCH 00/10] Support kernel buffers in vhost Vincent Whitchurch
2021-09-29 15:11 ` Vincent Whitchurch
2021-09-29 15:11 ` [RFC PATCH 01/10] vhost: scsi: use copy_to_iter() Vincent Whitchurch
2021-09-29 15:11   ` Vincent Whitchurch
2021-09-29 15:11 ` [RFC PATCH 02/10] vhost: push virtqueue area pointers into a user struct Vincent Whitchurch
2021-09-29 15:11   ` Vincent Whitchurch
2021-09-29 15:11 ` [RFC PATCH 03/10] vhost: add iov wrapper Vincent Whitchurch
2021-09-29 15:11   ` Vincent Whitchurch
2021-09-29 15:11 ` Vincent Whitchurch [this message]
2021-09-29 15:11   ` [RFC PATCH 04/10] vhost: add support for kernel buffers Vincent Whitchurch
2021-09-29 15:11 ` [RFC PATCH 05/10] vhost: extract common code for file_operations handling Vincent Whitchurch
2021-09-29 15:11   ` Vincent Whitchurch
2021-09-29 15:11 ` [RFC PATCH 06/10] vhost: extract ioctl locking to common code Vincent Whitchurch
2021-09-29 15:11   ` Vincent Whitchurch
2021-09-29 15:11 ` [RFC PATCH 07/10] vhost: add support for kernel control Vincent Whitchurch
2021-09-29 15:11   ` Vincent Whitchurch
2021-09-29 15:11 ` [RFC PATCH 08/10] vhost: net: " Vincent Whitchurch
2021-09-29 15:11   ` Vincent Whitchurch
2021-09-29 15:11 ` [RFC PATCH 09/10] vdpa: add test driver for kernel buffers in vhost Vincent Whitchurch
2021-09-29 15:11   ` Vincent Whitchurch
2021-09-29 15:11 ` [RFC PATCH 10/10] selftests: add vhost_kernel tests Vincent Whitchurch
2021-09-29 15:11   ` Vincent Whitchurch

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210929151119.14778-5-vincent.whitchurch@axis.com \
    --to=vincent.whitchurch@axis.com \
    --cc=jasowang@redhat.com \
    --cc=kernel@axis.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=sgarzare@redhat.com \
    --cc=stefanha@redhat.com \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.