From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:58468) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1c2EJr-00040r-BS for qemu-devel@nongnu.org; Thu, 03 Nov 2016 05:28:25 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1c2EJo-00023m-78 for qemu-devel@nongnu.org; Thu, 03 Nov 2016 05:28:23 -0400 Received: from mx1.redhat.com ([209.132.183.28]:34386) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1c2EJn-00023O-UY for qemu-devel@nongnu.org; Thu, 03 Nov 2016 05:28:20 -0400 From: Jason Wang Date: Thu, 3 Nov 2016 17:27:23 +0800 Message-Id: <1478165243-4767-12-git-send-email-jasowang@redhat.com> In-Reply-To: <1478165243-4767-1-git-send-email-jasowang@redhat.com> References: <1478165243-4767-1-git-send-email-jasowang@redhat.com> Subject: [Qemu-devel] [PATCH V2 11/11] vhost_net: device IOTLB support List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: mst@redhat.com, peterx@redhat.com, wexu@redhat.com, qemu-devel@nongnu.org Cc: vkaplans@redhat.com, pbonzini@redhat.com, cornelia.huck@de.ibm.com, Jason Wang This patches implements Device IOTLB support for vhost kernel. This is done through: 1) switch to use dma helpers when map/unmap vrings from vhost codes 2) kernel support for Device IOTLB API: - allow vhost-net to query the IOMMU IOTLB entry through eventfd - enable the ability for qemu to update a specified mapping of vhost - through ioctl. - enable the ability to invalidate a specified range of iova for the device IOTLB of vhost through ioctl. In x86/intel_iommu case this is triggered through iommu memory region notifier from device IOTLB invalidation descriptor processing routine. With all the above, kernel vhost_net can co-operate with IOMMU. Cc: Michael S. Tsirkin Signed-off-by: Jason Wang --- hw/virtio/vhost-backend.c | 104 ++++++++++++++++++++++ hw/virtio/vhost.c | 179 +++++++++++++++++++++++++++++++++----- include/hw/virtio/vhost-backend.h | 13 +++ include/hw/virtio/vhost.h | 4 + include/hw/virtio/virtio-access.h | 4 +- net/tap.c | 1 + 6 files changed, 281 insertions(+), 24 deletions(-) diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c index 272a5ec..287829d 100644 --- a/hw/virtio/vhost-backend.c +++ b/hw/virtio/vhost-backend.c @@ -185,6 +185,107 @@ static int vhost_kernel_vsock_set_running(struct vhost_dev *dev, int start) } #endif /* CONFIG_VHOST_VSOCK */ +static void vhost_kernel_iotlb_read(void *opaque) +{ + struct vhost_dev *dev = opaque; + struct vhost_msg msg; + ssize_t len; + + while ((len = read((uintptr_t)dev->opaque, &msg, sizeof msg)) > 0) { + struct vhost_iotlb_msg *imsg = &msg.iotlb; + if (len < sizeof msg) { + error_report("Wrong vhost message len: %d", (int)len); + break; + } + if (msg.type != VHOST_IOTLB_MSG) { + error_report("Unknown vhost iotlb message type"); + break; + } + switch (imsg->type) { + case VHOST_IOTLB_MISS: + vhost_device_iotlb_miss(dev, imsg->iova, + imsg->perm != VHOST_ACCESS_RO); + break; + case VHOST_IOTLB_UPDATE: + case VHOST_IOTLB_INVALIDATE: + error_report("Unexpected IOTLB message type"); + break; + case VHOST_IOTLB_ACCESS_FAIL: + /* FIXME: report device iotlb error */ + break; + default: + break; + } + } +} + +static int vhost_kernel_update_device_iotlb(struct vhost_dev *dev, + uint64_t iova, uint64_t uaddr, + uint64_t len, + IOMMUAccessFlags perm) +{ + struct vhost_msg msg = { + .type = VHOST_IOTLB_MSG, + .iotlb = { + .iova = iova, + .uaddr = uaddr, + .size = len, + .type = VHOST_IOTLB_UPDATE, + } + }; + + switch (perm) { + case IOMMU_RO: + msg.iotlb.perm = VHOST_ACCESS_RO; + break; + case IOMMU_WO: + msg.iotlb.perm = VHOST_ACCESS_WO; + break; + case IOMMU_RW: + msg.iotlb.perm = VHOST_ACCESS_RW; + break; + default: + g_assert_not_reached(); + } + + if (write((uintptr_t)dev->opaque, &msg, sizeof msg) != sizeof msg) { + error_report("Fail to update device iotlb"); + return -EFAULT; + } + + return 0; +} + +static int vhost_kernel_invalidate_device_iotlb(struct vhost_dev *dev, + uint64_t iova, uint64_t len) +{ + struct vhost_msg msg = { + .type = VHOST_IOTLB_MSG, + .iotlb = { + .iova = iova, + .size = len, + .type = VHOST_IOTLB_INVALIDATE, + } + }; + + if (write((uintptr_t)dev->opaque, &msg, sizeof msg) != sizeof msg) { + error_report("Fail to invalidate device iotlb"); + return -EFAULT; + } + + return 0; +} + +static void vhost_kernel_set_iotlb_callback(struct vhost_dev *dev, + int enabled) +{ + if (enabled) + qemu_set_fd_handler((uintptr_t)dev->opaque, + vhost_kernel_iotlb_read, NULL, dev); + else + qemu_set_fd_handler((uintptr_t)dev->opaque, NULL, NULL, NULL); +} + static const VhostOps kernel_ops = { .backend_type = VHOST_BACKEND_TYPE_KERNEL, .vhost_backend_init = vhost_kernel_init, @@ -214,6 +315,9 @@ static const VhostOps kernel_ops = { .vhost_vsock_set_guest_cid = vhost_kernel_vsock_set_guest_cid, .vhost_vsock_set_running = vhost_kernel_vsock_set_running, #endif /* CONFIG_VHOST_VSOCK */ + .vhost_set_iotlb_callback = vhost_kernel_set_iotlb_callback, + .vhost_update_device_iotlb = vhost_kernel_update_device_iotlb, + .vhost_invalidate_device_iotlb = vhost_kernel_invalidate_device_iotlb, }; int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index bd051ab..1fe2df5 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -421,6 +421,33 @@ static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) dev->log_size = size; } +static void *vhost_memory_map(VirtIODevice *vdev, hwaddr addr, + hwaddr *plen, int is_write) +{ + AddressSpace *dma_as = virtio_get_dma_as(vdev); + + if (!memory_region_is_iommu(dma_as->root)) { + return dma_memory_map(dma_as, addr, plen, is_write ? + DMA_DIRECTION_FROM_DEVICE : + DMA_DIRECTION_TO_DEVICE); + } else { + return (void *)(uintptr_t)addr; + } +} + +static void vhost_memory_unmap(VirtIODevice *vdev, void *buffer, + hwaddr len, int is_write, + hwaddr access_len) +{ + AddressSpace *dma_as = virtio_get_dma_as(vdev); + + if (!memory_region_is_iommu(dma_as->root)) { + dma_memory_unmap(dma_as, buffer, len, is_write ? + DMA_DIRECTION_FROM_DEVICE : DMA_DIRECTION_TO_DEVICE, + access_len); + } +} + static int vhost_verify_ring_mappings(struct vhost_dev *dev, uint64_t start_addr, uint64_t size) @@ -437,7 +464,7 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev, continue; } l = vq->ring_size; - p = cpu_physical_memory_map(vq->ring_phys, &l, 1); + p = vhost_memory_map(dev->vdev, vq->ring_phys, &l, 1); if (!p || l != vq->ring_size) { error_report("Unable to map ring buffer for ring %d", i); r = -ENOMEM; @@ -446,7 +473,7 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev, error_report("Ring buffer relocated for ring %d", i); r = -EBUSY; } - cpu_physical_memory_unmap(p, l, 0, 0); + vhost_memory_unmap(dev->vdev, p, l, 0, 0); } return r; } @@ -674,13 +701,27 @@ static int vhost_virtqueue_set_addr(struct vhost_dev *dev, return 0; } -static int vhost_dev_set_features(struct vhost_dev *dev, bool enable_log) +static int vhost_dev_has_iommu(struct vhost_dev *dev) +{ + VirtIODevice *vdev = dev->vdev; + AddressSpace *dma_as = virtio_get_dma_as(vdev); + + return memory_region_is_iommu(dma_as->root) && + virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); +} + +static int vhost_dev_set_features(struct vhost_dev *dev, + bool enable_log) { uint64_t features = dev->acked_features; + bool has_iommu = vhost_dev_has_iommu(dev); int r; if (enable_log) { features |= 0x1ULL << VHOST_F_LOG_ALL; } + if (has_iommu) { + features |= 0x1ULL << VIRTIO_F_IOMMU_PLATFORM; + } r = dev->vhost_ops->vhost_set_features(dev, features); if (r < 0) { VHOST_OPS_DEBUG("vhost_set_features failed"); @@ -817,6 +858,56 @@ static int vhost_virtqueue_set_vring_endian_legacy(struct vhost_dev *dev, return -errno; } +static int vhost_memory_region_lookup(struct vhost_dev *hdev, + uint64_t gpa, uint64_t *uaddr, + uint64_t *len) +{ + int i; + + for (i = 0; i < hdev->mem->nregions; i++) { + struct vhost_memory_region *reg = hdev->mem->regions + i; + + if (gpa >= reg->guest_phys_addr && + reg->guest_phys_addr + reg->memory_size > gpa) { + *uaddr = reg->userspace_addr + gpa - reg->guest_phys_addr; + *len = reg->guest_phys_addr + reg->memory_size - gpa; + return 0; + } + } + + return -EFAULT; +} + +void vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write) +{ + IOMMUTLBEntry iotlb; + uint64_t uaddr, len; + + rcu_read_lock(); + + iotlb = address_space_get_iotlb_entry(virtio_get_dma_as(dev->vdev), + iova, write); + if (iotlb.target_as != NULL) { + if (vhost_memory_region_lookup(dev, iotlb.translated_addr, + &uaddr, &len)) { + error_report("Fail to lookup the translated address " + "%"PRIx64, iotlb.translated_addr); + goto out; + } + + len = MIN(iotlb.addr_mask + 1, len); + iova = iova & ~iotlb.addr_mask; + + if (dev->vhost_ops->vhost_update_device_iotlb(dev, iova, uaddr, + len, iotlb.perm)) { + error_report("Fail to update device iotlb"); + goto out; + } + } +out: + rcu_read_unlock(); +} + static int vhost_virtqueue_start(struct vhost_dev *dev, struct VirtIODevice *vdev, struct vhost_virtqueue *vq, @@ -862,21 +953,21 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, s = l = virtio_queue_get_desc_size(vdev, idx); a = virtio_queue_get_desc_addr(vdev, idx); - vq->desc = cpu_physical_memory_map(a, &l, 0); + vq->desc = vhost_memory_map(vdev, a, &l, 0); if (!vq->desc || l != s) { r = -ENOMEM; goto fail_alloc_desc; } s = l = virtio_queue_get_avail_size(vdev, idx); a = virtio_queue_get_avail_addr(vdev, idx); - vq->avail = cpu_physical_memory_map(a, &l, 0); + vq->avail = vhost_memory_map(vdev, a, &l, 0); if (!vq->avail || l != s) { r = -ENOMEM; goto fail_alloc_avail; } vq->used_size = s = l = virtio_queue_get_used_size(vdev, idx); vq->used_phys = a = virtio_queue_get_used_addr(vdev, idx); - vq->used = cpu_physical_memory_map(a, &l, 1); + vq->used = vhost_memory_map(vdev, a, &l, 1); if (!vq->used || l != s) { r = -ENOMEM; goto fail_alloc_used; @@ -884,7 +975,7 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, vq->ring_size = s = l = virtio_queue_get_ring_size(vdev, idx); vq->ring_phys = a = virtio_queue_get_ring_addr(vdev, idx); - vq->ring = cpu_physical_memory_map(a, &l, 1); + vq->ring = vhost_memory_map(vdev, a, &l, 1); if (!vq->ring || l != s) { r = -ENOMEM; goto fail_alloc_ring; @@ -930,17 +1021,17 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, fail_vector: fail_kick: fail_alloc: - cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx), - 0, 0); + vhost_memory_unmap(vdev, vq->ring, virtio_queue_get_ring_size(vdev, idx), + 0, 0); fail_alloc_ring: - cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx), - 0, 0); + vhost_memory_unmap(vdev, vq->used, virtio_queue_get_used_size(vdev, idx), + 0, 0); fail_alloc_used: - cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx), - 0, 0); + vhost_memory_unmap(vdev, vq->avail, virtio_queue_get_avail_size(vdev, idx), + 0, 0); fail_alloc_avail: - cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx), - 0, 0); + vhost_memory_unmap(vdev, vq->desc, virtio_queue_get_desc_size(vdev, idx), + 0, 0); fail_alloc_desc: return r; } @@ -973,14 +1064,14 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev, vhost_vq_index); } - cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx), - 0, virtio_queue_get_ring_size(vdev, idx)); - cpu_physical_memory_unmap(vq->used, virtio_queue_get_used_size(vdev, idx), - 1, virtio_queue_get_used_size(vdev, idx)); - cpu_physical_memory_unmap(vq->avail, virtio_queue_get_avail_size(vdev, idx), - 0, virtio_queue_get_avail_size(vdev, idx)); - cpu_physical_memory_unmap(vq->desc, virtio_queue_get_desc_size(vdev, idx), - 0, virtio_queue_get_desc_size(vdev, idx)); + vhost_memory_unmap(vdev, vq->ring, virtio_queue_get_ring_size(vdev, idx), + 0, virtio_queue_get_ring_size(vdev, idx)); + vhost_memory_unmap(vdev, vq->used, virtio_queue_get_used_size(vdev, idx), + 1, virtio_queue_get_used_size(vdev, idx)); + vhost_memory_unmap(vdev, vq->avail, virtio_queue_get_avail_size(vdev, idx), + 0, virtio_queue_get_avail_size(vdev, idx)); + vhost_memory_unmap(vdev, vq->desc, virtio_queue_get_desc_size(vdev, idx), + 0, virtio_queue_get_desc_size(vdev, idx)); } static void vhost_eventfd_add(MemoryListener *listener, @@ -1037,6 +1128,9 @@ static int vhost_virtqueue_init(struct vhost_dev *dev, r = -errno; goto fail_call; } + + vq->dev = dev; + return 0; fail_call: event_notifier_cleanup(&vq->masked_notifier); @@ -1048,12 +1142,24 @@ static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq) event_notifier_cleanup(&vq->masked_notifier); } +static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) +{ + struct vhost_dev *hdev = container_of(n, struct vhost_dev, n); + + if (hdev->vhost_ops->vhost_invalidate_device_iotlb(hdev, + iotlb->iova, + iotlb->addr_mask + 1)) { + error_report("Fail to invalidate device iotlb"); + } +} + int vhost_dev_init(struct vhost_dev *hdev, void *opaque, VhostBackendType backend_type, uint32_t busyloop_timeout) { uint64_t features; int i, r, n_initialized_vqs = 0; + hdev->vdev = NULL; hdev->migration_blocker = NULL; r = vhost_set_backend_type(hdev, backend_type); @@ -1118,6 +1224,9 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, .priority = 10 }; + hdev->n.notify = vhost_iommu_unmap_notify; + hdev->n.notifier_flags = IOMMU_NOTIFIER_UNMAP; + if (hdev->migration_blocker == NULL) { if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) { error_setg(&hdev->migration_blocker, @@ -1310,11 +1419,18 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) assert(hdev->vhost_ops); hdev->started = true; + hdev->vdev = vdev; r = vhost_dev_set_features(hdev, hdev->log_enabled); if (r < 0) { goto fail_features; } + + if (vhost_dev_has_iommu(hdev)) { + memory_region_register_iommu_notifier(virtio_get_dma_as(vdev)->root, + &hdev->n); + } + r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem); if (r < 0) { VHOST_OPS_DEBUG("vhost_set_mem_table failed"); @@ -1348,6 +1464,16 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) } } + hdev->vhost_ops->vhost_set_iotlb_callback(hdev, true); + + if (vhost_dev_has_iommu(hdev)) { + /* Update used ring information for IOTLB to work correctly, + * vhost-kernel code requires for this.*/ + for (i = 0; i < hdev->nvqs; ++i) { + struct vhost_virtqueue *vq = hdev->vqs + i; + vhost_device_iotlb_miss(hdev, vq->used_phys, true); + } + } return 0; fail_log: vhost_log_put(hdev, false); @@ -1359,6 +1485,7 @@ fail_vq: hdev->vq_index + i); } i = hdev->nvqs; + fail_mem: fail_features: @@ -1373,6 +1500,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev) /* should only be called after backend is connected */ assert(hdev->vhost_ops); + hdev->vhost_ops->vhost_set_iotlb_callback(hdev, false); for (i = 0; i < hdev->nvqs; ++i) { vhost_virtqueue_stop(hdev, @@ -1381,8 +1509,13 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev) hdev->vq_index + i); } + if (vhost_dev_has_iommu(hdev)) { + memory_region_unregister_iommu_notifier(virtio_get_dma_as(vdev)->root, + &hdev->n); + } vhost_log_put(hdev, true); hdev->started = false; + hdev->vdev = NULL; } int vhost_net_set_backend(struct vhost_dev *hdev, diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index 6e90703..236eb85 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -11,6 +11,8 @@ #ifndef VHOST_BACKEND_H #define VHOST_BACKEND_H +#include "exec/memory.h" + typedef enum VhostBackendType { VHOST_BACKEND_TYPE_NONE = 0, VHOST_BACKEND_TYPE_KERNEL = 1, @@ -76,6 +78,14 @@ typedef bool (*vhost_backend_can_merge_op)(struct vhost_dev *dev, typedef int (*vhost_vsock_set_guest_cid_op)(struct vhost_dev *dev, uint64_t guest_cid); typedef int (*vhost_vsock_set_running_op)(struct vhost_dev *dev, int start); +typedef void (*vhost_set_iotlb_callback_op)(struct vhost_dev *dev, + int enabled); +typedef int (*vhost_update_device_iotlb_op)(struct vhost_dev *dev, + uint64_t iova, uint64_t uaddr, + uint64_t len, + IOMMUAccessFlags perm); +typedef int (*vhost_invalidate_device_iotlb_op)(struct vhost_dev *dev, + uint64_t iova, uint64_t len); typedef struct VhostOps { VhostBackendType backend_type; @@ -107,6 +117,9 @@ typedef struct VhostOps { vhost_backend_can_merge_op vhost_backend_can_merge; vhost_vsock_set_guest_cid_op vhost_vsock_set_guest_cid; vhost_vsock_set_running_op vhost_vsock_set_running; + vhost_set_iotlb_callback_op vhost_set_iotlb_callback; + vhost_update_device_iotlb_op vhost_update_device_iotlb; + vhost_invalidate_device_iotlb_op vhost_invalidate_device_iotlb; } VhostOps; extern const VhostOps user_ops; diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index e433089..4e52a13 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -20,6 +20,7 @@ struct vhost_virtqueue { unsigned long long ring_phys; unsigned ring_size; EventNotifier masked_notifier; + struct vhost_dev *dev; }; typedef unsigned long vhost_log_chunk_t; @@ -37,6 +38,7 @@ struct vhost_log { struct vhost_memory; struct vhost_dev { + VirtIODevice *vdev; MemoryListener memory_listener; struct vhost_memory *mem; int n_mem_sections; @@ -61,6 +63,7 @@ struct vhost_dev { void *opaque; struct vhost_log *log; QLIST_ENTRY(vhost_dev) entry; + IOMMUNotifier n; }; int vhost_dev_init(struct vhost_dev *hdev, void *opaque, @@ -90,4 +93,5 @@ bool vhost_has_free_slot(void); int vhost_net_set_backend(struct vhost_dev *hdev, struct vhost_vring_file *file); +void vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write); #endif diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h index 4643a06..c122fd8 100644 --- a/include/hw/virtio/virtio-access.h +++ b/include/hw/virtio/virtio-access.h @@ -18,6 +18,7 @@ #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-bus.h" +#include "sysemu/dma.h" #include "exec/address-spaces.h" #if defined(TARGET_PPC64) || defined(TARGET_ARM) @@ -193,4 +194,5 @@ static inline void virtio_tswap64s(VirtIODevice *vdev, uint64_t *s) { *s = virtio_tswap64(vdev, *s); } -#endif /* QEMU_VIRTIO_ACCESS_H */ + +#endif /* _QEMU_VIRTIO_ACCESS_H */ diff --git a/net/tap.c b/net/tap.c index b6896a7..86071b2 100644 --- a/net/tap.c +++ b/net/tap.c @@ -696,6 +696,7 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, "tap: open vhost char device failed"); return; } + fcntl(vhostfd, F_SETFL, O_NONBLOCK); } options.opaque = (void *)(uintptr_t)vhostfd; -- 2.7.4