All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-10-28  6:38 Andy Lutomirski
  2015-10-28  6:38 ` [PATCH v3 1/3] virtio_net: Stop doing DMA from the stack Andy Lutomirski
                   ` (9 more replies)
  0 siblings, 10 replies; 110+ messages in thread
From: Andy Lutomirski @ 2015-10-28  6:38 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joerg Roedel, Christian Borntraeger, Cornelia Huck,
	Sebastian Ott, Paolo Bonzini, Christoph Hellwig, benh, KVM,
	dwmw2, Martin Schwidefsky, linux-s390, Michael S. Tsirkin,
	virtualization, Andy Lutomirski

This switches virtio to use the DMA API unconditionally.  I'm sure
it breaks things, but it seems to work on x86 using virtio-pci, with
and without Xen, and using both the modern 1.0 variant and the
legacy variant.

Changes from v2:
 - Fix really embarrassing bug.  This version actually works.

Changes from v1:
 - Fix an endian conversion error causing a BUG to hit.
 - Fix a DMA ordering issue (swiotlb=force works now).
 - Minor cleanups.

Andy Lutomirski (3):
  virtio_net: Stop doing DMA from the stack
  virtio_ring: Support DMA APIs
  virtio_pci: Use the DMA API

 drivers/net/virtio_net.c           |  53 +++++++----
 drivers/virtio/Kconfig             |   2 +-
 drivers/virtio/virtio_pci_common.h |   3 +-
 drivers/virtio/virtio_pci_legacy.c |  19 +++-
 drivers/virtio/virtio_pci_modern.c |  34 +++++--
 drivers/virtio/virtio_ring.c       | 187 ++++++++++++++++++++++++++++++-------
 tools/virtio/linux/dma-mapping.h   |  17 ++++
 7 files changed, 246 insertions(+), 69 deletions(-)
 create mode 100644 tools/virtio/linux/dma-mapping.h

-- 
2.4.3


^ permalink raw reply	[flat|nested] 110+ messages in thread

* [PATCH v3 1/3] virtio_net: Stop doing DMA from the stack
  2015-10-28  6:38 [PATCH v3 0/3] virtio DMA API core stuff Andy Lutomirski
@ 2015-10-28  6:38 ` Andy Lutomirski
  2015-10-28  7:08   ` Michael S. Tsirkin
  2015-10-28  7:08   ` Michael S. Tsirkin
  2015-10-28  6:38 ` Andy Lutomirski
                   ` (8 subsequent siblings)
  9 siblings, 2 replies; 110+ messages in thread
From: Andy Lutomirski @ 2015-10-28  6:38 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joerg Roedel, Christian Borntraeger, Cornelia Huck,
	Sebastian Ott, Paolo Bonzini, Christoph Hellwig, benh, KVM,
	dwmw2, Martin Schwidefsky, linux-s390, Michael S. Tsirkin,
	virtualization, Andy Lutomirski, Andy Lutomirski

From: Andy Lutomirski <luto@amacapital.net>

Once virtio starts using the DMA API, we won't be able to safely DMA
from the stack.  virtio-net does a couple of config DMA requests
from small stack buffers -- switch to using dynamically-allocated
memory.

This should have no effect on any performance-critical code paths.

Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: virtualization@lists.linux-foundation.org
Reviewed-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
---

Hi Michael and DaveM-

This is a prerequisite for the virtio DMA fixing project.  It works
as a standalone patch, though.  Would it make sense to apply it to
an appropriate networking tree now?

(This is unchanged from v2.)

drivers/net/virtio_net.c | 53 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 36 insertions(+), 17 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index d8838dedb7a4..4f10f8a58811 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -976,31 +976,43 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
 				 struct scatterlist *out)
 {
 	struct scatterlist *sgs[4], hdr, stat;
-	struct virtio_net_ctrl_hdr ctrl;
-	virtio_net_ctrl_ack status = ~0;
+
+	struct {
+		struct virtio_net_ctrl_hdr ctrl;
+		virtio_net_ctrl_ack status;
+	} *buf;
+
 	unsigned out_num = 0, tmp;
+	bool ret;
 
 	/* Caller should know better */
 	BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
 
-	ctrl.class = class;
-	ctrl.cmd = cmd;
+	buf = kmalloc(sizeof(*buf), GFP_ATOMIC);
+	if (!buf)
+		return false;
+	buf->status = ~0;
+
+	buf->ctrl.class = class;
+	buf->ctrl.cmd = cmd;
 	/* Add header */
-	sg_init_one(&hdr, &ctrl, sizeof(ctrl));
+	sg_init_one(&hdr, &buf->ctrl, sizeof(buf->ctrl));
 	sgs[out_num++] = &hdr;
 
 	if (out)
 		sgs[out_num++] = out;
 
 	/* Add return status. */
-	sg_init_one(&stat, &status, sizeof(status));
+	sg_init_one(&stat, &buf->status, sizeof(buf->status));
 	sgs[out_num] = &stat;
 
 	BUG_ON(out_num + 1 > ARRAY_SIZE(sgs));
 	virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC);
 
-	if (unlikely(!virtqueue_kick(vi->cvq)))
-		return status == VIRTIO_NET_OK;
+	if (unlikely(!virtqueue_kick(vi->cvq))) {
+		ret = (buf->status == VIRTIO_NET_OK);
+		goto out;
+	}
 
 	/* Spin for a response, the kick causes an ioport write, trapping
 	 * into the hypervisor, so the request should be handled immediately.
@@ -1009,7 +1021,11 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
 	       !virtqueue_is_broken(vi->cvq))
 		cpu_relax();
 
-	return status == VIRTIO_NET_OK;
+	ret = (buf->status == VIRTIO_NET_OK);
+
+out:
+	kfree(buf);
+	return ret;
 }
 
 static int virtnet_set_mac_address(struct net_device *dev, void *p)
@@ -1151,7 +1167,7 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 	struct scatterlist sg[2];
-	u8 promisc, allmulti;
+	u8 *cmdbyte;
 	struct virtio_net_ctrl_mac *mac_data;
 	struct netdev_hw_addr *ha;
 	int uc_count;
@@ -1163,22 +1179,25 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
 		return;
 
-	promisc = ((dev->flags & IFF_PROMISC) != 0);
-	allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
+	cmdbyte = kmalloc(sizeof(*cmdbyte), GFP_ATOMIC);
+	if (!cmdbyte)
+		return;
 
-	sg_init_one(sg, &promisc, sizeof(promisc));
+	sg_init_one(sg, cmdbyte, sizeof(*cmdbyte));
 
+	*cmdbyte = ((dev->flags & IFF_PROMISC) != 0);
 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
 				  VIRTIO_NET_CTRL_RX_PROMISC, sg))
 		dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
-			 promisc ? "en" : "dis");
-
-	sg_init_one(sg, &allmulti, sizeof(allmulti));
+			 *cmdbyte ? "en" : "dis");
 
+	*cmdbyte = ((dev->flags & IFF_ALLMULTI) != 0);
 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
 				  VIRTIO_NET_CTRL_RX_ALLMULTI, sg))
 		dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
-			 allmulti ? "en" : "dis");
+			 *cmdbyte ? "en" : "dis");
+
+	kfree(cmdbyte);
 
 	uc_count = netdev_uc_count(dev);
 	mc_count = netdev_mc_count(dev);
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 110+ messages in thread

* [PATCH v3 1/3] virtio_net: Stop doing DMA from the stack
  2015-10-28  6:38 [PATCH v3 0/3] virtio DMA API core stuff Andy Lutomirski
  2015-10-28  6:38 ` [PATCH v3 1/3] virtio_net: Stop doing DMA from the stack Andy Lutomirski
@ 2015-10-28  6:38 ` Andy Lutomirski
  2015-10-28  6:38 ` [PATCH v3 2/3] virtio_ring: Support DMA APIs Andy Lutomirski
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 110+ messages in thread
From: Andy Lutomirski @ 2015-10-28  6:38 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-s390, Joerg Roedel, KVM, Michael S. Tsirkin, benh,
	Sebastian Ott, virtualization, Christian Borntraeger,
	Andy Lutomirski, Paolo Bonzini, Andy Lutomirski, dwmw2,
	Christoph Hellwig, Martin Schwidefsky

From: Andy Lutomirski <luto@amacapital.net>

Once virtio starts using the DMA API, we won't be able to safely DMA
from the stack.  virtio-net does a couple of config DMA requests
from small stack buffers -- switch to using dynamically-allocated
memory.

This should have no effect on any performance-critical code paths.

Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: virtualization@lists.linux-foundation.org
Reviewed-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
---

Hi Michael and DaveM-

This is a prerequisite for the virtio DMA fixing project.  It works
as a standalone patch, though.  Would it make sense to apply it to
an appropriate networking tree now?

(This is unchanged from v2.)

drivers/net/virtio_net.c | 53 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 36 insertions(+), 17 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index d8838dedb7a4..4f10f8a58811 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -976,31 +976,43 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
 				 struct scatterlist *out)
 {
 	struct scatterlist *sgs[4], hdr, stat;
-	struct virtio_net_ctrl_hdr ctrl;
-	virtio_net_ctrl_ack status = ~0;
+
+	struct {
+		struct virtio_net_ctrl_hdr ctrl;
+		virtio_net_ctrl_ack status;
+	} *buf;
+
 	unsigned out_num = 0, tmp;
+	bool ret;
 
 	/* Caller should know better */
 	BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
 
-	ctrl.class = class;
-	ctrl.cmd = cmd;
+	buf = kmalloc(sizeof(*buf), GFP_ATOMIC);
+	if (!buf)
+		return false;
+	buf->status = ~0;
+
+	buf->ctrl.class = class;
+	buf->ctrl.cmd = cmd;
 	/* Add header */
-	sg_init_one(&hdr, &ctrl, sizeof(ctrl));
+	sg_init_one(&hdr, &buf->ctrl, sizeof(buf->ctrl));
 	sgs[out_num++] = &hdr;
 
 	if (out)
 		sgs[out_num++] = out;
 
 	/* Add return status. */
-	sg_init_one(&stat, &status, sizeof(status));
+	sg_init_one(&stat, &buf->status, sizeof(buf->status));
 	sgs[out_num] = &stat;
 
 	BUG_ON(out_num + 1 > ARRAY_SIZE(sgs));
 	virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC);
 
-	if (unlikely(!virtqueue_kick(vi->cvq)))
-		return status == VIRTIO_NET_OK;
+	if (unlikely(!virtqueue_kick(vi->cvq))) {
+		ret = (buf->status == VIRTIO_NET_OK);
+		goto out;
+	}
 
 	/* Spin for a response, the kick causes an ioport write, trapping
 	 * into the hypervisor, so the request should be handled immediately.
@@ -1009,7 +1021,11 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
 	       !virtqueue_is_broken(vi->cvq))
 		cpu_relax();
 
-	return status == VIRTIO_NET_OK;
+	ret = (buf->status == VIRTIO_NET_OK);
+
+out:
+	kfree(buf);
+	return ret;
 }
 
 static int virtnet_set_mac_address(struct net_device *dev, void *p)
@@ -1151,7 +1167,7 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 	struct scatterlist sg[2];
-	u8 promisc, allmulti;
+	u8 *cmdbyte;
 	struct virtio_net_ctrl_mac *mac_data;
 	struct netdev_hw_addr *ha;
 	int uc_count;
@@ -1163,22 +1179,25 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
 		return;
 
-	promisc = ((dev->flags & IFF_PROMISC) != 0);
-	allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
+	cmdbyte = kmalloc(sizeof(*cmdbyte), GFP_ATOMIC);
+	if (!cmdbyte)
+		return;
 
-	sg_init_one(sg, &promisc, sizeof(promisc));
+	sg_init_one(sg, cmdbyte, sizeof(*cmdbyte));
 
+	*cmdbyte = ((dev->flags & IFF_PROMISC) != 0);
 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
 				  VIRTIO_NET_CTRL_RX_PROMISC, sg))
 		dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
-			 promisc ? "en" : "dis");
-
-	sg_init_one(sg, &allmulti, sizeof(allmulti));
+			 *cmdbyte ? "en" : "dis");
 
+	*cmdbyte = ((dev->flags & IFF_ALLMULTI) != 0);
 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
 				  VIRTIO_NET_CTRL_RX_ALLMULTI, sg))
 		dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
-			 allmulti ? "en" : "dis");
+			 *cmdbyte ? "en" : "dis");
+
+	kfree(cmdbyte);
 
 	uc_count = netdev_uc_count(dev);
 	mc_count = netdev_mc_count(dev);
-- 
2.4.3

^ permalink raw reply related	[flat|nested] 110+ messages in thread

* [PATCH v3 2/3] virtio_ring: Support DMA APIs
  2015-10-28  6:38 [PATCH v3 0/3] virtio DMA API core stuff Andy Lutomirski
                   ` (2 preceding siblings ...)
  2015-10-28  6:38 ` [PATCH v3 2/3] virtio_ring: Support DMA APIs Andy Lutomirski
@ 2015-10-28  6:38 ` Andy Lutomirski
  2015-10-28  6:39 ` [PATCH v3 3/3] virtio_pci: Use the DMA API Andy Lutomirski
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 110+ messages in thread
From: Andy Lutomirski @ 2015-10-28  6:38 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joerg Roedel, Christian Borntraeger, Cornelia Huck,
	Sebastian Ott, Paolo Bonzini, Christoph Hellwig, benh, KVM,
	dwmw2, Martin Schwidefsky, linux-s390, Michael S. Tsirkin,
	virtualization, Andy Lutomirski

virtio_ring currently sends the device (usually a hypervisor)
physical addresses of its I/O buffers.  This is okay when DMA
addresses and physical addresses are the same thing, but this isn't
always the case.  For example, this never works on Xen guests, and
it is likely to fail if a physical "virtio" device ever ends up
behind an IOMMU or swiotlb.

The immediate use case for me is to enable virtio on Xen guests.
For that to work, we need vring to support DMA address translation
as well as a corresponding change to virtio_pci or to another
driver.

With this patch, if enabled, virtfs survives kmemleak and
CONFIG_DMA_API_DEBUG.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 drivers/virtio/Kconfig           |   2 +-
 drivers/virtio/virtio_ring.c     | 187 +++++++++++++++++++++++++++++++--------
 tools/virtio/linux/dma-mapping.h |  17 ++++
 3 files changed, 169 insertions(+), 37 deletions(-)
 create mode 100644 tools/virtio/linux/dma-mapping.h

diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index cab9f3f63a38..77590320d44c 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -60,7 +60,7 @@ config VIRTIO_INPUT
 
  config VIRTIO_MMIO
 	tristate "Platform bus driver for memory mapped virtio devices"
-	depends on HAS_IOMEM
+	depends on HAS_IOMEM && HAS_DMA
  	select VIRTIO
  	---help---
  	 This drivers provides support for memory mapped virtio
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 096b857e7b75..6962ea37ade0 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -24,6 +24,7 @@
 #include <linux/module.h>
 #include <linux/hrtimer.h>
 #include <linux/kmemleak.h>
+#include <linux/dma-mapping.h>
 
 #ifdef DEBUG
 /* For development, we want to crash whenever the ring is screwed. */
@@ -54,7 +55,14 @@
 #define END_USE(vq)
 #endif
 
-struct vring_virtqueue {
+struct vring_desc_state
+{
+	void *data;			/* Data for callback. */
+	struct vring_desc *indir_desc;	/* Indirect descriptor, if any. */
+};
+
+struct vring_virtqueue
+{
 	struct virtqueue vq;
 
 	/* Actual memory layout for this queue */
@@ -92,12 +100,71 @@ struct vring_virtqueue {
 	ktime_t last_add_time;
 #endif
 
-	/* Tokens for callbacks. */
-	void *data[];
+	/* Per-descriptor state. */
+	struct vring_desc_state desc_state[];
 };
 
 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
 
+/*
+ * The DMA ops on various arches are rather gnarly right now, and
+ * making all of the arch DMA ops work on the vring device itself
+ * is a mess.  For now, we use the parent device for DMA ops.
+ */
+struct device *vring_dma_dev(const struct vring_virtqueue *vq)
+{
+	return vq->vq.vdev->dev.parent;
+}
+
+/* Map one sg entry. */
+static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
+				   struct scatterlist *sg,
+				   enum dma_data_direction direction)
+{
+	/*
+	 * We can't use dma_map_sg, because we don't use scatterlists in
+	 * the way it expects (we don't guarantee that the scatterlist
+	 * will exist for the lifetime of the mapping).
+	 */
+	return dma_map_page(vring_dma_dev(vq),
+			    sg_page(sg), sg->offset, sg->length,
+			    direction);
+}
+
+static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
+				   void *cpu_addr, size_t size,
+				   enum dma_data_direction direction)
+{
+	return dma_map_single(vring_dma_dev(vq),
+			      cpu_addr, size, direction);
+}
+
+static void vring_unmap_one(const struct vring_virtqueue *vq,
+			    struct vring_desc *desc)
+{
+	u16 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
+
+	if (flags & VRING_DESC_F_INDIRECT) {
+		dma_unmap_single(vring_dma_dev(vq),
+				 virtio64_to_cpu(vq->vq.vdev, desc->addr),
+				 virtio32_to_cpu(vq->vq.vdev, desc->len),
+				 (flags & VRING_DESC_F_WRITE) ?
+				 DMA_FROM_DEVICE : DMA_TO_DEVICE);
+	} else {
+		dma_unmap_page(vring_dma_dev(vq),
+			       virtio64_to_cpu(vq->vq.vdev, desc->addr),
+			       virtio32_to_cpu(vq->vq.vdev, desc->len),
+			       (flags & VRING_DESC_F_WRITE) ?
+			       DMA_FROM_DEVICE : DMA_TO_DEVICE);
+	}
+}
+
+static int vring_mapping_error(const struct vring_virtqueue *vq,
+			       dma_addr_t addr)
+{
+	return dma_mapping_error(vring_dma_dev(vq), addr);
+}
+
 static struct vring_desc *alloc_indirect(struct virtqueue *_vq,
 					 unsigned int total_sg, gfp_t gfp)
 {
@@ -131,7 +198,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	struct scatterlist *sg;
 	struct vring_desc *desc;
-	unsigned int i, n, avail, descs_used, uninitialized_var(prev);
+	unsigned int i, n, avail, descs_used, uninitialized_var(prev), err_idx;
 	int head;
 	bool indirect;
 
@@ -171,21 +238,15 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 
 	if (desc) {
 		/* Use a single buffer which doesn't continue */
-		vq->vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT);
-		vq->vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, virt_to_phys(desc));
-		/* avoid kmemleak false positive (hidden by virt_to_phys) */
-		kmemleak_ignore(desc);
-		vq->vring.desc[head].len = cpu_to_virtio32(_vq->vdev, total_sg * sizeof(struct vring_desc));
-
+		indirect = true;
 		/* Set up rest to use this indirect table. */
 		i = 0;
 		descs_used = 1;
-		indirect = true;
 	} else {
+		indirect = false;
 		desc = vq->vring.desc;
 		i = head;
 		descs_used = total_sg;
-		indirect = false;
 	}
 
 	if (vq->vq.num_free < descs_used) {
@@ -200,14 +261,13 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 		return -ENOSPC;
 	}
 
-	/* We're about to use some buffers from the free list. */
-	vq->vq.num_free -= descs_used;
-
 	for (n = 0; n < out_sgs; n++) {
 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
 			desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT);
-			desc[i].addr = cpu_to_virtio64(_vq->vdev, sg_phys(sg));
+			desc[i].addr = cpu_to_virtio64(_vq->vdev, vring_map_one_sg(vq, sg, DMA_TO_DEVICE));
 			desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
+			if (vring_mapping_error(vq, desc[i].addr))
+				goto unmap_release;
 			prev = i;
 			i = virtio16_to_cpu(_vq->vdev, desc[i].next);
 		}
@@ -215,8 +275,10 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 	for (; n < (out_sgs + in_sgs); n++) {
 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
 			desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE);
-			desc[i].addr = cpu_to_virtio64(_vq->vdev, sg_phys(sg));
+			desc[i].addr = cpu_to_virtio64(_vq->vdev, vring_map_one_sg(vq, sg, DMA_FROM_DEVICE));
 			desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
+			if (vring_mapping_error(vq, desc[i].addr))
+				goto unmap_release;
 			prev = i;
 			i = virtio16_to_cpu(_vq->vdev, desc[i].next);
 		}
@@ -224,14 +286,34 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 	/* Last one doesn't continue. */
 	desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
 
+	if (indirect) {
+		/* Now that the indirect table is filled in, map it. */
+		dma_addr_t addr = vring_map_single(
+			vq, desc, total_sg * sizeof(struct vring_desc),
+			DMA_TO_DEVICE);
+
+		if (vring_mapping_error(vq, addr))
+			goto unmap_release;
+
+		vq->vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT);
+		vq->vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, addr);
+
+		vq->vring.desc[head].len = cpu_to_virtio32(_vq->vdev, total_sg * sizeof(struct vring_desc));
+	}
+
+	/* We're using some buffers from the free list. */
+	vq->vq.num_free -= descs_used;
+
 	/* Update free pointer */
 	if (indirect)
 		vq->free_head = virtio16_to_cpu(_vq->vdev, vq->vring.desc[head].next);
 	else
 		vq->free_head = i;
 
-	/* Set token. */
-	vq->data[head] = data;
+	/* Store token and indirect buffer state. */
+	vq->desc_state[head].data = data;
+	if (indirect)
+		vq->desc_state[head].indir_desc = desc;
 
 	/* Put entry in available array (but don't update avail->idx until they
 	 * do sync). */
@@ -253,6 +335,24 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 		virtqueue_kick(_vq);
 
 	return 0;
+
+unmap_release:
+	err_idx = i;
+	i = head;
+
+	for (n = 0; n < total_sg; n++) {
+		if (i == err_idx)
+			break;
+		vring_unmap_one(vq, &desc[i]);
+		i = vq->vring.desc[i].next;
+	}
+
+	vq->vq.num_free += total_sg;
+
+	if (indirect)
+		kfree(desc);
+
+	return -EIO;
 }
 
 /**
@@ -423,27 +523,43 @@ EXPORT_SYMBOL_GPL(virtqueue_kick);
 
 static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
 {
-	unsigned int i;
+	unsigned int i, j;
+	u16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
 
 	/* Clear data ptr. */
-	vq->data[head] = NULL;
+	vq->desc_state[head].data = NULL;
 
-	/* Put back on free list: find end */
+	/* Put back on free list: unmap first-level descriptors and find end */
 	i = head;
 
-	/* Free the indirect table */
-	if (vq->vring.desc[i].flags & cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT))
-		kfree(phys_to_virt(virtio64_to_cpu(vq->vq.vdev, vq->vring.desc[i].addr)));
-
-	while (vq->vring.desc[i].flags & cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT)) {
+	while (vq->vring.desc[i].flags & nextflag) {
+		vring_unmap_one(vq, &vq->vring.desc[i]);
 		i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next);
 		vq->vq.num_free++;
 	}
 
+	vring_unmap_one(vq, &vq->vring.desc[i]);
 	vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head);
 	vq->free_head = head;
+
 	/* Plus final descriptor */
 	vq->vq.num_free++;
+
+	/* Free the indirect table, if any, now that it's unmapped. */
+	if (vq->desc_state[head].indir_desc) {
+		struct vring_desc *indir_desc = vq->desc_state[head].indir_desc;
+		u32 len = vq->vring.desc[head].len;
+
+		BUG_ON(!(vq->vring.desc[head].flags &
+			 cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
+		BUG_ON(len == 0 || len % sizeof(struct vring_desc));
+
+		for (j = 0; j < len / sizeof(struct vring_desc); j++)
+			vring_unmap_one(vq, &indir_desc[j]);
+
+		kfree(vq->desc_state[head].indir_desc);
+		vq->desc_state[head].indir_desc = NULL;
+	}
 }
 
 static inline bool more_used(const struct vring_virtqueue *vq)
@@ -498,13 +614,13 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
 		BAD_RING(vq, "id %u out of range\n", i);
 		return NULL;
 	}
-	if (unlikely(!vq->data[i])) {
+	if (unlikely(!vq->desc_state[i].data)) {
 		BAD_RING(vq, "id %u is not a head!\n", i);
 		return NULL;
 	}
 
 	/* detach_buf clears data, so grab it now. */
-	ret = vq->data[i];
+	ret = vq->desc_state[i].data;
 	detach_buf(vq, i);
 	vq->last_used_idx++;
 	/* If we expect an interrupt for the next entry, tell host
@@ -665,10 +781,10 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
 	START_USE(vq);
 
 	for (i = 0; i < vq->vring.num; i++) {
-		if (!vq->data[i])
+		if (!vq->desc_state[i].data)
 			continue;
 		/* detach_buf clears data, so grab it now. */
-		buf = vq->data[i];
+		buf = vq->desc_state[i].data;
 		detach_buf(vq, i);
 		vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) - 1);
 		END_USE(vq);
@@ -721,7 +837,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 		return NULL;
 	}
 
-	vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL);
+	vq = kmalloc(sizeof(*vq) + num * sizeof(struct vring_desc_state),
+		     GFP_KERNEL);
 	if (!vq)
 		return NULL;
 
@@ -751,11 +868,9 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 
 	/* Put everything in free lists. */
 	vq->free_head = 0;
-	for (i = 0; i < num-1; i++) {
+	for (i = 0; i < num-1; i++)
 		vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
-		vq->data[i] = NULL;
-	}
-	vq->data[i] = NULL;
+	memset(vq->desc_state, 0, num * sizeof(struct vring_desc_state));
 
 	return &vq->vq;
 }
diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h
new file mode 100644
index 000000000000..4f93af89ae16
--- /dev/null
+++ b/tools/virtio/linux/dma-mapping.h
@@ -0,0 +1,17 @@
+#ifndef _LINUX_DMA_MAPPING_H
+#define _LINUX_DMA_MAPPING_H
+
+#ifdef CONFIG_HAS_DMA
+# error Virtio userspace code does not support CONFIG_HAS_DMA
+#endif
+
+#define PCI_DMA_BUS_IS_PHYS 1
+
+enum dma_data_direction {
+	DMA_BIDIRECTIONAL = 0,
+	DMA_TO_DEVICE = 1,
+	DMA_FROM_DEVICE = 2,
+	DMA_NONE = 3,
+};
+
+#endif
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 110+ messages in thread

* [PATCH v3 2/3] virtio_ring: Support DMA APIs
  2015-10-28  6:38 [PATCH v3 0/3] virtio DMA API core stuff Andy Lutomirski
  2015-10-28  6:38 ` [PATCH v3 1/3] virtio_net: Stop doing DMA from the stack Andy Lutomirski
  2015-10-28  6:38 ` Andy Lutomirski
@ 2015-10-28  6:38 ` Andy Lutomirski
  2015-10-28  6:38 ` Andy Lutomirski
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 110+ messages in thread
From: Andy Lutomirski @ 2015-10-28  6:38 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-s390, Joerg Roedel, KVM, Michael S. Tsirkin, benh,
	Sebastian Ott, virtualization, Christian Borntraeger,
	Andy Lutomirski, Paolo Bonzini, dwmw2, Christoph Hellwig,
	Martin Schwidefsky

virtio_ring currently sends the device (usually a hypervisor)
physical addresses of its I/O buffers.  This is okay when DMA
addresses and physical addresses are the same thing, but this isn't
always the case.  For example, this never works on Xen guests, and
it is likely to fail if a physical "virtio" device ever ends up
behind an IOMMU or swiotlb.

The immediate use case for me is to enable virtio on Xen guests.
For that to work, we need vring to support DMA address translation
as well as a corresponding change to virtio_pci or to another
driver.

With this patch, if enabled, virtfs survives kmemleak and
CONFIG_DMA_API_DEBUG.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 drivers/virtio/Kconfig           |   2 +-
 drivers/virtio/virtio_ring.c     | 187 +++++++++++++++++++++++++++++++--------
 tools/virtio/linux/dma-mapping.h |  17 ++++
 3 files changed, 169 insertions(+), 37 deletions(-)
 create mode 100644 tools/virtio/linux/dma-mapping.h

diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index cab9f3f63a38..77590320d44c 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -60,7 +60,7 @@ config VIRTIO_INPUT
 
  config VIRTIO_MMIO
 	tristate "Platform bus driver for memory mapped virtio devices"
-	depends on HAS_IOMEM
+	depends on HAS_IOMEM && HAS_DMA
  	select VIRTIO
  	---help---
  	 This drivers provides support for memory mapped virtio
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 096b857e7b75..6962ea37ade0 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -24,6 +24,7 @@
 #include <linux/module.h>
 #include <linux/hrtimer.h>
 #include <linux/kmemleak.h>
+#include <linux/dma-mapping.h>
 
 #ifdef DEBUG
 /* For development, we want to crash whenever the ring is screwed. */
@@ -54,7 +55,14 @@
 #define END_USE(vq)
 #endif
 
-struct vring_virtqueue {
+struct vring_desc_state
+{
+	void *data;			/* Data for callback. */
+	struct vring_desc *indir_desc;	/* Indirect descriptor, if any. */
+};
+
+struct vring_virtqueue
+{
 	struct virtqueue vq;
 
 	/* Actual memory layout for this queue */
@@ -92,12 +100,71 @@ struct vring_virtqueue {
 	ktime_t last_add_time;
 #endif
 
-	/* Tokens for callbacks. */
-	void *data[];
+	/* Per-descriptor state. */
+	struct vring_desc_state desc_state[];
 };
 
 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
 
+/*
+ * The DMA ops on various arches are rather gnarly right now, and
+ * making all of the arch DMA ops work on the vring device itself
+ * is a mess.  For now, we use the parent device for DMA ops.
+ */
+struct device *vring_dma_dev(const struct vring_virtqueue *vq)
+{
+	return vq->vq.vdev->dev.parent;
+}
+
+/* Map one sg entry. */
+static dma_addr_t vring_map_one_sg(const struct vring_virtqueue *vq,
+				   struct scatterlist *sg,
+				   enum dma_data_direction direction)
+{
+	/*
+	 * We can't use dma_map_sg, because we don't use scatterlists in
+	 * the way it expects (we don't guarantee that the scatterlist
+	 * will exist for the lifetime of the mapping).
+	 */
+	return dma_map_page(vring_dma_dev(vq),
+			    sg_page(sg), sg->offset, sg->length,
+			    direction);
+}
+
+static dma_addr_t vring_map_single(const struct vring_virtqueue *vq,
+				   void *cpu_addr, size_t size,
+				   enum dma_data_direction direction)
+{
+	return dma_map_single(vring_dma_dev(vq),
+			      cpu_addr, size, direction);
+}
+
+static void vring_unmap_one(const struct vring_virtqueue *vq,
+			    struct vring_desc *desc)
+{
+	u16 flags = virtio16_to_cpu(vq->vq.vdev, desc->flags);
+
+	if (flags & VRING_DESC_F_INDIRECT) {
+		dma_unmap_single(vring_dma_dev(vq),
+				 virtio64_to_cpu(vq->vq.vdev, desc->addr),
+				 virtio32_to_cpu(vq->vq.vdev, desc->len),
+				 (flags & VRING_DESC_F_WRITE) ?
+				 DMA_FROM_DEVICE : DMA_TO_DEVICE);
+	} else {
+		dma_unmap_page(vring_dma_dev(vq),
+			       virtio64_to_cpu(vq->vq.vdev, desc->addr),
+			       virtio32_to_cpu(vq->vq.vdev, desc->len),
+			       (flags & VRING_DESC_F_WRITE) ?
+			       DMA_FROM_DEVICE : DMA_TO_DEVICE);
+	}
+}
+
+static int vring_mapping_error(const struct vring_virtqueue *vq,
+			       dma_addr_t addr)
+{
+	return dma_mapping_error(vring_dma_dev(vq), addr);
+}
+
 static struct vring_desc *alloc_indirect(struct virtqueue *_vq,
 					 unsigned int total_sg, gfp_t gfp)
 {
@@ -131,7 +198,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	struct scatterlist *sg;
 	struct vring_desc *desc;
-	unsigned int i, n, avail, descs_used, uninitialized_var(prev);
+	unsigned int i, n, avail, descs_used, uninitialized_var(prev), err_idx;
 	int head;
 	bool indirect;
 
@@ -171,21 +238,15 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 
 	if (desc) {
 		/* Use a single buffer which doesn't continue */
-		vq->vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT);
-		vq->vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, virt_to_phys(desc));
-		/* avoid kmemleak false positive (hidden by virt_to_phys) */
-		kmemleak_ignore(desc);
-		vq->vring.desc[head].len = cpu_to_virtio32(_vq->vdev, total_sg * sizeof(struct vring_desc));
-
+		indirect = true;
 		/* Set up rest to use this indirect table. */
 		i = 0;
 		descs_used = 1;
-		indirect = true;
 	} else {
+		indirect = false;
 		desc = vq->vring.desc;
 		i = head;
 		descs_used = total_sg;
-		indirect = false;
 	}
 
 	if (vq->vq.num_free < descs_used) {
@@ -200,14 +261,13 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 		return -ENOSPC;
 	}
 
-	/* We're about to use some buffers from the free list. */
-	vq->vq.num_free -= descs_used;
-
 	for (n = 0; n < out_sgs; n++) {
 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
 			desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT);
-			desc[i].addr = cpu_to_virtio64(_vq->vdev, sg_phys(sg));
+			desc[i].addr = cpu_to_virtio64(_vq->vdev, vring_map_one_sg(vq, sg, DMA_TO_DEVICE));
 			desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
+			if (vring_mapping_error(vq, desc[i].addr))
+				goto unmap_release;
 			prev = i;
 			i = virtio16_to_cpu(_vq->vdev, desc[i].next);
 		}
@@ -215,8 +275,10 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 	for (; n < (out_sgs + in_sgs); n++) {
 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
 			desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE);
-			desc[i].addr = cpu_to_virtio64(_vq->vdev, sg_phys(sg));
+			desc[i].addr = cpu_to_virtio64(_vq->vdev, vring_map_one_sg(vq, sg, DMA_FROM_DEVICE));
 			desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
+			if (vring_mapping_error(vq, desc[i].addr))
+				goto unmap_release;
 			prev = i;
 			i = virtio16_to_cpu(_vq->vdev, desc[i].next);
 		}
@@ -224,14 +286,34 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 	/* Last one doesn't continue. */
 	desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
 
+	if (indirect) {
+		/* Now that the indirect table is filled in, map it. */
+		dma_addr_t addr = vring_map_single(
+			vq, desc, total_sg * sizeof(struct vring_desc),
+			DMA_TO_DEVICE);
+
+		if (vring_mapping_error(vq, addr))
+			goto unmap_release;
+
+		vq->vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT);
+		vq->vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, addr);
+
+		vq->vring.desc[head].len = cpu_to_virtio32(_vq->vdev, total_sg * sizeof(struct vring_desc));
+	}
+
+	/* We're using some buffers from the free list. */
+	vq->vq.num_free -= descs_used;
+
 	/* Update free pointer */
 	if (indirect)
 		vq->free_head = virtio16_to_cpu(_vq->vdev, vq->vring.desc[head].next);
 	else
 		vq->free_head = i;
 
-	/* Set token. */
-	vq->data[head] = data;
+	/* Store token and indirect buffer state. */
+	vq->desc_state[head].data = data;
+	if (indirect)
+		vq->desc_state[head].indir_desc = desc;
 
 	/* Put entry in available array (but don't update avail->idx until they
 	 * do sync). */
@@ -253,6 +335,24 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 		virtqueue_kick(_vq);
 
 	return 0;
+
+unmap_release:
+	err_idx = i;
+	i = head;
+
+	for (n = 0; n < total_sg; n++) {
+		if (i == err_idx)
+			break;
+		vring_unmap_one(vq, &desc[i]);
+		i = vq->vring.desc[i].next;
+	}
+
+	vq->vq.num_free += total_sg;
+
+	if (indirect)
+		kfree(desc);
+
+	return -EIO;
 }
 
 /**
@@ -423,27 +523,43 @@ EXPORT_SYMBOL_GPL(virtqueue_kick);
 
 static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
 {
-	unsigned int i;
+	unsigned int i, j;
+	u16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
 
 	/* Clear data ptr. */
-	vq->data[head] = NULL;
+	vq->desc_state[head].data = NULL;
 
-	/* Put back on free list: find end */
+	/* Put back on free list: unmap first-level descriptors and find end */
 	i = head;
 
-	/* Free the indirect table */
-	if (vq->vring.desc[i].flags & cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT))
-		kfree(phys_to_virt(virtio64_to_cpu(vq->vq.vdev, vq->vring.desc[i].addr)));
-
-	while (vq->vring.desc[i].flags & cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT)) {
+	while (vq->vring.desc[i].flags & nextflag) {
+		vring_unmap_one(vq, &vq->vring.desc[i]);
 		i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next);
 		vq->vq.num_free++;
 	}
 
+	vring_unmap_one(vq, &vq->vring.desc[i]);
 	vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head);
 	vq->free_head = head;
+
 	/* Plus final descriptor */
 	vq->vq.num_free++;
+
+	/* Free the indirect table, if any, now that it's unmapped. */
+	if (vq->desc_state[head].indir_desc) {
+		struct vring_desc *indir_desc = vq->desc_state[head].indir_desc;
+		u32 len = vq->vring.desc[head].len;
+
+		BUG_ON(!(vq->vring.desc[head].flags &
+			 cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT)));
+		BUG_ON(len == 0 || len % sizeof(struct vring_desc));
+
+		for (j = 0; j < len / sizeof(struct vring_desc); j++)
+			vring_unmap_one(vq, &indir_desc[j]);
+
+		kfree(vq->desc_state[head].indir_desc);
+		vq->desc_state[head].indir_desc = NULL;
+	}
 }
 
 static inline bool more_used(const struct vring_virtqueue *vq)
@@ -498,13 +614,13 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
 		BAD_RING(vq, "id %u out of range\n", i);
 		return NULL;
 	}
-	if (unlikely(!vq->data[i])) {
+	if (unlikely(!vq->desc_state[i].data)) {
 		BAD_RING(vq, "id %u is not a head!\n", i);
 		return NULL;
 	}
 
 	/* detach_buf clears data, so grab it now. */
-	ret = vq->data[i];
+	ret = vq->desc_state[i].data;
 	detach_buf(vq, i);
 	vq->last_used_idx++;
 	/* If we expect an interrupt for the next entry, tell host
@@ -665,10 +781,10 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
 	START_USE(vq);
 
 	for (i = 0; i < vq->vring.num; i++) {
-		if (!vq->data[i])
+		if (!vq->desc_state[i].data)
 			continue;
 		/* detach_buf clears data, so grab it now. */
-		buf = vq->data[i];
+		buf = vq->desc_state[i].data;
 		detach_buf(vq, i);
 		vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, virtio16_to_cpu(_vq->vdev, vq->vring.avail->idx) - 1);
 		END_USE(vq);
@@ -721,7 +837,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 		return NULL;
 	}
 
-	vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL);
+	vq = kmalloc(sizeof(*vq) + num * sizeof(struct vring_desc_state),
+		     GFP_KERNEL);
 	if (!vq)
 		return NULL;
 
@@ -751,11 +868,9 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
 
 	/* Put everything in free lists. */
 	vq->free_head = 0;
-	for (i = 0; i < num-1; i++) {
+	for (i = 0; i < num-1; i++)
 		vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
-		vq->data[i] = NULL;
-	}
-	vq->data[i] = NULL;
+	memset(vq->desc_state, 0, num * sizeof(struct vring_desc_state));
 
 	return &vq->vq;
 }
diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h
new file mode 100644
index 000000000000..4f93af89ae16
--- /dev/null
+++ b/tools/virtio/linux/dma-mapping.h
@@ -0,0 +1,17 @@
+#ifndef _LINUX_DMA_MAPPING_H
+#define _LINUX_DMA_MAPPING_H
+
+#ifdef CONFIG_HAS_DMA
+# error Virtio userspace code does not support CONFIG_HAS_DMA
+#endif
+
+#define PCI_DMA_BUS_IS_PHYS 1
+
+enum dma_data_direction {
+	DMA_BIDIRECTIONAL = 0,
+	DMA_TO_DEVICE = 1,
+	DMA_FROM_DEVICE = 2,
+	DMA_NONE = 3,
+};
+
+#endif
-- 
2.4.3

^ permalink raw reply related	[flat|nested] 110+ messages in thread

* [PATCH v3 3/3] virtio_pci: Use the DMA API
  2015-10-28  6:38 [PATCH v3 0/3] virtio DMA API core stuff Andy Lutomirski
                   ` (4 preceding siblings ...)
  2015-10-28  6:39 ` [PATCH v3 3/3] virtio_pci: Use the DMA API Andy Lutomirski
@ 2015-10-28  6:39 ` Andy Lutomirski
  2015-10-28  6:53   ` David Woodhouse
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 110+ messages in thread
From: Andy Lutomirski @ 2015-10-28  6:39 UTC (permalink / raw)
  To: linux-kernel
  Cc: Joerg Roedel, Christian Borntraeger, Cornelia Huck,
	Sebastian Ott, Paolo Bonzini, Christoph Hellwig, benh, KVM,
	dwmw2, Martin Schwidefsky, linux-s390, Michael S. Tsirkin,
	virtualization, Andy Lutomirski

This fixes virtio-pci on platforms and busses that have IOMMUs.  This
will break the experimental QEMU Q35 IOMMU support until QEMU is
fixed.  In exchange, it fixes physical virtio hardware as well as
virtio-pci running under Xen.

We should clean up the virtqueue API to do its own allocation and
teach virtqueue_get_avail and virtqueue_get_used to return DMA
addresses directly.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 drivers/virtio/virtio_pci_common.h |  3 ++-
 drivers/virtio/virtio_pci_legacy.c | 19 +++++++++++++++----
 drivers/virtio/virtio_pci_modern.c | 34 ++++++++++++++++++++++++----------
 3 files changed, 41 insertions(+), 15 deletions(-)

diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index b976d968e793..cd6196b513ad 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -38,8 +38,9 @@ struct virtio_pci_vq_info {
 	/* the number of entries in the queue */
 	int num;
 
-	/* the virtual address of the ring queue */
+	/* the ring queue */
 	void *queue;
+	dma_addr_t queue_dma_addr;      /* bus address */
 
 	/* the list node for the virtqueues list */
 	struct list_head node;
diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c
index 48bc9797e530..b5293e5f2af4 100644
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c
@@ -135,12 +135,14 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 	info->msix_vector = msix_vec;
 
 	size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN));
-	info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO);
+	info->queue = dma_zalloc_coherent(&vp_dev->pci_dev->dev, size,
+					  &info->queue_dma_addr,
+					  GFP_KERNEL);
 	if (info->queue == NULL)
 		return ERR_PTR(-ENOMEM);
 
 	/* activate the queue */
-	iowrite32(virt_to_phys(info->queue) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT,
+	iowrite32(info->queue_dma_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT,
 		  vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 
 	/* create the vring */
@@ -169,7 +171,8 @@ out_assign:
 	vring_del_virtqueue(vq);
 out_activate_queue:
 	iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
-	free_pages_exact(info->queue, size);
+	dma_free_coherent(&vp_dev->pci_dev->dev, size,
+			  info->queue, info->queue_dma_addr);
 	return ERR_PTR(err);
 }
 
@@ -194,7 +197,8 @@ static void del_vq(struct virtio_pci_vq_info *info)
 	iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 
 	size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_VRING_ALIGN));
-	free_pages_exact(info->queue, size);
+	dma_free_coherent(&vp_dev->pci_dev->dev, size,
+			  info->queue, info->queue_dma_addr);
 }
 
 static const struct virtio_config_ops virtio_pci_config_ops = {
@@ -227,6 +231,13 @@ int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev)
 		return -ENODEV;
 	}
 
+	rc = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64));
+	if (rc)
+		rc = dma_set_mask_and_coherent(&pci_dev->dev,
+						DMA_BIT_MASK(32));
+	if (rc)
+		dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA.  Trying to continue, but this might not work.\n");
+
 	rc = pci_request_region(pci_dev, 0, "virtio-pci-legacy");
 	if (rc)
 		return rc;
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index 8e5cf194cc0b..fbe0bd1c4881 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -293,14 +293,16 @@ static size_t vring_pci_size(u16 num)
 	return PAGE_ALIGN(vring_size(num, SMP_CACHE_BYTES));
 }
 
-static void *alloc_virtqueue_pages(int *num)
+static void *alloc_virtqueue_pages(struct virtio_pci_device *vp_dev,
+				   int *num, dma_addr_t *dma_addr)
 {
 	void *pages;
 
 	/* TODO: allocate each queue chunk individually */
 	for (; *num && vring_pci_size(*num) > PAGE_SIZE; *num /= 2) {
-		pages = alloc_pages_exact(vring_pci_size(*num),
-					  GFP_KERNEL|__GFP_ZERO|__GFP_NOWARN);
+		pages = dma_zalloc_coherent(
+			&vp_dev->pci_dev->dev, vring_pci_size(*num),
+			dma_addr, GFP_KERNEL|__GFP_NOWARN);
 		if (pages)
 			return pages;
 	}
@@ -309,7 +311,9 @@ static void *alloc_virtqueue_pages(int *num)
 		return NULL;
 
 	/* Try to get a single page. You are my only hope! */
-	return alloc_pages_exact(vring_pci_size(*num), GFP_KERNEL|__GFP_ZERO);
+	return dma_zalloc_coherent(
+		&vp_dev->pci_dev->dev, vring_pci_size(*num),
+		dma_addr, GFP_KERNEL);
 }
 
 static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
@@ -346,7 +350,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 	info->num = num;
 	info->msix_vector = msix_vec;
 
-	info->queue = alloc_virtqueue_pages(&info->num);
+	info->queue = alloc_virtqueue_pages(vp_dev, &info->num,
+					    &info->queue_dma_addr);
 	if (info->queue == NULL)
 		return ERR_PTR(-ENOMEM);
 
@@ -361,11 +366,11 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 
 	/* activate the queue */
 	vp_iowrite16(num, &cfg->queue_size);
-	vp_iowrite64_twopart(virt_to_phys(info->queue),
+	vp_iowrite64_twopart(info->queue_dma_addr,
 			     &cfg->queue_desc_lo, &cfg->queue_desc_hi);
-	vp_iowrite64_twopart(virt_to_phys(virtqueue_get_avail(vq)),
+	vp_iowrite64_twopart(info->queue_dma_addr + ((char *)virtqueue_get_avail(vq) - (char *)info->queue),
 			     &cfg->queue_avail_lo, &cfg->queue_avail_hi);
-	vp_iowrite64_twopart(virt_to_phys(virtqueue_get_used(vq)),
+	vp_iowrite64_twopart(info->queue_dma_addr + ((char *)virtqueue_get_used(vq) - (char *)info->queue),
 			     &cfg->queue_used_lo, &cfg->queue_used_hi);
 
 	if (vp_dev->notify_base) {
@@ -411,7 +416,8 @@ err_assign_vector:
 err_map_notify:
 	vring_del_virtqueue(vq);
 err_new_queue:
-	free_pages_exact(info->queue, vring_pci_size(info->num));
+	dma_free_coherent(&vp_dev->pci_dev->dev, vring_pci_size(info->num),
+			  info->queue, info->queue_dma_addr);
 	return ERR_PTR(err);
 }
 
@@ -457,7 +463,8 @@ static void del_vq(struct virtio_pci_vq_info *info)
 
 	vring_del_virtqueue(vq);
 
-	free_pages_exact(info->queue, vring_pci_size(info->num));
+	dma_free_coherent(&vp_dev->pci_dev->dev, vring_pci_size(info->num),
+			  info->queue, info->queue_dma_addr);
 }
 
 static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
@@ -641,6 +648,13 @@ int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
 		return -EINVAL;
 	}
 
+	err = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64));
+	if (err)
+		err = dma_set_mask_and_coherent(&pci_dev->dev,
+						DMA_BIT_MASK(32));
+	if (err)
+		dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA.  Trying to continue, but this might not work.\n");
+
 	/* Device capability is only mandatory for devices that have
 	 * device-specific configuration.
 	 */
-- 
2.4.3


^ permalink raw reply related	[flat|nested] 110+ messages in thread

* [PATCH v3 3/3] virtio_pci: Use the DMA API
  2015-10-28  6:38 [PATCH v3 0/3] virtio DMA API core stuff Andy Lutomirski
                   ` (3 preceding siblings ...)
  2015-10-28  6:38 ` Andy Lutomirski
@ 2015-10-28  6:39 ` Andy Lutomirski
  2015-10-28  6:39 ` Andy Lutomirski
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 110+ messages in thread
From: Andy Lutomirski @ 2015-10-28  6:39 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-s390, Joerg Roedel, KVM, Michael S. Tsirkin, benh,
	Sebastian Ott, virtualization, Christian Borntraeger,
	Andy Lutomirski, Paolo Bonzini, dwmw2, Christoph Hellwig,
	Martin Schwidefsky

This fixes virtio-pci on platforms and busses that have IOMMUs.  This
will break the experimental QEMU Q35 IOMMU support until QEMU is
fixed.  In exchange, it fixes physical virtio hardware as well as
virtio-pci running under Xen.

We should clean up the virtqueue API to do its own allocation and
teach virtqueue_get_avail and virtqueue_get_used to return DMA
addresses directly.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 drivers/virtio/virtio_pci_common.h |  3 ++-
 drivers/virtio/virtio_pci_legacy.c | 19 +++++++++++++++----
 drivers/virtio/virtio_pci_modern.c | 34 ++++++++++++++++++++++++----------
 3 files changed, 41 insertions(+), 15 deletions(-)

diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index b976d968e793..cd6196b513ad 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -38,8 +38,9 @@ struct virtio_pci_vq_info {
 	/* the number of entries in the queue */
 	int num;
 
-	/* the virtual address of the ring queue */
+	/* the ring queue */
 	void *queue;
+	dma_addr_t queue_dma_addr;      /* bus address */
 
 	/* the list node for the virtqueues list */
 	struct list_head node;
diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c
index 48bc9797e530..b5293e5f2af4 100644
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c
@@ -135,12 +135,14 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 	info->msix_vector = msix_vec;
 
 	size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN));
-	info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO);
+	info->queue = dma_zalloc_coherent(&vp_dev->pci_dev->dev, size,
+					  &info->queue_dma_addr,
+					  GFP_KERNEL);
 	if (info->queue == NULL)
 		return ERR_PTR(-ENOMEM);
 
 	/* activate the queue */
-	iowrite32(virt_to_phys(info->queue) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT,
+	iowrite32(info->queue_dma_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT,
 		  vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 
 	/* create the vring */
@@ -169,7 +171,8 @@ out_assign:
 	vring_del_virtqueue(vq);
 out_activate_queue:
 	iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
-	free_pages_exact(info->queue, size);
+	dma_free_coherent(&vp_dev->pci_dev->dev, size,
+			  info->queue, info->queue_dma_addr);
 	return ERR_PTR(err);
 }
 
@@ -194,7 +197,8 @@ static void del_vq(struct virtio_pci_vq_info *info)
 	iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 
 	size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_VRING_ALIGN));
-	free_pages_exact(info->queue, size);
+	dma_free_coherent(&vp_dev->pci_dev->dev, size,
+			  info->queue, info->queue_dma_addr);
 }
 
 static const struct virtio_config_ops virtio_pci_config_ops = {
@@ -227,6 +231,13 @@ int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev)
 		return -ENODEV;
 	}
 
+	rc = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64));
+	if (rc)
+		rc = dma_set_mask_and_coherent(&pci_dev->dev,
+						DMA_BIT_MASK(32));
+	if (rc)
+		dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA.  Trying to continue, but this might not work.\n");
+
 	rc = pci_request_region(pci_dev, 0, "virtio-pci-legacy");
 	if (rc)
 		return rc;
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index 8e5cf194cc0b..fbe0bd1c4881 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -293,14 +293,16 @@ static size_t vring_pci_size(u16 num)
 	return PAGE_ALIGN(vring_size(num, SMP_CACHE_BYTES));
 }
 
-static void *alloc_virtqueue_pages(int *num)
+static void *alloc_virtqueue_pages(struct virtio_pci_device *vp_dev,
+				   int *num, dma_addr_t *dma_addr)
 {
 	void *pages;
 
 	/* TODO: allocate each queue chunk individually */
 	for (; *num && vring_pci_size(*num) > PAGE_SIZE; *num /= 2) {
-		pages = alloc_pages_exact(vring_pci_size(*num),
-					  GFP_KERNEL|__GFP_ZERO|__GFP_NOWARN);
+		pages = dma_zalloc_coherent(
+			&vp_dev->pci_dev->dev, vring_pci_size(*num),
+			dma_addr, GFP_KERNEL|__GFP_NOWARN);
 		if (pages)
 			return pages;
 	}
@@ -309,7 +311,9 @@ static void *alloc_virtqueue_pages(int *num)
 		return NULL;
 
 	/* Try to get a single page. You are my only hope! */
-	return alloc_pages_exact(vring_pci_size(*num), GFP_KERNEL|__GFP_ZERO);
+	return dma_zalloc_coherent(
+		&vp_dev->pci_dev->dev, vring_pci_size(*num),
+		dma_addr, GFP_KERNEL);
 }
 
 static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
@@ -346,7 +350,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 	info->num = num;
 	info->msix_vector = msix_vec;
 
-	info->queue = alloc_virtqueue_pages(&info->num);
+	info->queue = alloc_virtqueue_pages(vp_dev, &info->num,
+					    &info->queue_dma_addr);
 	if (info->queue == NULL)
 		return ERR_PTR(-ENOMEM);
 
@@ -361,11 +366,11 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 
 	/* activate the queue */
 	vp_iowrite16(num, &cfg->queue_size);
-	vp_iowrite64_twopart(virt_to_phys(info->queue),
+	vp_iowrite64_twopart(info->queue_dma_addr,
 			     &cfg->queue_desc_lo, &cfg->queue_desc_hi);
-	vp_iowrite64_twopart(virt_to_phys(virtqueue_get_avail(vq)),
+	vp_iowrite64_twopart(info->queue_dma_addr + ((char *)virtqueue_get_avail(vq) - (char *)info->queue),
 			     &cfg->queue_avail_lo, &cfg->queue_avail_hi);
-	vp_iowrite64_twopart(virt_to_phys(virtqueue_get_used(vq)),
+	vp_iowrite64_twopart(info->queue_dma_addr + ((char *)virtqueue_get_used(vq) - (char *)info->queue),
 			     &cfg->queue_used_lo, &cfg->queue_used_hi);
 
 	if (vp_dev->notify_base) {
@@ -411,7 +416,8 @@ err_assign_vector:
 err_map_notify:
 	vring_del_virtqueue(vq);
 err_new_queue:
-	free_pages_exact(info->queue, vring_pci_size(info->num));
+	dma_free_coherent(&vp_dev->pci_dev->dev, vring_pci_size(info->num),
+			  info->queue, info->queue_dma_addr);
 	return ERR_PTR(err);
 }
 
@@ -457,7 +463,8 @@ static void del_vq(struct virtio_pci_vq_info *info)
 
 	vring_del_virtqueue(vq);
 
-	free_pages_exact(info->queue, vring_pci_size(info->num));
+	dma_free_coherent(&vp_dev->pci_dev->dev, vring_pci_size(info->num),
+			  info->queue, info->queue_dma_addr);
 }
 
 static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
@@ -641,6 +648,13 @@ int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
 		return -EINVAL;
 	}
 
+	err = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64));
+	if (err)
+		err = dma_set_mask_and_coherent(&pci_dev->dev,
+						DMA_BIT_MASK(32));
+	if (err)
+		dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA.  Trying to continue, but this might not work.\n");
+
 	/* Device capability is only mandatory for devices that have
 	 * device-specific configuration.
 	 */
-- 
2.4.3

^ permalink raw reply related	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-28  6:38 [PATCH v3 0/3] virtio DMA API core stuff Andy Lutomirski
@ 2015-10-28  6:53   ` David Woodhouse
  2015-10-28  6:38 ` Andy Lutomirski
                     ` (8 subsequent siblings)
  9 siblings, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-10-28  6:53 UTC (permalink / raw)
  To: Andy Lutomirski, linux-kernel
  Cc: Joerg Roedel, Christian Borntraeger, Cornelia Huck,
	Sebastian Ott, Paolo Bonzini, Christoph Hellwig, benh, KVM,
	Martin Schwidefsky, linux-s390, Michael S. Tsirkin,
	virtualization

[-- Attachment #1: Type: text/plain, Size: 839 bytes --]

On Tue, 2015-10-27 at 23:38 -0700, Andy Lutomirski wrote:
> 
> Changes from v2:
>  - Fix really embarrassing bug.  This version actually works.

So embarrassing you didn't want to tell us what it was? ...

--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -292,7 +292,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
                        vq, desc, total_sg * sizeof(struct vring_desc),
                        DMA_TO_DEVICE);
 
-               if (vring_mapping_error(vq, vq->vring.desc[head].addr))
+               if (vring_mapping_error(vq, addr))
                        goto unmap_release;
 
                vq->vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT);

That wasn't going to be the reason for Christian's failure, was it?


-- 
dwmw2



[-- Attachment #2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-10-28  6:53   ` David Woodhouse
  0 siblings, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-10-28  6:53 UTC (permalink / raw)
  To: Andy Lutomirski, linux-kernel
  Cc: linux-s390, Joerg Roedel, KVM, Michael S. Tsirkin, benh,
	Sebastian Ott, virtualization, Christian Borntraeger,
	Paolo Bonzini, Christoph Hellwig, Martin Schwidefsky


[-- Attachment #1.1: Type: text/plain, Size: 839 bytes --]

On Tue, 2015-10-27 at 23:38 -0700, Andy Lutomirski wrote:
> 
> Changes from v2:
>  - Fix really embarrassing bug.  This version actually works.

So embarrassing you didn't want to tell us what it was? ...

--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -292,7 +292,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
                        vq, desc, total_sg * sizeof(struct vring_desc),
                        DMA_TO_DEVICE);
 
-               if (vring_mapping_error(vq, vq->vring.desc[head].addr))
+               if (vring_mapping_error(vq, addr))
                        goto unmap_release;
 
                vq->vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT);

That wasn't going to be the reason for Christian's failure, was it?


-- 
dwmw2



[-- Attachment #1.2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 1/3] virtio_net: Stop doing DMA from the stack
  2015-10-28  6:38 ` [PATCH v3 1/3] virtio_net: Stop doing DMA from the stack Andy Lutomirski
  2015-10-28  7:08   ` Michael S. Tsirkin
@ 2015-10-28  7:08   ` Michael S. Tsirkin
  1 sibling, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-10-28  7:08 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linux-kernel, Joerg Roedel, Christian Borntraeger, Cornelia Huck,
	Sebastian Ott, Paolo Bonzini, Christoph Hellwig, benh, KVM,
	dwmw2, Martin Schwidefsky, linux-s390, virtualization,
	Andy Lutomirski

On Tue, Oct 27, 2015 at 11:38:58PM -0700, Andy Lutomirski wrote:
> From: Andy Lutomirski <luto@amacapital.net>
> 
> Once virtio starts using the DMA API, we won't be able to safely DMA
> from the stack.  virtio-net does a couple of config DMA requests
> from small stack buffers -- switch to using dynamically-allocated
> memory.
> 
> This should have no effect on any performance-critical code paths.
> 
> Cc: "Michael S. Tsirkin" <mst@redhat.com>
> Cc: virtualization@lists.linux-foundation.org
> Reviewed-by: Joerg Roedel <jroedel@suse.de>
> Signed-off-by: Andy Lutomirski <luto@kernel.org>

Same issues as v2 (I only saw v3 now).
I've proposed an alternative patch.

> ---
> 
> Hi Michael and DaveM-
> 
> This is a prerequisite for the virtio DMA fixing project.  It works
> as a standalone patch, though.  Would it make sense to apply it to
> an appropriate networking tree now?
> 
> (This is unchanged from v2.)
> 
> drivers/net/virtio_net.c | 53 ++++++++++++++++++++++++++++++++----------------
>  1 file changed, 36 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index d8838dedb7a4..4f10f8a58811 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -976,31 +976,43 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
>  				 struct scatterlist *out)
>  {
>  	struct scatterlist *sgs[4], hdr, stat;
> -	struct virtio_net_ctrl_hdr ctrl;
> -	virtio_net_ctrl_ack status = ~0;
> +
> +	struct {
> +		struct virtio_net_ctrl_hdr ctrl;
> +		virtio_net_ctrl_ack status;
> +	} *buf;
> +
>  	unsigned out_num = 0, tmp;
> +	bool ret;
>  
>  	/* Caller should know better */
>  	BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
>  
> -	ctrl.class = class;
> -	ctrl.cmd = cmd;
> +	buf = kmalloc(sizeof(*buf), GFP_ATOMIC);
> +	if (!buf)
> +		return false;
> +	buf->status = ~0;
> +
> +	buf->ctrl.class = class;
> +	buf->ctrl.cmd = cmd;
>  	/* Add header */
> -	sg_init_one(&hdr, &ctrl, sizeof(ctrl));
> +	sg_init_one(&hdr, &buf->ctrl, sizeof(buf->ctrl));
>  	sgs[out_num++] = &hdr;
>  
>  	if (out)
>  		sgs[out_num++] = out;
>  
>  	/* Add return status. */
> -	sg_init_one(&stat, &status, sizeof(status));
> +	sg_init_one(&stat, &buf->status, sizeof(buf->status));
>  	sgs[out_num] = &stat;
>  
>  	BUG_ON(out_num + 1 > ARRAY_SIZE(sgs));
>  	virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC);
>  
> -	if (unlikely(!virtqueue_kick(vi->cvq)))
> -		return status == VIRTIO_NET_OK;
> +	if (unlikely(!virtqueue_kick(vi->cvq))) {
> +		ret = (buf->status == VIRTIO_NET_OK);
> +		goto out;
> +	}
>  
>  	/* Spin for a response, the kick causes an ioport write, trapping
>  	 * into the hypervisor, so the request should be handled immediately.
> @@ -1009,7 +1021,11 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
>  	       !virtqueue_is_broken(vi->cvq))
>  		cpu_relax();
>  
> -	return status == VIRTIO_NET_OK;
> +	ret = (buf->status == VIRTIO_NET_OK);
> +
> +out:
> +	kfree(buf);
> +	return ret;
>  }
>  
>  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> @@ -1151,7 +1167,7 @@ static void virtnet_set_rx_mode(struct net_device *dev)
>  {
>  	struct virtnet_info *vi = netdev_priv(dev);
>  	struct scatterlist sg[2];
> -	u8 promisc, allmulti;
> +	u8 *cmdbyte;
>  	struct virtio_net_ctrl_mac *mac_data;
>  	struct netdev_hw_addr *ha;
>  	int uc_count;
> @@ -1163,22 +1179,25 @@ static void virtnet_set_rx_mode(struct net_device *dev)
>  	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
>  		return;
>  
> -	promisc = ((dev->flags & IFF_PROMISC) != 0);
> -	allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
> +	cmdbyte = kmalloc(sizeof(*cmdbyte), GFP_ATOMIC);
> +	if (!cmdbyte)
> +		return;
>  
> -	sg_init_one(sg, &promisc, sizeof(promisc));
> +	sg_init_one(sg, cmdbyte, sizeof(*cmdbyte));
>  
> +	*cmdbyte = ((dev->flags & IFF_PROMISC) != 0);
>  	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
>  				  VIRTIO_NET_CTRL_RX_PROMISC, sg))
>  		dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
> -			 promisc ? "en" : "dis");
> -
> -	sg_init_one(sg, &allmulti, sizeof(allmulti));
> +			 *cmdbyte ? "en" : "dis");
>  
> +	*cmdbyte = ((dev->flags & IFF_ALLMULTI) != 0);
>  	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
>  				  VIRTIO_NET_CTRL_RX_ALLMULTI, sg))
>  		dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
> -			 allmulti ? "en" : "dis");
> +			 *cmdbyte ? "en" : "dis");
> +
> +	kfree(cmdbyte);
>  
>  	uc_count = netdev_uc_count(dev);
>  	mc_count = netdev_mc_count(dev);
> -- 
> 2.4.3

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 1/3] virtio_net: Stop doing DMA from the stack
  2015-10-28  6:38 ` [PATCH v3 1/3] virtio_net: Stop doing DMA from the stack Andy Lutomirski
@ 2015-10-28  7:08   ` Michael S. Tsirkin
  2015-10-28  7:08   ` Michael S. Tsirkin
  1 sibling, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-10-28  7:08 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linux-s390, Joerg Roedel, KVM, benh, Sebastian Ott, linux-kernel,
	virtualization, Christian Borntraeger, Paolo Bonzini,
	Andy Lutomirski, dwmw2, Christoph Hellwig, Martin Schwidefsky

On Tue, Oct 27, 2015 at 11:38:58PM -0700, Andy Lutomirski wrote:
> From: Andy Lutomirski <luto@amacapital.net>
> 
> Once virtio starts using the DMA API, we won't be able to safely DMA
> from the stack.  virtio-net does a couple of config DMA requests
> from small stack buffers -- switch to using dynamically-allocated
> memory.
> 
> This should have no effect on any performance-critical code paths.
> 
> Cc: "Michael S. Tsirkin" <mst@redhat.com>
> Cc: virtualization@lists.linux-foundation.org
> Reviewed-by: Joerg Roedel <jroedel@suse.de>
> Signed-off-by: Andy Lutomirski <luto@kernel.org>

Same issues as v2 (I only saw v3 now).
I've proposed an alternative patch.

> ---
> 
> Hi Michael and DaveM-
> 
> This is a prerequisite for the virtio DMA fixing project.  It works
> as a standalone patch, though.  Would it make sense to apply it to
> an appropriate networking tree now?
> 
> (This is unchanged from v2.)
> 
> drivers/net/virtio_net.c | 53 ++++++++++++++++++++++++++++++++----------------
>  1 file changed, 36 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index d8838dedb7a4..4f10f8a58811 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -976,31 +976,43 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
>  				 struct scatterlist *out)
>  {
>  	struct scatterlist *sgs[4], hdr, stat;
> -	struct virtio_net_ctrl_hdr ctrl;
> -	virtio_net_ctrl_ack status = ~0;
> +
> +	struct {
> +		struct virtio_net_ctrl_hdr ctrl;
> +		virtio_net_ctrl_ack status;
> +	} *buf;
> +
>  	unsigned out_num = 0, tmp;
> +	bool ret;
>  
>  	/* Caller should know better */
>  	BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
>  
> -	ctrl.class = class;
> -	ctrl.cmd = cmd;
> +	buf = kmalloc(sizeof(*buf), GFP_ATOMIC);
> +	if (!buf)
> +		return false;
> +	buf->status = ~0;
> +
> +	buf->ctrl.class = class;
> +	buf->ctrl.cmd = cmd;
>  	/* Add header */
> -	sg_init_one(&hdr, &ctrl, sizeof(ctrl));
> +	sg_init_one(&hdr, &buf->ctrl, sizeof(buf->ctrl));
>  	sgs[out_num++] = &hdr;
>  
>  	if (out)
>  		sgs[out_num++] = out;
>  
>  	/* Add return status. */
> -	sg_init_one(&stat, &status, sizeof(status));
> +	sg_init_one(&stat, &buf->status, sizeof(buf->status));
>  	sgs[out_num] = &stat;
>  
>  	BUG_ON(out_num + 1 > ARRAY_SIZE(sgs));
>  	virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC);
>  
> -	if (unlikely(!virtqueue_kick(vi->cvq)))
> -		return status == VIRTIO_NET_OK;
> +	if (unlikely(!virtqueue_kick(vi->cvq))) {
> +		ret = (buf->status == VIRTIO_NET_OK);
> +		goto out;
> +	}
>  
>  	/* Spin for a response, the kick causes an ioport write, trapping
>  	 * into the hypervisor, so the request should be handled immediately.
> @@ -1009,7 +1021,11 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
>  	       !virtqueue_is_broken(vi->cvq))
>  		cpu_relax();
>  
> -	return status == VIRTIO_NET_OK;
> +	ret = (buf->status == VIRTIO_NET_OK);
> +
> +out:
> +	kfree(buf);
> +	return ret;
>  }
>  
>  static int virtnet_set_mac_address(struct net_device *dev, void *p)
> @@ -1151,7 +1167,7 @@ static void virtnet_set_rx_mode(struct net_device *dev)
>  {
>  	struct virtnet_info *vi = netdev_priv(dev);
>  	struct scatterlist sg[2];
> -	u8 promisc, allmulti;
> +	u8 *cmdbyte;
>  	struct virtio_net_ctrl_mac *mac_data;
>  	struct netdev_hw_addr *ha;
>  	int uc_count;
> @@ -1163,22 +1179,25 @@ static void virtnet_set_rx_mode(struct net_device *dev)
>  	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
>  		return;
>  
> -	promisc = ((dev->flags & IFF_PROMISC) != 0);
> -	allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
> +	cmdbyte = kmalloc(sizeof(*cmdbyte), GFP_ATOMIC);
> +	if (!cmdbyte)
> +		return;
>  
> -	sg_init_one(sg, &promisc, sizeof(promisc));
> +	sg_init_one(sg, cmdbyte, sizeof(*cmdbyte));
>  
> +	*cmdbyte = ((dev->flags & IFF_PROMISC) != 0);
>  	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
>  				  VIRTIO_NET_CTRL_RX_PROMISC, sg))
>  		dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
> -			 promisc ? "en" : "dis");
> -
> -	sg_init_one(sg, &allmulti, sizeof(allmulti));
> +			 *cmdbyte ? "en" : "dis");
>  
> +	*cmdbyte = ((dev->flags & IFF_ALLMULTI) != 0);
>  	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
>  				  VIRTIO_NET_CTRL_RX_ALLMULTI, sg))
>  		dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
> -			 allmulti ? "en" : "dis");
> +			 *cmdbyte ? "en" : "dis");
> +
> +	kfree(cmdbyte);
>  
>  	uc_count = netdev_uc_count(dev);
>  	mc_count = netdev_mc_count(dev);
> -- 
> 2.4.3

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-28  6:53   ` David Woodhouse
  (?)
@ 2015-10-28  7:09   ` Andy Lutomirski
  -1 siblings, 0 replies; 110+ messages in thread
From: Andy Lutomirski @ 2015-10-28  7:09 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Andy Lutomirski, linux-kernel, Joerg Roedel,
	Christian Borntraeger, Cornelia Huck, Sebastian Ott,
	Paolo Bonzini, Christoph Hellwig, Benjamin Herrenschmidt, KVM,
	Martin Schwidefsky, linux-s390, Michael S. Tsirkin,
	Linux Virtualization

On Tue, Oct 27, 2015 at 11:53 PM, David Woodhouse <dwmw2@infradead.org> wrote:
> On Tue, 2015-10-27 at 23:38 -0700, Andy Lutomirski wrote:
>>
>> Changes from v2:
>>  - Fix really embarrassing bug.  This version actually works.
>
> So embarrassing you didn't want to tell us what it was? ...

Shhh, it's a secret!

I somehow managed to test-boot a different kernel than I thought I was booting.

>
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -292,7 +292,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
>                         vq, desc, total_sg * sizeof(struct vring_desc),
>                         DMA_TO_DEVICE);
>
> -               if (vring_mapping_error(vq, vq->vring.desc[head].addr))
> +               if (vring_mapping_error(vq, addr))
>                         goto unmap_release;
>
>                 vq->vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT);
>
> That wasn't going to be the reason for Christian's failure, was it?
>

Not obviously, but it's possible.  Now that I'm staring at it, I have
some more big-endian issues, so there'll be a v4.  I'll also play with
Michael's thing.  Expect a long delay, though -- my flight's about to
leave.

The readme notwithstanding, virtme (https://github.com/amluto/virtme)
actually has s390x support, so I can try to debug when I get home.
I'm not about to try doing this on a laptop :)

--Andy

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-28  6:53   ` David Woodhouse
  (?)
  (?)
@ 2015-10-28  7:09   ` Andy Lutomirski
  -1 siblings, 0 replies; 110+ messages in thread
From: Andy Lutomirski @ 2015-10-28  7:09 UTC (permalink / raw)
  To: David Woodhouse
  Cc: linux-s390, Joerg Roedel, KVM, Michael S. Tsirkin,
	Benjamin Herrenschmidt, Sebastian Ott, linux-kernel,
	Linux Virtualization, Christian Borntraeger, Andy Lutomirski,
	Paolo Bonzini, Christoph Hellwig, Martin Schwidefsky

On Tue, Oct 27, 2015 at 11:53 PM, David Woodhouse <dwmw2@infradead.org> wrote:
> On Tue, 2015-10-27 at 23:38 -0700, Andy Lutomirski wrote:
>>
>> Changes from v2:
>>  - Fix really embarrassing bug.  This version actually works.
>
> So embarrassing you didn't want to tell us what it was? ...

Shhh, it's a secret!

I somehow managed to test-boot a different kernel than I thought I was booting.

>
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -292,7 +292,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
>                         vq, desc, total_sg * sizeof(struct vring_desc),
>                         DMA_TO_DEVICE);
>
> -               if (vring_mapping_error(vq, vq->vring.desc[head].addr))
> +               if (vring_mapping_error(vq, addr))
>                         goto unmap_release;
>
>                 vq->vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT);
>
> That wasn't going to be the reason for Christian's failure, was it?
>

Not obviously, but it's possible.  Now that I'm staring at it, I have
some more big-endian issues, so there'll be a v4.  I'll also play with
Michael's thing.  Expect a long delay, though -- my flight's about to
leave.

The readme notwithstanding, virtme (https://github.com/amluto/virtme)
actually has s390x support, so I can try to debug when I get home.
I'm not about to try doing this on a laptop :)

--Andy

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-28  6:38 [PATCH v3 0/3] virtio DMA API core stuff Andy Lutomirski
@ 2015-10-28  7:17   ` Michael S. Tsirkin
  2015-10-28  6:38 ` Andy Lutomirski
                     ` (8 subsequent siblings)
  9 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-10-28  7:17 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linux-kernel, Joerg Roedel, Christian Borntraeger, Cornelia Huck,
	Sebastian Ott, Paolo Bonzini, Christoph Hellwig, benh, KVM,
	dwmw2, Martin Schwidefsky, linux-s390, virtualization

On Tue, Oct 27, 2015 at 11:38:57PM -0700, Andy Lutomirski wrote:
> This switches virtio to use the DMA API unconditionally.  I'm sure
> it breaks things, but it seems to work on x86 using virtio-pci, with
> and without Xen, and using both the modern 1.0 variant and the
> legacy variant.

I'm very glad to see work on this making progress.

I suspect we'll have to find a way to make this optional though, and
keep doing the non-DMA API thing with old devices.  And I've been
debating with myself whether a pci specific thing or a feature bit is
preferable.

Thoughts?

> Changes from v2:
>  - Fix really embarrassing bug.  This version actually works.
> 
> Changes from v1:
>  - Fix an endian conversion error causing a BUG to hit.
>  - Fix a DMA ordering issue (swiotlb=force works now).
>  - Minor cleanups.
> 
> Andy Lutomirski (3):
>   virtio_net: Stop doing DMA from the stack
>   virtio_ring: Support DMA APIs
>   virtio_pci: Use the DMA API
> 
>  drivers/net/virtio_net.c           |  53 +++++++----
>  drivers/virtio/Kconfig             |   2 +-
>  drivers/virtio/virtio_pci_common.h |   3 +-
>  drivers/virtio/virtio_pci_legacy.c |  19 +++-
>  drivers/virtio/virtio_pci_modern.c |  34 +++++--
>  drivers/virtio/virtio_ring.c       | 187 ++++++++++++++++++++++++++++++-------
>  tools/virtio/linux/dma-mapping.h   |  17 ++++
>  7 files changed, 246 insertions(+), 69 deletions(-)
>  create mode 100644 tools/virtio/linux/dma-mapping.h
> 
> -- 
> 2.4.3

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-10-28  7:17   ` Michael S. Tsirkin
  0 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-10-28  7:17 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linux-s390, Joerg Roedel, KVM, benh, Sebastian Ott, linux-kernel,
	virtualization, Christian Borntraeger, Paolo Bonzini, dwmw2,
	Christoph Hellwig, Martin Schwidefsky

On Tue, Oct 27, 2015 at 11:38:57PM -0700, Andy Lutomirski wrote:
> This switches virtio to use the DMA API unconditionally.  I'm sure
> it breaks things, but it seems to work on x86 using virtio-pci, with
> and without Xen, and using both the modern 1.0 variant and the
> legacy variant.

I'm very glad to see work on this making progress.

I suspect we'll have to find a way to make this optional though, and
keep doing the non-DMA API thing with old devices.  And I've been
debating with myself whether a pci specific thing or a feature bit is
preferable.

Thoughts?

> Changes from v2:
>  - Fix really embarrassing bug.  This version actually works.
> 
> Changes from v1:
>  - Fix an endian conversion error causing a BUG to hit.
>  - Fix a DMA ordering issue (swiotlb=force works now).
>  - Minor cleanups.
> 
> Andy Lutomirski (3):
>   virtio_net: Stop doing DMA from the stack
>   virtio_ring: Support DMA APIs
>   virtio_pci: Use the DMA API
> 
>  drivers/net/virtio_net.c           |  53 +++++++----
>  drivers/virtio/Kconfig             |   2 +-
>  drivers/virtio/virtio_pci_common.h |   3 +-
>  drivers/virtio/virtio_pci_legacy.c |  19 +++-
>  drivers/virtio/virtio_pci_modern.c |  34 +++++--
>  drivers/virtio/virtio_ring.c       | 187 ++++++++++++++++++++++++++++++-------
>  tools/virtio/linux/dma-mapping.h   |  17 ++++
>  7 files changed, 246 insertions(+), 69 deletions(-)
>  create mode 100644 tools/virtio/linux/dma-mapping.h
> 
> -- 
> 2.4.3

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-28  7:17   ` Michael S. Tsirkin
@ 2015-10-28  7:40     ` Christian Borntraeger
  -1 siblings, 0 replies; 110+ messages in thread
From: Christian Borntraeger @ 2015-10-28  7:40 UTC (permalink / raw)
  To: Michael S. Tsirkin, Andy Lutomirski
  Cc: linux-kernel, Joerg Roedel, Cornelia Huck, Sebastian Ott,
	Paolo Bonzini, Christoph Hellwig, benh, KVM, dwmw2,
	Martin Schwidefsky, linux-s390, virtualization

Am 28.10.2015 um 16:17 schrieb Michael S. Tsirkin:
> On Tue, Oct 27, 2015 at 11:38:57PM -0700, Andy Lutomirski wrote:
>> This switches virtio to use the DMA API unconditionally.  I'm sure
>> it breaks things, but it seems to work on x86 using virtio-pci, with
>> and without Xen, and using both the modern 1.0 variant and the
>> legacy variant.
> 
> I'm very glad to see work on this making progress.
> 
> I suspect we'll have to find a way to make this optional though, and
> keep doing the non-DMA API thing with old devices.  And I've been
> debating with myself whether a pci specific thing or a feature bit is
> preferable.
> 

We have discussed that at kernel summit. I will try to implement a dummy dma_ops for
s390 that does 1:1 mapping and Ben will look into doing some quirk to handle "old"
code in addition to also make it possible to mark devices as iommu bypass (IIRC,
via device tree, Ben?)

Christian



> Thoughts?
> 
>> Changes from v2:
>>  - Fix really embarrassing bug.  This version actually works.
>>
>> Changes from v1:
>>  - Fix an endian conversion error causing a BUG to hit.
>>  - Fix a DMA ordering issue (swiotlb=force works now).
>>  - Minor cleanups.
>>
>> Andy Lutomirski (3):
>>   virtio_net: Stop doing DMA from the stack
>>   virtio_ring: Support DMA APIs
>>   virtio_pci: Use the DMA API
>>
>>  drivers/net/virtio_net.c           |  53 +++++++----
>>  drivers/virtio/Kconfig             |   2 +-
>>  drivers/virtio/virtio_pci_common.h |   3 +-
>>  drivers/virtio/virtio_pci_legacy.c |  19 +++-
>>  drivers/virtio/virtio_pci_modern.c |  34 +++++--
>>  drivers/virtio/virtio_ring.c       | 187 ++++++++++++++++++++++++++++++-------
>>  tools/virtio/linux/dma-mapping.h   |  17 ++++
>>  7 files changed, 246 insertions(+), 69 deletions(-)
>>  create mode 100644 tools/virtio/linux/dma-mapping.h
>>
>> -- 
>> 2.4.3
> 


^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-10-28  7:40     ` Christian Borntraeger
  0 siblings, 0 replies; 110+ messages in thread
From: Christian Borntraeger @ 2015-10-28  7:40 UTC (permalink / raw)
  To: Michael S. Tsirkin, Andy Lutomirski
  Cc: linux-s390, Joerg Roedel, KVM, benh, Sebastian Ott, linux-kernel,
	virtualization, Martin Schwidefsky, Paolo Bonzini, dwmw2,
	Christoph Hellwig

Am 28.10.2015 um 16:17 schrieb Michael S. Tsirkin:
> On Tue, Oct 27, 2015 at 11:38:57PM -0700, Andy Lutomirski wrote:
>> This switches virtio to use the DMA API unconditionally.  I'm sure
>> it breaks things, but it seems to work on x86 using virtio-pci, with
>> and without Xen, and using both the modern 1.0 variant and the
>> legacy variant.
> 
> I'm very glad to see work on this making progress.
> 
> I suspect we'll have to find a way to make this optional though, and
> keep doing the non-DMA API thing with old devices.  And I've been
> debating with myself whether a pci specific thing or a feature bit is
> preferable.
> 

We have discussed that at kernel summit. I will try to implement a dummy dma_ops for
s390 that does 1:1 mapping and Ben will look into doing some quirk to handle "old"
code in addition to also make it possible to mark devices as iommu bypass (IIRC,
via device tree, Ben?)

Christian



> Thoughts?
> 
>> Changes from v2:
>>  - Fix really embarrassing bug.  This version actually works.
>>
>> Changes from v1:
>>  - Fix an endian conversion error causing a BUG to hit.
>>  - Fix a DMA ordering issue (swiotlb=force works now).
>>  - Minor cleanups.
>>
>> Andy Lutomirski (3):
>>   virtio_net: Stop doing DMA from the stack
>>   virtio_ring: Support DMA APIs
>>   virtio_pci: Use the DMA API
>>
>>  drivers/net/virtio_net.c           |  53 +++++++----
>>  drivers/virtio/Kconfig             |   2 +-
>>  drivers/virtio/virtio_pci_common.h |   3 +-
>>  drivers/virtio/virtio_pci_legacy.c |  19 +++-
>>  drivers/virtio/virtio_pci_modern.c |  34 +++++--
>>  drivers/virtio/virtio_ring.c       | 187 ++++++++++++++++++++++++++++++-------
>>  tools/virtio/linux/dma-mapping.h   |  17 ++++
>>  7 files changed, 246 insertions(+), 69 deletions(-)
>>  create mode 100644 tools/virtio/linux/dma-mapping.h
>>
>> -- 
>> 2.4.3
> 

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-28  7:40     ` Christian Borntraeger
@ 2015-10-28  8:09       ` David Woodhouse
  -1 siblings, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-10-28  8:09 UTC (permalink / raw)
  To: Christian Borntraeger, Michael S. Tsirkin, Andy Lutomirski
  Cc: linux-kernel, Joerg Roedel, Cornelia Huck, Sebastian Ott,
	Paolo Bonzini, Christoph Hellwig, benh, KVM, Martin Schwidefsky,
	linux-s390, virtualization

[-- Attachment #1: Type: text/plain, Size: 1390 bytes --]

On Wed, 2015-10-28 at 16:40 +0900, Christian Borntraeger wrote:
> Am 28.10.2015 um 16:17 schrieb Michael S. Tsirkin:
> > On Tue, Oct 27, 2015 at 11:38:57PM -0700, Andy Lutomirski wrote:
> > > This switches virtio to use the DMA API unconditionally.  I'm sure
> > > it breaks things, but it seems to work on x86 using virtio-pci, with
> > > and without Xen, and using both the modern 1.0 variant and the
> > > legacy variant.
> > 
> > I'm very glad to see work on this making progress.
> > 
> > I suspect we'll have to find a way to make this optional though, and
> > keep doing the non-DMA API thing with old devices.  And I've been
> > debating with myself whether a pci specific thing or a feature bit is
> > preferable.
> > 
> 
> We have discussed that at kernel summit. I will try to implement a dummy dma_ops for
> s390 that does 1:1 mapping and Ben will look into doing some quirk to handle "old"
> code in addition to also make it possible to mark devices as iommu bypass (IIRC,
> via device tree, Ben?)

Right. You never eschew the DMA API in the *driver* — you just expect
the DMA API to do the right thing for devices which don't need
translation (with platforms using per-device dma_ops and generally
getting their act together).

We're pushing that on the platforms where it's currently an issue,
including Power, SPARC and S390.

-- 
dwmw2



[-- Attachment #2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-10-28  8:09       ` David Woodhouse
  0 siblings, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-10-28  8:09 UTC (permalink / raw)
  To: Christian Borntraeger, Michael S. Tsirkin, Andy Lutomirski
  Cc: linux-s390, Joerg Roedel, KVM, benh, Sebastian Ott, linux-kernel,
	virtualization, Martin Schwidefsky, Paolo Bonzini,
	Christoph Hellwig


[-- Attachment #1.1: Type: text/plain, Size: 1390 bytes --]

On Wed, 2015-10-28 at 16:40 +0900, Christian Borntraeger wrote:
> Am 28.10.2015 um 16:17 schrieb Michael S. Tsirkin:
> > On Tue, Oct 27, 2015 at 11:38:57PM -0700, Andy Lutomirski wrote:
> > > This switches virtio to use the DMA API unconditionally.  I'm sure
> > > it breaks things, but it seems to work on x86 using virtio-pci, with
> > > and without Xen, and using both the modern 1.0 variant and the
> > > legacy variant.
> > 
> > I'm very glad to see work on this making progress.
> > 
> > I suspect we'll have to find a way to make this optional though, and
> > keep doing the non-DMA API thing with old devices.  And I've been
> > debating with myself whether a pci specific thing or a feature bit is
> > preferable.
> > 
> 
> We have discussed that at kernel summit. I will try to implement a dummy dma_ops for
> s390 that does 1:1 mapping and Ben will look into doing some quirk to handle "old"
> code in addition to also make it possible to mark devices as iommu bypass (IIRC,
> via device tree, Ben?)

Right. You never eschew the DMA API in the *driver* — you just expect
the DMA API to do the right thing for devices which don't need
translation (with platforms using per-device dma_ops and generally
getting their act together).

We're pushing that on the platforms where it's currently an issue,
including Power, SPARC and S390.

-- 
dwmw2



[-- Attachment #1.2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-28  7:40     ` Christian Borntraeger
@ 2015-10-28  8:36       ` Benjamin Herrenschmidt
  -1 siblings, 0 replies; 110+ messages in thread
From: Benjamin Herrenschmidt @ 2015-10-28  8:36 UTC (permalink / raw)
  To: Christian Borntraeger, Michael S. Tsirkin, Andy Lutomirski
  Cc: linux-kernel, Joerg Roedel, Cornelia Huck, Sebastian Ott,
	Paolo Bonzini, Christoph Hellwig, KVM, dwmw2, Martin Schwidefsky,
	linux-s390, virtualization

On Wed, 2015-10-28 at 16:40 +0900, Christian Borntraeger wrote:
> We have discussed that at kernel summit. I will try to implement a dummy dma_ops for
> s390 that does 1:1 mapping and Ben will look into doing some quirk to handle "old"
> code in addition to also make it possible to mark devices as iommu bypass (IIRC,
> via device tree, Ben?)

Something like that yes. I'll look into it when I'm back home.

Cheers,
Ben.


^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-10-28  8:36       ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 110+ messages in thread
From: Benjamin Herrenschmidt @ 2015-10-28  8:36 UTC (permalink / raw)
  To: Christian Borntraeger, Michael S. Tsirkin, Andy Lutomirski
  Cc: linux-s390, Joerg Roedel, KVM, Sebastian Ott, linux-kernel,
	virtualization, Martin Schwidefsky, Paolo Bonzini, dwmw2,
	Christoph Hellwig

On Wed, 2015-10-28 at 16:40 +0900, Christian Borntraeger wrote:
> We have discussed that at kernel summit. I will try to implement a dummy dma_ops for
> s390 that does 1:1 mapping and Ben will look into doing some quirk to handle "old"
> code in addition to also make it possible to mark devices as iommu bypass (IIRC,
> via device tree, Ben?)

Something like that yes. I'll look into it when I'm back home.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-28  8:36       ` Benjamin Herrenschmidt
@ 2015-10-28 11:23         ` Michael S. Tsirkin
  -1 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-10-28 11:23 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Christian Borntraeger, Andy Lutomirski, linux-kernel,
	Joerg Roedel, Cornelia Huck, Sebastian Ott, Paolo Bonzini,
	Christoph Hellwig, KVM, dwmw2, Martin Schwidefsky, linux-s390,
	virtualization

On Wed, Oct 28, 2015 at 05:36:53PM +0900, Benjamin Herrenschmidt wrote:
> On Wed, 2015-10-28 at 16:40 +0900, Christian Borntraeger wrote:
> > We have discussed that at kernel summit. I will try to implement a dummy dma_ops for
> > s390 that does 1:1 mapping and Ben will look into doing some quirk to handle "old"
> > code in addition to also make it possible to mark devices as iommu bypass (IIRC,
> > via device tree, Ben?)
> 
> Something like that yes. I'll look into it when I'm back home.
> 
> Cheers,
> Ben.

OK so I guess that means we should prefer a transport-specific
interface in virtio-pci then.

-- 
MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-10-28 11:23         ` Michael S. Tsirkin
  0 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-10-28 11:23 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: linux-s390, Joerg Roedel, KVM, Sebastian Ott, linux-kernel,
	virtualization, Christian Borntraeger, Andy Lutomirski,
	Paolo Bonzini, dwmw2, Christoph Hellwig, Martin Schwidefsky

On Wed, Oct 28, 2015 at 05:36:53PM +0900, Benjamin Herrenschmidt wrote:
> On Wed, 2015-10-28 at 16:40 +0900, Christian Borntraeger wrote:
> > We have discussed that at kernel summit. I will try to implement a dummy dma_ops for
> > s390 that does 1:1 mapping and Ben will look into doing some quirk to handle "old"
> > code in addition to also make it possible to mark devices as iommu bypass (IIRC,
> > via device tree, Ben?)
> 
> Something like that yes. I'll look into it when I'm back home.
> 
> Cheers,
> Ben.

OK so I guess that means we should prefer a transport-specific
interface in virtio-pci then.

-- 
MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-28  8:09       ` David Woodhouse
@ 2015-10-28 11:35         ` Michael S. Tsirkin
  -1 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-10-28 11:35 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Christian Borntraeger, Andy Lutomirski, linux-kernel,
	Joerg Roedel, Cornelia Huck, Sebastian Ott, Paolo Bonzini,
	Christoph Hellwig, benh, KVM, Martin Schwidefsky, linux-s390,
	virtualization

On Wed, Oct 28, 2015 at 05:09:47PM +0900, David Woodhouse wrote:
> On Wed, 2015-10-28 at 16:40 +0900, Christian Borntraeger wrote:
> > Am 28.10.2015 um 16:17 schrieb Michael S. Tsirkin:
> > > On Tue, Oct 27, 2015 at 11:38:57PM -0700, Andy Lutomirski wrote:
> > > > This switches virtio to use the DMA API unconditionally.  I'm sure
> > > > it breaks things, but it seems to work on x86 using virtio-pci, with
> > > > and without Xen, and using both the modern 1.0 variant and the
> > > > legacy variant.
> > > 
> > > I'm very glad to see work on this making progress.
> > > 
> > > I suspect we'll have to find a way to make this optional though, and
> > > keep doing the non-DMA API thing with old devices.  And I've been
> > > debating with myself whether a pci specific thing or a feature bit is
> > > preferable.
> > > 
> > 
> > We have discussed that at kernel summit. I will try to implement a dummy dma_ops for
> > s390 that does 1:1 mapping and Ben will look into doing some quirk to handle "old"
> > code in addition to also make it possible to mark devices as iommu bypass (IIRC,
> > via device tree, Ben?)
> 
> Right. You never eschew the DMA API in the *driver* — you just expect
> the DMA API to do the right thing for devices which don't need
> translation (with platforms using per-device dma_ops and generally
> getting their act together).
> We're pushing that on the platforms where it's currently an issue,
> including Power, SPARC and S390.
> 
> -- 
> dwmw2
> 
> 

Well APIs are just that - internal kernel APIs.
If the only user of an API is virtio, we can strick the
code in virtio.h just as well.
I think controlling this dynamically and not statically
in e.g. devicetree is important though.

E.g. on intel x86, there's an option iommu=pt which does the 1:1
thing for devices when used by kernel, but enables
the iommu if used by userspace/VMs.

Something like this would be needed for other platforms IMHO.

And given that
1. virtio seems the only user so far
2. supporting this per device seems like something that
   might become useful in the future
maybe we'd better make this part of virtio transports.

-- 
MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-10-28 11:35         ` Michael S. Tsirkin
  0 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-10-28 11:35 UTC (permalink / raw)
  To: David Woodhouse
  Cc: linux-s390, Joerg Roedel, KVM, benh, Sebastian Ott, linux-kernel,
	virtualization, Christian Borntraeger, Andy Lutomirski,
	Paolo Bonzini, Christoph Hellwig, Martin Schwidefsky

On Wed, Oct 28, 2015 at 05:09:47PM +0900, David Woodhouse wrote:
> On Wed, 2015-10-28 at 16:40 +0900, Christian Borntraeger wrote:
> > Am 28.10.2015 um 16:17 schrieb Michael S. Tsirkin:
> > > On Tue, Oct 27, 2015 at 11:38:57PM -0700, Andy Lutomirski wrote:
> > > > This switches virtio to use the DMA API unconditionally.  I'm sure
> > > > it breaks things, but it seems to work on x86 using virtio-pci, with
> > > > and without Xen, and using both the modern 1.0 variant and the
> > > > legacy variant.
> > > 
> > > I'm very glad to see work on this making progress.
> > > 
> > > I suspect we'll have to find a way to make this optional though, and
> > > keep doing the non-DMA API thing with old devices.  And I've been
> > > debating with myself whether a pci specific thing or a feature bit is
> > > preferable.
> > > 
> > 
> > We have discussed that at kernel summit. I will try to implement a dummy dma_ops for
> > s390 that does 1:1 mapping and Ben will look into doing some quirk to handle "old"
> > code in addition to also make it possible to mark devices as iommu bypass (IIRC,
> > via device tree, Ben?)
> 
> Right. You never eschew the DMA API in the *driver* — you just expect
> the DMA API to do the right thing for devices which don't need
> translation (with platforms using per-device dma_ops and generally
> getting their act together).
> We're pushing that on the platforms where it's currently an issue,
> including Power, SPARC and S390.
> 
> -- 
> dwmw2
> 
> 

Well APIs are just that - internal kernel APIs.
If the only user of an API is virtio, we can strick the
code in virtio.h just as well.
I think controlling this dynamically and not statically
in e.g. devicetree is important though.

E.g. on intel x86, there's an option iommu=pt which does the 1:1
thing for devices when used by kernel, but enables
the iommu if used by userspace/VMs.

Something like this would be needed for other platforms IMHO.

And given that
1. virtio seems the only user so far
2. supporting this per device seems like something that
   might become useful in the future
maybe we'd better make this part of virtio transports.

-- 
MST
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-28 11:35         ` Michael S. Tsirkin
@ 2015-10-28 13:35           ` David Woodhouse
  -1 siblings, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-10-28 13:35 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Christian Borntraeger, Andy Lutomirski, linux-kernel,
	Joerg Roedel, Cornelia Huck, Sebastian Ott, Paolo Bonzini,
	Christoph Hellwig, benh, KVM, Martin Schwidefsky, linux-s390,
	virtualization

[-- Attachment #1: Type: text/plain, Size: 762 bytes --]

On Wed, 2015-10-28 at 13:35 +0200, Michael S. Tsirkin wrote:
> E.g. on intel x86, there's an option iommu=pt which does the 1:1
> thing for devices when used by kernel, but enables
> the iommu if used by userspace/VMs.

That's none of your business.

You call the DMA API when you do DMA. That's all there is to it.

If the IOMMU happens to be in passthrough mode, or your device happens
to not to be routed through an IOMMU today, then I/O virtual address
you get back from the DMA API will look a *lot* like the physical
address you asked the DMA to map. You might think there's no IOMMU. We
couldn't possibly comment.

Use the DMA API. Always. Let the platform worry about whether it
actually needs to *do* anything or not.

-- 
dwmw2



[-- Attachment #2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-10-28 13:35           ` David Woodhouse
  0 siblings, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-10-28 13:35 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: linux-s390, Joerg Roedel, KVM, benh, Sebastian Ott, linux-kernel,
	virtualization, Christian Borntraeger, Andy Lutomirski,
	Paolo Bonzini, Christoph Hellwig, Martin Schwidefsky


[-- Attachment #1.1: Type: text/plain, Size: 762 bytes --]

On Wed, 2015-10-28 at 13:35 +0200, Michael S. Tsirkin wrote:
> E.g. on intel x86, there's an option iommu=pt which does the 1:1
> thing for devices when used by kernel, but enables
> the iommu if used by userspace/VMs.

That's none of your business.

You call the DMA API when you do DMA. That's all there is to it.

If the IOMMU happens to be in passthrough mode, or your device happens
to not to be routed through an IOMMU today, then I/O virtual address
you get back from the DMA API will look a *lot* like the physical
address you asked the DMA to map. You might think there's no IOMMU. We
couldn't possibly comment.

Use the DMA API. Always. Let the platform worry about whether it
actually needs to *do* anything or not.

-- 
dwmw2



[-- Attachment #1.2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-28 11:23         ` Michael S. Tsirkin
@ 2015-10-28 13:37           ` David Woodhouse
  -1 siblings, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-10-28 13:37 UTC (permalink / raw)
  To: Michael S. Tsirkin, Benjamin Herrenschmidt
  Cc: Christian Borntraeger, Andy Lutomirski, linux-kernel,
	Joerg Roedel, Cornelia Huck, Sebastian Ott, Paolo Bonzini,
	Christoph Hellwig, KVM, Martin Schwidefsky, linux-s390,
	virtualization

[-- Attachment #1: Type: text/plain, Size: 762 bytes --]

On Wed, 2015-10-28 at 13:23 +0200, Michael S. Tsirkin wrote:
> On Wed, Oct 28, 2015 at 05:36:53PM +0900, Benjamin Herrenschmidt
> wrote:
> > On Wed, 2015-10-28 at 16:40 +0900, Christian Borntraeger wrote:
> > > We have discussed that at kernel summit. I will try to implement
> > > a dummy dma_ops for
> > > s390 that does 1:1 mapping and Ben will look into doing some
> > > quirk to handle "old"
> > > code in addition to also make it possible to mark devices as
> > > iommu bypass (IIRC,
> > > via device tree, Ben?)
> > 
> > Something like that yes. I'll look into it when I'm back home.
> > 
> > Cheers,
> > Ben.
> 
> OK so I guess that means we should prefer a transport-specific
> interface in virtio-pci then.

Why?

-- 
dwmw2



[-- Attachment #2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-10-28 13:37           ` David Woodhouse
  0 siblings, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-10-28 13:37 UTC (permalink / raw)
  To: Michael S. Tsirkin, Benjamin Herrenschmidt
  Cc: linux-s390, Joerg Roedel, KVM, Sebastian Ott, linux-kernel,
	virtualization, Christian Borntraeger, Andy Lutomirski,
	Paolo Bonzini, Christoph Hellwig, Martin Schwidefsky


[-- Attachment #1.1: Type: text/plain, Size: 762 bytes --]

On Wed, 2015-10-28 at 13:23 +0200, Michael S. Tsirkin wrote:
> On Wed, Oct 28, 2015 at 05:36:53PM +0900, Benjamin Herrenschmidt
> wrote:
> > On Wed, 2015-10-28 at 16:40 +0900, Christian Borntraeger wrote:
> > > We have discussed that at kernel summit. I will try to implement
> > > a dummy dma_ops for
> > > s390 that does 1:1 mapping and Ben will look into doing some
> > > quirk to handle "old"
> > > code in addition to also make it possible to mark devices as
> > > iommu bypass (IIRC,
> > > via device tree, Ben?)
> > 
> > Something like that yes. I'll look into it when I'm back home.
> > 
> > Cheers,
> > Ben.
> 
> OK so I guess that means we should prefer a transport-specific
> interface in virtio-pci then.

Why?

-- 
dwmw2



[-- Attachment #1.2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-28 13:35           ` David Woodhouse
@ 2015-10-28 14:05             ` Michael S. Tsirkin
  -1 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-10-28 14:05 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Christian Borntraeger, Andy Lutomirski, linux-kernel,
	Joerg Roedel, Cornelia Huck, Sebastian Ott, Paolo Bonzini,
	Christoph Hellwig, benh, KVM, Martin Schwidefsky, linux-s390,
	virtualization

On Wed, Oct 28, 2015 at 10:35:27PM +0900, David Woodhouse wrote:
> On Wed, 2015-10-28 at 13:35 +0200, Michael S. Tsirkin wrote:
> > E.g. on intel x86, there's an option iommu=pt which does the 1:1
> > thing for devices when used by kernel, but enables
> > the iommu if used by userspace/VMs.
> 
> That's none of your business.
> 
> You call the DMA API when you do DMA. That's all there is to it.
> 
> If the IOMMU happens to be in passthrough mode, or your device happens
> to not to be routed through an IOMMU today, then I/O virtual address
> you get back from the DMA API will look a *lot* like the physical
> address you asked the DMA to map. You might think there's no IOMMU. We
> couldn't possibly comment.
> 
> Use the DMA API. Always. Let the platform worry about whether it
> actually needs to *do* anything or not.
> -- 
> dwmw2
> 
> 

Short answer - platforms need a way to discover, and express different
security requirements of different devices.  If they continue to lack
that, we'll need a custom API in virtio, and while this seems a bit less
elegant, I would not see that as the end of the world at all, there are
not that many virtio drivers.

And hey - that's just an internal API.  We can change it later at a whim.

Long answer - PV is weird. It's not always the same as real hardware.

For PV, it's generally hypervisor doing writes into memory.

If it's monolitic with device emulation in same memory space as the
hypervisor (e.g. in the case of the current QEMU, or using vhost in host
kernel), then you gain *no security* by "restricting" it by means of the
IOMMU - the IOMMU is part of the same hypervisor.

If it is modular with device emulation in a separate memory space (e.g.
in case of Xen, or vhost-user in modern QEMU) then you do gain security:
the part emulating the IOMMU limits the part doing DMA.

In both cases for assigned devices, it is always modular in a sense, so
you do gain security since that is restricted by the hardware IOMMU.

The way things are set up at the moment, it's mostly global,
with iommu=pt on intel being a kind of exception.
We need host/guest and API interfaces that are more nuanced than that.

-- 
MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-10-28 14:05             ` Michael S. Tsirkin
  0 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-10-28 14:05 UTC (permalink / raw)
  To: David Woodhouse
  Cc: linux-s390, Joerg Roedel, KVM, benh, Sebastian Ott, linux-kernel,
	virtualization, Christian Borntraeger, Andy Lutomirski,
	Paolo Bonzini, Christoph Hellwig, Martin Schwidefsky

On Wed, Oct 28, 2015 at 10:35:27PM +0900, David Woodhouse wrote:
> On Wed, 2015-10-28 at 13:35 +0200, Michael S. Tsirkin wrote:
> > E.g. on intel x86, there's an option iommu=pt which does the 1:1
> > thing for devices when used by kernel, but enables
> > the iommu if used by userspace/VMs.
> 
> That's none of your business.
> 
> You call the DMA API when you do DMA. That's all there is to it.
> 
> If the IOMMU happens to be in passthrough mode, or your device happens
> to not to be routed through an IOMMU today, then I/O virtual address
> you get back from the DMA API will look a *lot* like the physical
> address you asked the DMA to map. You might think there's no IOMMU. We
> couldn't possibly comment.
> 
> Use the DMA API. Always. Let the platform worry about whether it
> actually needs to *do* anything or not.
> -- 
> dwmw2
> 
> 

Short answer - platforms need a way to discover, and express different
security requirements of different devices.  If they continue to lack
that, we'll need a custom API in virtio, and while this seems a bit less
elegant, I would not see that as the end of the world at all, there are
not that many virtio drivers.

And hey - that's just an internal API.  We can change it later at a whim.

Long answer - PV is weird. It's not always the same as real hardware.

For PV, it's generally hypervisor doing writes into memory.

If it's monolitic with device emulation in same memory space as the
hypervisor (e.g. in the case of the current QEMU, or using vhost in host
kernel), then you gain *no security* by "restricting" it by means of the
IOMMU - the IOMMU is part of the same hypervisor.

If it is modular with device emulation in a separate memory space (e.g.
in case of Xen, or vhost-user in modern QEMU) then you do gain security:
the part emulating the IOMMU limits the part doing DMA.

In both cases for assigned devices, it is always modular in a sense, so
you do gain security since that is restricted by the hardware IOMMU.

The way things are set up at the moment, it's mostly global,
with iommu=pt on intel being a kind of exception.
We need host/guest and API interfaces that are more nuanced than that.

-- 
MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-28 13:37           ` David Woodhouse
@ 2015-10-28 14:07             ` Michael S. Tsirkin
  -1 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-10-28 14:07 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Benjamin Herrenschmidt, Christian Borntraeger, Andy Lutomirski,
	linux-kernel, Joerg Roedel, Cornelia Huck, Sebastian Ott,
	Paolo Bonzini, Christoph Hellwig, KVM, Martin Schwidefsky,
	linux-s390, virtualization

On Wed, Oct 28, 2015 at 10:37:56PM +0900, David Woodhouse wrote:
> On Wed, 2015-10-28 at 13:23 +0200, Michael S. Tsirkin wrote:
> > On Wed, Oct 28, 2015 at 05:36:53PM +0900, Benjamin Herrenschmidt
> > wrote:
> > > On Wed, 2015-10-28 at 16:40 +0900, Christian Borntraeger wrote:
> > > > We have discussed that at kernel summit. I will try to implement
> > > > a dummy dma_ops for
> > > > s390 that does 1:1 mapping and Ben will look into doing some
> > > > quirk to handle "old"
> > > > code in addition to also make it possible to mark devices as
> > > > iommu bypass (IIRC,
> > > > via device tree, Ben?)
> > > 
> > > Something like that yes. I'll look into it when I'm back home.
> > > 
> > > Cheers,
> > > Ben.
> > 
> > OK so I guess that means we should prefer a transport-specific
> > interface in virtio-pci then.
> 
> Why?

Because you said you are doing something device tree specific for ARM,
aren't you?

-- 
MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-10-28 14:07             ` Michael S. Tsirkin
  0 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-10-28 14:07 UTC (permalink / raw)
  To: David Woodhouse
  Cc: linux-s390, Joerg Roedel, KVM, Benjamin Herrenschmidt,
	Sebastian Ott, linux-kernel, virtualization,
	Christian Borntraeger, Andy Lutomirski, Paolo Bonzini,
	Christoph Hellwig, Martin Schwidefsky

On Wed, Oct 28, 2015 at 10:37:56PM +0900, David Woodhouse wrote:
> On Wed, 2015-10-28 at 13:23 +0200, Michael S. Tsirkin wrote:
> > On Wed, Oct 28, 2015 at 05:36:53PM +0900, Benjamin Herrenschmidt
> > wrote:
> > > On Wed, 2015-10-28 at 16:40 +0900, Christian Borntraeger wrote:
> > > > We have discussed that at kernel summit. I will try to implement
> > > > a dummy dma_ops for
> > > > s390 that does 1:1 mapping and Ben will look into doing some
> > > > quirk to handle "old"
> > > > code in addition to also make it possible to mark devices as
> > > > iommu bypass (IIRC,
> > > > via device tree, Ben?)
> > > 
> > > Something like that yes. I'll look into it when I'm back home.
> > > 
> > > Cheers,
> > > Ben.
> > 
> > OK so I guess that means we should prefer a transport-specific
> > interface in virtio-pci then.
> 
> Why?

Because you said you are doing something device tree specific for ARM,
aren't you?

-- 
MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-28 14:05             ` Michael S. Tsirkin
@ 2015-10-28 14:13               ` David Woodhouse
  -1 siblings, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-10-28 14:13 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Christian Borntraeger, Andy Lutomirski, linux-kernel,
	Joerg Roedel, Cornelia Huck, Sebastian Ott, Paolo Bonzini,
	Christoph Hellwig, benh, KVM, Martin Schwidefsky, linux-s390,
	virtualization

[-- Attachment #1: Type: text/plain, Size: 1159 bytes --]

On Wed, 2015-10-28 at 16:05 +0200, Michael S. Tsirkin wrote:
> 
> Short answer - platforms need a way to discover, and express different
> security requirements of different devices.

Sure. PLATFORMS need that. Do not let it go anywhere near your device
drivers. Including the virtio drivers.

> If they continue to lack that, we'll need a custom API in virtio,
> and while this seems a bit less elegant, I would not see that as
> the end of the world at all, there are not that many virtio drivers.

No. If they continue to lack that, we fix them. This is a *platform*
issue. The DMA API shall do the right thing. Do not second-guess it.


 (From the other mail)
> > > OK so I guess that means we should prefer a transport-specific
> > > interface in virtio-pci then.
> >
> > Why?
> 
> Because you said you are doing something device tree specific for 
> ARM, aren't you?

Nonono. The ARM platform code might do that, and the DMA API on ARM
*might* give you I/O virtual addresses that look a lot like the
physical addresses you asked it to map. That's none of your business.
Drivers use DMA API. No more talky.

-- 
dwmw2



[-- Attachment #2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-10-28 14:13               ` David Woodhouse
  0 siblings, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-10-28 14:13 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: linux-s390, Joerg Roedel, KVM, benh, Sebastian Ott, linux-kernel,
	virtualization, Christian Borntraeger, Andy Lutomirski,
	Paolo Bonzini, Christoph Hellwig, Martin Schwidefsky


[-- Attachment #1.1: Type: text/plain, Size: 1159 bytes --]

On Wed, 2015-10-28 at 16:05 +0200, Michael S. Tsirkin wrote:
> 
> Short answer - platforms need a way to discover, and express different
> security requirements of different devices.

Sure. PLATFORMS need that. Do not let it go anywhere near your device
drivers. Including the virtio drivers.

> If they continue to lack that, we'll need a custom API in virtio,
> and while this seems a bit less elegant, I would not see that as
> the end of the world at all, there are not that many virtio drivers.

No. If they continue to lack that, we fix them. This is a *platform*
issue. The DMA API shall do the right thing. Do not second-guess it.


 (From the other mail)
> > > OK so I guess that means we should prefer a transport-specific
> > > interface in virtio-pci then.
> >
> > Why?
> 
> Because you said you are doing something device tree specific for 
> ARM, aren't you?

Nonono. The ARM platform code might do that, and the DMA API on ARM
*might* give you I/O virtual addresses that look a lot like the
physical addresses you asked it to map. That's none of your business.
Drivers use DMA API. No more talky.

-- 
dwmw2



[-- Attachment #1.2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-28 14:13               ` David Woodhouse
@ 2015-10-28 14:22                 ` Michael S. Tsirkin
  -1 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-10-28 14:22 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Christian Borntraeger, Andy Lutomirski, linux-kernel,
	Joerg Roedel, Cornelia Huck, Sebastian Ott, Paolo Bonzini,
	Christoph Hellwig, benh, KVM, Martin Schwidefsky, linux-s390,
	virtualization

On Wed, Oct 28, 2015 at 11:13:29PM +0900, David Woodhouse wrote:
> On Wed, 2015-10-28 at 16:05 +0200, Michael S. Tsirkin wrote:
> > 
> > Short answer - platforms need a way to discover, and express different
> > security requirements of different devices.
> 
> Sure. PLATFORMS need that. Do not let it go anywhere near your device
> drivers. Including the virtio drivers.

But would there be any users of this outside the virtio subsystem?
If no, maybe virtio core is a logical place to keep this.

> > If they continue to lack that, we'll need a custom API in virtio,
> > and while this seems a bit less elegant, I would not see that as
> > the end of the world at all, there are not that many virtio drivers.
> 
> No. If they continue to lack that, we fix them. This is a *platform*
> issue. The DMA API shall do the right thing. Do not second-guess it.
> 
> 
>  (From the other mail)

I don't have a problem with extending DMA API to address
more usecases.

> > > > OK so I guess that means we should prefer a transport-specific
> > > > interface in virtio-pci then.
> > >
> > > Why?
> > 
> > Because you said you are doing something device tree specific for 
> > ARM, aren't you?
> 
> Nonono. The ARM platform code might do that, and the DMA API on ARM
> *might* give you I/O virtual addresses that look a lot like the
> physical addresses you asked it to map. That's none of your business.
> Drivers use DMA API. No more talky.

Well for virtio they don't ATM. And 1:1 mapping makes perfect sense
for the wast majority of users, so I can't switch them over
until the DMA API actually addresses all existing usecases.


> -- 
> dwmw2
> 
> 



^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-10-28 14:22                 ` Michael S. Tsirkin
  0 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-10-28 14:22 UTC (permalink / raw)
  To: David Woodhouse
  Cc: linux-s390, Joerg Roedel, KVM, benh, Sebastian Ott, linux-kernel,
	virtualization, Christian Borntraeger, Andy Lutomirski,
	Paolo Bonzini, Christoph Hellwig, Martin Schwidefsky

On Wed, Oct 28, 2015 at 11:13:29PM +0900, David Woodhouse wrote:
> On Wed, 2015-10-28 at 16:05 +0200, Michael S. Tsirkin wrote:
> > 
> > Short answer - platforms need a way to discover, and express different
> > security requirements of different devices.
> 
> Sure. PLATFORMS need that. Do not let it go anywhere near your device
> drivers. Including the virtio drivers.

But would there be any users of this outside the virtio subsystem?
If no, maybe virtio core is a logical place to keep this.

> > If they continue to lack that, we'll need a custom API in virtio,
> > and while this seems a bit less elegant, I would not see that as
> > the end of the world at all, there are not that many virtio drivers.
> 
> No. If they continue to lack that, we fix them. This is a *platform*
> issue. The DMA API shall do the right thing. Do not second-guess it.
> 
> 
>  (From the other mail)

I don't have a problem with extending DMA API to address
more usecases.

> > > > OK so I guess that means we should prefer a transport-specific
> > > > interface in virtio-pci then.
> > >
> > > Why?
> > 
> > Because you said you are doing something device tree specific for 
> > ARM, aren't you?
> 
> Nonono. The ARM platform code might do that, and the DMA API on ARM
> *might* give you I/O virtual addresses that look a lot like the
> physical addresses you asked it to map. That's none of your business.
> Drivers use DMA API. No more talky.

Well for virtio they don't ATM. And 1:1 mapping makes perfect sense
for the wast majority of users, so I can't switch them over
until the DMA API actually addresses all existing usecases.


> -- 
> dwmw2
> 
> 

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-28 14:22                 ` Michael S. Tsirkin
@ 2015-10-28 14:32                   ` David Woodhouse
  -1 siblings, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-10-28 14:32 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Christian Borntraeger, Andy Lutomirski, linux-kernel,
	Joerg Roedel, Cornelia Huck, Sebastian Ott, Paolo Bonzini,
	Christoph Hellwig, benh, KVM, Martin Schwidefsky, linux-s390,
	virtualization

[-- Attachment #1: Type: text/plain, Size: 2156 bytes --]

On Wed, 2015-10-28 at 16:22 +0200, Michael S. Tsirkin wrote:
> On Wed, Oct 28, 2015 at 11:13:29PM +0900, David Woodhouse wrote:
> > On Wed, 2015-10-28 at 16:05 +0200, Michael S. Tsirkin wrote:
> > > 
> > > Short answer - platforms need a way to discover, and express
> > > different
> > > security requirements of different devices.
> > 
> > Sure. PLATFORMS need that. Do not let it go anywhere near your
> > device
> > drivers. Including the virtio drivers.
> 
> But would there be any users of this outside the virtio subsystem?
> If no, maybe virtio core is a logical place to keep this.

Users of what? DMA API ops which basically do nothing? Sure — there are
*plenty* of cases where there isn't actually an IOMMU in active use and
the DMA API just returns the same address it was given.

Obviously that happens in platforms without an IOMMU, but it also
happens in cases where an IOMMU exists but is in passthrough mode, and
it also happens in cases where an IOMMU exists somewhere in the system
but only translates for *other* devices.

In all cases, drivers must just use the DMA API and *it* is responsible
for doing the right thing.

> I don't have a problem with extending DMA API to address
> more usecases.

No, this isn't an extension. This is fixing a bug, on certain platforms
where the DMA API has currently done the wrong thing.

We have historically worked around that bug by introducing *another*
bug, which is not to *use* the DMA API in the virtio driver.

Sure, we can co-ordinate those two bug-fixes. But let's not talk about
them as anything other than bug-fixes.

> > Drivers use DMA API. No more talky.
> 
> Well for virtio they don't ATM. And 1:1 mapping makes perfect sense
> for the wast majority of users, so I can't switch them over
> until the DMA API actually addresses all existing usecases.

That's still not your business; it's the platform's. And there are
hardware implementations of the virtio protocols on real PCI cards. And
we have the option of doing IOMMU translation for the virtio devices
even in a virtual machine. Just don't get involved.

-- 
dwmw2



[-- Attachment #2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-10-28 14:32                   ` David Woodhouse
  0 siblings, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-10-28 14:32 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: linux-s390, Joerg Roedel, KVM, benh, Sebastian Ott, linux-kernel,
	virtualization, Christian Borntraeger, Andy Lutomirski,
	Paolo Bonzini, Christoph Hellwig, Martin Schwidefsky


[-- Attachment #1.1: Type: text/plain, Size: 2156 bytes --]

On Wed, 2015-10-28 at 16:22 +0200, Michael S. Tsirkin wrote:
> On Wed, Oct 28, 2015 at 11:13:29PM +0900, David Woodhouse wrote:
> > On Wed, 2015-10-28 at 16:05 +0200, Michael S. Tsirkin wrote:
> > > 
> > > Short answer - platforms need a way to discover, and express
> > > different
> > > security requirements of different devices.
> > 
> > Sure. PLATFORMS need that. Do not let it go anywhere near your
> > device
> > drivers. Including the virtio drivers.
> 
> But would there be any users of this outside the virtio subsystem?
> If no, maybe virtio core is a logical place to keep this.

Users of what? DMA API ops which basically do nothing? Sure — there are
*plenty* of cases where there isn't actually an IOMMU in active use and
the DMA API just returns the same address it was given.

Obviously that happens in platforms without an IOMMU, but it also
happens in cases where an IOMMU exists but is in passthrough mode, and
it also happens in cases where an IOMMU exists somewhere in the system
but only translates for *other* devices.

In all cases, drivers must just use the DMA API and *it* is responsible
for doing the right thing.

> I don't have a problem with extending DMA API to address
> more usecases.

No, this isn't an extension. This is fixing a bug, on certain platforms
where the DMA API has currently done the wrong thing.

We have historically worked around that bug by introducing *another*
bug, which is not to *use* the DMA API in the virtio driver.

Sure, we can co-ordinate those two bug-fixes. But let's not talk about
them as anything other than bug-fixes.

> > Drivers use DMA API. No more talky.
> 
> Well for virtio they don't ATM. And 1:1 mapping makes perfect sense
> for the wast majority of users, so I can't switch them over
> until the DMA API actually addresses all existing usecases.

That's still not your business; it's the platform's. And there are
hardware implementations of the virtio protocols on real PCI cards. And
we have the option of doing IOMMU translation for the virtio devices
even in a virtual machine. Just don't get involved.

-- 
dwmw2



[-- Attachment #1.2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-28 14:32                   ` David Woodhouse
  (?)
@ 2015-10-28 16:12                   ` Michael S. Tsirkin
  2015-10-28 22:51                       ` Andy Lutomirski
  -1 siblings, 1 reply; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-10-28 16:12 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Christian Borntraeger, Andy Lutomirski, linux-kernel,
	Joerg Roedel, Cornelia Huck, Sebastian Ott, Paolo Bonzini,
	Christoph Hellwig, benh, KVM, Martin Schwidefsky, linux-s390,
	virtualization

On Wed, Oct 28, 2015 at 11:32:34PM +0900, David Woodhouse wrote:
> > I don't have a problem with extending DMA API to address
> > more usecases.
> 
> No, this isn't an extension. This is fixing a bug, on certain platforms
> where the DMA API has currently done the wrong thing.
> 
> We have historically worked around that bug by introducing *another*
> bug, which is not to *use* the DMA API in the virtio driver.
> 
> Sure, we can co-ordinate those two bug-fixes. But let's not talk about
> them as anything other than bug-fixes.

It was pretty practical not to use it. All virtio devices at the time
without exception bypassed the IOMMU, so it was a question of omitting a
couple of function calls in virtio versus hacking on DMA implementation
on multiple platforms. We have more policy options now, so I agree it's
time to revisit this.

But for me, the most important thing is that we do coordinate.

> > > Drivers use DMA API. No more talky.
> > 
> > Well for virtio they don't ATM. And 1:1 mapping makes perfect sense
> > for the wast majority of users, so I can't switch them over
> > until the DMA API actually addresses all existing usecases.
> 
> That's still not your business; it's the platform's. And there are
> hardware implementations of the virtio protocols on real PCI cards. And
> we have the option of doing IOMMU translation for the virtio devices
> even in a virtual machine. Just don't get involved.
> 
> -- 
> dwmw2
> 
> 

I'm involved anyway, it's possible not to put all the code in the virtio
subsystem in guest though.  But I suspect we'll need to find a way for
non-linux drivers within guest to work correctly too, and they might
have trouble poking at things at the system level.  So possibly virtio
subsystem will have to tell platform "this device wants to bypass IOMMU"
and then DMA API does the right thing.

I'll look into this after my vacation ~1.5 weeks from now.

-- 
MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-28 14:32                   ` David Woodhouse
  (?)
  (?)
@ 2015-10-28 16:12                   ` Michael S. Tsirkin
  -1 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-10-28 16:12 UTC (permalink / raw)
  To: David Woodhouse
  Cc: linux-s390, Joerg Roedel, KVM, benh, Sebastian Ott, linux-kernel,
	virtualization, Christian Borntraeger, Andy Lutomirski,
	Paolo Bonzini, Christoph Hellwig, Martin Schwidefsky

On Wed, Oct 28, 2015 at 11:32:34PM +0900, David Woodhouse wrote:
> > I don't have a problem with extending DMA API to address
> > more usecases.
> 
> No, this isn't an extension. This is fixing a bug, on certain platforms
> where the DMA API has currently done the wrong thing.
> 
> We have historically worked around that bug by introducing *another*
> bug, which is not to *use* the DMA API in the virtio driver.
> 
> Sure, we can co-ordinate those two bug-fixes. But let's not talk about
> them as anything other than bug-fixes.

It was pretty practical not to use it. All virtio devices at the time
without exception bypassed the IOMMU, so it was a question of omitting a
couple of function calls in virtio versus hacking on DMA implementation
on multiple platforms. We have more policy options now, so I agree it's
time to revisit this.

But for me, the most important thing is that we do coordinate.

> > > Drivers use DMA API. No more talky.
> > 
> > Well for virtio they don't ATM. And 1:1 mapping makes perfect sense
> > for the wast majority of users, so I can't switch them over
> > until the DMA API actually addresses all existing usecases.
> 
> That's still not your business; it's the platform's. And there are
> hardware implementations of the virtio protocols on real PCI cards. And
> we have the option of doing IOMMU translation for the virtio devices
> even in a virtual machine. Just don't get involved.
> 
> -- 
> dwmw2
> 
> 

I'm involved anyway, it's possible not to put all the code in the virtio
subsystem in guest though.  But I suspect we'll need to find a way for
non-linux drivers within guest to work correctly too, and they might
have trouble poking at things at the system level.  So possibly virtio
subsystem will have to tell platform "this device wants to bypass IOMMU"
and then DMA API does the right thing.

I'll look into this after my vacation ~1.5 weeks from now.

-- 
MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-28 16:12                   ` Michael S. Tsirkin
@ 2015-10-28 22:51                       ` Andy Lutomirski
  0 siblings, 0 replies; 110+ messages in thread
From: Andy Lutomirski @ 2015-10-28 22:51 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: David Woodhouse, Christian Borntraeger, Andy Lutomirski,
	linux-kernel, Joerg Roedel, Cornelia Huck, Sebastian Ott,
	Paolo Bonzini, Christoph Hellwig, Benjamin Herrenschmidt, KVM,
	Martin Schwidefsky, linux-s390, Linux Virtualization

On Wed, Oct 28, 2015 at 9:12 AM, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Wed, Oct 28, 2015 at 11:32:34PM +0900, David Woodhouse wrote:
>> > I don't have a problem with extending DMA API to address
>> > more usecases.
>>
>> No, this isn't an extension. This is fixing a bug, on certain platforms
>> where the DMA API has currently done the wrong thing.
>>
>> We have historically worked around that bug by introducing *another*
>> bug, which is not to *use* the DMA API in the virtio driver.
>>
>> Sure, we can co-ordinate those two bug-fixes. But let's not talk about
>> them as anything other than bug-fixes.
>
> It was pretty practical not to use it. All virtio devices at the time
> without exception bypassed the IOMMU, so it was a question of omitting a
> couple of function calls in virtio versus hacking on DMA implementation
> on multiple platforms. We have more policy options now, so I agree it's
> time to revisit this.
>
> But for me, the most important thing is that we do coordinate.
>
>> > > Drivers use DMA API. No more talky.
>> >
>> > Well for virtio they don't ATM. And 1:1 mapping makes perfect sense
>> > for the wast majority of users, so I can't switch them over
>> > until the DMA API actually addresses all existing usecases.
>>
>> That's still not your business; it's the platform's. And there are
>> hardware implementations of the virtio protocols on real PCI cards. And
>> we have the option of doing IOMMU translation for the virtio devices
>> even in a virtual machine. Just don't get involved.
>>
>> --
>> dwmw2
>>
>>
>
> I'm involved anyway, it's possible not to put all the code in the virtio
> subsystem in guest though.  But I suspect we'll need to find a way for
> non-linux drivers within guest to work correctly too, and they might
> have trouble poking at things at the system level.  So possibly virtio
> subsystem will have to tell platform "this device wants to bypass IOMMU"
> and then DMA API does the right thing.
>

After some discussion at KS, no one came up with an example where it's
necessary, and the patches to convert virtqueue to use the DMA API are
much nicer when they convert it unconditionally.

The two interesting cases we thought of were PPC and x86's emulated
Q35 IOMMU.  PPC will look in to architecting a devicetree-based way to
indicate passthrough status and will add quirks for the existing
virtio devices.  Everyone seems to agree that x86's emulated Q35 thing
is just buggy right now and should be taught to use the existing ACPI
mechanism for enumerating passthrough devices.

I'll send a new version of the series soon.

--Andy

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-10-28 22:51                       ` Andy Lutomirski
  0 siblings, 0 replies; 110+ messages in thread
From: Andy Lutomirski @ 2015-10-28 22:51 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: linux-s390, Joerg Roedel, KVM, Benjamin Herrenschmidt,
	Sebastian Ott, linux-kernel, Linux Virtualization,
	Christian Borntraeger, Andy Lutomirski, Paolo Bonzini,
	David Woodhouse, Christoph Hellwig, Martin Schwidefsky

On Wed, Oct 28, 2015 at 9:12 AM, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Wed, Oct 28, 2015 at 11:32:34PM +0900, David Woodhouse wrote:
>> > I don't have a problem with extending DMA API to address
>> > more usecases.
>>
>> No, this isn't an extension. This is fixing a bug, on certain platforms
>> where the DMA API has currently done the wrong thing.
>>
>> We have historically worked around that bug by introducing *another*
>> bug, which is not to *use* the DMA API in the virtio driver.
>>
>> Sure, we can co-ordinate those two bug-fixes. But let's not talk about
>> them as anything other than bug-fixes.
>
> It was pretty practical not to use it. All virtio devices at the time
> without exception bypassed the IOMMU, so it was a question of omitting a
> couple of function calls in virtio versus hacking on DMA implementation
> on multiple platforms. We have more policy options now, so I agree it's
> time to revisit this.
>
> But for me, the most important thing is that we do coordinate.
>
>> > > Drivers use DMA API. No more talky.
>> >
>> > Well for virtio they don't ATM. And 1:1 mapping makes perfect sense
>> > for the wast majority of users, so I can't switch them over
>> > until the DMA API actually addresses all existing usecases.
>>
>> That's still not your business; it's the platform's. And there are
>> hardware implementations of the virtio protocols on real PCI cards. And
>> we have the option of doing IOMMU translation for the virtio devices
>> even in a virtual machine. Just don't get involved.
>>
>> --
>> dwmw2
>>
>>
>
> I'm involved anyway, it's possible not to put all the code in the virtio
> subsystem in guest though.  But I suspect we'll need to find a way for
> non-linux drivers within guest to work correctly too, and they might
> have trouble poking at things at the system level.  So possibly virtio
> subsystem will have to tell platform "this device wants to bypass IOMMU"
> and then DMA API does the right thing.
>

After some discussion at KS, no one came up with an example where it's
necessary, and the patches to convert virtqueue to use the DMA API are
much nicer when they convert it unconditionally.

The two interesting cases we thought of were PPC and x86's emulated
Q35 IOMMU.  PPC will look in to architecting a devicetree-based way to
indicate passthrough status and will add quirks for the existing
virtio devices.  Everyone seems to agree that x86's emulated Q35 thing
is just buggy right now and should be taught to use the existing ACPI
mechanism for enumerating passthrough devices.

I'll send a new version of the series soon.

--Andy

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-28 22:51                       ` Andy Lutomirski
@ 2015-10-29  9:01                         ` Michael S. Tsirkin
  -1 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-10-29  9:01 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: David Woodhouse, Christian Borntraeger, Andy Lutomirski,
	linux-kernel, Joerg Roedel, Cornelia Huck, Sebastian Ott,
	Paolo Bonzini, Christoph Hellwig, Benjamin Herrenschmidt, KVM,
	Martin Schwidefsky, linux-s390, Linux Virtualization

On Wed, Oct 28, 2015 at 03:51:58PM -0700, Andy Lutomirski wrote:
> On Wed, Oct 28, 2015 at 9:12 AM, Michael S. Tsirkin <mst@redhat.com> wrote:
> > On Wed, Oct 28, 2015 at 11:32:34PM +0900, David Woodhouse wrote:
> >> > I don't have a problem with extending DMA API to address
> >> > more usecases.
> >>
> >> No, this isn't an extension. This is fixing a bug, on certain platforms
> >> where the DMA API has currently done the wrong thing.
> >>
> >> We have historically worked around that bug by introducing *another*
> >> bug, which is not to *use* the DMA API in the virtio driver.
> >>
> >> Sure, we can co-ordinate those two bug-fixes. But let's not talk about
> >> them as anything other than bug-fixes.
> >
> > It was pretty practical not to use it. All virtio devices at the time
> > without exception bypassed the IOMMU, so it was a question of omitting a
> > couple of function calls in virtio versus hacking on DMA implementation
> > on multiple platforms. We have more policy options now, so I agree it's
> > time to revisit this.
> >
> > But for me, the most important thing is that we do coordinate.
> >
> >> > > Drivers use DMA API. No more talky.
> >> >
> >> > Well for virtio they don't ATM. And 1:1 mapping makes perfect sense
> >> > for the wast majority of users, so I can't switch them over
> >> > until the DMA API actually addresses all existing usecases.
> >>
> >> That's still not your business; it's the platform's. And there are
> >> hardware implementations of the virtio protocols on real PCI cards. And
> >> we have the option of doing IOMMU translation for the virtio devices
> >> even in a virtual machine. Just don't get involved.
> >>
> >> --
> >> dwmw2
> >>
> >>
> >
> > I'm involved anyway, it's possible not to put all the code in the virtio
> > subsystem in guest though.  But I suspect we'll need to find a way for
> > non-linux drivers within guest to work correctly too, and they might
> > have trouble poking at things at the system level.  So possibly virtio
> > subsystem will have to tell platform "this device wants to bypass IOMMU"
> > and then DMA API does the right thing.
> >
> 
> After some discussion at KS, no one came up with an example where it's
> necessary, and the patches to convert virtqueue to use the DMA API are
> much nicer when they convert it unconditionally.

It's very surprising no one couldn't.  I did above, I try again below.
Note: below discusses configuration *within guest*.

Example: you have a mix of assigned devices and virtio devices. You
don't trust your assigned device vendor not to corrupt your memory so
you want to limit the damage your assigned device can do to your guest,
so you use an IOMMU for that.  Thus existing iommu=pt within guest is out.

But you trust your hypervisor (you have no choice anyway),
and you don't want the overhead of tweaking IOMMU
on data path for virtio. Thus iommu=on is out too.



> The two interesting cases we thought of were PPC and x86's emulated
> Q35 IOMMU.  PPC will look in to architecting a devicetree-based way to
> indicate passthrough status and will add quirks for the existing
> virtio devices.

Isn't this specified by the hypervisor? I don't think this is a good way
to do this: guest security should be up to guest.

> Everyone seems to agree that x86's emulated Q35 thing
> is just buggy right now and should be taught to use the existing ACPI
> mechanism for enumerating passthrough devices.

I'm not sure what ACPI has to do with it.
It's about a way for guest users to specify whether
they want to bypass an IOMMU for a given device.

> I'll send a new version of the series soon.
> 
> --Andy

By the way, a bunch of code is missing on the QEMU side
to make this useful:
1. virtio ignores the iommu
2. vhost user ignores the iommu
3. dataplane ignores the iommu
4. vhost-net ignores the iommu
5. VFIO ignores the iommu

I think so far I only saw patches for 1 above.

-- 
MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-10-29  9:01                         ` Michael S. Tsirkin
  0 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-10-29  9:01 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linux-s390, Joerg Roedel, KVM, Benjamin Herrenschmidt,
	Sebastian Ott, linux-kernel, Linux Virtualization,
	Christian Borntraeger, Andy Lutomirski, Paolo Bonzini,
	David Woodhouse, Christoph Hellwig, Martin Schwidefsky

On Wed, Oct 28, 2015 at 03:51:58PM -0700, Andy Lutomirski wrote:
> On Wed, Oct 28, 2015 at 9:12 AM, Michael S. Tsirkin <mst@redhat.com> wrote:
> > On Wed, Oct 28, 2015 at 11:32:34PM +0900, David Woodhouse wrote:
> >> > I don't have a problem with extending DMA API to address
> >> > more usecases.
> >>
> >> No, this isn't an extension. This is fixing a bug, on certain platforms
> >> where the DMA API has currently done the wrong thing.
> >>
> >> We have historically worked around that bug by introducing *another*
> >> bug, which is not to *use* the DMA API in the virtio driver.
> >>
> >> Sure, we can co-ordinate those two bug-fixes. But let's not talk about
> >> them as anything other than bug-fixes.
> >
> > It was pretty practical not to use it. All virtio devices at the time
> > without exception bypassed the IOMMU, so it was a question of omitting a
> > couple of function calls in virtio versus hacking on DMA implementation
> > on multiple platforms. We have more policy options now, so I agree it's
> > time to revisit this.
> >
> > But for me, the most important thing is that we do coordinate.
> >
> >> > > Drivers use DMA API. No more talky.
> >> >
> >> > Well for virtio they don't ATM. And 1:1 mapping makes perfect sense
> >> > for the wast majority of users, so I can't switch them over
> >> > until the DMA API actually addresses all existing usecases.
> >>
> >> That's still not your business; it's the platform's. And there are
> >> hardware implementations of the virtio protocols on real PCI cards. And
> >> we have the option of doing IOMMU translation for the virtio devices
> >> even in a virtual machine. Just don't get involved.
> >>
> >> --
> >> dwmw2
> >>
> >>
> >
> > I'm involved anyway, it's possible not to put all the code in the virtio
> > subsystem in guest though.  But I suspect we'll need to find a way for
> > non-linux drivers within guest to work correctly too, and they might
> > have trouble poking at things at the system level.  So possibly virtio
> > subsystem will have to tell platform "this device wants to bypass IOMMU"
> > and then DMA API does the right thing.
> >
> 
> After some discussion at KS, no one came up with an example where it's
> necessary, and the patches to convert virtqueue to use the DMA API are
> much nicer when they convert it unconditionally.

It's very surprising no one couldn't.  I did above, I try again below.
Note: below discusses configuration *within guest*.

Example: you have a mix of assigned devices and virtio devices. You
don't trust your assigned device vendor not to corrupt your memory so
you want to limit the damage your assigned device can do to your guest,
so you use an IOMMU for that.  Thus existing iommu=pt within guest is out.

But you trust your hypervisor (you have no choice anyway),
and you don't want the overhead of tweaking IOMMU
on data path for virtio. Thus iommu=on is out too.



> The two interesting cases we thought of were PPC and x86's emulated
> Q35 IOMMU.  PPC will look in to architecting a devicetree-based way to
> indicate passthrough status and will add quirks for the existing
> virtio devices.

Isn't this specified by the hypervisor? I don't think this is a good way
to do this: guest security should be up to guest.

> Everyone seems to agree that x86's emulated Q35 thing
> is just buggy right now and should be taught to use the existing ACPI
> mechanism for enumerating passthrough devices.

I'm not sure what ACPI has to do with it.
It's about a way for guest users to specify whether
they want to bypass an IOMMU for a given device.

> I'll send a new version of the series soon.
> 
> --Andy

By the way, a bunch of code is missing on the QEMU side
to make this useful:
1. virtio ignores the iommu
2. vhost user ignores the iommu
3. dataplane ignores the iommu
4. vhost-net ignores the iommu
5. VFIO ignores the iommu

I think so far I only saw patches for 1 above.

-- 
MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-29  9:01                         ` Michael S. Tsirkin
@ 2015-10-29 16:18                           ` David Woodhouse
  -1 siblings, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-10-29 16:18 UTC (permalink / raw)
  To: Michael S. Tsirkin, Andy Lutomirski
  Cc: Christian Borntraeger, Andy Lutomirski, linux-kernel,
	Joerg Roedel, Cornelia Huck, Sebastian Ott, Paolo Bonzini,
	Christoph Hellwig, Benjamin Herrenschmidt, KVM,
	Martin Schwidefsky, linux-s390, Linux Virtualization

[-- Attachment #1: Type: text/plain, Size: 1026 bytes --]

On Thu, 2015-10-29 at 11:01 +0200, Michael S. Tsirkin wrote:
> 
> Example: you have a mix of assigned devices and virtio devices. You
> don't trust your assigned device vendor not to corrupt your memory so
> you want to limit the damage your assigned device can do to your
> guest,
> so you use an IOMMU for that.  Thus existing iommu=pt within guest is
> out.
> 
> But you trust your hypervisor (you have no choice anyway),
> and you don't want the overhead of tweaking IOMMU
> on data path for virtio. Thus iommu=on is out too.

That's not at all special for virtio or guest VMs. Even with real
hardware, we might want performance from *some* devices, and security
from others. See the DMA_ATTR_IOMMU_BYPASS which is currently being
discussed.

But of course the easy answer in *your* case it just to ask the
hypervisor not to put the virtio devices behind an IOMMU at all. Which
we were planning to remain the default behaviour.

In all cases, the DMA API shall do the right thing.

-- 
dwmw2



[-- Attachment #2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-10-29 16:18                           ` David Woodhouse
  0 siblings, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-10-29 16:18 UTC (permalink / raw)
  To: Michael S. Tsirkin, Andy Lutomirski
  Cc: linux-s390, Joerg Roedel, KVM, Benjamin Herrenschmidt,
	Sebastian Ott, linux-kernel, Linux Virtualization,
	Christian Borntraeger, Andy Lutomirski, Paolo Bonzini,
	Christoph Hellwig, Martin Schwidefsky


[-- Attachment #1.1: Type: text/plain, Size: 1026 bytes --]

On Thu, 2015-10-29 at 11:01 +0200, Michael S. Tsirkin wrote:
> 
> Example: you have a mix of assigned devices and virtio devices. You
> don't trust your assigned device vendor not to corrupt your memory so
> you want to limit the damage your assigned device can do to your
> guest,
> so you use an IOMMU for that.  Thus existing iommu=pt within guest is
> out.
> 
> But you trust your hypervisor (you have no choice anyway),
> and you don't want the overhead of tweaking IOMMU
> on data path for virtio. Thus iommu=on is out too.

That's not at all special for virtio or guest VMs. Even with real
hardware, we might want performance from *some* devices, and security
from others. See the DMA_ATTR_IOMMU_BYPASS which is currently being
discussed.

But of course the easy answer in *your* case it just to ask the
hypervisor not to put the virtio devices behind an IOMMU at all. Which
we were planning to remain the default behaviour.

In all cases, the DMA API shall do the right thing.

-- 
dwmw2



[-- Attachment #1.2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-29  9:01                         ` Michael S. Tsirkin
                                           ` (2 preceding siblings ...)
  (?)
@ 2015-10-30 15:16                         ` Joerg Roedel
  2015-11-11  9:11                             ` Michael S. Tsirkin
  -1 siblings, 1 reply; 110+ messages in thread
From: Joerg Roedel @ 2015-10-30 15:16 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Andy Lutomirski, David Woodhouse, Christian Borntraeger,
	Andy Lutomirski, linux-kernel, Cornelia Huck, Sebastian Ott,
	Paolo Bonzini, Christoph Hellwig, Benjamin Herrenschmidt, KVM,
	Martin Schwidefsky, linux-s390, Linux Virtualization

On Thu, Oct 29, 2015 at 11:01:41AM +0200, Michael S. Tsirkin wrote:
> Example: you have a mix of assigned devices and virtio devices. You
> don't trust your assigned device vendor not to corrupt your memory so
> you want to limit the damage your assigned device can do to your guest,
> so you use an IOMMU for that.  Thus existing iommu=pt within guest is out.
> 
> But you trust your hypervisor (you have no choice anyway),
> and you don't want the overhead of tweaking IOMMU
> on data path for virtio. Thus iommu=on is out too.

IOMMUs on x86 usually come with an ACPI table that describes which
IOMMUs are in the system and which devices they translate. So you can
easily describe all devices there that are not behind an IOMMU.

The ACPI table is built by the BIOS, and the platform intialization code
sets the device dma_ops accordingly. If the BIOS provides wrong
information in the ACPI table this is a platform bug.

> I'm not sure what ACPI has to do with it.  It's about a way for guest
> users to specify whether they want to bypass an IOMMU for a given
> device.

We have no way yet to request passthrough-mode per-device from the IOMMU
drivers, but that can easily be added. But as I see it:

> By the way, a bunch of code is missing on the QEMU side
> to make this useful:
> 1. virtio ignores the iommu
> 2. vhost user ignores the iommu
> 3. dataplane ignores the iommu
> 4. vhost-net ignores the iommu
> 5. VFIO ignores the iommu

Qemu does not implement IOMMU translation for virtio devices anyway
(which is fine), so it just should tell the guest so in the ACPI table
built to describe the emulated IOMMU.


	Joerg


^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-29  9:01                         ` Michael S. Tsirkin
  (?)
  (?)
@ 2015-10-30 15:16                         ` Joerg Roedel
  -1 siblings, 0 replies; 110+ messages in thread
From: Joerg Roedel @ 2015-10-30 15:16 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: linux-s390, KVM, Benjamin Herrenschmidt, Sebastian Ott,
	linux-kernel, Andy Lutomirski, Christian Borntraeger,
	Andy Lutomirski, Paolo Bonzini, Linux Virtualization,
	David Woodhouse, Christoph Hellwig, Martin Schwidefsky

On Thu, Oct 29, 2015 at 11:01:41AM +0200, Michael S. Tsirkin wrote:
> Example: you have a mix of assigned devices and virtio devices. You
> don't trust your assigned device vendor not to corrupt your memory so
> you want to limit the damage your assigned device can do to your guest,
> so you use an IOMMU for that.  Thus existing iommu=pt within guest is out.
> 
> But you trust your hypervisor (you have no choice anyway),
> and you don't want the overhead of tweaking IOMMU
> on data path for virtio. Thus iommu=on is out too.

IOMMUs on x86 usually come with an ACPI table that describes which
IOMMUs are in the system and which devices they translate. So you can
easily describe all devices there that are not behind an IOMMU.

The ACPI table is built by the BIOS, and the platform intialization code
sets the device dma_ops accordingly. If the BIOS provides wrong
information in the ACPI table this is a platform bug.

> I'm not sure what ACPI has to do with it.  It's about a way for guest
> users to specify whether they want to bypass an IOMMU for a given
> device.

We have no way yet to request passthrough-mode per-device from the IOMMU
drivers, but that can easily be added. But as I see it:

> By the way, a bunch of code is missing on the QEMU side
> to make this useful:
> 1. virtio ignores the iommu
> 2. vhost user ignores the iommu
> 3. dataplane ignores the iommu
> 4. vhost-net ignores the iommu
> 5. VFIO ignores the iommu

Qemu does not implement IOMMU translation for virtio devices anyway
(which is fine), so it just should tell the guest so in the ACPI table
built to describe the emulated IOMMU.


	Joerg

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-29  9:01                         ` Michael S. Tsirkin
@ 2015-10-30 16:54                           ` David Woodhouse
  -1 siblings, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-10-30 16:54 UTC (permalink / raw)
  To: Michael S. Tsirkin, Andy Lutomirski
  Cc: Christian Borntraeger, Andy Lutomirski, linux-kernel,
	Joerg Roedel, Cornelia Huck, Sebastian Ott, Paolo Bonzini,
	Christoph Hellwig, Benjamin Herrenschmidt, KVM,
	Martin Schwidefsky, linux-s390, Linux Virtualization

[-- Attachment #1: Type: text/plain, Size: 2882 bytes --]

(Sorry, missed part of this before).

On Thu, 2015-10-29 at 11:01 +0200, Michael S. Tsirkin wrote:
> Isn't this specified by the hypervisor? I don't think this is a good
> way to do this: guest security should be up to guest.

And it is. When the guest sees an IOMMU, it can choose to use it, or
choose not to (or choose to put it in passthrough mode). But as Jörg
says, we don't have a way for an individual  device driver to *request*
passthrough mode or not yet; the choice is made by the core IOMMU code
(iommu=pt on the command line) — or by the platform simply stating that
a given device isn't *covered* by an IOMMU, if that is indeed the case.

In *no* circumstance is it sane for a device driver just to "opt out"
of using the correct DMA API function calls, and expect that to
*magically* cause the IOMMU to be bypassed.

> > Everyone seems to agree that x86's emulated Q35 thing
> > is just buggy right now and should be taught to use the existing ACPI
> > mechanism for enumerating passthrough devices.
> 
> I'm not sure what ACPI has to do with it.
> It's about a way for guest users to specify whether
> they want to bypass an IOMMU for a given device.

No, it absolutely isn't. You might want that — and see the discussion
about DMA_ATTR_IOMMU_BYPASS if you do. But that is *utterly* irrelevant
to *this* discussion, in which you seem to be advocating that the
virtio drivers should remain buggy by just unilaterally not using the
DMA API.

> By the way, a bunch of code is missing on the QEMU side
> to make this useful:
> 1. virtio ignores the iommu
> 2. vhost user ignores the iommu
> 3. dataplane ignores the iommu
> 4. vhost-net ignores the iommu
> 5. VFIO ignores the iommu

No, those things are not useful for fixing the virtio driver bug under
discussion here. All we need to do is make the virtio drivers correctly
use the DMA API. They should never have passed review and been accepted
into the Linux kernel without that.

All we need to do first is make sure that the bug we have in the
PowerPC IOMMU code (and potentially ARM and/or SPARC?) is fixed, and
that it doesn't attempt to use an IOMMU that doesn't exist. And ensure
that the virtualised IOMMU on qemu/x86 isn't lying and claiming that it
translates for the virtio devices when it doesn't.

There are other things we might want to do — like fixing the IOMMU that
qemu can emulate, and actually making it work with real assigned
devices (currently it's totally hosed because it doesn't handle that
case at all). And potentially making the virtualised IOMMU actually
*do* translation for virtio devices (as opposed to just admitting
correctly that it doesn't). But those aren't strictly relevant here,
yet.

It's not clear what specific uses of the IOMMU you had in mind in your
above list — could you elucidate?

-- 
dwmw2


[-- Attachment #2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-10-30 16:54                           ` David Woodhouse
  0 siblings, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-10-30 16:54 UTC (permalink / raw)
  To: Michael S. Tsirkin, Andy Lutomirski
  Cc: linux-s390, Joerg Roedel, KVM, Benjamin Herrenschmidt,
	Sebastian Ott, linux-kernel, Linux Virtualization,
	Christian Borntraeger, Andy Lutomirski, Paolo Bonzini,
	Christoph Hellwig, Martin Schwidefsky


[-- Attachment #1.1: Type: text/plain, Size: 2882 bytes --]

(Sorry, missed part of this before).

On Thu, 2015-10-29 at 11:01 +0200, Michael S. Tsirkin wrote:
> Isn't this specified by the hypervisor? I don't think this is a good
> way to do this: guest security should be up to guest.

And it is. When the guest sees an IOMMU, it can choose to use it, or
choose not to (or choose to put it in passthrough mode). But as Jörg
says, we don't have a way for an individual  device driver to *request*
passthrough mode or not yet; the choice is made by the core IOMMU code
(iommu=pt on the command line) — or by the platform simply stating that
a given device isn't *covered* by an IOMMU, if that is indeed the case.

In *no* circumstance is it sane for a device driver just to "opt out"
of using the correct DMA API function calls, and expect that to
*magically* cause the IOMMU to be bypassed.

> > Everyone seems to agree that x86's emulated Q35 thing
> > is just buggy right now and should be taught to use the existing ACPI
> > mechanism for enumerating passthrough devices.
> 
> I'm not sure what ACPI has to do with it.
> It's about a way for guest users to specify whether
> they want to bypass an IOMMU for a given device.

No, it absolutely isn't. You might want that — and see the discussion
about DMA_ATTR_IOMMU_BYPASS if you do. But that is *utterly* irrelevant
to *this* discussion, in which you seem to be advocating that the
virtio drivers should remain buggy by just unilaterally not using the
DMA API.

> By the way, a bunch of code is missing on the QEMU side
> to make this useful:
> 1. virtio ignores the iommu
> 2. vhost user ignores the iommu
> 3. dataplane ignores the iommu
> 4. vhost-net ignores the iommu
> 5. VFIO ignores the iommu

No, those things are not useful for fixing the virtio driver bug under
discussion here. All we need to do is make the virtio drivers correctly
use the DMA API. They should never have passed review and been accepted
into the Linux kernel without that.

All we need to do first is make sure that the bug we have in the
PowerPC IOMMU code (and potentially ARM and/or SPARC?) is fixed, and
that it doesn't attempt to use an IOMMU that doesn't exist. And ensure
that the virtualised IOMMU on qemu/x86 isn't lying and claiming that it
translates for the virtio devices when it doesn't.

There are other things we might want to do — like fixing the IOMMU that
qemu can emulate, and actually making it work with real assigned
devices (currently it's totally hosed because it doesn't handle that
case at all). And potentially making the virtualised IOMMU actually
*do* translation for virtio devices (as opposed to just admitting
correctly that it doesn't). But those aren't strictly relevant here,
yet.

It's not clear what specific uses of the IOMMU you had in mind in your
above list — could you elucidate?

-- 
dwmw2


[-- Attachment #1.2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-29  9:01                         ` Michael S. Tsirkin
                                           ` (5 preceding siblings ...)
  (?)
@ 2015-11-03 10:24                         ` Paolo Bonzini
  -1 siblings, 0 replies; 110+ messages in thread
From: Paolo Bonzini @ 2015-11-03 10:24 UTC (permalink / raw)
  To: Michael S. Tsirkin, Andy Lutomirski
  Cc: David Woodhouse, Christian Borntraeger, Andy Lutomirski,
	linux-kernel, Joerg Roedel, Cornelia Huck, Sebastian Ott,
	Christoph Hellwig, Benjamin Herrenschmidt, KVM,
	Martin Schwidefsky, linux-s390, Linux Virtualization



On 29/10/2015 10:01, Michael S. Tsirkin wrote:
> > Everyone seems to agree that x86's emulated Q35 thing
> > is just buggy right now and should be taught to use the existing ACPI
> > mechanism for enumerating passthrough devices.
> 
> I'm not sure what ACPI has to do with it.
> It's about a way for guest users to specify whether
> they want to bypass an IOMMU for a given device.

It's not configured in the guest, it's configured _when starting_ the
guest (e.g. -device some-pci-device,iommu-bypass=on) and it is reflected
in the DMAR table or the device tree.

The default for virtio and VFIO is to bypass the IOMMU.  Changing the
default can be supported (virtio) or not (VFIO, vhost-user).  Hotplug
need to check whether the parent bridge is has the same setting that the
user desires for the new device.

> 1. virtio ignores the iommu
> 2. vhost user ignores the iommu
> 3. dataplane ignores the iommu
> 4. vhost-net ignores the iommu
> 5. VFIO ignores the iommu
> 
> I think so far I only saw patches for 1 above.

1 and 3 are easy.  For 2 and 5 you can simply forbid configurations with
vhost-user/VFIO behind an IOMMU.  For 4 QEMU can simply not activate
vhost-net and use the userspace fallback.

However, IOMMU support in QEMU is experimental.  We can do things a step
at a time.

Paolo

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-29  9:01                         ` Michael S. Tsirkin
                                           ` (4 preceding siblings ...)
  (?)
@ 2015-11-03 10:24                         ` Paolo Bonzini
  -1 siblings, 0 replies; 110+ messages in thread
From: Paolo Bonzini @ 2015-11-03 10:24 UTC (permalink / raw)
  To: Michael S. Tsirkin, Andy Lutomirski
  Cc: linux-s390, Joerg Roedel, KVM, Benjamin Herrenschmidt,
	Sebastian Ott, linux-kernel, Linux Virtualization,
	Christian Borntraeger, Andy Lutomirski, David Woodhouse,
	Christoph Hellwig, Martin Schwidefsky



On 29/10/2015 10:01, Michael S. Tsirkin wrote:
> > Everyone seems to agree that x86's emulated Q35 thing
> > is just buggy right now and should be taught to use the existing ACPI
> > mechanism for enumerating passthrough devices.
> 
> I'm not sure what ACPI has to do with it.
> It's about a way for guest users to specify whether
> they want to bypass an IOMMU for a given device.

It's not configured in the guest, it's configured _when starting_ the
guest (e.g. -device some-pci-device,iommu-bypass=on) and it is reflected
in the DMAR table or the device tree.

The default for virtio and VFIO is to bypass the IOMMU.  Changing the
default can be supported (virtio) or not (VFIO, vhost-user).  Hotplug
need to check whether the parent bridge is has the same setting that the
user desires for the new device.

> 1. virtio ignores the iommu
> 2. vhost user ignores the iommu
> 3. dataplane ignores the iommu
> 4. vhost-net ignores the iommu
> 5. VFIO ignores the iommu
> 
> I think so far I only saw patches for 1 above.

1 and 3 are easy.  For 2 and 5 you can simply forbid configurations with
vhost-user/VFIO behind an IOMMU.  For 4 QEMU can simply not activate
vhost-net and use the userspace fallback.

However, IOMMU support in QEMU is experimental.  We can do things a step
at a time.

Paolo

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-29 16:18                           ` David Woodhouse
@ 2015-11-08 10:37                             ` Michael S. Tsirkin
  -1 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-11-08 10:37 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Andy Lutomirski, Christian Borntraeger, Andy Lutomirski,
	linux-kernel, Joerg Roedel, Cornelia Huck, Sebastian Ott,
	Paolo Bonzini, Christoph Hellwig, Benjamin Herrenschmidt, KVM,
	Martin Schwidefsky, linux-s390, Linux Virtualization

On Thu, Oct 29, 2015 at 05:18:56PM +0100, David Woodhouse wrote:
> On Thu, 2015-10-29 at 11:01 +0200, Michael S. Tsirkin wrote:
> > 
> > Example: you have a mix of assigned devices and virtio devices. You
> > don't trust your assigned device vendor not to corrupt your memory so
> > you want to limit the damage your assigned device can do to your
> > guest,
> > so you use an IOMMU for that.  Thus existing iommu=pt within guest is
> > out.
> > 
> > But you trust your hypervisor (you have no choice anyway),
> > and you don't want the overhead of tweaking IOMMU
> > on data path for virtio. Thus iommu=on is out too.
> 
> That's not at all special for virtio or guest VMs. Even with real
> hardware, we might want performance from *some* devices, and security
> from others. See the DMA_ATTR_IOMMU_BYPASS which is currently being
> discussed.

Right. So let's wait for that discussion to play out?

> But of course the easy answer in *your* case it just to ask the
> hypervisor not to put the virtio devices behind an IOMMU at all. Which
> we were planning to remain the default behaviour.

One can't do this for x86 ATM, can one?

> In all cases, the DMA API shall do the right thing.

I have no problem with that. For example, can we teach
the DMA API on intel x86 to use PT for virtio by default?
That would allow merging Andy's patches with
full compatibility with old guests and hosts.

> -- 
> dwmw2
> 
> 



-- 
MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-11-08 10:37                             ` Michael S. Tsirkin
  0 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-11-08 10:37 UTC (permalink / raw)
  To: David Woodhouse
  Cc: linux-s390, Joerg Roedel, KVM, Benjamin Herrenschmidt,
	Sebastian Ott, linux-kernel, Andy Lutomirski,
	Christian Borntraeger, Andy Lutomirski, Paolo Bonzini,
	Linux Virtualization, Christoph Hellwig, Martin Schwidefsky

On Thu, Oct 29, 2015 at 05:18:56PM +0100, David Woodhouse wrote:
> On Thu, 2015-10-29 at 11:01 +0200, Michael S. Tsirkin wrote:
> > 
> > Example: you have a mix of assigned devices and virtio devices. You
> > don't trust your assigned device vendor not to corrupt your memory so
> > you want to limit the damage your assigned device can do to your
> > guest,
> > so you use an IOMMU for that.  Thus existing iommu=pt within guest is
> > out.
> > 
> > But you trust your hypervisor (you have no choice anyway),
> > and you don't want the overhead of tweaking IOMMU
> > on data path for virtio. Thus iommu=on is out too.
> 
> That's not at all special for virtio or guest VMs. Even with real
> hardware, we might want performance from *some* devices, and security
> from others. See the DMA_ATTR_IOMMU_BYPASS which is currently being
> discussed.

Right. So let's wait for that discussion to play out?

> But of course the easy answer in *your* case it just to ask the
> hypervisor not to put the virtio devices behind an IOMMU at all. Which
> we were planning to remain the default behaviour.

One can't do this for x86 ATM, can one?

> In all cases, the DMA API shall do the right thing.

I have no problem with that. For example, can we teach
the DMA API on intel x86 to use PT for virtio by default?
That would allow merging Andy's patches with
full compatibility with old guests and hosts.

> -- 
> dwmw2
> 
> 



-- 
MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-08 10:37                             ` Michael S. Tsirkin
@ 2015-11-08 11:49                               ` Joerg Roedel
  -1 siblings, 0 replies; 110+ messages in thread
From: Joerg Roedel @ 2015-11-08 11:49 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: David Woodhouse, Andy Lutomirski, Christian Borntraeger,
	Andy Lutomirski, linux-kernel, Cornelia Huck, Sebastian Ott,
	Paolo Bonzini, Christoph Hellwig, Benjamin Herrenschmidt, KVM,
	Martin Schwidefsky, linux-s390, Linux Virtualization

On Sun, Nov 08, 2015 at 12:37:47PM +0200, Michael S. Tsirkin wrote:
> I have no problem with that. For example, can we teach
> the DMA API on intel x86 to use PT for virtio by default?
> That would allow merging Andy's patches with
> full compatibility with old guests and hosts.

Well, the only incompatibility comes from an experimental qemu feature,
more explicitly from a bug in that features implementation. So why
should we work around that in the kernel? I think it is not too hard to
fix qemu to generate a correct DMAR table which excludes the virtio
devices from iommu translation.


	Joerg


^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-11-08 11:49                               ` Joerg Roedel
  0 siblings, 0 replies; 110+ messages in thread
From: Joerg Roedel @ 2015-11-08 11:49 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: linux-s390, KVM, Benjamin Herrenschmidt, Sebastian Ott,
	linux-kernel, Andy Lutomirski, Christian Borntraeger,
	Andy Lutomirski, Paolo Bonzini, Linux Virtualization,
	David Woodhouse, Christoph Hellwig, Martin Schwidefsky

On Sun, Nov 08, 2015 at 12:37:47PM +0200, Michael S. Tsirkin wrote:
> I have no problem with that. For example, can we teach
> the DMA API on intel x86 to use PT for virtio by default?
> That would allow merging Andy's patches with
> full compatibility with old guests and hosts.

Well, the only incompatibility comes from an experimental qemu feature,
more explicitly from a bug in that features implementation. So why
should we work around that in the kernel? I think it is not too hard to
fix qemu to generate a correct DMAR table which excludes the virtio
devices from iommu translation.


	Joerg

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-08 10:37                             ` Michael S. Tsirkin
@ 2015-11-08 12:00                               ` David Woodhouse
  -1 siblings, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-11-08 12:00 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Andy Lutomirski, Christian Borntraeger, Andy Lutomirski,
	linux-kernel, Joerg Roedel, Cornelia Huck, Sebastian Ott,
	Paolo Bonzini, Christoph Hellwig, Benjamin Herrenschmidt, KVM,
	Martin Schwidefsky, linux-s390, Linux Virtualization

[-- Attachment #1: Type: text/plain, Size: 2813 bytes --]

On Sun, 2015-11-08 at 12:37 +0200, Michael S. Tsirkin wrote:
> On Thu, Oct 29, 2015 at 05:18:56PM +0100, David Woodhouse wrote:
> > On Thu, 2015-10-29 at 11:01 +0200, Michael S. Tsirkin wrote:
> > > 
> > > But you trust your hypervisor (you have no choice anyway),
> > > and you don't want the overhead of tweaking IOMMU
> > > on data path for virtio. Thus iommu=on is out too.
> > 
> > That's not at all special for virtio or guest VMs. Even with real
> > hardware, we might want performance from *some* devices, and security
> > from others. See the DMA_ATTR_IOMMU_BYPASS which is currently being
> > discussed.
> 
> Right. So let's wait for that discussion to play out?

That discussion is purely about a requested optimisation. This one is
about correctness.

> > But of course the easy answer in *your* case it just to ask the
> > hypervisor not to put the virtio devices behind an IOMMU at all. Which
> > we were planning to remain the default behaviour.
> 
> One can't do this for x86 ATM, can one?

The converse is true, in fact — currently, there's no way to tell 
qemu-system-x86 that you *do* want it to put the virtio devices behind
the emulated IOMMU, as it has no support for that.

Which is a bit sad really, since the DMAR table that qemu advertises to
the guest does *tell* the guest that the virtio devices are behind the
emulated IOMMU.

In the short term, we'll be fixing the DMAR table, and still not
actually making it possible to put the virtio devices behind the
emulated IOMMU.

In the fullness of time, however, we *will* be fixing the qemu IOMMU
code so that it can translate for virtio devices — and for assigned
physical devices, which I believe are also broken at the moment when
qemu emulates an IOMMU.

> > In all cases, the DMA API shall do the right thing.
> 
> I have no problem with that. For example, can we teach
> the DMA API on intel x86 to use PT for virtio by default?
> That would allow merging Andy's patches with
> full compatibility with old guests and hosts.

A quirk so that we *notice* the bug in the existing qemu DMAR table,
and disbelieve what it says about the virtio devices?

Alternatively, we could just recognise that the emulated IOMMU support
in qemu is an experimental feature and doesn't work right, yet. Are
people really using it in anger?

If we do want to do a quirk, then we should make it get it right for
assigned devices too.

To start with, do you want to try to express the criteria for "the DMAR
table lies and <this> device is actually untranslated" in a form of
prose which could reasonably be translated into code?

-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation


[-- Attachment #2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-11-08 12:00                               ` David Woodhouse
  0 siblings, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-11-08 12:00 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: linux-s390, Joerg Roedel, KVM, Benjamin Herrenschmidt,
	Sebastian Ott, linux-kernel, Andy Lutomirski,
	Christian Borntraeger, Andy Lutomirski, Paolo Bonzini,
	Linux Virtualization, Christoph Hellwig, Martin Schwidefsky


[-- Attachment #1.1: Type: text/plain, Size: 2813 bytes --]

On Sun, 2015-11-08 at 12:37 +0200, Michael S. Tsirkin wrote:
> On Thu, Oct 29, 2015 at 05:18:56PM +0100, David Woodhouse wrote:
> > On Thu, 2015-10-29 at 11:01 +0200, Michael S. Tsirkin wrote:
> > > 
> > > But you trust your hypervisor (you have no choice anyway),
> > > and you don't want the overhead of tweaking IOMMU
> > > on data path for virtio. Thus iommu=on is out too.
> > 
> > That's not at all special for virtio or guest VMs. Even with real
> > hardware, we might want performance from *some* devices, and security
> > from others. See the DMA_ATTR_IOMMU_BYPASS which is currently being
> > discussed.
> 
> Right. So let's wait for that discussion to play out?

That discussion is purely about a requested optimisation. This one is
about correctness.

> > But of course the easy answer in *your* case it just to ask the
> > hypervisor not to put the virtio devices behind an IOMMU at all. Which
> > we were planning to remain the default behaviour.
> 
> One can't do this for x86 ATM, can one?

The converse is true, in fact — currently, there's no way to tell 
qemu-system-x86 that you *do* want it to put the virtio devices behind
the emulated IOMMU, as it has no support for that.

Which is a bit sad really, since the DMAR table that qemu advertises to
the guest does *tell* the guest that the virtio devices are behind the
emulated IOMMU.

In the short term, we'll be fixing the DMAR table, and still not
actually making it possible to put the virtio devices behind the
emulated IOMMU.

In the fullness of time, however, we *will* be fixing the qemu IOMMU
code so that it can translate for virtio devices — and for assigned
physical devices, which I believe are also broken at the moment when
qemu emulates an IOMMU.

> > In all cases, the DMA API shall do the right thing.
> 
> I have no problem with that. For example, can we teach
> the DMA API on intel x86 to use PT for virtio by default?
> That would allow merging Andy's patches with
> full compatibility with old guests and hosts.

A quirk so that we *notice* the bug in the existing qemu DMAR table,
and disbelieve what it says about the virtio devices?

Alternatively, we could just recognise that the emulated IOMMU support
in qemu is an experimental feature and doesn't work right, yet. Are
people really using it in anger?

If we do want to do a quirk, then we should make it get it right for
assigned devices too.

To start with, do you want to try to express the criteria for "the DMAR
table lies and <this> device is actually untranslated" in a form of
prose which could reasonably be translated into code?

-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation


[-- Attachment #1.2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-08 11:49                               ` Joerg Roedel
@ 2015-11-10 15:02                                 ` Michael S. Tsirkin
  -1 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-11-10 15:02 UTC (permalink / raw)
  To: Joerg Roedel
  Cc: David Woodhouse, Andy Lutomirski, Christian Borntraeger,
	Andy Lutomirski, linux-kernel, Cornelia Huck, Sebastian Ott,
	Paolo Bonzini, Christoph Hellwig, Benjamin Herrenschmidt, KVM,
	Martin Schwidefsky, linux-s390, Linux Virtualization

On Sun, Nov 08, 2015 at 12:49:46PM +0100, Joerg Roedel wrote:
> On Sun, Nov 08, 2015 at 12:37:47PM +0200, Michael S. Tsirkin wrote:
> > I have no problem with that. For example, can we teach
> > the DMA API on intel x86 to use PT for virtio by default?
> > That would allow merging Andy's patches with
> > full compatibility with old guests and hosts.
> 
> Well, the only incompatibility comes from an experimental qemu feature,
> more explicitly from a bug in that features implementation. So why
> should we work around that in the kernel? I think it is not too hard to
> fix qemu to generate a correct DMAR table which excludes the virtio
> devices from iommu translation.
> 
> 
> 	Joerg

It's not that easy - you'd have to dedicate some buses
for iommu bypass, and teach management tools to only put
virtio there - but it's possible.

This will absolutely address guests that don't need to set up IOMMU for
virtio devices, and virtio that bypasses the IOMMU.

But the problem is that we do want to *allow* guests
to set up IOMMU for virtio devices.
In that case, these are two other usecases:

A- monolitic virtio within QEMU:
	iommu only needed for VFIO ->
	guest should always use iommu=pt
        iommu=on works but is just useless overhead.

B- modular out of process virtio outside QEMU:
	iommu needed for VFIO or kernel driver ->
	guest should use iommu=pt or iommu=on
	depending on security/performance requirements

Note that there could easily be a mix of these in the same system.

So for these cases we do need QEMU to specify to guest that IOMMU covers
the virtio devices.  Also, once one does this, the default on linux is
iommu=on and not pt, which works but ATM is very slow.

This poses three problems:

1. How do we address the different needs of A and B?
   One way would be for virtio to pass the information to guest
   using some virtio specific way, and have drivers
   specify what kind of DMA access they want.

2. (Kind of a subset of 1) once we do allow IOMMU, how do we make sure most guests
   use the more sensible iommu=pt.

3. Once we do allow IOMMU, how can we keep existing guests work in this configuration?
   Creating different hypervisor configurations depending on guest is very nasty.
   Again, one way would be some virtio specific interface.

I'd rather we figured the answers to this before merging Andy's patches
because I'm concerned that instead of 1 broken configuration
(virtio always bypasses IOMMU) we'll get two bad configurations
(in the second one, virtio uses the slow default with no
gain in security).

Suggestions wellcome.

-- 
MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-11-10 15:02                                 ` Michael S. Tsirkin
  0 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-11-10 15:02 UTC (permalink / raw)
  To: Joerg Roedel
  Cc: linux-s390, KVM, Benjamin Herrenschmidt, Sebastian Ott,
	linux-kernel, Andy Lutomirski, Christian Borntraeger,
	Andy Lutomirski, Paolo Bonzini, Linux Virtualization,
	David Woodhouse, Christoph Hellwig, Martin Schwidefsky

On Sun, Nov 08, 2015 at 12:49:46PM +0100, Joerg Roedel wrote:
> On Sun, Nov 08, 2015 at 12:37:47PM +0200, Michael S. Tsirkin wrote:
> > I have no problem with that. For example, can we teach
> > the DMA API on intel x86 to use PT for virtio by default?
> > That would allow merging Andy's patches with
> > full compatibility with old guests and hosts.
> 
> Well, the only incompatibility comes from an experimental qemu feature,
> more explicitly from a bug in that features implementation. So why
> should we work around that in the kernel? I think it is not too hard to
> fix qemu to generate a correct DMAR table which excludes the virtio
> devices from iommu translation.
> 
> 
> 	Joerg

It's not that easy - you'd have to dedicate some buses
for iommu bypass, and teach management tools to only put
virtio there - but it's possible.

This will absolutely address guests that don't need to set up IOMMU for
virtio devices, and virtio that bypasses the IOMMU.

But the problem is that we do want to *allow* guests
to set up IOMMU for virtio devices.
In that case, these are two other usecases:

A- monolitic virtio within QEMU:
	iommu only needed for VFIO ->
	guest should always use iommu=pt
        iommu=on works but is just useless overhead.

B- modular out of process virtio outside QEMU:
	iommu needed for VFIO or kernel driver ->
	guest should use iommu=pt or iommu=on
	depending on security/performance requirements

Note that there could easily be a mix of these in the same system.

So for these cases we do need QEMU to specify to guest that IOMMU covers
the virtio devices.  Also, once one does this, the default on linux is
iommu=on and not pt, which works but ATM is very slow.

This poses three problems:

1. How do we address the different needs of A and B?
   One way would be for virtio to pass the information to guest
   using some virtio specific way, and have drivers
   specify what kind of DMA access they want.

2. (Kind of a subset of 1) once we do allow IOMMU, how do we make sure most guests
   use the more sensible iommu=pt.

3. Once we do allow IOMMU, how can we keep existing guests work in this configuration?
   Creating different hypervisor configurations depending on guest is very nasty.
   Again, one way would be some virtio specific interface.

I'd rather we figured the answers to this before merging Andy's patches
because I'm concerned that instead of 1 broken configuration
(virtio always bypasses IOMMU) we'll get two bad configurations
(in the second one, virtio uses the slow default with no
gain in security).

Suggestions wellcome.

-- 
MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-10 15:02                                 ` Michael S. Tsirkin
@ 2015-11-10 18:54                                   ` Andy Lutomirski
  -1 siblings, 0 replies; 110+ messages in thread
From: Andy Lutomirski @ 2015-11-10 18:54 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Benjamin Herrenschmidt, Christian Borntraeger, Paolo Bonzini,
	David Woodhouse, Martin Schwidefsky, Sebastian Ott, linux-s390,
	Cornelia Huck, Joerg Roedel, Christoph Hellwig,
	Linux Virtualization, linux-kernel, KVM

On Nov 10, 2015 7:02 AM, "Michael S. Tsirkin" <mst@redhat.com> wrote:
>
> On Sun, Nov 08, 2015 at 12:49:46PM +0100, Joerg Roedel wrote:
> > On Sun, Nov 08, 2015 at 12:37:47PM +0200, Michael S. Tsirkin wrote:
> > > I have no problem with that. For example, can we teach
> > > the DMA API on intel x86 to use PT for virtio by default?
> > > That would allow merging Andy's patches with
> > > full compatibility with old guests and hosts.
> >
> > Well, the only incompatibility comes from an experimental qemu feature,
> > more explicitly from a bug in that features implementation. So why
> > should we work around that in the kernel? I think it is not too hard to
> > fix qemu to generate a correct DMAR table which excludes the virtio
> > devices from iommu translation.
> >
> >
> >       Joerg
>
> It's not that easy - you'd have to dedicate some buses
> for iommu bypass, and teach management tools to only put
> virtio there - but it's possible.
>
> This will absolutely address guests that don't need to set up IOMMU for
> virtio devices, and virtio that bypasses the IOMMU.
>
> But the problem is that we do want to *allow* guests
> to set up IOMMU for virtio devices.
> In that case, these are two other usecases:
>
> A- monolitic virtio within QEMU:
>         iommu only needed for VFIO ->
>         guest should always use iommu=pt
>         iommu=on works but is just useless overhead.
>
> B- modular out of process virtio outside QEMU:
>         iommu needed for VFIO or kernel driver ->
>         guest should use iommu=pt or iommu=on
>         depending on security/performance requirements
>
> Note that there could easily be a mix of these in the same system.
>
> So for these cases we do need QEMU to specify to guest that IOMMU covers
> the virtio devices.  Also, once one does this, the default on linux is
> iommu=on and not pt, which works but ATM is very slow.
>
> This poses three problems:
>
> 1. How do we address the different needs of A and B?
>    One way would be for virtio to pass the information to guest
>    using some virtio specific way, and have drivers
>    specify what kind of DMA access they want.
>
> 2. (Kind of a subset of 1) once we do allow IOMMU, how do we make sure most guests
>    use the more sensible iommu=pt.
>
> 3. Once we do allow IOMMU, how can we keep existing guests work in this configuration?
>    Creating different hypervisor configurations depending on guest is very nasty.
>    Again, one way would be some virtio specific interface.
>
> I'd rather we figured the answers to this before merging Andy's patches
> because I'm concerned that instead of 1 broken configuration
> (virtio always bypasses IOMMU) we'll get two bad configurations
> (in the second one, virtio uses the slow default with no
> gain in security).
>
> Suggestions wellcome.

I think there's still no downside of using my patches, even on x86.

Old kernels on new QEMU work unless IOMMU is enabled on the host.  I
think that's the best we can possibly do.

New kernels work at full speed on old QEMU.

New kernels with new QEMU and iommu enabled work slower.  Even newer
kernels with default passthrough work at full speed, and there's no
obvious downside to the existence of kernels with just my patches.

--Andy

>
> --
> MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-11-10 18:54                                   ` Andy Lutomirski
  0 siblings, 0 replies; 110+ messages in thread
From: Andy Lutomirski @ 2015-11-10 18:54 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: linux-s390, KVM, Benjamin Herrenschmidt, Sebastian Ott,
	linux-kernel, Linux Virtualization, Christian Borntraeger,
	Joerg Roedel, Martin Schwidefsky, Paolo Bonzini, David Woodhouse,
	Christoph Hellwig

On Nov 10, 2015 7:02 AM, "Michael S. Tsirkin" <mst@redhat.com> wrote:
>
> On Sun, Nov 08, 2015 at 12:49:46PM +0100, Joerg Roedel wrote:
> > On Sun, Nov 08, 2015 at 12:37:47PM +0200, Michael S. Tsirkin wrote:
> > > I have no problem with that. For example, can we teach
> > > the DMA API on intel x86 to use PT for virtio by default?
> > > That would allow merging Andy's patches with
> > > full compatibility with old guests and hosts.
> >
> > Well, the only incompatibility comes from an experimental qemu feature,
> > more explicitly from a bug in that features implementation. So why
> > should we work around that in the kernel? I think it is not too hard to
> > fix qemu to generate a correct DMAR table which excludes the virtio
> > devices from iommu translation.
> >
> >
> >       Joerg
>
> It's not that easy - you'd have to dedicate some buses
> for iommu bypass, and teach management tools to only put
> virtio there - but it's possible.
>
> This will absolutely address guests that don't need to set up IOMMU for
> virtio devices, and virtio that bypasses the IOMMU.
>
> But the problem is that we do want to *allow* guests
> to set up IOMMU for virtio devices.
> In that case, these are two other usecases:
>
> A- monolitic virtio within QEMU:
>         iommu only needed for VFIO ->
>         guest should always use iommu=pt
>         iommu=on works but is just useless overhead.
>
> B- modular out of process virtio outside QEMU:
>         iommu needed for VFIO or kernel driver ->
>         guest should use iommu=pt or iommu=on
>         depending on security/performance requirements
>
> Note that there could easily be a mix of these in the same system.
>
> So for these cases we do need QEMU to specify to guest that IOMMU covers
> the virtio devices.  Also, once one does this, the default on linux is
> iommu=on and not pt, which works but ATM is very slow.
>
> This poses three problems:
>
> 1. How do we address the different needs of A and B?
>    One way would be for virtio to pass the information to guest
>    using some virtio specific way, and have drivers
>    specify what kind of DMA access they want.
>
> 2. (Kind of a subset of 1) once we do allow IOMMU, how do we make sure most guests
>    use the more sensible iommu=pt.
>
> 3. Once we do allow IOMMU, how can we keep existing guests work in this configuration?
>    Creating different hypervisor configurations depending on guest is very nasty.
>    Again, one way would be some virtio specific interface.
>
> I'd rather we figured the answers to this before merging Andy's patches
> because I'm concerned that instead of 1 broken configuration
> (virtio always bypasses IOMMU) we'll get two bad configurations
> (in the second one, virtio uses the slow default with no
> gain in security).
>
> Suggestions wellcome.

I think there's still no downside of using my patches, even on x86.

Old kernels on new QEMU work unless IOMMU is enabled on the host.  I
think that's the best we can possibly do.

New kernels work at full speed on old QEMU.

New kernels with new QEMU and iommu enabled work slower.  Even newer
kernels with default passthrough work at full speed, and there's no
obvious downside to the existence of kernels with just my patches.

--Andy

>
> --
> MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-30 15:16                         ` Joerg Roedel
@ 2015-11-11  9:11                             ` Michael S. Tsirkin
  0 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-11-11  9:11 UTC (permalink / raw)
  To: Joerg Roedel
  Cc: Andy Lutomirski, David Woodhouse, Christian Borntraeger,
	Andy Lutomirski, linux-kernel, Cornelia Huck, Sebastian Ott,
	Paolo Bonzini, Christoph Hellwig, Benjamin Herrenschmidt, KVM,
	Martin Schwidefsky, linux-s390, Linux Virtualization

On Sat, Oct 31, 2015 at 12:16:12AM +0900, Joerg Roedel wrote:
> On Thu, Oct 29, 2015 at 11:01:41AM +0200, Michael S. Tsirkin wrote:
> > Example: you have a mix of assigned devices and virtio devices. You
> > don't trust your assigned device vendor not to corrupt your memory so
> > you want to limit the damage your assigned device can do to your guest,
> > so you use an IOMMU for that.  Thus existing iommu=pt within guest is out.
> > 
> > But you trust your hypervisor (you have no choice anyway),
> > and you don't want the overhead of tweaking IOMMU
> > on data path for virtio. Thus iommu=on is out too.
> 
> IOMMUs on x86 usually come with an ACPI table that describes which
> IOMMUs are in the system and which devices they translate. So you can
> easily describe all devices there that are not behind an IOMMU.
> 
> The ACPI table is built by the BIOS, and the platform intialization code
> sets the device dma_ops accordingly. If the BIOS provides wrong
> information in the ACPI table this is a platform bug.

It doesn't look like I managed to put the point across.
My point is that IOMMU is required to do things like
userspace drivers, what we need is a way to express
"there is an IOMMU but it is part of device itself, use passthrough
 unless your driver is untrusted".

> > I'm not sure what ACPI has to do with it.  It's about a way for guest
> > users to specify whether they want to bypass an IOMMU for a given
> > device.
> 
> We have no way yet to request passthrough-mode per-device from the IOMMU
> drivers, but that can easily be added. But as I see it:
> 
> > By the way, a bunch of code is missing on the QEMU side
> > to make this useful:
> > 1. virtio ignores the iommu
> > 2. vhost user ignores the iommu
> > 3. dataplane ignores the iommu
> > 4. vhost-net ignores the iommu
> > 5. VFIO ignores the iommu
> 
> Qemu does not implement IOMMU translation for virtio devices anyway
> (which is fine), so it just should tell the guest so in the ACPI table
> built to describe the emulated IOMMU.
> 
> 
> 	Joerg

This is a short term limitation.



^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-11-11  9:11                             ` Michael S. Tsirkin
  0 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-11-11  9:11 UTC (permalink / raw)
  To: Joerg Roedel
  Cc: linux-s390, KVM, Benjamin Herrenschmidt, Sebastian Ott,
	linux-kernel, Andy Lutomirski, Christian Borntraeger,
	Andy Lutomirski, Paolo Bonzini, Linux Virtualization,
	David Woodhouse, Christoph Hellwig, Martin Schwidefsky

On Sat, Oct 31, 2015 at 12:16:12AM +0900, Joerg Roedel wrote:
> On Thu, Oct 29, 2015 at 11:01:41AM +0200, Michael S. Tsirkin wrote:
> > Example: you have a mix of assigned devices and virtio devices. You
> > don't trust your assigned device vendor not to corrupt your memory so
> > you want to limit the damage your assigned device can do to your guest,
> > so you use an IOMMU for that.  Thus existing iommu=pt within guest is out.
> > 
> > But you trust your hypervisor (you have no choice anyway),
> > and you don't want the overhead of tweaking IOMMU
> > on data path for virtio. Thus iommu=on is out too.
> 
> IOMMUs on x86 usually come with an ACPI table that describes which
> IOMMUs are in the system and which devices they translate. So you can
> easily describe all devices there that are not behind an IOMMU.
> 
> The ACPI table is built by the BIOS, and the platform intialization code
> sets the device dma_ops accordingly. If the BIOS provides wrong
> information in the ACPI table this is a platform bug.

It doesn't look like I managed to put the point across.
My point is that IOMMU is required to do things like
userspace drivers, what we need is a way to express
"there is an IOMMU but it is part of device itself, use passthrough
 unless your driver is untrusted".

> > I'm not sure what ACPI has to do with it.  It's about a way for guest
> > users to specify whether they want to bypass an IOMMU for a given
> > device.
> 
> We have no way yet to request passthrough-mode per-device from the IOMMU
> drivers, but that can easily be added. But as I see it:
> 
> > By the way, a bunch of code is missing on the QEMU side
> > to make this useful:
> > 1. virtio ignores the iommu
> > 2. vhost user ignores the iommu
> > 3. dataplane ignores the iommu
> > 4. vhost-net ignores the iommu
> > 5. VFIO ignores the iommu
> 
> Qemu does not implement IOMMU translation for virtio devices anyway
> (which is fine), so it just should tell the guest so in the ACPI table
> built to describe the emulated IOMMU.
> 
> 
> 	Joerg

This is a short term limitation.

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-10 18:54                                   ` Andy Lutomirski
@ 2015-11-11 10:05                                     ` Michael S. Tsirkin
  -1 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-11-11 10:05 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Benjamin Herrenschmidt, Christian Borntraeger, Paolo Bonzini,
	David Woodhouse, Martin Schwidefsky, Sebastian Ott, linux-s390,
	Cornelia Huck, Joerg Roedel, Christoph Hellwig,
	Linux Virtualization, linux-kernel, KVM

On Tue, Nov 10, 2015 at 10:54:21AM -0800, Andy Lutomirski wrote:
> On Nov 10, 2015 7:02 AM, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> >
> > On Sun, Nov 08, 2015 at 12:49:46PM +0100, Joerg Roedel wrote:
> > > On Sun, Nov 08, 2015 at 12:37:47PM +0200, Michael S. Tsirkin wrote:
> > > > I have no problem with that. For example, can we teach
> > > > the DMA API on intel x86 to use PT for virtio by default?
> > > > That would allow merging Andy's patches with
> > > > full compatibility with old guests and hosts.
> > >
> > > Well, the only incompatibility comes from an experimental qemu feature,
> > > more explicitly from a bug in that features implementation. So why
> > > should we work around that in the kernel? I think it is not too hard to
> > > fix qemu to generate a correct DMAR table which excludes the virtio
> > > devices from iommu translation.
> > >
> > >
> > >       Joerg
> >
> > It's not that easy - you'd have to dedicate some buses
> > for iommu bypass, and teach management tools to only put
> > virtio there - but it's possible.
> >
> > This will absolutely address guests that don't need to set up IOMMU for
> > virtio devices, and virtio that bypasses the IOMMU.
> >
> > But the problem is that we do want to *allow* guests
> > to set up IOMMU for virtio devices.
> > In that case, these are two other usecases:
> >
> > A- monolitic virtio within QEMU:
> >         iommu only needed for VFIO ->
> >         guest should always use iommu=pt
> >         iommu=on works but is just useless overhead.
> >
> > B- modular out of process virtio outside QEMU:
> >         iommu needed for VFIO or kernel driver ->
> >         guest should use iommu=pt or iommu=on
> >         depending on security/performance requirements
> >
> > Note that there could easily be a mix of these in the same system.
> >
> > So for these cases we do need QEMU to specify to guest that IOMMU covers
> > the virtio devices.  Also, once one does this, the default on linux is
> > iommu=on and not pt, which works but ATM is very slow.
> >
> > This poses three problems:
> >
> > 1. How do we address the different needs of A and B?
> >    One way would be for virtio to pass the information to guest
> >    using some virtio specific way, and have drivers
> >    specify what kind of DMA access they want.
> >
> > 2. (Kind of a subset of 1) once we do allow IOMMU, how do we make sure most guests
> >    use the more sensible iommu=pt.
> >
> > 3. Once we do allow IOMMU, how can we keep existing guests work in this configuration?
> >    Creating different hypervisor configurations depending on guest is very nasty.
> >    Again, one way would be some virtio specific interface.
> >
> > I'd rather we figured the answers to this before merging Andy's patches
> > because I'm concerned that instead of 1 broken configuration
> > (virtio always bypasses IOMMU) we'll get two bad configurations
> > (in the second one, virtio uses the slow default with no
> > gain in security).
> >
> > Suggestions wellcome.
> 
> I think there's still no downside of using my patches, even on x86.
> 
> Old kernels on new QEMU work unless IOMMU is enabled on the host.  I
> think that's the best we can possibly do.
> New kernels work at full speed on old QEMU.

Only if IOMMU is disabled, right?

> New kernels with new QEMU and iommu enabled work slower.  Even newer
> kernels with default passthrough work at full speed, and there's no
> obvious downside to the existence of kernels with just my patches.
> 
> --Andy
> 

I tried to explain the possible downside. Let me try again.  Imagine
that guest kernel notifies hypervisor that it wants IOMMU to actually
work.  This will make old kernel on new QEMU work even with IOMMU
enabled on host - better than "the best we can do" that you described
above.  Specifically, QEMU will assume that if it didn't get
notification, it's an old kernel so it should ignore the IOMMU.

But if we apply your patches this trick won't work.

Without implementing it all, I think the easiest incremental step would
be to teach linux to make passthrough the default when running as a
guest on top of QEMU, put your patches on top. If someone specifies
non passthrough on command line it'll still be broken,
but not too bad.


> >
> > --
> > MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-11-11 10:05                                     ` Michael S. Tsirkin
  0 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-11-11 10:05 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linux-s390, KVM, Benjamin Herrenschmidt, Sebastian Ott,
	linux-kernel, Linux Virtualization, Christian Borntraeger,
	Joerg Roedel, Martin Schwidefsky, Paolo Bonzini, David Woodhouse,
	Christoph Hellwig

On Tue, Nov 10, 2015 at 10:54:21AM -0800, Andy Lutomirski wrote:
> On Nov 10, 2015 7:02 AM, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> >
> > On Sun, Nov 08, 2015 at 12:49:46PM +0100, Joerg Roedel wrote:
> > > On Sun, Nov 08, 2015 at 12:37:47PM +0200, Michael S. Tsirkin wrote:
> > > > I have no problem with that. For example, can we teach
> > > > the DMA API on intel x86 to use PT for virtio by default?
> > > > That would allow merging Andy's patches with
> > > > full compatibility with old guests and hosts.
> > >
> > > Well, the only incompatibility comes from an experimental qemu feature,
> > > more explicitly from a bug in that features implementation. So why
> > > should we work around that in the kernel? I think it is not too hard to
> > > fix qemu to generate a correct DMAR table which excludes the virtio
> > > devices from iommu translation.
> > >
> > >
> > >       Joerg
> >
> > It's not that easy - you'd have to dedicate some buses
> > for iommu bypass, and teach management tools to only put
> > virtio there - but it's possible.
> >
> > This will absolutely address guests that don't need to set up IOMMU for
> > virtio devices, and virtio that bypasses the IOMMU.
> >
> > But the problem is that we do want to *allow* guests
> > to set up IOMMU for virtio devices.
> > In that case, these are two other usecases:
> >
> > A- monolitic virtio within QEMU:
> >         iommu only needed for VFIO ->
> >         guest should always use iommu=pt
> >         iommu=on works but is just useless overhead.
> >
> > B- modular out of process virtio outside QEMU:
> >         iommu needed for VFIO or kernel driver ->
> >         guest should use iommu=pt or iommu=on
> >         depending on security/performance requirements
> >
> > Note that there could easily be a mix of these in the same system.
> >
> > So for these cases we do need QEMU to specify to guest that IOMMU covers
> > the virtio devices.  Also, once one does this, the default on linux is
> > iommu=on and not pt, which works but ATM is very slow.
> >
> > This poses three problems:
> >
> > 1. How do we address the different needs of A and B?
> >    One way would be for virtio to pass the information to guest
> >    using some virtio specific way, and have drivers
> >    specify what kind of DMA access they want.
> >
> > 2. (Kind of a subset of 1) once we do allow IOMMU, how do we make sure most guests
> >    use the more sensible iommu=pt.
> >
> > 3. Once we do allow IOMMU, how can we keep existing guests work in this configuration?
> >    Creating different hypervisor configurations depending on guest is very nasty.
> >    Again, one way would be some virtio specific interface.
> >
> > I'd rather we figured the answers to this before merging Andy's patches
> > because I'm concerned that instead of 1 broken configuration
> > (virtio always bypasses IOMMU) we'll get two bad configurations
> > (in the second one, virtio uses the slow default with no
> > gain in security).
> >
> > Suggestions wellcome.
> 
> I think there's still no downside of using my patches, even on x86.
> 
> Old kernels on new QEMU work unless IOMMU is enabled on the host.  I
> think that's the best we can possibly do.
> New kernels work at full speed on old QEMU.

Only if IOMMU is disabled, right?

> New kernels with new QEMU and iommu enabled work slower.  Even newer
> kernels with default passthrough work at full speed, and there's no
> obvious downside to the existence of kernels with just my patches.
> 
> --Andy
> 

I tried to explain the possible downside. Let me try again.  Imagine
that guest kernel notifies hypervisor that it wants IOMMU to actually
work.  This will make old kernel on new QEMU work even with IOMMU
enabled on host - better than "the best we can do" that you described
above.  Specifically, QEMU will assume that if it didn't get
notification, it's an old kernel so it should ignore the IOMMU.

But if we apply your patches this trick won't work.

Without implementing it all, I think the easiest incremental step would
be to teach linux to make passthrough the default when running as a
guest on top of QEMU, put your patches on top. If someone specifies
non passthrough on command line it'll still be broken,
but not too bad.


> >
> > --
> > MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-11 10:05                                     ` Michael S. Tsirkin
  (?)
@ 2015-11-11 15:56                                     ` Andy Lutomirski
  2015-11-11 22:30                                       ` David Woodhouse
  2015-11-11 22:30                                       ` David Woodhouse
  -1 siblings, 2 replies; 110+ messages in thread
From: Andy Lutomirski @ 2015-11-11 15:56 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Benjamin Herrenschmidt, Christian Borntraeger, Paolo Bonzini,
	David Woodhouse, Martin Schwidefsky, Sebastian Ott, linux-s390,
	Cornelia Huck, Joerg Roedel, Christoph Hellwig,
	Linux Virtualization, linux-kernel, KVM

On Wed, Nov 11, 2015 at 2:05 AM, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Tue, Nov 10, 2015 at 10:54:21AM -0800, Andy Lutomirski wrote:
>> On Nov 10, 2015 7:02 AM, "Michael S. Tsirkin" <mst@redhat.com> wrote:
>> >
>> > On Sun, Nov 08, 2015 at 12:49:46PM +0100, Joerg Roedel wrote:
>> > > On Sun, Nov 08, 2015 at 12:37:47PM +0200, Michael S. Tsirkin wrote:
>> > > > I have no problem with that. For example, can we teach
>> > > > the DMA API on intel x86 to use PT for virtio by default?
>> > > > That would allow merging Andy's patches with
>> > > > full compatibility with old guests and hosts.
>> > >
>> > > Well, the only incompatibility comes from an experimental qemu feature,
>> > > more explicitly from a bug in that features implementation. So why
>> > > should we work around that in the kernel? I think it is not too hard to
>> > > fix qemu to generate a correct DMAR table which excludes the virtio
>> > > devices from iommu translation.
>> > >
>> > >
>> > >       Joerg
>> >
>> > It's not that easy - you'd have to dedicate some buses
>> > for iommu bypass, and teach management tools to only put
>> > virtio there - but it's possible.
>> >
>> > This will absolutely address guests that don't need to set up IOMMU for
>> > virtio devices, and virtio that bypasses the IOMMU.
>> >
>> > But the problem is that we do want to *allow* guests
>> > to set up IOMMU for virtio devices.
>> > In that case, these are two other usecases:
>> >
>> > A- monolitic virtio within QEMU:
>> >         iommu only needed for VFIO ->
>> >         guest should always use iommu=pt
>> >         iommu=on works but is just useless overhead.
>> >
>> > B- modular out of process virtio outside QEMU:
>> >         iommu needed for VFIO or kernel driver ->
>> >         guest should use iommu=pt or iommu=on
>> >         depending on security/performance requirements
>> >
>> > Note that there could easily be a mix of these in the same system.
>> >
>> > So for these cases we do need QEMU to specify to guest that IOMMU covers
>> > the virtio devices.  Also, once one does this, the default on linux is
>> > iommu=on and not pt, which works but ATM is very slow.
>> >
>> > This poses three problems:
>> >
>> > 1. How do we address the different needs of A and B?
>> >    One way would be for virtio to pass the information to guest
>> >    using some virtio specific way, and have drivers
>> >    specify what kind of DMA access they want.
>> >
>> > 2. (Kind of a subset of 1) once we do allow IOMMU, how do we make sure most guests
>> >    use the more sensible iommu=pt.
>> >
>> > 3. Once we do allow IOMMU, how can we keep existing guests work in this configuration?
>> >    Creating different hypervisor configurations depending on guest is very nasty.
>> >    Again, one way would be some virtio specific interface.
>> >
>> > I'd rather we figured the answers to this before merging Andy's patches
>> > because I'm concerned that instead of 1 broken configuration
>> > (virtio always bypasses IOMMU) we'll get two bad configurations
>> > (in the second one, virtio uses the slow default with no
>> > gain in security).
>> >
>> > Suggestions wellcome.
>>
>> I think there's still no downside of using my patches, even on x86.
>>
>> Old kernels on new QEMU work unless IOMMU is enabled on the host.  I
>> think that's the best we can possibly do.
>> New kernels work at full speed on old QEMU.
>
> Only if IOMMU is disabled, right?
>
>> New kernels with new QEMU and iommu enabled work slower.  Even newer
>> kernels with default passthrough work at full speed, and there's no
>> obvious downside to the existence of kernels with just my patches.
>>
>> --Andy
>>
>
> I tried to explain the possible downside. Let me try again.  Imagine
> that guest kernel notifies hypervisor that it wants IOMMU to actually
> work.  This will make old kernel on new QEMU work even with IOMMU
> enabled on host - better than "the best we can do" that you described
> above.  Specifically, QEMU will assume that if it didn't get
> notification, it's an old kernel so it should ignore the IOMMU.

Can you flesh out this trick?

On x86 IIUC the IOMMU more-or-less defaults to passthrough.  If the
kernel wants, it can switch it to a non-passthrough mode.  My patches
cause the virtio driver to do exactly this, except that the host
implementation doesn't actually exist yet, so the patches will instead
have no particular effect.

On powerpc and sparc, we *already* screwed up.  The host already tells
the guest that there's an IOMMU and that it's *enabled* because those
platforms don't have selective IOMMU coverage the way that x86 does.
So we need to work around it.

I think that, if we want fancy virt-friendly IOMMU stuff like you're
talking about, then the right thing to do is to create a virtio bus
instead of pretending to be PCI.  That bus could have a virtio IOMMU
and its own cross-platform enumeration mechanism for devices on the
bus, and everything would be peachy.

In the mean time, there are existing mechanisms by which every PCI
driver is supposed to notify the host/platform of how it intends to
map DMA memory, and virtio gets it wrong.

>
> But if we apply your patches this trick won't work.
>

I still don't understand what trick.  If we want virtio devices to be
assignable, then they should be translated through the IOMMU, and the
DMA API is the right interface for that.

> Without implementing it all, I think the easiest incremental step would
> be to teach linux to make passthrough the default when running as a
> guest on top of QEMU, put your patches on top. If someone specifies
> non passthrough on command line it'll still be broken,
> but not too bad.

Can powerpc and sparc do exact 1:1 passthrough for a given device?  If
so, that might be a reasonable way forward.  After all, if a new
powerpc kernel asks for exact passthrough (dma addr = phys addr with
no offset at all), then old QEMU will just ignore it and therefore
accidentally get it right.  Ben?

--Andy

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-11 10:05                                     ` Michael S. Tsirkin
  (?)
  (?)
@ 2015-11-11 15:56                                     ` Andy Lutomirski
  -1 siblings, 0 replies; 110+ messages in thread
From: Andy Lutomirski @ 2015-11-11 15:56 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: linux-s390, KVM, Benjamin Herrenschmidt, Sebastian Ott,
	linux-kernel, Linux Virtualization, Christian Borntraeger,
	Joerg Roedel, Martin Schwidefsky, Paolo Bonzini, David Woodhouse,
	Christoph Hellwig

On Wed, Nov 11, 2015 at 2:05 AM, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Tue, Nov 10, 2015 at 10:54:21AM -0800, Andy Lutomirski wrote:
>> On Nov 10, 2015 7:02 AM, "Michael S. Tsirkin" <mst@redhat.com> wrote:
>> >
>> > On Sun, Nov 08, 2015 at 12:49:46PM +0100, Joerg Roedel wrote:
>> > > On Sun, Nov 08, 2015 at 12:37:47PM +0200, Michael S. Tsirkin wrote:
>> > > > I have no problem with that. For example, can we teach
>> > > > the DMA API on intel x86 to use PT for virtio by default?
>> > > > That would allow merging Andy's patches with
>> > > > full compatibility with old guests and hosts.
>> > >
>> > > Well, the only incompatibility comes from an experimental qemu feature,
>> > > more explicitly from a bug in that features implementation. So why
>> > > should we work around that in the kernel? I think it is not too hard to
>> > > fix qemu to generate a correct DMAR table which excludes the virtio
>> > > devices from iommu translation.
>> > >
>> > >
>> > >       Joerg
>> >
>> > It's not that easy - you'd have to dedicate some buses
>> > for iommu bypass, and teach management tools to only put
>> > virtio there - but it's possible.
>> >
>> > This will absolutely address guests that don't need to set up IOMMU for
>> > virtio devices, and virtio that bypasses the IOMMU.
>> >
>> > But the problem is that we do want to *allow* guests
>> > to set up IOMMU for virtio devices.
>> > In that case, these are two other usecases:
>> >
>> > A- monolitic virtio within QEMU:
>> >         iommu only needed for VFIO ->
>> >         guest should always use iommu=pt
>> >         iommu=on works but is just useless overhead.
>> >
>> > B- modular out of process virtio outside QEMU:
>> >         iommu needed for VFIO or kernel driver ->
>> >         guest should use iommu=pt or iommu=on
>> >         depending on security/performance requirements
>> >
>> > Note that there could easily be a mix of these in the same system.
>> >
>> > So for these cases we do need QEMU to specify to guest that IOMMU covers
>> > the virtio devices.  Also, once one does this, the default on linux is
>> > iommu=on and not pt, which works but ATM is very slow.
>> >
>> > This poses three problems:
>> >
>> > 1. How do we address the different needs of A and B?
>> >    One way would be for virtio to pass the information to guest
>> >    using some virtio specific way, and have drivers
>> >    specify what kind of DMA access they want.
>> >
>> > 2. (Kind of a subset of 1) once we do allow IOMMU, how do we make sure most guests
>> >    use the more sensible iommu=pt.
>> >
>> > 3. Once we do allow IOMMU, how can we keep existing guests work in this configuration?
>> >    Creating different hypervisor configurations depending on guest is very nasty.
>> >    Again, one way would be some virtio specific interface.
>> >
>> > I'd rather we figured the answers to this before merging Andy's patches
>> > because I'm concerned that instead of 1 broken configuration
>> > (virtio always bypasses IOMMU) we'll get two bad configurations
>> > (in the second one, virtio uses the slow default with no
>> > gain in security).
>> >
>> > Suggestions wellcome.
>>
>> I think there's still no downside of using my patches, even on x86.
>>
>> Old kernels on new QEMU work unless IOMMU is enabled on the host.  I
>> think that's the best we can possibly do.
>> New kernels work at full speed on old QEMU.
>
> Only if IOMMU is disabled, right?
>
>> New kernels with new QEMU and iommu enabled work slower.  Even newer
>> kernels with default passthrough work at full speed, and there's no
>> obvious downside to the existence of kernels with just my patches.
>>
>> --Andy
>>
>
> I tried to explain the possible downside. Let me try again.  Imagine
> that guest kernel notifies hypervisor that it wants IOMMU to actually
> work.  This will make old kernel on new QEMU work even with IOMMU
> enabled on host - better than "the best we can do" that you described
> above.  Specifically, QEMU will assume that if it didn't get
> notification, it's an old kernel so it should ignore the IOMMU.

Can you flesh out this trick?

On x86 IIUC the IOMMU more-or-less defaults to passthrough.  If the
kernel wants, it can switch it to a non-passthrough mode.  My patches
cause the virtio driver to do exactly this, except that the host
implementation doesn't actually exist yet, so the patches will instead
have no particular effect.

On powerpc and sparc, we *already* screwed up.  The host already tells
the guest that there's an IOMMU and that it's *enabled* because those
platforms don't have selective IOMMU coverage the way that x86 does.
So we need to work around it.

I think that, if we want fancy virt-friendly IOMMU stuff like you're
talking about, then the right thing to do is to create a virtio bus
instead of pretending to be PCI.  That bus could have a virtio IOMMU
and its own cross-platform enumeration mechanism for devices on the
bus, and everything would be peachy.

In the mean time, there are existing mechanisms by which every PCI
driver is supposed to notify the host/platform of how it intends to
map DMA memory, and virtio gets it wrong.

>
> But if we apply your patches this trick won't work.
>

I still don't understand what trick.  If we want virtio devices to be
assignable, then they should be translated through the IOMMU, and the
DMA API is the right interface for that.

> Without implementing it all, I think the easiest incremental step would
> be to teach linux to make passthrough the default when running as a
> guest on top of QEMU, put your patches on top. If someone specifies
> non passthrough on command line it'll still be broken,
> but not too bad.

Can powerpc and sparc do exact 1:1 passthrough for a given device?  If
so, that might be a reasonable way forward.  After all, if a new
powerpc kernel asks for exact passthrough (dma addr = phys addr with
no offset at all), then old QEMU will just ignore it and therefore
accidentally get it right.  Ben?

--Andy

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-11 15:56                                     ` Andy Lutomirski
  2015-11-11 22:30                                       ` David Woodhouse
@ 2015-11-11 22:30                                       ` David Woodhouse
  2015-11-12 11:09                                           ` Michael S. Tsirkin
  1 sibling, 1 reply; 110+ messages in thread
From: David Woodhouse @ 2015-11-11 22:30 UTC (permalink / raw)
  To: Andy Lutomirski, Michael S. Tsirkin
  Cc: Benjamin Herrenschmidt, Christian Borntraeger, Paolo Bonzini,
	Martin Schwidefsky, Sebastian Ott, linux-s390, Cornelia Huck,
	Joerg Roedel, Christoph Hellwig, Linux Virtualization,
	linux-kernel, KVM

[-- Attachment #1: Type: text/plain, Size: 3666 bytes --]

On Wed, 2015-11-11 at 07:56 -0800, Andy Lutomirski wrote:
> 
> Can you flesh out this trick?
> 
> On x86 IIUC the IOMMU more-or-less defaults to passthrough.  If the
> kernel wants, it can switch it to a non-passthrough mode.  My patches
> cause the virtio driver to do exactly this, except that the host
> implementation doesn't actually exist yet, so the patches will instead
> have no particular effect.

At some level, yes — we're compatible with a 1982 IBM PC and thus the
IOMMU is entirely disabled at boot until the kernel turns it on —
except in TXT mode where we abandon that compatibility.

But no, the virtio driver has *nothing* to do with switching the device
out of passthrough mode. It is either in passthrough mode, or it isn't.

If the VMM *doesn't* expose an IOMMU to the guest, obviously the
devices are in passthrough mode. If the guest kernel doesn't have IOMMU
support enabled, then obviously the devices are in passthrough mode.
And if the ACPI tables exposed to the guest kernel *tell* it that the
virtio devices are not actually behind the IOMMU (which qemu gets
wrong), then it'll be in passthrough mode.

If the IOMMU is exposed, and enabled, and telling the guest kernel that
it *does* cover the virtio devices, then those virtio devices will
*not* be in passthrough mode.

You choosing to use the DMA API in the virtio device drivers instead of
being buggy, has nothing to do with whether it's actually in
passthrough mode or not. Whether it's in passthrough mode or not, using
the DMA API is technically the right thing to do — because it should
either *do* the translation, or return a 1:1 mapped IOVA, as
appropriate.


> On powerpc and sparc, we *already* screwed up.  The host already tells
> the guest that there's an IOMMU and that it's *enabled* because those
> platforms don't have selective IOMMU coverage the way that x86 does.
> So we need to work around it.

No, we need it on x86 too because once we fix the virtio device driver
bug and make it start using the DMA API, then we start to trip up on
the qemu bug where it lies about which devices are covered by the
IOMMU.

Of course, we still have that same qemu bug w.r.t. assigned devices,
which it *also* claims are behind its IOMMU when they're not...

> I think that, if we want fancy virt-friendly IOMMU stuff like you're
> talking about, then the right thing to do is to create a virtio bus
> instead of pretending to be PCI.  That bus could have a virtio IOMMU
> and its own cross-platform enumeration mechanism for devices on the
> bus, and everything would be peachy.

That doesn't really help very much for the x86 case where the problem
is compatibility with *existing* (arguably broken) qemu
implementations.

Having said that, if this were real hardware I'd just be blacklisting
it and saying "Another BIOS with broken DMAR tables --> IOMMU
completely disabled". So perhaps we should just do that.


> I still don't understand what trick.  If we want virtio devices to be
> assignable, then they should be translated through the IOMMU, and the
> DMA API is the right interface for that.

The DMA API is the right interface *regardless* of whether there's
actual translation to be done. The device driver itself should not be
involved in any way with that decision.

When you want to access MMIO, you use ioremap() and writel() instead of
doing random crap for yourself. When you want DMA, you use the DMA API
to get a bus address for your device *even* if you expect there to be
no IOMMU and you expect it to precisely match the physical address. No
excuses.

-- 
dwmw2



[-- Attachment #2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-11 15:56                                     ` Andy Lutomirski
@ 2015-11-11 22:30                                       ` David Woodhouse
  2015-11-11 22:30                                       ` David Woodhouse
  1 sibling, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-11-11 22:30 UTC (permalink / raw)
  To: Andy Lutomirski, Michael S. Tsirkin
  Cc: linux-s390, KVM, Benjamin Herrenschmidt, Sebastian Ott,
	linux-kernel, Linux Virtualization, Christian Borntraeger,
	Joerg Roedel, Martin Schwidefsky, Paolo Bonzini,
	Christoph Hellwig


[-- Attachment #1.1: Type: text/plain, Size: 3666 bytes --]

On Wed, 2015-11-11 at 07:56 -0800, Andy Lutomirski wrote:
> 
> Can you flesh out this trick?
> 
> On x86 IIUC the IOMMU more-or-less defaults to passthrough.  If the
> kernel wants, it can switch it to a non-passthrough mode.  My patches
> cause the virtio driver to do exactly this, except that the host
> implementation doesn't actually exist yet, so the patches will instead
> have no particular effect.

At some level, yes — we're compatible with a 1982 IBM PC and thus the
IOMMU is entirely disabled at boot until the kernel turns it on —
except in TXT mode where we abandon that compatibility.

But no, the virtio driver has *nothing* to do with switching the device
out of passthrough mode. It is either in passthrough mode, or it isn't.

If the VMM *doesn't* expose an IOMMU to the guest, obviously the
devices are in passthrough mode. If the guest kernel doesn't have IOMMU
support enabled, then obviously the devices are in passthrough mode.
And if the ACPI tables exposed to the guest kernel *tell* it that the
virtio devices are not actually behind the IOMMU (which qemu gets
wrong), then it'll be in passthrough mode.

If the IOMMU is exposed, and enabled, and telling the guest kernel that
it *does* cover the virtio devices, then those virtio devices will
*not* be in passthrough mode.

You choosing to use the DMA API in the virtio device drivers instead of
being buggy, has nothing to do with whether it's actually in
passthrough mode or not. Whether it's in passthrough mode or not, using
the DMA API is technically the right thing to do — because it should
either *do* the translation, or return a 1:1 mapped IOVA, as
appropriate.


> On powerpc and sparc, we *already* screwed up.  The host already tells
> the guest that there's an IOMMU and that it's *enabled* because those
> platforms don't have selective IOMMU coverage the way that x86 does.
> So we need to work around it.

No, we need it on x86 too because once we fix the virtio device driver
bug and make it start using the DMA API, then we start to trip up on
the qemu bug where it lies about which devices are covered by the
IOMMU.

Of course, we still have that same qemu bug w.r.t. assigned devices,
which it *also* claims are behind its IOMMU when they're not...

> I think that, if we want fancy virt-friendly IOMMU stuff like you're
> talking about, then the right thing to do is to create a virtio bus
> instead of pretending to be PCI.  That bus could have a virtio IOMMU
> and its own cross-platform enumeration mechanism for devices on the
> bus, and everything would be peachy.

That doesn't really help very much for the x86 case where the problem
is compatibility with *existing* (arguably broken) qemu
implementations.

Having said that, if this were real hardware I'd just be blacklisting
it and saying "Another BIOS with broken DMAR tables --> IOMMU
completely disabled". So perhaps we should just do that.


> I still don't understand what trick.  If we want virtio devices to be
> assignable, then they should be translated through the IOMMU, and the
> DMA API is the right interface for that.

The DMA API is the right interface *regardless* of whether there's
actual translation to be done. The device driver itself should not be
involved in any way with that decision.

When you want to access MMIO, you use ioremap() and writel() instead of
doing random crap for yourself. When you want DMA, you use the DMA API
to get a bus address for your device *even* if you expect there to be
no IOMMU and you expect it to precisely match the physical address. No
excuses.

-- 
dwmw2



[-- Attachment #1.2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-11 22:30                                       ` David Woodhouse
@ 2015-11-12 11:09                                           ` Michael S. Tsirkin
  0 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-11-12 11:09 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Andy Lutomirski, Benjamin Herrenschmidt, Christian Borntraeger,
	Paolo Bonzini, Martin Schwidefsky, Sebastian Ott, linux-s390,
	Cornelia Huck, Joerg Roedel, Christoph Hellwig,
	Linux Virtualization, linux-kernel, KVM

On Wed, Nov 11, 2015 at 11:30:27PM +0100, David Woodhouse wrote:
> On Wed, 2015-11-11 at 07:56 -0800, Andy Lutomirski wrote:
> > 
> > Can you flesh out this trick?
> > 
> > On x86 IIUC the IOMMU more-or-less defaults to passthrough.  If the
> > kernel wants, it can switch it to a non-passthrough mode.  My patches
> > cause the virtio driver to do exactly this, except that the host
> > implementation doesn't actually exist yet, so the patches will instead
> > have no particular effect.
> 
> At some level, yes — we're compatible with a 1982 IBM PC and thus the
> IOMMU is entirely disabled at boot until the kernel turns it on —
> except in TXT mode where we abandon that compatibility.
> 
> But no, the virtio driver has *nothing* to do with switching the device
> out of passthrough mode. It is either in passthrough mode, or it isn't.
> 
> If the VMM *doesn't* expose an IOMMU to the guest, obviously the
> devices are in passthrough mode. If the guest kernel doesn't have IOMMU
> support enabled, then obviously the devices are in passthrough mode.
> And if the ACPI tables exposed to the guest kernel *tell* it that the
> virtio devices are not actually behind the IOMMU (which qemu gets
> wrong), then it'll be in passthrough mode.
> 
> If the IOMMU is exposed, and enabled, and telling the guest kernel that
> it *does* cover the virtio devices, then those virtio devices will
> *not* be in passthrough mode.

This we need to fix. Because in most configurations if you are
using kernel drivers, then you don't want IOMMU with virtio,
but if you are using VFIO then you do.

Intel's iommu can be programmed to still
do a kind of passthrough (1:1) mapping, it's
just a matter of doing this for virtio devices
when not using VFIO.

> You choosing to use the DMA API in the virtio device drivers instead of
> being buggy, has nothing to do with whether it's actually in
> passthrough mode or not. Whether it's in passthrough mode or not, using
> the DMA API is technically the right thing to do — because it should
> either *do* the translation, or return a 1:1 mapped IOVA, as
> appropriate.

Right but first we need to actually make DMA API do the right thing
at least on x86,ppc and arm.

> > On powerpc and sparc, we *already* screwed up.  The host already tells
> > the guest that there's an IOMMU and that it's *enabled* because those
> > platforms don't have selective IOMMU coverage the way that x86 does.
> > So we need to work around it.
> 
> No, we need it on x86 too because once we fix the virtio device driver
> bug and make it start using the DMA API, then we start to trip up on
> the qemu bug where it lies about which devices are covered by the
> IOMMU.
> 
> Of course, we still have that same qemu bug w.r.t. assigned devices,
> which it *also* claims are behind its IOMMU when they're not...

I'm not worried about qemu bugs that much.  I am interested in being
able to use both VFIO and kernel drivers with virtio devices with good
performance and without tweaking kernel parameters.


> > I think that, if we want fancy virt-friendly IOMMU stuff like you're
> > talking about, then the right thing to do is to create a virtio bus
> > instead of pretending to be PCI.  That bus could have a virtio IOMMU
> > and its own cross-platform enumeration mechanism for devices on the
> > bus, and everything would be peachy.
> 
> That doesn't really help very much for the x86 case where the problem
> is compatibility with *existing* (arguably broken) qemu
> implementations.
> 
> Having said that, if this were real hardware I'd just be blacklisting
> it and saying "Another BIOS with broken DMAR tables --> IOMMU
> completely disabled". So perhaps we should just do that.
> 

Yes, once there is new QEMU where virtio is covered by the IOMMU,
that would be one way to address existing QEMU bugs. 

> > I still don't understand what trick.  If we want virtio devices to be
> > assignable, then they should be translated through the IOMMU, and the
> > DMA API is the right interface for that.
> 
> The DMA API is the right interface *regardless* of whether there's
> actual translation to be done. The device driver itself should not be
> involved in any way with that decision.

With virt, each device can have different priveledges:
some are part of hypervisor so with a kernel driver
trying to get protection from them using an IOMMU which is also
part of hypervisor makes no sense - but when using a
userspace driver then getting protection from the userspace
driver does make sense. Others are real devices so
getting protection from them makes some sense.

Which is which? It's easiest for the device driver itself to
gain that knowledge. Please note this is *not* the same
question as whether a specific device is covered by an IOMMU.

> When you want to access MMIO, you use ioremap() and writel() instead of
> doing random crap for yourself. When you want DMA, you use the DMA API
> to get a bus address for your device *even* if you expect there to be
> no IOMMU and you expect it to precisely match the physical address. No
> excuses.

No problem, but the fact remains that virtio does need
per-device control over whether it's passthrough or not.

Forget the bugs, that's not the issue - the issue is
that it's sometimes part of hypervisor and
sometimes isn't.

We just can't say it's always not a part of hypervisor so you always
want maximum protection - that drops performance by to the floor.

Linux doesn't seem to support that usecase at the moment, if this is a
generic problem then we need to teach Linux to solve it, but if virtio
is unique in this requirement, then we should just keep doing virtio
specific things to solve it.


> -- 
> dwmw2
> 
> 



^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-11-12 11:09                                           ` Michael S. Tsirkin
  0 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-11-12 11:09 UTC (permalink / raw)
  To: David Woodhouse
  Cc: linux-s390, KVM, Benjamin Herrenschmidt, Sebastian Ott,
	linux-kernel, Andy Lutomirski, Christian Borntraeger,
	Joerg Roedel, Martin Schwidefsky, Paolo Bonzini,
	Linux Virtualization, Christoph Hellwig

On Wed, Nov 11, 2015 at 11:30:27PM +0100, David Woodhouse wrote:
> On Wed, 2015-11-11 at 07:56 -0800, Andy Lutomirski wrote:
> > 
> > Can you flesh out this trick?
> > 
> > On x86 IIUC the IOMMU more-or-less defaults to passthrough.  If the
> > kernel wants, it can switch it to a non-passthrough mode.  My patches
> > cause the virtio driver to do exactly this, except that the host
> > implementation doesn't actually exist yet, so the patches will instead
> > have no particular effect.
> 
> At some level, yes — we're compatible with a 1982 IBM PC and thus the
> IOMMU is entirely disabled at boot until the kernel turns it on —
> except in TXT mode where we abandon that compatibility.
> 
> But no, the virtio driver has *nothing* to do with switching the device
> out of passthrough mode. It is either in passthrough mode, or it isn't.
> 
> If the VMM *doesn't* expose an IOMMU to the guest, obviously the
> devices are in passthrough mode. If the guest kernel doesn't have IOMMU
> support enabled, then obviously the devices are in passthrough mode.
> And if the ACPI tables exposed to the guest kernel *tell* it that the
> virtio devices are not actually behind the IOMMU (which qemu gets
> wrong), then it'll be in passthrough mode.
> 
> If the IOMMU is exposed, and enabled, and telling the guest kernel that
> it *does* cover the virtio devices, then those virtio devices will
> *not* be in passthrough mode.

This we need to fix. Because in most configurations if you are
using kernel drivers, then you don't want IOMMU with virtio,
but if you are using VFIO then you do.

Intel's iommu can be programmed to still
do a kind of passthrough (1:1) mapping, it's
just a matter of doing this for virtio devices
when not using VFIO.

> You choosing to use the DMA API in the virtio device drivers instead of
> being buggy, has nothing to do with whether it's actually in
> passthrough mode or not. Whether it's in passthrough mode or not, using
> the DMA API is technically the right thing to do — because it should
> either *do* the translation, or return a 1:1 mapped IOVA, as
> appropriate.

Right but first we need to actually make DMA API do the right thing
at least on x86,ppc and arm.

> > On powerpc and sparc, we *already* screwed up.  The host already tells
> > the guest that there's an IOMMU and that it's *enabled* because those
> > platforms don't have selective IOMMU coverage the way that x86 does.
> > So we need to work around it.
> 
> No, we need it on x86 too because once we fix the virtio device driver
> bug and make it start using the DMA API, then we start to trip up on
> the qemu bug where it lies about which devices are covered by the
> IOMMU.
> 
> Of course, we still have that same qemu bug w.r.t. assigned devices,
> which it *also* claims are behind its IOMMU when they're not...

I'm not worried about qemu bugs that much.  I am interested in being
able to use both VFIO and kernel drivers with virtio devices with good
performance and without tweaking kernel parameters.


> > I think that, if we want fancy virt-friendly IOMMU stuff like you're
> > talking about, then the right thing to do is to create a virtio bus
> > instead of pretending to be PCI.  That bus could have a virtio IOMMU
> > and its own cross-platform enumeration mechanism for devices on the
> > bus, and everything would be peachy.
> 
> That doesn't really help very much for the x86 case where the problem
> is compatibility with *existing* (arguably broken) qemu
> implementations.
> 
> Having said that, if this were real hardware I'd just be blacklisting
> it and saying "Another BIOS with broken DMAR tables --> IOMMU
> completely disabled". So perhaps we should just do that.
> 

Yes, once there is new QEMU where virtio is covered by the IOMMU,
that would be one way to address existing QEMU bugs. 

> > I still don't understand what trick.  If we want virtio devices to be
> > assignable, then they should be translated through the IOMMU, and the
> > DMA API is the right interface for that.
> 
> The DMA API is the right interface *regardless* of whether there's
> actual translation to be done. The device driver itself should not be
> involved in any way with that decision.

With virt, each device can have different priveledges:
some are part of hypervisor so with a kernel driver
trying to get protection from them using an IOMMU which is also
part of hypervisor makes no sense - but when using a
userspace driver then getting protection from the userspace
driver does make sense. Others are real devices so
getting protection from them makes some sense.

Which is which? It's easiest for the device driver itself to
gain that knowledge. Please note this is *not* the same
question as whether a specific device is covered by an IOMMU.

> When you want to access MMIO, you use ioremap() and writel() instead of
> doing random crap for yourself. When you want DMA, you use the DMA API
> to get a bus address for your device *even* if you expect there to be
> no IOMMU and you expect it to precisely match the physical address. No
> excuses.

No problem, but the fact remains that virtio does need
per-device control over whether it's passthrough or not.

Forget the bugs, that's not the issue - the issue is
that it's sometimes part of hypervisor and
sometimes isn't.

We just can't say it's always not a part of hypervisor so you always
want maximum protection - that drops performance by to the floor.

Linux doesn't seem to support that usecase at the moment, if this is a
generic problem then we need to teach Linux to solve it, but if virtio
is unique in this requirement, then we should just keep doing virtio
specific things to solve it.


> -- 
> dwmw2
> 
> 


_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-12 11:09                                           ` Michael S. Tsirkin
@ 2015-11-12 12:18                                             ` David Woodhouse
  -1 siblings, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-11-12 12:18 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Andy Lutomirski, Benjamin Herrenschmidt, Christian Borntraeger,
	Paolo Bonzini, Martin Schwidefsky, Sebastian Ott, linux-s390,
	Cornelia Huck, Joerg Roedel, Christoph Hellwig,
	Linux Virtualization, linux-kernel, KVM

[-- Attachment #1: Type: text/plain, Size: 4543 bytes --]

On Thu, 2015-11-12 at 13:09 +0200, Michael S. Tsirkin wrote:
> On Wed, Nov 11, 2015 at 11:30:27PM +0100, David Woodhouse wrote:
> > 
> > If the IOMMU is exposed, and enabled, and telling the guest kernel that
> > it *does* cover the virtio devices, then those virtio devices will
> > *not* be in passthrough mode.
> 
> This we need to fix. Because in most configurations if you are
> using kernel drivers, then you don't want IOMMU with virtio,
> but if you are using VFIO then you do.

This is *absolutely* not specific to virtio. There are *plenty* of
other users (especially networking) where we only really care about the
existence of the IOMMU for VFIO purposes and assigning devices to
guests, and we are willing to dispense with the protection that it
offers for native in-kernel drivers. For that, boot with iommu=pt.

There is no way, currently, to enable the passthrough mode on a per-
device basis. Although it has been discussed right here, very recently.

Let's not conflate those issues.

> > You choosing to use the DMA API in the virtio device drivers instead of
> > being buggy, has nothing to do with whether it's actually in
> > passthrough mode or not. Whether it's in passthrough mode or not, using
> > the DMA API is technically the right thing to do — because it should
> > either *do* the translation, or return a 1:1 mapped IOVA, as
> > appropriate.
> 
> Right but first we need to actually make DMA API do the right thing
> at least on x86,ppc and arm.

It already does the right thing on x86, modulo BIOS bugs (including the
qemu ACPI table but that you said you're not too worried about).

> I'm not worried about qemu bugs that much.  I am interested in being
> able to use both VFIO and kernel drivers with virtio devices with good
> performance and without tweaking kernel parameters.

OK, then you are interested in the semi-orthogonal discussion about
DMA_ATTR_IOMMU_BYPASS. Either way, device drivers SHALL use the DMA
API.


> > Having said that, if this were real hardware I'd just be blacklisting
> > it and saying "Another BIOS with broken DMAR tables --> IOMMU
> > completely disabled". So perhaps we should just do that.
> > 
> Yes, once there is new QEMU where virtio is covered by the IOMMU,
> that would be one way to address existing QEMU bugs. 

No, that's not required. All that's required is to fix the currently-
broken ACPI table so that it *admits* that the virtio devices aren't
covered by the IOMMU. And I've never waited for a fix to be available
before, before blacklisting *other* broken firmwares...

The only reason I'm holding off for now is because ARM and PPC also
need a quirk for their platform code to realise that certain devices
actually *aren't* covered by the IOMMU, and I might be able to just use
the same thing and still enable the IOMMU in the offending qemu
versions.

Although as noted, it would need to cover assigned devices as well as
virtio — qemu currently lies to us and tells us that the emulated IOMMU
in the guest does cover *those* too.

> With virt, each device can have different priveledges:
> some are part of hypervisor so with a kernel driver
> trying to get protection from them using an IOMMU which is also
> part of hypervisor makes no sense
>  - but when using a
> userspace driver then getting protection from the userspace
> driver does make sense. Others are real devices so
> getting protection from them makes some sense.
> 
> Which is which? It's easiest for the device driver itself to
> gain that knowledge. Please note this is *not* the same
> question as whether a specific device is covered by an IOMMU.

OK. How does your device driver know whether the virtio PCI device it's
talking to is actually implemented by the hypervisor, or whether it's
one of the real PCI implementations that apparently exist?

> Linux doesn't seem to support that usecase at the moment, if this is a
> generic problem then we need to teach Linux to solve it, but if virtio
> is unique in this requirement, then we should just keep doing virtio
> specific things to solve it.

It is a generic problem. There is a discussion elsewhere about how (or
indeed whether) to solve it. It absolutely isn't virtio-specific, and
we absolutely shouldn't be doing virtio-specific things to solve it.

Nothing excuses just eschewing the correct DMA API. That's just broken,
and only ever worked in conjunction with *other* bugs elsewhere in the
platform.


-- 
dwmw2


[-- Attachment #2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-11-12 12:18                                             ` David Woodhouse
  0 siblings, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-11-12 12:18 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: linux-s390, KVM, Benjamin Herrenschmidt, Sebastian Ott,
	linux-kernel, Andy Lutomirski, Christian Borntraeger,
	Joerg Roedel, Martin Schwidefsky, Paolo Bonzini,
	Linux Virtualization, Christoph Hellwig


[-- Attachment #1.1: Type: text/plain, Size: 4543 bytes --]

On Thu, 2015-11-12 at 13:09 +0200, Michael S. Tsirkin wrote:
> On Wed, Nov 11, 2015 at 11:30:27PM +0100, David Woodhouse wrote:
> > 
> > If the IOMMU is exposed, and enabled, and telling the guest kernel that
> > it *does* cover the virtio devices, then those virtio devices will
> > *not* be in passthrough mode.
> 
> This we need to fix. Because in most configurations if you are
> using kernel drivers, then you don't want IOMMU with virtio,
> but if you are using VFIO then you do.

This is *absolutely* not specific to virtio. There are *plenty* of
other users (especially networking) where we only really care about the
existence of the IOMMU for VFIO purposes and assigning devices to
guests, and we are willing to dispense with the protection that it
offers for native in-kernel drivers. For that, boot with iommu=pt.

There is no way, currently, to enable the passthrough mode on a per-
device basis. Although it has been discussed right here, very recently.

Let's not conflate those issues.

> > You choosing to use the DMA API in the virtio device drivers instead of
> > being buggy, has nothing to do with whether it's actually in
> > passthrough mode or not. Whether it's in passthrough mode or not, using
> > the DMA API is technically the right thing to do — because it should
> > either *do* the translation, or return a 1:1 mapped IOVA, as
> > appropriate.
> 
> Right but first we need to actually make DMA API do the right thing
> at least on x86,ppc and arm.

It already does the right thing on x86, modulo BIOS bugs (including the
qemu ACPI table but that you said you're not too worried about).

> I'm not worried about qemu bugs that much.  I am interested in being
> able to use both VFIO and kernel drivers with virtio devices with good
> performance and without tweaking kernel parameters.

OK, then you are interested in the semi-orthogonal discussion about
DMA_ATTR_IOMMU_BYPASS. Either way, device drivers SHALL use the DMA
API.


> > Having said that, if this were real hardware I'd just be blacklisting
> > it and saying "Another BIOS with broken DMAR tables --> IOMMU
> > completely disabled". So perhaps we should just do that.
> > 
> Yes, once there is new QEMU where virtio is covered by the IOMMU,
> that would be one way to address existing QEMU bugs. 

No, that's not required. All that's required is to fix the currently-
broken ACPI table so that it *admits* that the virtio devices aren't
covered by the IOMMU. And I've never waited for a fix to be available
before, before blacklisting *other* broken firmwares...

The only reason I'm holding off for now is because ARM and PPC also
need a quirk for their platform code to realise that certain devices
actually *aren't* covered by the IOMMU, and I might be able to just use
the same thing and still enable the IOMMU in the offending qemu
versions.

Although as noted, it would need to cover assigned devices as well as
virtio — qemu currently lies to us and tells us that the emulated IOMMU
in the guest does cover *those* too.

> With virt, each device can have different priveledges:
> some are part of hypervisor so with a kernel driver
> trying to get protection from them using an IOMMU which is also
> part of hypervisor makes no sense
>  - but when using a
> userspace driver then getting protection from the userspace
> driver does make sense. Others are real devices so
> getting protection from them makes some sense.
> 
> Which is which? It's easiest for the device driver itself to
> gain that knowledge. Please note this is *not* the same
> question as whether a specific device is covered by an IOMMU.

OK. How does your device driver know whether the virtio PCI device it's
talking to is actually implemented by the hypervisor, or whether it's
one of the real PCI implementations that apparently exist?

> Linux doesn't seem to support that usecase at the moment, if this is a
> generic problem then we need to teach Linux to solve it, but if virtio
> is unique in this requirement, then we should just keep doing virtio
> specific things to solve it.

It is a generic problem. There is a discussion elsewhere about how (or
indeed whether) to solve it. It absolutely isn't virtio-specific, and
we absolutely shouldn't be doing virtio-specific things to solve it.

Nothing excuses just eschewing the correct DMA API. That's just broken,
and only ever worked in conjunction with *other* bugs elsewhere in the
platform.


-- 
dwmw2


[-- Attachment #1.2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-28  6:38 [PATCH v3 0/3] virtio DMA API core stuff Andy Lutomirski
                   ` (8 preceding siblings ...)
  2015-11-19 13:45 ` Michael S. Tsirkin
@ 2015-11-19 13:45 ` Michael S. Tsirkin
  2015-11-19 21:59     ` Andy Lutomirski
  9 siblings, 1 reply; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-11-19 13:45 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linux-kernel, Joerg Roedel, Christian Borntraeger, Cornelia Huck,
	Sebastian Ott, Paolo Bonzini, Christoph Hellwig, benh, KVM,
	dwmw2, Martin Schwidefsky, linux-s390, virtualization

On Tue, Oct 27, 2015 at 11:38:57PM -0700, Andy Lutomirski wrote:
> This switches virtio to use the DMA API unconditionally.  I'm sure
> it breaks things, but it seems to work on x86 using virtio-pci, with
> and without Xen, and using both the modern 1.0 variant and the
> legacy variant.

So thinking hard about it, I don't see any real drawbacks to making this
conditional on a new feature bit, that Xen can then set.

As a bonus, host can distinguish between old and new guests using the
feature bit, even though making driver *control* whether IOMMU is
bypassed makes userspace drivers unsafe, so might not be a good idea.

A tiny bit more code but not by much, and we clearly won't
be breaking anything that's not already broken,
and we will be able to drop the extra code later
if we think it's a good idea.

I'll run this by the virtio TC on OASIS next week so we
can reserve a feature bit.

> Changes from v2:
>  - Fix really embarrassing bug.  This version actually works.
> 
> Changes from v1:
>  - Fix an endian conversion error causing a BUG to hit.
>  - Fix a DMA ordering issue (swiotlb=force works now).
>  - Minor cleanups.
> 
> Andy Lutomirski (3):
>   virtio_net: Stop doing DMA from the stack
>   virtio_ring: Support DMA APIs
>   virtio_pci: Use the DMA API
> 
>  drivers/net/virtio_net.c           |  53 +++++++----
>  drivers/virtio/Kconfig             |   2 +-
>  drivers/virtio/virtio_pci_common.h |   3 +-
>  drivers/virtio/virtio_pci_legacy.c |  19 +++-
>  drivers/virtio/virtio_pci_modern.c |  34 +++++--
>  drivers/virtio/virtio_ring.c       | 187 ++++++++++++++++++++++++++++++-------
>  tools/virtio/linux/dma-mapping.h   |  17 ++++
>  7 files changed, 246 insertions(+), 69 deletions(-)
>  create mode 100644 tools/virtio/linux/dma-mapping.h
> 
> -- 
> 2.4.3

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-10-28  6:38 [PATCH v3 0/3] virtio DMA API core stuff Andy Lutomirski
                   ` (7 preceding siblings ...)
  2015-10-28  7:17   ` Michael S. Tsirkin
@ 2015-11-19 13:45 ` Michael S. Tsirkin
  2015-11-19 13:45 ` Michael S. Tsirkin
  9 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-11-19 13:45 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linux-s390, Joerg Roedel, KVM, benh, Sebastian Ott, linux-kernel,
	virtualization, Christian Borntraeger, Paolo Bonzini, dwmw2,
	Christoph Hellwig, Martin Schwidefsky

On Tue, Oct 27, 2015 at 11:38:57PM -0700, Andy Lutomirski wrote:
> This switches virtio to use the DMA API unconditionally.  I'm sure
> it breaks things, but it seems to work on x86 using virtio-pci, with
> and without Xen, and using both the modern 1.0 variant and the
> legacy variant.

So thinking hard about it, I don't see any real drawbacks to making this
conditional on a new feature bit, that Xen can then set.

As a bonus, host can distinguish between old and new guests using the
feature bit, even though making driver *control* whether IOMMU is
bypassed makes userspace drivers unsafe, so might not be a good idea.

A tiny bit more code but not by much, and we clearly won't
be breaking anything that's not already broken,
and we will be able to drop the extra code later
if we think it's a good idea.

I'll run this by the virtio TC on OASIS next week so we
can reserve a feature bit.

> Changes from v2:
>  - Fix really embarrassing bug.  This version actually works.
> 
> Changes from v1:
>  - Fix an endian conversion error causing a BUG to hit.
>  - Fix a DMA ordering issue (swiotlb=force works now).
>  - Minor cleanups.
> 
> Andy Lutomirski (3):
>   virtio_net: Stop doing DMA from the stack
>   virtio_ring: Support DMA APIs
>   virtio_pci: Use the DMA API
> 
>  drivers/net/virtio_net.c           |  53 +++++++----
>  drivers/virtio/Kconfig             |   2 +-
>  drivers/virtio/virtio_pci_common.h |   3 +-
>  drivers/virtio/virtio_pci_legacy.c |  19 +++-
>  drivers/virtio/virtio_pci_modern.c |  34 +++++--
>  drivers/virtio/virtio_ring.c       | 187 ++++++++++++++++++++++++++++++-------
>  tools/virtio/linux/dma-mapping.h   |  17 ++++
>  7 files changed, 246 insertions(+), 69 deletions(-)
>  create mode 100644 tools/virtio/linux/dma-mapping.h
> 
> -- 
> 2.4.3

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-19 13:45 ` Michael S. Tsirkin
@ 2015-11-19 21:59     ` Andy Lutomirski
  0 siblings, 0 replies; 110+ messages in thread
From: Andy Lutomirski @ 2015-11-19 21:59 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Benjamin Herrenschmidt, Christian Borntraeger, Paolo Bonzini,
	linux-kernel, Martin Schwidefsky, Sebastian Ott, linux-s390,
	Cornelia Huck, Joerg Roedel, Linux Virtualization,
	Christoph Hellwig, David Woodhouse, KVM

On Nov 19, 2015 5:45 AM, "Michael S. Tsirkin" <mst@redhat.com> wrote:
>
> On Tue, Oct 27, 2015 at 11:38:57PM -0700, Andy Lutomirski wrote:
> > This switches virtio to use the DMA API unconditionally.  I'm sure
> > it breaks things, but it seems to work on x86 using virtio-pci, with
> > and without Xen, and using both the modern 1.0 variant and the
> > legacy variant.
>
> So thinking hard about it, I don't see any real drawbacks to making this
> conditional on a new feature bit, that Xen can then set..

Can you elaborate?  If I run QEMU, hosting Xen, hosting Linux, and the
virtio device is provided by QEMU, then how does Xen set the bit?
Similarly, how would Xen set the bit for a real physical device?


--Andy

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-11-19 21:59     ` Andy Lutomirski
  0 siblings, 0 replies; 110+ messages in thread
From: Andy Lutomirski @ 2015-11-19 21:59 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: linux-s390, KVM, Benjamin Herrenschmidt, Sebastian Ott,
	linux-kernel, Linux Virtualization, Christian Borntraeger,
	Joerg Roedel, Martin Schwidefsky, Paolo Bonzini, David Woodhouse,
	Christoph Hellwig

On Nov 19, 2015 5:45 AM, "Michael S. Tsirkin" <mst@redhat.com> wrote:
>
> On Tue, Oct 27, 2015 at 11:38:57PM -0700, Andy Lutomirski wrote:
> > This switches virtio to use the DMA API unconditionally.  I'm sure
> > it breaks things, but it seems to work on x86 using virtio-pci, with
> > and without Xen, and using both the modern 1.0 variant and the
> > legacy variant.
>
> So thinking hard about it, I don't see any real drawbacks to making this
> conditional on a new feature bit, that Xen can then set..

Can you elaborate?  If I run QEMU, hosting Xen, hosting Linux, and the
virtio device is provided by QEMU, then how does Xen set the bit?
Similarly, how would Xen set the bit for a real physical device?


--Andy

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-19 21:59     ` Andy Lutomirski
@ 2015-11-19 23:38       ` David Woodhouse
  -1 siblings, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-11-19 23:38 UTC (permalink / raw)
  To: Andy Lutomirski, Michael S. Tsirkin
  Cc: Benjamin Herrenschmidt, Christian Borntraeger, Paolo Bonzini,
	linux-kernel, Martin Schwidefsky, Sebastian Ott, linux-s390,
	Cornelia Huck, Joerg Roedel, Linux Virtualization,
	Christoph Hellwig, KVM

[-- Attachment #1: Type: text/plain, Size: 1498 bytes --]

On Thu, 2015-11-19 at 13:59 -0800, Andy Lutomirski wrote:
> 
> >
> > So thinking hard about it, I don't see any real drawbacks to making this
> > conditional on a new feature bit, that Xen can then set..
> 
> Can you elaborate?  If I run QEMU, hosting Xen, hosting Linux, and the
> virtio device is provided by QEMU, then how does Xen set the bit?
> Similarly, how would Xen set the bit for a real physical device?

Right. This is *not* a fundamental characteristic of the device. This
is all about how your *particular* hypervisor (in the set of turtles-
all-the-way-down) happened to expose the thing to you.

This is why it lives in the DMAR table, in the Intel world, which
*tells* you which devices are behind which IOMMU (and which are not).
And why I keep repeating myself that it has nothing to do with the
actual device or the virtio drivers.

I understand that POWER and other platforms don't currently have a
clean way to indicate that certain device don't have translation. And I
understand that we may end up with a *quirk* which ensures that the DMA
API does the right thing (i.e. nothing) in certain cases.

But we should *NOT* be involving the virtio device drivers in that
quirk, in any way. And putting a feature bit in the virtio device
itself doesn't seem at all sane either.

Bear in mind that qemu-system-x86_64 currently has the *same* problem
with assigned physical devices. It's claiming they're translated, and
they're not.

-- 
dwmw2


[-- Attachment #2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-11-19 23:38       ` David Woodhouse
  0 siblings, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-11-19 23:38 UTC (permalink / raw)
  To: Andy Lutomirski, Michael S. Tsirkin
  Cc: linux-s390, KVM, Benjamin Herrenschmidt, Sebastian Ott,
	linux-kernel, Linux Virtualization, Christian Borntraeger,
	Joerg Roedel, Martin Schwidefsky, Paolo Bonzini,
	Christoph Hellwig


[-- Attachment #1.1: Type: text/plain, Size: 1498 bytes --]

On Thu, 2015-11-19 at 13:59 -0800, Andy Lutomirski wrote:
> 
> >
> > So thinking hard about it, I don't see any real drawbacks to making this
> > conditional on a new feature bit, that Xen can then set..
> 
> Can you elaborate?  If I run QEMU, hosting Xen, hosting Linux, and the
> virtio device is provided by QEMU, then how does Xen set the bit?
> Similarly, how would Xen set the bit for a real physical device?

Right. This is *not* a fundamental characteristic of the device. This
is all about how your *particular* hypervisor (in the set of turtles-
all-the-way-down) happened to expose the thing to you.

This is why it lives in the DMAR table, in the Intel world, which
*tells* you which devices are behind which IOMMU (and which are not).
And why I keep repeating myself that it has nothing to do with the
actual device or the virtio drivers.

I understand that POWER and other platforms don't currently have a
clean way to indicate that certain device don't have translation. And I
understand that we may end up with a *quirk* which ensures that the DMA
API does the right thing (i.e. nothing) in certain cases.

But we should *NOT* be involving the virtio device drivers in that
quirk, in any way. And putting a feature bit in the virtio device
itself doesn't seem at all sane either.

Bear in mind that qemu-system-x86_64 currently has the *same* problem
with assigned physical devices. It's claiming they're translated, and
they're not.

-- 
dwmw2


[-- Attachment #1.2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-19 23:38       ` David Woodhouse
@ 2015-11-20  2:56         ` Benjamin Herrenschmidt
  -1 siblings, 0 replies; 110+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-20  2:56 UTC (permalink / raw)
  To: David Woodhouse, Andy Lutomirski, Michael S. Tsirkin
  Cc: Christian Borntraeger, Paolo Bonzini, linux-kernel,
	Martin Schwidefsky, Sebastian Ott, linux-s390, Cornelia Huck,
	Joerg Roedel, Linux Virtualization, Christoph Hellwig, KVM

On Thu, 2015-11-19 at 23:38 +0000, David Woodhouse wrote:
> 
> I understand that POWER and other platforms don't currently have a
> clean way to indicate that certain device don't have translation. And I
> understand that we may end up with a *quirk* which ensures that the DMA
> API does the right thing (i.e. nothing) in certain cases.
> 
> But we should *NOT* be involving the virtio device drivers in that
> quirk, in any way. And putting a feature bit in the virtio device
> itself doesn't seem at all sane either.
> 
> Bear in mind that qemu-system-x86_64 currently has the *same* problem
> with assigned physical devices. It's claiming they're translated, and
> they're not.

It's not that clear but yeah ... as I mentioned, I can't find a
way to do that quirk that won't break when we want to actually use
the iommu... 

Ben.


^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-11-20  2:56         ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 110+ messages in thread
From: Benjamin Herrenschmidt @ 2015-11-20  2:56 UTC (permalink / raw)
  To: David Woodhouse, Andy Lutomirski, Michael S. Tsirkin
  Cc: linux-s390, KVM, Sebastian Ott, linux-kernel,
	Linux Virtualization, Christian Borntraeger, Joerg Roedel,
	Martin Schwidefsky, Paolo Bonzini, Christoph Hellwig

On Thu, 2015-11-19 at 23:38 +0000, David Woodhouse wrote:
> 
> I understand that POWER and other platforms don't currently have a
> clean way to indicate that certain device don't have translation. And I
> understand that we may end up with a *quirk* which ensures that the DMA
> API does the right thing (i.e. nothing) in certain cases.
> 
> But we should *NOT* be involving the virtio device drivers in that
> quirk, in any way. And putting a feature bit in the virtio device
> itself doesn't seem at all sane either.
> 
> Bear in mind that qemu-system-x86_64 currently has the *same* problem
> with assigned physical devices. It's claiming they're translated, and
> they're not.

It's not that clear but yeah ... as I mentioned, I can't find a
way to do that quirk that won't break when we want to actually use
the iommu... 

Ben.

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-19 21:59     ` Andy Lutomirski
@ 2015-11-20  6:56       ` Michael S. Tsirkin
  -1 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-11-20  6:56 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Benjamin Herrenschmidt, Christian Borntraeger, Paolo Bonzini,
	linux-kernel, Martin Schwidefsky, Sebastian Ott, linux-s390,
	Cornelia Huck, Joerg Roedel, Linux Virtualization,
	Christoph Hellwig, David Woodhouse, KVM

On Thu, Nov 19, 2015 at 01:59:05PM -0800, Andy Lutomirski wrote:
> On Nov 19, 2015 5:45 AM, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> >
> > On Tue, Oct 27, 2015 at 11:38:57PM -0700, Andy Lutomirski wrote:
> > > This switches virtio to use the DMA API unconditionally.  I'm sure
> > > it breaks things, but it seems to work on x86 using virtio-pci, with
> > > and without Xen, and using both the modern 1.0 variant and the
> > > legacy variant.
> >
> > So thinking hard about it, I don't see any real drawbacks to making this
> > conditional on a new feature bit, that Xen can then set..
> 
> Can you elaborate?  If I run QEMU, hosting Xen, hosting Linux, and the
> virtio device is provided by QEMU, then how does Xen set the bit?

You would run QEMU with the appropriate flag. E.g.
-global virtio-pci,use_platform_dma=on

> Similarly, how would Xen set the bit for a real physical device?
> 
> 
> --Andy

There's no need to set bits for physical devices I think: from security
point of view, using them from a VM isn't very different from using them
from host.



-- 
MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-11-20  6:56       ` Michael S. Tsirkin
  0 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-11-20  6:56 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linux-s390, KVM, Benjamin Herrenschmidt, Sebastian Ott,
	linux-kernel, Linux Virtualization, Christian Borntraeger,
	Joerg Roedel, Martin Schwidefsky, Paolo Bonzini, David Woodhouse,
	Christoph Hellwig

On Thu, Nov 19, 2015 at 01:59:05PM -0800, Andy Lutomirski wrote:
> On Nov 19, 2015 5:45 AM, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> >
> > On Tue, Oct 27, 2015 at 11:38:57PM -0700, Andy Lutomirski wrote:
> > > This switches virtio to use the DMA API unconditionally.  I'm sure
> > > it breaks things, but it seems to work on x86 using virtio-pci, with
> > > and without Xen, and using both the modern 1.0 variant and the
> > > legacy variant.
> >
> > So thinking hard about it, I don't see any real drawbacks to making this
> > conditional on a new feature bit, that Xen can then set..
> 
> Can you elaborate?  If I run QEMU, hosting Xen, hosting Linux, and the
> virtio device is provided by QEMU, then how does Xen set the bit?

You would run QEMU with the appropriate flag. E.g.
-global virtio-pci,use_platform_dma=on

> Similarly, how would Xen set the bit for a real physical device?
> 
> 
> --Andy

There's no need to set bits for physical devices I think: from security
point of view, using them from a VM isn't very different from using them
from host.



-- 
MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-20  6:56       ` Michael S. Tsirkin
@ 2015-11-20  7:47         ` Michael S. Tsirkin
  -1 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-11-20  7:47 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Benjamin Herrenschmidt, Christian Borntraeger, Paolo Bonzini,
	linux-kernel, Martin Schwidefsky, Sebastian Ott, linux-s390,
	Cornelia Huck, Joerg Roedel, Linux Virtualization,
	Christoph Hellwig, David Woodhouse, KVM

On Fri, Nov 20, 2015 at 08:56:46AM +0200, Michael S. Tsirkin wrote:
> On Thu, Nov 19, 2015 at 01:59:05PM -0800, Andy Lutomirski wrote:
> > On Nov 19, 2015 5:45 AM, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > >
> > > On Tue, Oct 27, 2015 at 11:38:57PM -0700, Andy Lutomirski wrote:
> > > > This switches virtio to use the DMA API unconditionally.  I'm sure
> > > > it breaks things, but it seems to work on x86 using virtio-pci, with
> > > > and without Xen, and using both the modern 1.0 variant and the
> > > > legacy variant.
> > >
> > > So thinking hard about it, I don't see any real drawbacks to making this
> > > conditional on a new feature bit, that Xen can then set..
> > 
> > Can you elaborate?  If I run QEMU, hosting Xen, hosting Linux, and the
> > virtio device is provided by QEMU, then how does Xen set the bit?
> 
> You would run QEMU with the appropriate flag. E.g.
> -global virtio-pci,use_platform_dma=on

Or Xen code within QEMU can tweak this global internally
so users don't need to care.

> > Similarly, how would Xen set the bit for a real physical device?
> > 
> > 
> > --Andy
> 
> There's no need to set bits for physical devices I think: from security
> point of view, using them from a VM isn't very different from using them
> from host.
> 
> 
> 
> -- 
> MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-11-20  7:47         ` Michael S. Tsirkin
  0 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-11-20  7:47 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: linux-s390, KVM, Benjamin Herrenschmidt, Sebastian Ott,
	linux-kernel, Linux Virtualization, Christian Borntraeger,
	Joerg Roedel, Martin Schwidefsky, Paolo Bonzini, David Woodhouse,
	Christoph Hellwig

On Fri, Nov 20, 2015 at 08:56:46AM +0200, Michael S. Tsirkin wrote:
> On Thu, Nov 19, 2015 at 01:59:05PM -0800, Andy Lutomirski wrote:
> > On Nov 19, 2015 5:45 AM, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > >
> > > On Tue, Oct 27, 2015 at 11:38:57PM -0700, Andy Lutomirski wrote:
> > > > This switches virtio to use the DMA API unconditionally.  I'm sure
> > > > it breaks things, but it seems to work on x86 using virtio-pci, with
> > > > and without Xen, and using both the modern 1.0 variant and the
> > > > legacy variant.
> > >
> > > So thinking hard about it, I don't see any real drawbacks to making this
> > > conditional on a new feature bit, that Xen can then set..
> > 
> > Can you elaborate?  If I run QEMU, hosting Xen, hosting Linux, and the
> > virtio device is provided by QEMU, then how does Xen set the bit?
> 
> You would run QEMU with the appropriate flag. E.g.
> -global virtio-pci,use_platform_dma=on

Or Xen code within QEMU can tweak this global internally
so users don't need to care.

> > Similarly, how would Xen set the bit for a real physical device?
> > 
> > 
> > --Andy
> 
> There's no need to set bits for physical devices I think: from security
> point of view, using them from a VM isn't very different from using them
> from host.
> 
> 
> 
> -- 
> MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-19 23:38       ` David Woodhouse
@ 2015-11-20  8:21         ` Michael S. Tsirkin
  -1 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-11-20  8:21 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Andy Lutomirski, Benjamin Herrenschmidt, Christian Borntraeger,
	Paolo Bonzini, linux-kernel, Martin Schwidefsky, Sebastian Ott,
	linux-s390, Cornelia Huck, Joerg Roedel, Linux Virtualization,
	Christoph Hellwig, KVM, Marcel Apfelbaum

On Thu, Nov 19, 2015 at 11:38:06PM +0000, David Woodhouse wrote:
> On Thu, 2015-11-19 at 13:59 -0800, Andy Lutomirski wrote:
> > 
> > >
> > > So thinking hard about it, I don't see any real drawbacks to making this
> > > conditional on a new feature bit, that Xen can then set..
> > 
> > Can you elaborate?  If I run QEMU, hosting Xen, hosting Linux, and the
> > virtio device is provided by QEMU, then how does Xen set the bit?
> > Similarly, how would Xen set the bit for a real physical device?
> 
> Right. This is *not* a fundamental characteristic of the device. This
> is all about how your *particular* hypervisor (in the set of turtles-
> all-the-way-down) happened to expose the thing to you.
> 
> This is why it lives in the DMAR table, in the Intel world, which
> *tells* you which devices are behind which IOMMU (and which are not).

David, there are two things a hypervisor needs to tell the guest.
1. The actual device is behind an IOMMU. This is what you
   are suggesting we use DMAR for.
2. Using IOMMU from kernel (as opposed to from userspace with VFIO)
   actually adds security. For exising virtio devices on KVM,
   the answer is no. And DMAR has no way to reflect that.

Question 2 only makes sense if you answer yes to question 1 and if user
wants protection from malicious devices with iommu=on, and
if you care about getting good performance from *other*
devices.  And what guest would do is use 1:1 for the
devices where answer 2 is "no".

Maybe for now I should just give up and say "don't use iommu=on within
VMs if you want any performance".  But the point is, if we just fix QEMU
to actually obey IOMMU mappings for assigned devices, then there's
already a kind of answer with virtio being trusted since it's part of
hypervisor, all this without guest changes. Seems kind of sad to let
performance regress.

So a (yet another) feature bit would be a possible solution there, but
we don't seem to be able to even agree on using a feature bit for a
quirk.


> And why I keep repeating myself that it has nothing to do with the
> actual device or the virtio drivers.
>
> I understand that POWER and other platforms don't currently have a
> clean way to indicate that certain device don't have translation. And I
> understand that we may end up with a *quirk* which ensures that the DMA
> API does the right thing (i.e. nothing) in certain cases.

So assuming we forget about 2 above for now, then yes, all we need
is a quirk, using some logic to detect these systems.

> But we should *NOT* be involving the virtio device drivers in that
> quirk, in any way. And putting a feature bit in the virtio device
> itself doesn't seem at all sane either.

Only if there's some other device that benefits from all this work.  If
virtio is the only one that benefits, then why do we want to
spread the quirk rules around so much? A feature bit gives us
a single, portable rule that the quirk can use on all platforms.

> Bear in mind that qemu-system-x86_64 currently has the *same* problem
> with assigned physical devices. It's claiming they're translated, and
> they're not.
> 
> -- 
> dwmw2
> 

Presumably people either don't assign
devices or don't have an iommu otherwise things won't work for them,
but if they do have an iommu and don't assign devices, then Andy's
patch will break them.

This is not QEMU specific unfortunately, we don't know who
might have implemented virtio.





-- 
MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-11-20  8:21         ` Michael S. Tsirkin
  0 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-11-20  8:21 UTC (permalink / raw)
  To: David Woodhouse
  Cc: linux-s390, KVM, Marcel Apfelbaum, Benjamin Herrenschmidt,
	Sebastian Ott, linux-kernel, Andy Lutomirski,
	Christian Borntraeger, Joerg Roedel, Martin Schwidefsky,
	Paolo Bonzini, Linux Virtualization, Christoph Hellwig

On Thu, Nov 19, 2015 at 11:38:06PM +0000, David Woodhouse wrote:
> On Thu, 2015-11-19 at 13:59 -0800, Andy Lutomirski wrote:
> > 
> > >
> > > So thinking hard about it, I don't see any real drawbacks to making this
> > > conditional on a new feature bit, that Xen can then set..
> > 
> > Can you elaborate?  If I run QEMU, hosting Xen, hosting Linux, and the
> > virtio device is provided by QEMU, then how does Xen set the bit?
> > Similarly, how would Xen set the bit for a real physical device?
> 
> Right. This is *not* a fundamental characteristic of the device. This
> is all about how your *particular* hypervisor (in the set of turtles-
> all-the-way-down) happened to expose the thing to you.
> 
> This is why it lives in the DMAR table, in the Intel world, which
> *tells* you which devices are behind which IOMMU (and which are not).

David, there are two things a hypervisor needs to tell the guest.
1. The actual device is behind an IOMMU. This is what you
   are suggesting we use DMAR for.
2. Using IOMMU from kernel (as opposed to from userspace with VFIO)
   actually adds security. For exising virtio devices on KVM,
   the answer is no. And DMAR has no way to reflect that.

Question 2 only makes sense if you answer yes to question 1 and if user
wants protection from malicious devices with iommu=on, and
if you care about getting good performance from *other*
devices.  And what guest would do is use 1:1 for the
devices where answer 2 is "no".

Maybe for now I should just give up and say "don't use iommu=on within
VMs if you want any performance".  But the point is, if we just fix QEMU
to actually obey IOMMU mappings for assigned devices, then there's
already a kind of answer with virtio being trusted since it's part of
hypervisor, all this without guest changes. Seems kind of sad to let
performance regress.

So a (yet another) feature bit would be a possible solution there, but
we don't seem to be able to even agree on using a feature bit for a
quirk.


> And why I keep repeating myself that it has nothing to do with the
> actual device or the virtio drivers.
>
> I understand that POWER and other platforms don't currently have a
> clean way to indicate that certain device don't have translation. And I
> understand that we may end up with a *quirk* which ensures that the DMA
> API does the right thing (i.e. nothing) in certain cases.

So assuming we forget about 2 above for now, then yes, all we need
is a quirk, using some logic to detect these systems.

> But we should *NOT* be involving the virtio device drivers in that
> quirk, in any way. And putting a feature bit in the virtio device
> itself doesn't seem at all sane either.

Only if there's some other device that benefits from all this work.  If
virtio is the only one that benefits, then why do we want to
spread the quirk rules around so much? A feature bit gives us
a single, portable rule that the quirk can use on all platforms.

> Bear in mind that qemu-system-x86_64 currently has the *same* problem
> with assigned physical devices. It's claiming they're translated, and
> they're not.
> 
> -- 
> dwmw2
> 

Presumably people either don't assign
devices or don't have an iommu otherwise things won't work for them,
but if they do have an iommu and don't assign devices, then Andy's
patch will break them.

This is not QEMU specific unfortunately, we don't know who
might have implemented virtio.





-- 
MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-20  2:56         ` Benjamin Herrenschmidt
  (?)
  (?)
@ 2015-11-20  8:34         ` Michael S. Tsirkin
  -1 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-11-20  8:34 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: David Woodhouse, Andy Lutomirski, Christian Borntraeger,
	Paolo Bonzini, linux-kernel, Martin Schwidefsky, Sebastian Ott,
	linux-s390, Cornelia Huck, Joerg Roedel, Linux Virtualization,
	Christoph Hellwig, KVM

On Fri, Nov 20, 2015 at 01:56:39PM +1100, Benjamin Herrenschmidt wrote:
> On Thu, 2015-11-19 at 23:38 +0000, David Woodhouse wrote:
> > 
> > I understand that POWER and other platforms don't currently have a
> > clean way to indicate that certain device don't have translation. And I
> > understand that we may end up with a *quirk* which ensures that the DMA
> > API does the right thing (i.e. nothing) in certain cases.
> > 
> > But we should *NOT* be involving the virtio device drivers in that
> > quirk, in any way. And putting a feature bit in the virtio device
> > itself doesn't seem at all sane either.
> > 
> > Bear in mind that qemu-system-x86_64 currently has the *same* problem
> > with assigned physical devices. It's claiming they're translated, and
> > they're not.
> 
> It's not that clear but yeah ... as I mentioned, I can't find a
> way to do that quirk that won't break when we want to actually use
> the iommu... 
> 
> Ben.

Yes, I am not at all sure we need a quirk for assigned devices.
Better teach QEMU to make iommu work for them.


-- 
MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-20  2:56         ` Benjamin Herrenschmidt
  (?)
@ 2015-11-20  8:34         ` Michael S. Tsirkin
  -1 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-11-20  8:34 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: linux-s390, KVM, Sebastian Ott, linux-kernel, Andy Lutomirski,
	Christian Borntraeger, Joerg Roedel, Martin Schwidefsky,
	Paolo Bonzini, Linux Virtualization, David Woodhouse,
	Christoph Hellwig

On Fri, Nov 20, 2015 at 01:56:39PM +1100, Benjamin Herrenschmidt wrote:
> On Thu, 2015-11-19 at 23:38 +0000, David Woodhouse wrote:
> > 
> > I understand that POWER and other platforms don't currently have a
> > clean way to indicate that certain device don't have translation. And I
> > understand that we may end up with a *quirk* which ensures that the DMA
> > API does the right thing (i.e. nothing) in certain cases.
> > 
> > But we should *NOT* be involving the virtio device drivers in that
> > quirk, in any way. And putting a feature bit in the virtio device
> > itself doesn't seem at all sane either.
> > 
> > Bear in mind that qemu-system-x86_64 currently has the *same* problem
> > with assigned physical devices. It's claiming they're translated, and
> > they're not.
> 
> It's not that clear but yeah ... as I mentioned, I can't find a
> way to do that quirk that won't break when we want to actually use
> the iommu... 
> 
> Ben.

Yes, I am not at all sure we need a quirk for assigned devices.
Better teach QEMU to make iommu work for them.


-- 
MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-08 11:49                               ` Joerg Roedel
@ 2015-11-22 13:06                                 ` Marcel Apfelbaum
  -1 siblings, 0 replies; 110+ messages in thread
From: Marcel Apfelbaum @ 2015-11-22 13:06 UTC (permalink / raw)
  To: Joerg Roedel, Michael S. Tsirkin
  Cc: linux-s390, KVM, Benjamin Herrenschmidt, Sebastian Ott,
	linux-kernel, Andy Lutomirski, Christian Borntraeger,
	Andy Lutomirski, Paolo Bonzini, Linux Virtualization,
	David Woodhouse, Christoph Hellwig, Martin Schwidefsky

On 11/08/2015 01:49 PM, Joerg Roedel wrote:
> On Sun, Nov 08, 2015 at 12:37:47PM +0200, Michael S. Tsirkin wrote:
>> I have no problem with that. For example, can we teach
>> the DMA API on intel x86 to use PT for virtio by default?
>> That would allow merging Andy's patches with
>> full compatibility with old guests and hosts.
>
> Well, the only incompatibility comes from an experimental qemu feature,
> more explicitly from a bug in that features implementation. So why
> should we work around that in the kernel? I think it is not too hard to
> fix qemu to generate a correct DMAR table which excludes the virtio
> devices from iommu translation.

Hi,

I tried to generate a DMAR table that excludes some devices from
IOMMU translation, however it does not help.

The reason is, as far as I understand, that Linux kernel does
not allow any device being outside an IOMMU scope if the
iommu kernel option is activated.

Does anybody know if it is "by design" or is simply an uncommon configuration?
(some devices in an IOMMU scope, while others outside *any* IOMMU scope)

Thanks,
Marcel

>
>
> 	Joerg
>
> _______________________________________________
> Virtualization mailing list
> Virtualization@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/virtualization
>


^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-11-22 13:06                                 ` Marcel Apfelbaum
  0 siblings, 0 replies; 110+ messages in thread
From: Marcel Apfelbaum @ 2015-11-22 13:06 UTC (permalink / raw)
  To: Joerg Roedel, Michael S. Tsirkin
  Cc: linux-s390, KVM, Benjamin Herrenschmidt, Sebastian Ott,
	linux-kernel, Andy Lutomirski, Christian Borntraeger,
	Andy Lutomirski, Martin Schwidefsky, Paolo Bonzini,
	Linux Virtualization, David Woodhouse, Christoph Hellwig

On 11/08/2015 01:49 PM, Joerg Roedel wrote:
> On Sun, Nov 08, 2015 at 12:37:47PM +0200, Michael S. Tsirkin wrote:
>> I have no problem with that. For example, can we teach
>> the DMA API on intel x86 to use PT for virtio by default?
>> That would allow merging Andy's patches with
>> full compatibility with old guests and hosts.
>
> Well, the only incompatibility comes from an experimental qemu feature,
> more explicitly from a bug in that features implementation. So why
> should we work around that in the kernel? I think it is not too hard to
> fix qemu to generate a correct DMAR table which excludes the virtio
> devices from iommu translation.

Hi,

I tried to generate a DMAR table that excludes some devices from
IOMMU translation, however it does not help.

The reason is, as far as I understand, that Linux kernel does
not allow any device being outside an IOMMU scope if the
iommu kernel option is activated.

Does anybody know if it is "by design" or is simply an uncommon configuration?
(some devices in an IOMMU scope, while others outside *any* IOMMU scope)

Thanks,
Marcel

>
>
> 	Joerg
>
> _______________________________________________
> Virtualization mailing list
> Virtualization@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/virtualization
>

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-22 13:06                                 ` Marcel Apfelbaum
@ 2015-11-22 15:54                                   ` David Woodhouse
  -1 siblings, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-11-22 15:54 UTC (permalink / raw)
  To: marcel, Joerg Roedel, Michael S. Tsirkin
  Cc: linux-s390, KVM, Benjamin Herrenschmidt, Sebastian Ott,
	linux-kernel, Andy Lutomirski, Christian Borntraeger,
	Andy Lutomirski, Paolo Bonzini, Linux Virtualization,
	Christoph Hellwig, Martin Schwidefsky

[-- Attachment #1: Type: text/plain, Size: 785 bytes --]

On Sun, 2015-11-22 at 15:06 +0200, Marcel Apfelbaum wrote:
> 
> 
> I tried to generate a DMAR table that excludes some devices from
> IOMMU translation, however it does not help.
> 
> The reason is, as far as I understand, that Linux kernel does
> not allow any device being outside an IOMMU scope if the
> iommu kernel option is activated.
> 
> Does anybody know if it is "by design" or is simply an uncommon
> configuration?
> (some devices in an IOMMU scope, while others outside *any* IOMMU
> scope)

That's a kernel bug in the way it handles per-device DMA operations. Or
more to the point, in the way it doesn't — the non-translated devices
end up being pointed to the intel_dma_ops despite the fact they
shouldn't be. I'm working on that...

-- 
dwmw2


[-- Attachment #2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-11-22 15:54                                   ` David Woodhouse
  0 siblings, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-11-22 15:54 UTC (permalink / raw)
  To: marcel, Joerg Roedel, Michael S. Tsirkin
  Cc: linux-s390, KVM, Benjamin Herrenschmidt, Sebastian Ott,
	linux-kernel, Andy Lutomirski, Christian Borntraeger,
	Andy Lutomirski, Martin Schwidefsky, Paolo Bonzini,
	Linux Virtualization, Christoph Hellwig


[-- Attachment #1.1: Type: text/plain, Size: 785 bytes --]

On Sun, 2015-11-22 at 15:06 +0200, Marcel Apfelbaum wrote:
> 
> 
> I tried to generate a DMAR table that excludes some devices from
> IOMMU translation, however it does not help.
> 
> The reason is, as far as I understand, that Linux kernel does
> not allow any device being outside an IOMMU scope if the
> iommu kernel option is activated.
> 
> Does anybody know if it is "by design" or is simply an uncommon
> configuration?
> (some devices in an IOMMU scope, while others outside *any* IOMMU
> scope)

That's a kernel bug in the way it handles per-device DMA operations. Or
more to the point, in the way it doesn't — the non-translated devices
end up being pointed to the intel_dma_ops despite the fact they
shouldn't be. I'm working on that...

-- 
dwmw2


[-- Attachment #1.2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-20  8:21         ` Michael S. Tsirkin
  (?)
  (?)
@ 2015-11-22 15:58         ` David Woodhouse
  2015-11-22 21:52           ` Michael S. Tsirkin
  2015-11-22 21:52           ` Michael S. Tsirkin
  -1 siblings, 2 replies; 110+ messages in thread
From: David Woodhouse @ 2015-11-22 15:58 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Andy Lutomirski, Benjamin Herrenschmidt, Christian Borntraeger,
	Paolo Bonzini, linux-kernel, Martin Schwidefsky, Sebastian Ott,
	linux-s390, Cornelia Huck, Joerg Roedel, Linux Virtualization,
	Christoph Hellwig, KVM, Marcel Apfelbaum

[-- Attachment #1: Type: text/plain, Size: 1119 bytes --]

On Fri, 2015-11-20 at 10:21 +0200, Michael S. Tsirkin wrote:
> 
> David, there are two things a hypervisor needs to tell the guest.
> 1. The actual device is behind an IOMMU. This is what you
>    are suggesting we use DMAR for.
> 2. Using IOMMU from kernel (as opposed to from userspace with VFIO)
>    actually adds security. For exising virtio devices on KVM,
>    the answer is no. And DMAR has no way to reflect that.

Using the IOMMU from the kernel *always* adds security. It protects
against device driver (and device) bugs which can be made exploitable
by allowing DMA to anywhere in the system.

Sure, there are classes of that which are far more interesting, for
example where you give the whole device to a guest and let it load the
firmware. But "we trust the hypervisor" and "we trust the hardware" are
not *so* far apart conceptually.

Hell, with ATS you *still* have to trust the hardware to a large
extent.

I really think that something like the proposed DMA_ATTR_IOMMU_BYPASS
should suffice for the "who cares about security; we want performance"
case.

-- 
dwmw2


[-- Attachment #2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-20  8:21         ` Michael S. Tsirkin
  (?)
@ 2015-11-22 15:58         ` David Woodhouse
  -1 siblings, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-11-22 15:58 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: linux-s390, KVM, Marcel Apfelbaum, Benjamin Herrenschmidt,
	Sebastian Ott, linux-kernel, Andy Lutomirski,
	Christian Borntraeger, Joerg Roedel, Martin Schwidefsky,
	Paolo Bonzini, Linux Virtualization, Christoph Hellwig


[-- Attachment #1.1: Type: text/plain, Size: 1119 bytes --]

On Fri, 2015-11-20 at 10:21 +0200, Michael S. Tsirkin wrote:
> 
> David, there are two things a hypervisor needs to tell the guest.
> 1. The actual device is behind an IOMMU. This is what you
>    are suggesting we use DMAR for.
> 2. Using IOMMU from kernel (as opposed to from userspace with VFIO)
>    actually adds security. For exising virtio devices on KVM,
>    the answer is no. And DMAR has no way to reflect that.

Using the IOMMU from the kernel *always* adds security. It protects
against device driver (and device) bugs which can be made exploitable
by allowing DMA to anywhere in the system.

Sure, there are classes of that which are far more interesting, for
example where you give the whole device to a guest and let it load the
firmware. But "we trust the hypervisor" and "we trust the hardware" are
not *so* far apart conceptually.

Hell, with ATS you *still* have to trust the hardware to a large
extent.

I really think that something like the proposed DMA_ATTR_IOMMU_BYPASS
should suffice for the "who cares about security; we want performance"
case.

-- 
dwmw2


[-- Attachment #1.2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 5691 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-22 15:54                                   ` David Woodhouse
  (?)
@ 2015-11-22 17:04                                   ` Marcel Apfelbaum
  -1 siblings, 0 replies; 110+ messages in thread
From: Marcel Apfelbaum @ 2015-11-22 17:04 UTC (permalink / raw)
  To: David Woodhouse, Joerg Roedel, Michael S. Tsirkin
  Cc: linux-s390, KVM, Benjamin Herrenschmidt, Sebastian Ott,
	linux-kernel, Andy Lutomirski, Christian Borntraeger,
	Andy Lutomirski, Paolo Bonzini, Linux Virtualization,
	Christoph Hellwig, Martin Schwidefsky

On 11/22/2015 05:54 PM, David Woodhouse wrote:
> On Sun, 2015-11-22 at 15:06 +0200, Marcel Apfelbaum wrote:
>>
>>
>> I tried to generate a DMAR table that excludes some devices from
>> IOMMU translation, however it does not help.
>>
>> The reason is, as far as I understand, that Linux kernel does
>> not allow any device being outside an IOMMU scope if the
>> iommu kernel option is activated.
>>
>> Does anybody know if it is "by design" or is simply an uncommon
>> configuration?
>> (some devices in an IOMMU scope, while others outside *any* IOMMU
>> scope)
>
> That's a kernel bug in the way it handles per-device DMA operations. Or
> more to the point, in the way it doesn't — the non-translated devices
> end up being pointed to the intel_dma_ops despite the fact they
> shouldn't be. I'm working on that...
>

Hi David,
Thank you for the fast response.

Sadly I am not familiar with the DMA/IOMMU code to contribute
with a sane idea, but I'll gladly test it.
If you lack the time and have an idea to share, I can give it a try though.

Thanks,
Marcel



^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-22 15:54                                   ` David Woodhouse
  (?)
  (?)
@ 2015-11-22 17:04                                   ` Marcel Apfelbaum
  -1 siblings, 0 replies; 110+ messages in thread
From: Marcel Apfelbaum @ 2015-11-22 17:04 UTC (permalink / raw)
  To: David Woodhouse, Joerg Roedel, Michael S. Tsirkin
  Cc: linux-s390, KVM, Benjamin Herrenschmidt, Sebastian Ott,
	linux-kernel, Andy Lutomirski, Christian Borntraeger,
	Andy Lutomirski, Martin Schwidefsky, Paolo Bonzini,
	Linux Virtualization, Christoph Hellwig

On 11/22/2015 05:54 PM, David Woodhouse wrote:
> On Sun, 2015-11-22 at 15:06 +0200, Marcel Apfelbaum wrote:
>>
>>
>> I tried to generate a DMAR table that excludes some devices from
>> IOMMU translation, however it does not help.
>>
>> The reason is, as far as I understand, that Linux kernel does
>> not allow any device being outside an IOMMU scope if the
>> iommu kernel option is activated.
>>
>> Does anybody know if it is "by design" or is simply an uncommon
>> configuration?
>> (some devices in an IOMMU scope, while others outside *any* IOMMU
>> scope)
>
> That's a kernel bug in the way it handles per-device DMA operations. Or
> more to the point, in the way it doesn't — the non-translated devices
> end up being pointed to the intel_dma_ops despite the fact they
> shouldn't be. I'm working on that...
>

Hi David,
Thank you for the fast response.

Sadly I am not familiar with the DMA/IOMMU code to contribute
with a sane idea, but I'll gladly test it.
If you lack the time and have an idea to share, I can give it a try though.

Thanks,
Marcel


_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-22 15:58         ` David Woodhouse
  2015-11-22 21:52           ` Michael S. Tsirkin
@ 2015-11-22 21:52           ` Michael S. Tsirkin
  2015-11-22 22:21               ` David Woodhouse
  2015-11-22 22:21               ` David Woodhouse
  1 sibling, 2 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-11-22 21:52 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Andy Lutomirski, Benjamin Herrenschmidt, Christian Borntraeger,
	Paolo Bonzini, linux-kernel, Martin Schwidefsky, Sebastian Ott,
	linux-s390, Cornelia Huck, Joerg Roedel, Linux Virtualization,
	Christoph Hellwig, KVM, Marcel Apfelbaum

On Sun, Nov 22, 2015 at 03:58:28PM +0000, David Woodhouse wrote:
> On Fri, 2015-11-20 at 10:21 +0200, Michael S. Tsirkin wrote:
> > 
> > David, there are two things a hypervisor needs to tell the guest.
> > 1. The actual device is behind an IOMMU. This is what you
> >    are suggesting we use DMAR for.
> > 2. Using IOMMU from kernel (as opposed to from userspace with VFIO)
> >    actually adds security. For exising virtio devices on KVM,
> >    the answer is no. And DMAR has no way to reflect that.
> 
> Using the IOMMU from the kernel *always* adds security. It protects
> against device driver (and device) bugs which can be made exploitable
> by allowing DMA to anywhere in the system.

No - speaking about QEMU/KVM here - you are not "allowing" DMA - by
programming the virtual IOMMU you are asking the hypervisor nicely to do
that. If it's buggy, it can ignore you and there's nothing you can do.

As with any random change in the system, some bugs might get masked and
become non-exploitable, but then some other bugs might surface and
become exploitable.

I gather that e.g. Xen is different.


> Sure, there are classes of that which are far more interesting, for
> example where you give the whole device to a guest and let it load the
> firmware. But "we trust the hypervisor" and "we trust the hardware" are
> not *so* far apart conceptually.

Depends on the hypervisor I guess. At least for QEMU/KVM, one conceptual
difference is that we actually could have the hypervisor tell us whether
a specific device has to be trusted, or can be protected against, and
user can actually read the code and verify that QEMU is doing the right
thing.

Hardware is closed source so harder to trust.

> Hell, with ATS you *still* have to trust the hardware to a large
> extent.
>
> I really think that something like the proposed DMA_ATTR_IOMMU_BYPASS
> should suffice

I'm not sure how that is supposed to be used - does
the driver request DMA_ATTR_IOMMU_BYPASS at setup time?

If yes then I think that will work for virtio -
we can just set that in the driver.

> for the "who cares about security; we want performance"
> case.
> 
> -- 
> dwmw2
> 

There's that, and there's an "I care about security, but
do not want to burn up cycles on fake protections that
do not work" case.


-- 
MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-22 15:58         ` David Woodhouse
@ 2015-11-22 21:52           ` Michael S. Tsirkin
  2015-11-22 21:52           ` Michael S. Tsirkin
  1 sibling, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-11-22 21:52 UTC (permalink / raw)
  To: David Woodhouse
  Cc: linux-s390, KVM, Marcel Apfelbaum, Benjamin Herrenschmidt,
	Sebastian Ott, linux-kernel, Andy Lutomirski,
	Christian Borntraeger, Joerg Roedel, Martin Schwidefsky,
	Paolo Bonzini, Linux Virtualization, Christoph Hellwig

On Sun, Nov 22, 2015 at 03:58:28PM +0000, David Woodhouse wrote:
> On Fri, 2015-11-20 at 10:21 +0200, Michael S. Tsirkin wrote:
> > 
> > David, there are two things a hypervisor needs to tell the guest.
> > 1. The actual device is behind an IOMMU. This is what you
> >    are suggesting we use DMAR for.
> > 2. Using IOMMU from kernel (as opposed to from userspace with VFIO)
> >    actually adds security. For exising virtio devices on KVM,
> >    the answer is no. And DMAR has no way to reflect that.
> 
> Using the IOMMU from the kernel *always* adds security. It protects
> against device driver (and device) bugs which can be made exploitable
> by allowing DMA to anywhere in the system.

No - speaking about QEMU/KVM here - you are not "allowing" DMA - by
programming the virtual IOMMU you are asking the hypervisor nicely to do
that. If it's buggy, it can ignore you and there's nothing you can do.

As with any random change in the system, some bugs might get masked and
become non-exploitable, but then some other bugs might surface and
become exploitable.

I gather that e.g. Xen is different.


> Sure, there are classes of that which are far more interesting, for
> example where you give the whole device to a guest and let it load the
> firmware. But "we trust the hypervisor" and "we trust the hardware" are
> not *so* far apart conceptually.

Depends on the hypervisor I guess. At least for QEMU/KVM, one conceptual
difference is that we actually could have the hypervisor tell us whether
a specific device has to be trusted, or can be protected against, and
user can actually read the code and verify that QEMU is doing the right
thing.

Hardware is closed source so harder to trust.

> Hell, with ATS you *still* have to trust the hardware to a large
> extent.
>
> I really think that something like the proposed DMA_ATTR_IOMMU_BYPASS
> should suffice

I'm not sure how that is supposed to be used - does
the driver request DMA_ATTR_IOMMU_BYPASS at setup time?

If yes then I think that will work for virtio -
we can just set that in the driver.

> for the "who cares about security; we want performance"
> case.
> 
> -- 
> dwmw2
> 

There's that, and there's an "I care about security, but
do not want to burn up cycles on fake protections that
do not work" case.


-- 
MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-22 15:54                                   ` David Woodhouse
@ 2015-11-22 22:11                                     ` Michael S. Tsirkin
  -1 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-11-22 22:11 UTC (permalink / raw)
  To: David Woodhouse
  Cc: marcel, Joerg Roedel, linux-s390, KVM, Benjamin Herrenschmidt,
	Sebastian Ott, linux-kernel, Andy Lutomirski,
	Christian Borntraeger, Andy Lutomirski, Paolo Bonzini,
	Linux Virtualization, Christoph Hellwig, Martin Schwidefsky

On Sun, Nov 22, 2015 at 03:54:21PM +0000, David Woodhouse wrote:
> On Sun, 2015-11-22 at 15:06 +0200, Marcel Apfelbaum wrote:
> > 
> > 
> > I tried to generate a DMAR table that excludes some devices from
> > IOMMU translation, however it does not help.
> > 
> > The reason is, as far as I understand, that Linux kernel does
> > not allow any device being outside an IOMMU scope if the
> > iommu kernel option is activated.
> > 
> > Does anybody know if it is "by design" or is simply an uncommon
> > configuration?
> > (some devices in an IOMMU scope, while others outside *any* IOMMU
> > scope)
> 
> That's a kernel bug in the way it handles per-device DMA operations. Or
> more to the point, in the way it doesn't — the non-translated devices
> end up being pointed to the intel_dma_ops despite the fact they
> shouldn't be. I'm working on that...
> 
> -- 
> dwmw2
> 

Interesting. This seems to imply such configurations aren't
common, so I wonder whether other guest OS-es treat them
correctly.

If many of them are, we probably shouldn't use this in QEMU:
we care about guests actually working :)

-- 
MST

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-11-22 22:11                                     ` Michael S. Tsirkin
  0 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-11-22 22:11 UTC (permalink / raw)
  To: David Woodhouse
  Cc: linux-s390, Joerg Roedel, KVM, Benjamin Herrenschmidt,
	Sebastian Ott, linux-kernel, Andy Lutomirski,
	Christian Borntraeger, Andy Lutomirski, marcel, Paolo Bonzini,
	Linux Virtualization, Christoph Hellwig, Martin Schwidefsky

On Sun, Nov 22, 2015 at 03:54:21PM +0000, David Woodhouse wrote:
> On Sun, 2015-11-22 at 15:06 +0200, Marcel Apfelbaum wrote:
> > 
> > 
> > I tried to generate a DMAR table that excludes some devices from
> > IOMMU translation, however it does not help.
> > 
> > The reason is, as far as I understand, that Linux kernel does
> > not allow any device being outside an IOMMU scope if the
> > iommu kernel option is activated.
> > 
> > Does anybody know if it is "by design" or is simply an uncommon
> > configuration?
> > (some devices in an IOMMU scope, while others outside *any* IOMMU
> > scope)
> 
> That's a kernel bug in the way it handles per-device DMA operations. Or
> more to the point, in the way it doesn't — the non-translated devices
> end up being pointed to the intel_dma_ops despite the fact they
> shouldn't be. I'm working on that...
> 
> -- 
> dwmw2
> 

Interesting. This seems to imply such configurations aren't
common, so I wonder whether other guest OS-es treat them
correctly.

If many of them are, we probably shouldn't use this in QEMU:
we care about guests actually working :)

-- 
MST
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-22 21:52           ` Michael S. Tsirkin
@ 2015-11-22 22:21               ` David Woodhouse
  2015-11-22 22:21               ` David Woodhouse
  1 sibling, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-11-22 22:21 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: David Woodhouse, Andy Lutomirski, Benjamin Herrenschmidt,
	Christian Borntraeger, Paolo Bonzini, linux-kernel,
	Martin Schwidefsky, Sebastian Ott, linux-s390, Cornelia Huck,
	Joerg Roedel, Linux Virtualization, Christoph Hellwig, KVM,
	Marcel Apfelbaum



> There's that, and there's an "I care about security, but
> do not want to burn up cycles on fake protections that
> do not work" case.

It would seem to make most sense for this use case simply *not* to expose
virtio devices to guests as being behind an IOMMU at all. Sure, there are
esoteric use cases where the guest actually nests and runs further guests
inside itself and wants to pass through the virtio devices from the real
hardware host. But presumably those configurations will have multiple
virtio devices assigned by the host anyway,  and further tweaking the
configuration to put them behind an IOMMU shouldn't be hard.


-- 
dwmw2


^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-11-22 22:21               ` David Woodhouse
  0 siblings, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-11-22 22:21 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: linux-s390, KVM, Marcel Apfelbaum, Benjamin Herrenschmidt,
	Sebastian Ott, linux-kernel, Andy Lutomirski,
	Christian Borntraeger, Joerg Roedel, Martin Schwidefsky,
	Paolo Bonzini, Linux Virtualization, David Woodhouse,
	Christoph Hellwig



> There's that, and there's an "I care about security, but
> do not want to burn up cycles on fake protections that
> do not work" case.

It would seem to make most sense for this use case simply *not* to expose
virtio devices to guests as being behind an IOMMU at all. Sure, there are
esoteric use cases where the guest actually nests and runs further guests
inside itself and wants to pass through the virtio devices from the real
hardware host. But presumably those configurations will have multiple
virtio devices assigned by the host anyway,  and further tweaking the
configuration to put them behind an IOMMU shouldn't be hard.


-- 
dwmw2

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-22 21:52           ` Michael S. Tsirkin
@ 2015-11-22 22:21               ` David Woodhouse
  2015-11-22 22:21               ` David Woodhouse
  1 sibling, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-11-22 22:21 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: David Woodhouse, Andy Lutomirski, Benjamin Herrenschmidt,
	Christian Borntraeger, Paolo Bonzini, linux-kernel,
	Martin Schwidefsky, Sebastian Ott, linux-s390, Cornelia Huck,
	Joerg Roedel, Linux Virtualization, Christoph Hellwig, KVM,
	Marcel Apfelbaum



> There's that, and there's an "I care about security, but
> do not want to burn up cycles on fake protections that
> do not work" case.

It would seem to make most sense for this use case simply *not* to expose
virtio devices to guests as being behind an IOMMU at all. Sure, there are
esoteric use cases where the guest actually nests and runs further guests
inside itself and wants to pass through the virtio devices from the real
hardware host. But presumably those configurations will have multiple
virtio devices assigned by the host anyway,  and further tweaking the
configuration to put them behind an IOMMU shouldn't be hard.


-- 
dwmw2


^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-11-22 22:21               ` David Woodhouse
  0 siblings, 0 replies; 110+ messages in thread
From: David Woodhouse @ 2015-11-22 22:21 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: linux-s390, KVM, Marcel Apfelbaum, Benjamin Herrenschmidt,
	Sebastian Ott, linux-kernel, Andy Lutomirski,
	Christian Borntraeger, Joerg Roedel, Martin Schwidefsky,
	Paolo Bonzini, Linux Virtualization, David Woodhouse,
	Christoph Hellwig



> There's that, and there's an "I care about security, but
> do not want to burn up cycles on fake protections that
> do not work" case.

It would seem to make most sense for this use case simply *not* to expose
virtio devices to guests as being behind an IOMMU at all. Sure, there are
esoteric use cases where the guest actually nests and runs further guests
inside itself and wants to pass through the virtio devices from the real
hardware host. But presumably those configurations will have multiple
virtio devices assigned by the host anyway,  and further tweaking the
configuration to put them behind an IOMMU shouldn't be hard.


-- 
dwmw2

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
  2015-11-22 22:21               ` David Woodhouse
@ 2015-11-23  7:56                 ` Michael S. Tsirkin
  -1 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-11-23  7:56 UTC (permalink / raw)
  To: David Woodhouse
  Cc: Andy Lutomirski, Benjamin Herrenschmidt, Christian Borntraeger,
	Paolo Bonzini, linux-kernel, Martin Schwidefsky, Sebastian Ott,
	linux-s390, Cornelia Huck, Joerg Roedel, Linux Virtualization,
	Christoph Hellwig, KVM, Marcel Apfelbaum

On Sun, Nov 22, 2015 at 10:21:34PM -0000, David Woodhouse wrote:
> 
> 
> > There's that, and there's an "I care about security, but
> > do not want to burn up cycles on fake protections that
> > do not work" case.
> 
> It would seem to make most sense for this use case simply *not* to expose
> virtio devices to guests as being behind an IOMMU at all. Sure, there are
> esoteric use cases where the guest actually nests and runs further guests
> inside itself and wants to pass through the virtio devices from the real
> hardware host. But presumably those configurations will have multiple
> virtio devices assigned by the host anyway,  and further tweaking the
> configuration to put them behind an IOMMU shouldn't be hard.

Unfortunately it's a no-go: this breaks the much less esoteric usecase
of DPDK: using virtio devices with userspace drivers.

Well - not breaks as such as this doesn't currently work,
but this approach would prevent us from making it work.

> 
> -- 
> dwmw2

^ permalink raw reply	[flat|nested] 110+ messages in thread

* Re: [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-11-23  7:56                 ` Michael S. Tsirkin
  0 siblings, 0 replies; 110+ messages in thread
From: Michael S. Tsirkin @ 2015-11-23  7:56 UTC (permalink / raw)
  To: David Woodhouse
  Cc: linux-s390, KVM, Marcel Apfelbaum, Benjamin Herrenschmidt,
	Sebastian Ott, linux-kernel, Andy Lutomirski,
	Christian Borntraeger, Joerg Roedel, Martin Schwidefsky,
	Paolo Bonzini, Linux Virtualization, Christoph Hellwig

On Sun, Nov 22, 2015 at 10:21:34PM -0000, David Woodhouse wrote:
> 
> 
> > There's that, and there's an "I care about security, but
> > do not want to burn up cycles on fake protections that
> > do not work" case.
> 
> It would seem to make most sense for this use case simply *not* to expose
> virtio devices to guests as being behind an IOMMU at all. Sure, there are
> esoteric use cases where the guest actually nests and runs further guests
> inside itself and wants to pass through the virtio devices from the real
> hardware host. But presumably those configurations will have multiple
> virtio devices assigned by the host anyway,  and further tweaking the
> configuration to put them behind an IOMMU shouldn't be hard.

Unfortunately it's a no-go: this breaks the much less esoteric usecase
of DPDK: using virtio devices with userspace drivers.

Well - not breaks as such as this doesn't currently work,
but this approach would prevent us from making it work.

> 
> -- 
> dwmw2

^ permalink raw reply	[flat|nested] 110+ messages in thread

* [PATCH v3 0/3] virtio DMA API core stuff
@ 2015-10-28  6:38 Andy Lutomirski
  0 siblings, 0 replies; 110+ messages in thread
From: Andy Lutomirski @ 2015-10-28  6:38 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-s390, Joerg Roedel, KVM, Michael S. Tsirkin, benh,
	Sebastian Ott, virtualization, Christian Borntraeger,
	Andy Lutomirski, Paolo Bonzini, dwmw2, Christoph Hellwig,
	Martin Schwidefsky

This switches virtio to use the DMA API unconditionally.  I'm sure
it breaks things, but it seems to work on x86 using virtio-pci, with
and without Xen, and using both the modern 1.0 variant and the
legacy variant.

Changes from v2:
 - Fix really embarrassing bug.  This version actually works.

Changes from v1:
 - Fix an endian conversion error causing a BUG to hit.
 - Fix a DMA ordering issue (swiotlb=force works now).
 - Minor cleanups.

Andy Lutomirski (3):
  virtio_net: Stop doing DMA from the stack
  virtio_ring: Support DMA APIs
  virtio_pci: Use the DMA API

 drivers/net/virtio_net.c           |  53 +++++++----
 drivers/virtio/Kconfig             |   2 +-
 drivers/virtio/virtio_pci_common.h |   3 +-
 drivers/virtio/virtio_pci_legacy.c |  19 +++-
 drivers/virtio/virtio_pci_modern.c |  34 +++++--
 drivers/virtio/virtio_ring.c       | 187 ++++++++++++++++++++++++++++++-------
 tools/virtio/linux/dma-mapping.h   |  17 ++++
 7 files changed, 246 insertions(+), 69 deletions(-)
 create mode 100644 tools/virtio/linux/dma-mapping.h

-- 
2.4.3

^ permalink raw reply	[flat|nested] 110+ messages in thread

end of thread, other threads:[~2015-11-23  7:57 UTC | newest]

Thread overview: 110+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-10-28  6:38 [PATCH v3 0/3] virtio DMA API core stuff Andy Lutomirski
2015-10-28  6:38 ` [PATCH v3 1/3] virtio_net: Stop doing DMA from the stack Andy Lutomirski
2015-10-28  7:08   ` Michael S. Tsirkin
2015-10-28  7:08   ` Michael S. Tsirkin
2015-10-28  6:38 ` Andy Lutomirski
2015-10-28  6:38 ` [PATCH v3 2/3] virtio_ring: Support DMA APIs Andy Lutomirski
2015-10-28  6:38 ` Andy Lutomirski
2015-10-28  6:39 ` [PATCH v3 3/3] virtio_pci: Use the DMA API Andy Lutomirski
2015-10-28  6:39 ` Andy Lutomirski
2015-10-28  6:53 ` [PATCH v3 0/3] virtio DMA API core stuff David Woodhouse
2015-10-28  6:53   ` David Woodhouse
2015-10-28  7:09   ` Andy Lutomirski
2015-10-28  7:09   ` Andy Lutomirski
2015-10-28  7:17 ` Michael S. Tsirkin
2015-10-28  7:17   ` Michael S. Tsirkin
2015-10-28  7:40   ` Christian Borntraeger
2015-10-28  7:40     ` Christian Borntraeger
2015-10-28  8:09     ` David Woodhouse
2015-10-28  8:09       ` David Woodhouse
2015-10-28 11:35       ` Michael S. Tsirkin
2015-10-28 11:35         ` Michael S. Tsirkin
2015-10-28 13:35         ` David Woodhouse
2015-10-28 13:35           ` David Woodhouse
2015-10-28 14:05           ` Michael S. Tsirkin
2015-10-28 14:05             ` Michael S. Tsirkin
2015-10-28 14:13             ` David Woodhouse
2015-10-28 14:13               ` David Woodhouse
2015-10-28 14:22               ` Michael S. Tsirkin
2015-10-28 14:22                 ` Michael S. Tsirkin
2015-10-28 14:32                 ` David Woodhouse
2015-10-28 14:32                   ` David Woodhouse
2015-10-28 16:12                   ` Michael S. Tsirkin
2015-10-28 22:51                     ` Andy Lutomirski
2015-10-28 22:51                       ` Andy Lutomirski
2015-10-29  9:01                       ` Michael S. Tsirkin
2015-10-29  9:01                         ` Michael S. Tsirkin
2015-10-29 16:18                         ` David Woodhouse
2015-10-29 16:18                           ` David Woodhouse
2015-11-08 10:37                           ` Michael S. Tsirkin
2015-11-08 10:37                             ` Michael S. Tsirkin
2015-11-08 11:49                             ` Joerg Roedel
2015-11-08 11:49                               ` Joerg Roedel
2015-11-10 15:02                               ` Michael S. Tsirkin
2015-11-10 15:02                                 ` Michael S. Tsirkin
2015-11-10 18:54                                 ` Andy Lutomirski
2015-11-10 18:54                                   ` Andy Lutomirski
2015-11-11 10:05                                   ` Michael S. Tsirkin
2015-11-11 10:05                                     ` Michael S. Tsirkin
2015-11-11 15:56                                     ` Andy Lutomirski
2015-11-11 22:30                                       ` David Woodhouse
2015-11-11 22:30                                       ` David Woodhouse
2015-11-12 11:09                                         ` Michael S. Tsirkin
2015-11-12 11:09                                           ` Michael S. Tsirkin
2015-11-12 12:18                                           ` David Woodhouse
2015-11-12 12:18                                             ` David Woodhouse
2015-11-11 15:56                                     ` Andy Lutomirski
2015-11-22 13:06                               ` Marcel Apfelbaum
2015-11-22 13:06                                 ` Marcel Apfelbaum
2015-11-22 15:54                                 ` David Woodhouse
2015-11-22 15:54                                   ` David Woodhouse
2015-11-22 17:04                                   ` Marcel Apfelbaum
2015-11-22 17:04                                   ` Marcel Apfelbaum
2015-11-22 22:11                                   ` Michael S. Tsirkin
2015-11-22 22:11                                     ` Michael S. Tsirkin
2015-11-08 12:00                             ` David Woodhouse
2015-11-08 12:00                               ` David Woodhouse
2015-10-30 15:16                         ` Joerg Roedel
2015-10-30 15:16                         ` Joerg Roedel
2015-11-11  9:11                           ` Michael S. Tsirkin
2015-11-11  9:11                             ` Michael S. Tsirkin
2015-10-30 16:54                         ` David Woodhouse
2015-10-30 16:54                           ` David Woodhouse
2015-11-03 10:24                         ` Paolo Bonzini
2015-11-03 10:24                         ` Paolo Bonzini
2015-10-28 16:12                   ` Michael S. Tsirkin
2015-10-28  8:36     ` Benjamin Herrenschmidt
2015-10-28  8:36       ` Benjamin Herrenschmidt
2015-10-28 11:23       ` Michael S. Tsirkin
2015-10-28 11:23         ` Michael S. Tsirkin
2015-10-28 13:37         ` David Woodhouse
2015-10-28 13:37           ` David Woodhouse
2015-10-28 14:07           ` Michael S. Tsirkin
2015-10-28 14:07             ` Michael S. Tsirkin
2015-11-19 13:45 ` Michael S. Tsirkin
2015-11-19 13:45 ` Michael S. Tsirkin
2015-11-19 21:59   ` Andy Lutomirski
2015-11-19 21:59     ` Andy Lutomirski
2015-11-19 23:38     ` David Woodhouse
2015-11-19 23:38       ` David Woodhouse
2015-11-20  2:56       ` Benjamin Herrenschmidt
2015-11-20  2:56         ` Benjamin Herrenschmidt
2015-11-20  8:34         ` Michael S. Tsirkin
2015-11-20  8:34         ` Michael S. Tsirkin
2015-11-20  8:21       ` Michael S. Tsirkin
2015-11-20  8:21         ` Michael S. Tsirkin
2015-11-22 15:58         ` David Woodhouse
2015-11-22 15:58         ` David Woodhouse
2015-11-22 21:52           ` Michael S. Tsirkin
2015-11-22 21:52           ` Michael S. Tsirkin
2015-11-22 22:21             ` David Woodhouse
2015-11-22 22:21               ` David Woodhouse
2015-11-23  7:56               ` Michael S. Tsirkin
2015-11-23  7:56                 ` Michael S. Tsirkin
2015-11-22 22:21             ` David Woodhouse
2015-11-22 22:21               ` David Woodhouse
2015-11-20  6:56     ` Michael S. Tsirkin
2015-11-20  6:56       ` Michael S. Tsirkin
2015-11-20  7:47       ` Michael S. Tsirkin
2015-11-20  7:47         ` Michael S. Tsirkin
  -- strict thread matches above, loose matches on Subject: below --
2015-10-28  6:38 Andy Lutomirski

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.