linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/8] Virtio-over-PCIe on non-MIC
@ 2019-01-16 16:32 Vincent Whitchurch
  2019-01-16 16:32 ` [PATCH 1/8] vop: Use %z for size_t Vincent Whitchurch
                   ` (10 more replies)
  0 siblings, 11 replies; 29+ messages in thread
From: Vincent Whitchurch @ 2019-01-16 16:32 UTC (permalink / raw)
  To: sudeep.dutt, ashutosh.dixit, gregkh, arnd
  Cc: linux-kernel, Vincent Whitchurch

The Virtio-over-PCIe framework living under drivers/misc/mic/vop implements a
generic framework to use virtio between two Linux systems, given shared memory
and a couple of interrupts.  It does not actually require the Intel MIC
hardware, x86-64, or even PCIe for that matter.  This patch series makes it
buildable on more systems and adds a loopback driver to test it without special
hardware.

Note that I don't have access to Intel MIC hardware so some testing of the
patchset (especially the patch "vop: Use consistent DMA") on that platform
would be appreciated, to ensure that the series does not break anything there.

Vincent Whitchurch (8):
  vop: Use %z for size_t
  vop: Cast pointers to uintptr_t
  vop: Add definition of readq/writeq if missing
  vop: Allow building on more systems
  vop: vringh: Do not crash if no DMA channel
  vop: Fix handling of >32 feature bits
  vop: Use consistent DMA
  vop: Add loopback

 drivers/misc/mic/Kconfig            |  14 +-
 drivers/misc/mic/bus/vop_bus.h      |   2 +
 drivers/misc/mic/host/mic_boot.c    |  46 ++++
 drivers/misc/mic/vop/Makefile       |   2 +
 drivers/misc/mic/vop/vop_loopback.c | 390 ++++++++++++++++++++++++++++
 drivers/misc/mic/vop/vop_main.c     |  36 +--
 drivers/misc/mic/vop/vop_vringh.c   | 151 ++++++-----
 7 files changed, 549 insertions(+), 92 deletions(-)
 create mode 100644 drivers/misc/mic/vop/vop_loopback.c

-- 
2.20.0


^ permalink raw reply	[flat|nested] 29+ messages in thread

* [PATCH 1/8] vop: Use %z for size_t
  2019-01-16 16:32 [PATCH 0/8] Virtio-over-PCIe on non-MIC Vincent Whitchurch
@ 2019-01-16 16:32 ` Vincent Whitchurch
  2019-01-16 16:32 ` [PATCH 2/8] vop: Cast pointers to uintptr_t Vincent Whitchurch
                   ` (9 subsequent siblings)
  10 siblings, 0 replies; 29+ messages in thread
From: Vincent Whitchurch @ 2019-01-16 16:32 UTC (permalink / raw)
  To: sudeep.dutt, ashutosh.dixit, gregkh, arnd
  Cc: linux-kernel, Vincent Whitchurch

Fixes these kind of errors on 32-bit:

 drivers/misc/mic/vop/vop_vringh.c:590:3:
 error: format '%lx' expects argument of type 'long unsigned int',
 but argument 7 has type 'size_t {aka unsigned int}' [-Werror=format=]

Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
---
 drivers/misc/mic/vop/vop_vringh.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/mic/vop/vop_vringh.c b/drivers/misc/mic/vop/vop_vringh.c
index cbc8ebcff5cf..4267fa08b483 100644
--- a/drivers/misc/mic/vop/vop_vringh.c
+++ b/drivers/misc/mic/vop/vop_vringh.c
@@ -587,7 +587,7 @@ static int vop_virtio_copy_to_user(struct vop_vdev *vdev, void __user *ubuf,
 err:
 	vpdev->hw_ops->iounmap(vpdev, dbuf);
 	dev_dbg(vop_dev(vdev),
-		"%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
+		"%s: ubuf %p dbuf %p len 0x%zx vr_idx 0x%x\n",
 		__func__, ubuf, dbuf, len, vr_idx);
 	return err;
 }
@@ -670,7 +670,7 @@ static int vop_virtio_copy_from_user(struct vop_vdev *vdev, void __user *ubuf,
 err:
 	vpdev->hw_ops->iounmap(vpdev, dbuf);
 	dev_dbg(vop_dev(vdev),
-		"%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
+		"%s: ubuf %p dbuf %p len 0x%zx vr_idx 0x%x\n",
 		__func__, ubuf, dbuf, len, vr_idx);
 	return err;
 }
-- 
2.20.0


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 2/8] vop: Cast pointers to uintptr_t
  2019-01-16 16:32 [PATCH 0/8] Virtio-over-PCIe on non-MIC Vincent Whitchurch
  2019-01-16 16:32 ` [PATCH 1/8] vop: Use %z for size_t Vincent Whitchurch
@ 2019-01-16 16:32 ` Vincent Whitchurch
  2019-01-16 17:39   ` Joe Perches
  2019-01-22 10:41   ` Greg KH
  2019-01-16 16:32 ` [PATCH 3/8] vop: Add definition of readq/writeq if missing Vincent Whitchurch
                   ` (8 subsequent siblings)
  10 siblings, 2 replies; 29+ messages in thread
From: Vincent Whitchurch @ 2019-01-16 16:32 UTC (permalink / raw)
  To: sudeep.dutt, ashutosh.dixit, gregkh, arnd
  Cc: linux-kernel, Vincent Whitchurch

Fix these on 32-bit:

 vop_vringh.c:711:13: error: cast from pointer to integer of different
 size [-Werror=pointer-to-int-cast]

Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
---
 drivers/misc/mic/vop/vop_main.c   | 8 ++++----
 drivers/misc/mic/vop/vop_vringh.c | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c
index 6b212c8b78e7..5a366cf68d95 100644
--- a/drivers/misc/mic/vop/vop_main.c
+++ b/drivers/misc/mic/vop/vop_main.c
@@ -497,7 +497,7 @@ static int _vop_add_device(struct mic_device_desc __iomem *d,
 	vdev->desc = d;
 	vdev->dc = (void __iomem *)d + _vop_aligned_desc_size(d);
 	vdev->dnode = dnode;
-	vdev->vdev.priv = (void *)(u64)dnode;
+	vdev->vdev.priv = (void *)(uintptr_t)dnode;
 	init_completion(&vdev->reset_done);
 
 	vdev->h2c_vdev_db = vpdev->hw_ops->next_db(vpdev);
@@ -519,7 +519,7 @@ static int _vop_add_device(struct mic_device_desc __iomem *d,
 			offset, type);
 		goto free_irq;
 	}
-	writeq((u64)vdev, &vdev->dc->vdev);
+	writeq((uintptr_t)vdev, &vdev->dc->vdev);
 	dev_dbg(_vop_dev(vdev), "%s: registered vop device %u type %u vdev %p\n",
 		__func__, offset, type, vdev);
 
@@ -552,7 +552,7 @@ static void _vop_handle_config_change(struct mic_device_desc __iomem *d,
 {
 	struct mic_device_ctrl __iomem *dc
 		= (void __iomem *)d + _vop_aligned_desc_size(d);
-	struct _vop_vdev *vdev = (struct _vop_vdev *)readq(&dc->vdev);
+	struct _vop_vdev *vdev = (struct _vop_vdev *)(uintptr_t)readq(&dc->vdev);
 
 	if (ioread8(&dc->config_change) != MIC_VIRTIO_PARAM_CONFIG_CHANGED)
 		return;
@@ -571,7 +571,7 @@ static int _vop_remove_device(struct mic_device_desc __iomem *d,
 {
 	struct mic_device_ctrl __iomem *dc
 		= (void __iomem *)d + _vop_aligned_desc_size(d);
-	struct _vop_vdev *vdev = (struct _vop_vdev *)readq(&dc->vdev);
+	struct _vop_vdev *vdev = (struct _vop_vdev *)(uintptr_t)readq(&dc->vdev);
 	u8 status;
 	int ret = -1;
 
diff --git a/drivers/misc/mic/vop/vop_vringh.c b/drivers/misc/mic/vop/vop_vringh.c
index 4267fa08b483..ccdfbb652123 100644
--- a/drivers/misc/mic/vop/vop_vringh.c
+++ b/drivers/misc/mic/vop/vop_vringh.c
@@ -708,12 +708,12 @@ static int vop_vringh_copy(struct vop_vdev *vdev, struct vringh_kiov *iov,
 		partlen = min(kiov->iov_len, len);
 		if (read)
 			ret = vop_virtio_copy_to_user(vdev, ubuf, partlen,
-						      (u64)kiov->iov_base,
+						      (uintptr_t)kiov->iov_base,
 						      kiov->iov_len,
 						      vr_idx);
 		else
 			ret = vop_virtio_copy_from_user(vdev, ubuf, partlen,
-							(u64)kiov->iov_base,
+							(uintptr_t)kiov->iov_base,
 							kiov->iov_len,
 							vr_idx);
 		if (ret) {
-- 
2.20.0


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 3/8] vop: Add definition of readq/writeq if missing
  2019-01-16 16:32 [PATCH 0/8] Virtio-over-PCIe on non-MIC Vincent Whitchurch
  2019-01-16 16:32 ` [PATCH 1/8] vop: Use %z for size_t Vincent Whitchurch
  2019-01-16 16:32 ` [PATCH 2/8] vop: Cast pointers to uintptr_t Vincent Whitchurch
@ 2019-01-16 16:32 ` Vincent Whitchurch
  2019-01-16 16:32 ` [PATCH 4/8] vop: Allow building on more systems Vincent Whitchurch
                   ` (7 subsequent siblings)
  10 siblings, 0 replies; 29+ messages in thread
From: Vincent Whitchurch @ 2019-01-16 16:32 UTC (permalink / raw)
  To: sudeep.dutt, ashutosh.dixit, gregkh, arnd
  Cc: linux-kernel, Vincent Whitchurch

Include <linux/io-64-nonatomic-lo-hi.h> so that readq/writeq are
replaced by two readl/writel on systems that do not support them.  The
values read/written are pointers which will be 32-bit on 32-bit systems
so the non-atomicity should not matter.

Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
---
 drivers/misc/mic/vop/vop_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c
index 5a366cf68d95..26b23f2cf94c 100644
--- a/drivers/misc/mic/vop/vop_main.c
+++ b/drivers/misc/mic/vop/vop_main.c
@@ -34,6 +34,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/dma-mapping.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 
 #include "vop_main.h"
 
-- 
2.20.0


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 4/8] vop: Allow building on more systems
  2019-01-16 16:32 [PATCH 0/8] Virtio-over-PCIe on non-MIC Vincent Whitchurch
                   ` (2 preceding siblings ...)
  2019-01-16 16:32 ` [PATCH 3/8] vop: Add definition of readq/writeq if missing Vincent Whitchurch
@ 2019-01-16 16:32 ` Vincent Whitchurch
  2019-01-16 16:32 ` [PATCH 5/8] vop: vringh: Do not crash if no DMA channel Vincent Whitchurch
                   ` (6 subsequent siblings)
  10 siblings, 0 replies; 29+ messages in thread
From: Vincent Whitchurch @ 2019-01-16 16:32 UTC (permalink / raw)
  To: sudeep.dutt, ashutosh.dixit, gregkh, arnd
  Cc: linux-kernel, Vincent Whitchurch

VOP_BUS does not actually depend on 32-bit X86 or PCI.  The code uses
archdata.dma_ops so it can be built on ARM too.

Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
---
 drivers/misc/mic/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig
index 227cc7443671..319ee3c90a26 100644
--- a/drivers/misc/mic/Kconfig
+++ b/drivers/misc/mic/Kconfig
@@ -38,7 +38,7 @@ comment "VOP Bus Driver"
 
 config VOP_BUS
 	tristate "VOP Bus Driver"
-	depends on 64BIT && PCI && X86 && X86_DEV_DMA_OPS
+	depends on ARM || (X86 && X86_DEV_DMA_OPS)
 	help
 	  This option is selected by any driver which registers a
 	  device or driver on the VOP Bus, such as CONFIG_INTEL_MIC_HOST
@@ -132,7 +132,7 @@ comment "VOP Driver"
 
 config VOP
 	tristate "VOP Driver"
-	depends on 64BIT && PCI && X86 && VOP_BUS
+	depends on VOP_BUS
 	select VHOST_RING
 	select VIRTIO
 	help
-- 
2.20.0


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 5/8] vop: vringh: Do not crash if no DMA channel
  2019-01-16 16:32 [PATCH 0/8] Virtio-over-PCIe on non-MIC Vincent Whitchurch
                   ` (3 preceding siblings ...)
  2019-01-16 16:32 ` [PATCH 4/8] vop: Allow building on more systems Vincent Whitchurch
@ 2019-01-16 16:32 ` Vincent Whitchurch
  2019-01-16 16:32 ` [PATCH 6/8] vop: Fix handling of >32 feature bits Vincent Whitchurch
                   ` (5 subsequent siblings)
  10 siblings, 0 replies; 29+ messages in thread
From: Vincent Whitchurch @ 2019-01-16 16:32 UTC (permalink / raw)
  To: sudeep.dutt, ashutosh.dixit, gregkh, arnd
  Cc: linux-kernel, Vincent Whitchurch

Fallback gracefully if no DMA channel is provided instead of
dereferencing NULL pointers.

Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
---
 drivers/misc/mic/vop/vop_vringh.c | 32 +++++++++++++++++++------------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/drivers/misc/mic/vop/vop_vringh.c b/drivers/misc/mic/vop/vop_vringh.c
index ccdfbb652123..18d6ecff1834 100644
--- a/drivers/misc/mic/vop/vop_vringh.c
+++ b/drivers/misc/mic/vop/vop_vringh.c
@@ -531,12 +531,12 @@ static int vop_virtio_copy_to_user(struct vop_vdev *vdev, void __user *ubuf,
 	void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len);
 	struct vop_vringh *vvr = &vdev->vvr[vr_idx];
 	struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
-	size_t dma_alignment = 1 << vi->dma_ch->device->copy_align;
-	bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
+	size_t dma_alignment;
+	bool x200;
 	size_t dma_offset, partlen;
 	int err;
 
-	if (!VOP_USE_DMA) {
+	if (!VOP_USE_DMA || !vi->dma_ch) {
 		if (copy_to_user(ubuf, (void __force *)dbuf, len)) {
 			err = -EFAULT;
 			dev_err(vop_dev(vdev), "%s %d err %d\n",
@@ -548,6 +548,9 @@ static int vop_virtio_copy_to_user(struct vop_vdev *vdev, void __user *ubuf,
 		goto err;
 	}
 
+	dma_alignment = 1 << vi->dma_ch->device->copy_align;
+	x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
+
 	dma_offset = daddr - round_down(daddr, dma_alignment);
 	daddr -= dma_offset;
 	len += dma_offset;
@@ -606,18 +609,23 @@ static int vop_virtio_copy_from_user(struct vop_vdev *vdev, void __user *ubuf,
 	void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len);
 	struct vop_vringh *vvr = &vdev->vvr[vr_idx];
 	struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
-	size_t dma_alignment = 1 << vi->dma_ch->device->copy_align;
-	bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
+	size_t dma_alignment;
+	bool x200;
 	size_t partlen;
-	bool dma = VOP_USE_DMA;
+	bool dma = VOP_USE_DMA && vi->dma_ch;
 	int err = 0;
 
-	if (daddr & (dma_alignment - 1)) {
-		vdev->tx_dst_unaligned += len;
-		dma = false;
-	} else if (ALIGN(len, dma_alignment) > dlen) {
-		vdev->tx_len_unaligned += len;
-		dma = false;
+	if (dma) {
+		dma_alignment = 1 << vi->dma_ch->device->copy_align;
+		x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
+
+		if (daddr & (dma_alignment - 1)) {
+			vdev->tx_dst_unaligned += len;
+			dma = false;
+		} else if (ALIGN(len, dma_alignment) > dlen) {
+			vdev->tx_len_unaligned += len;
+			dma = false;
+		}
 	}
 
 	if (!dma)
-- 
2.20.0


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 6/8] vop: Fix handling of >32 feature bits
  2019-01-16 16:32 [PATCH 0/8] Virtio-over-PCIe on non-MIC Vincent Whitchurch
                   ` (4 preceding siblings ...)
  2019-01-16 16:32 ` [PATCH 5/8] vop: vringh: Do not crash if no DMA channel Vincent Whitchurch
@ 2019-01-16 16:32 ` Vincent Whitchurch
  2019-01-16 16:32 ` [PATCH 7/8] vop: Use consistent DMA Vincent Whitchurch
                   ` (4 subsequent siblings)
  10 siblings, 0 replies; 29+ messages in thread
From: Vincent Whitchurch @ 2019-01-16 16:32 UTC (permalink / raw)
  To: sudeep.dutt, ashutosh.dixit, gregkh, arnd
  Cc: linux-kernel, Vincent Whitchurch

This is needed, for example, for VIRTIO_F_IOMMU_PLATFORM.

Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
---
 drivers/misc/mic/vop/vop_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c
index 26b23f2cf94c..dd49169ef53d 100644
--- a/drivers/misc/mic/vop/vop_main.c
+++ b/drivers/misc/mic/vop/vop_main.c
@@ -117,7 +117,7 @@ _vop_total_desc_size(struct mic_device_desc __iomem *desc)
 static u64 vop_get_features(struct virtio_device *vdev)
 {
 	unsigned int i, bits;
-	u32 features = 0;
+	u64 features = 0;
 	struct mic_device_desc __iomem *desc = to_vopvdev(vdev)->desc;
 	u8 __iomem *in_features = _vop_vq_features(desc);
 	int feature_len = ioread8(&desc->feature_len);
@@ -125,7 +125,7 @@ static u64 vop_get_features(struct virtio_device *vdev)
 	bits = min_t(unsigned, feature_len, sizeof(vdev->features)) * 8;
 	for (i = 0; i < bits; i++)
 		if (ioread8(&in_features[i / 8]) & (BIT(i % 8)))
-			features |= BIT(i);
+			features |= BIT_ULL(i);
 
 	return features;
 }
-- 
2.20.0


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 7/8] vop: Use consistent DMA
  2019-01-16 16:32 [PATCH 0/8] Virtio-over-PCIe on non-MIC Vincent Whitchurch
                   ` (5 preceding siblings ...)
  2019-01-16 16:32 ` [PATCH 6/8] vop: Fix handling of >32 feature bits Vincent Whitchurch
@ 2019-01-16 16:32 ` Vincent Whitchurch
  2019-01-16 16:32 ` [PATCH 8/8] vop: Add loopback Vincent Whitchurch
                   ` (3 subsequent siblings)
  10 siblings, 0 replies; 29+ messages in thread
From: Vincent Whitchurch @ 2019-01-16 16:32 UTC (permalink / raw)
  To: sudeep.dutt, ashutosh.dixit, gregkh, arnd
  Cc: linux-kernel, Vincent Whitchurch

The vop code maps buffers using the streaming DMA API but never syncs
them so it doesn't work on systems without cache coherence.  The vrings
want consistent mappings, and not streaming mappings so use that API to
allocate and map buffers.

Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
---
 drivers/misc/mic/bus/vop_bus.h    |   2 +
 drivers/misc/mic/host/mic_boot.c  |  46 +++++++++++++
 drivers/misc/mic/vop/vop_main.c   |  23 ++-----
 drivers/misc/mic/vop/vop_vringh.c | 111 ++++++++++++++++--------------
 4 files changed, 114 insertions(+), 68 deletions(-)

diff --git a/drivers/misc/mic/bus/vop_bus.h b/drivers/misc/mic/bus/vop_bus.h
index fff7a865d721..a2db11bea277 100644
--- a/drivers/misc/mic/bus/vop_bus.h
+++ b/drivers/misc/mic/bus/vop_bus.h
@@ -86,6 +86,7 @@ struct vop_driver {
  *                 node to add/remove/configure virtio devices.
  * @get_dp: Get access to the virtio device page used by the self
  *          node to add/remove/configure virtio devices.
+ * @get_dp_dma: Get DMA address of the virtio device page.
  * @send_intr: Send an interrupt to the peer node on a specified doorbell.
  * @ioremap: Map a buffer with the specified DMA address and length.
  * @iounmap: Unmap a buffer previously mapped.
@@ -103,6 +104,7 @@ struct vop_hw_ops {
 	void (*ack_interrupt)(struct vop_device *vpdev, int num);
 	void __iomem * (*get_remote_dp)(struct vop_device *vpdev);
 	void * (*get_dp)(struct vop_device *vpdev);
+	dma_addr_t (*get_dp_dma)(struct vop_device *vpdev);
 	void (*send_intr)(struct vop_device *vpdev, int db);
 	void __iomem * (*ioremap)(struct vop_device *vpdev,
 				  dma_addr_t pa, size_t len);
diff --git a/drivers/misc/mic/host/mic_boot.c b/drivers/misc/mic/host/mic_boot.c
index 6479435ac96b..1dcd25917eca 100644
--- a/drivers/misc/mic/host/mic_boot.c
+++ b/drivers/misc/mic/host/mic_boot.c
@@ -55,9 +55,47 @@ static void _mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
 	mic_unmap_single(mdev, dma_addr, size);
 }
 
+static int _mic_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+		 void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		 unsigned long attrs)
+{
+	return remap_pfn_range(vma, vma->vm_start,
+			       virt_to_pfn(cpu_addr), size,
+			       vma->vm_page_prot);
+}
+
+static void *_mic_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
+		    gfp_t gfp, unsigned long attrs)
+{
+	void *cpu_addr = (void *) __get_free_pages(gfp, get_order(size));
+	dma_addr_t dma_addr;
+
+	if (!cpu_addr)
+		return NULL;
+
+	*handle = dma_map_single(dev, p, size, attrs);
+	if (dma_mapping_error(dev, *handle)) {
+		free_pages((unsigned long) (uintptr_t) cpu_addr,
+			   get_order(size));
+		return NULL;
+	}
+
+	return cpu_addr;
+}
+
+static void _mic_dma_free(struct device *dev, size_t size, void *cpu_addr,
+		  dma_addr_t handle, unsigned long attrs)
+{
+	dma_unmap_single(dev, cpu_addr, handle, attrs);
+	free_pages((unsigned long) (uintptr_t) cpu_addr, get_order(size));
+}
+
 static const struct dma_map_ops _mic_dma_ops = {
 	.map_page = _mic_dma_map_page,
 	.unmap_page = _mic_dma_unmap_page,
+	.mmap = _mic_dma_mmap,
+	.alloc = _mic_dma_alloc,
+	.free = _mic_dma_free,
 };
 
 static struct mic_irq *
@@ -100,6 +138,13 @@ static void *__mic_get_dp(struct vop_device *vpdev)
 	return mdev->dp;
 }
 
+static void *__mic_get_dp_dma(struct vop_device *vpdev)
+{
+	struct mic_device *mdev = vpdev_to_mdev(&vpdev->dev);
+
+	return mdev->dp_dma_addr;
+}
+
 static void __iomem *__mic_get_remote_dp(struct vop_device *vpdev)
 {
 	return NULL;
@@ -131,6 +176,7 @@ static struct vop_hw_ops vop_hw_ops = {
 	.ack_interrupt = __mic_ack_interrupt,
 	.next_db = __mic_next_db,
 	.get_dp = __mic_get_dp,
+	.get_dp_dma = __mic_get_dp_dma,
 	.get_remote_dp = __mic_get_remote_dp,
 	.send_intr = __mic_send_intr,
 	.ioremap = __mic_ioremap,
diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c
index dd49169ef53d..6d4a4e8993bb 100644
--- a/drivers/misc/mic/vop/vop_main.c
+++ b/drivers/misc/mic/vop/vop_main.c
@@ -264,9 +264,8 @@ static void vop_del_vq(struct virtqueue *vq, int n)
 	struct vring *vr = (struct vring *)(vq + 1);
 	struct vop_device *vpdev = vdev->vpdev;
 
-	dma_unmap_single(&vpdev->dev, vdev->used[n],
-			 vdev->used_size[n], DMA_BIDIRECTIONAL);
-	free_pages((unsigned long)vr->used, get_order(vdev->used_size[n]));
+	dma_free_wc(&vpdev->dev, vdev->used_size[n], vr->used, vdev->used[n]);
+
 	vring_del_virtqueue(vq);
 	vpdev->hw_ops->iounmap(vpdev, vdev->vr[n]);
 	vdev->vr[n] = NULL;
@@ -346,23 +345,16 @@ static struct virtqueue *vop_find_vq(struct virtio_device *dev,
 	vdev->used_size[index] = PAGE_ALIGN(sizeof(__u16) * 3 +
 					     sizeof(struct vring_used_elem) *
 					     le16_to_cpu(config.num));
-	used = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
-					get_order(vdev->used_size[index]));
+	used = dma_alloc_wc(&vpdev->dev, vdev->used_size[index],
+			    &vdev->used[index], GFP_KERNEL | __GFP_ZERO);
+
 	if (!used) {
 		err = -ENOMEM;
 		dev_err(_vop_dev(vdev), "%s %d err %d\n",
 			__func__, __LINE__, err);
 		goto del_vq;
 	}
-	vdev->used[index] = dma_map_single(&vpdev->dev, used,
-					    vdev->used_size[index],
-					    DMA_BIDIRECTIONAL);
-	if (dma_mapping_error(&vpdev->dev, vdev->used[index])) {
-		err = -ENOMEM;
-		dev_err(_vop_dev(vdev), "%s %d err %d\n",
-			__func__, __LINE__, err);
-		goto free_used;
-	}
+
 	writeq(vdev->used[index], &vqconfig->used_address);
 	/*
 	 * To reassign the used ring here we are directly accessing
@@ -376,9 +368,6 @@ static struct virtqueue *vop_find_vq(struct virtio_device *dev,
 
 	vq->priv = vdev;
 	return vq;
-free_used:
-	free_pages((unsigned long)used,
-		   get_order(vdev->used_size[index]));
 del_vq:
 	vring_del_virtqueue(vq);
 unmap:
diff --git a/drivers/misc/mic/vop/vop_vringh.c b/drivers/misc/mic/vop/vop_vringh.c
index 18d6ecff1834..4b8ab0d319b0 100644
--- a/drivers/misc/mic/vop/vop_vringh.c
+++ b/drivers/misc/mic/vop/vop_vringh.c
@@ -310,9 +310,8 @@ static int vop_virtio_add_device(struct vop_vdev *vdev,
 		mutex_init(&vvr->vr_mutex);
 		vr_size = PAGE_ALIGN(vring_size(num, MIC_VIRTIO_RING_ALIGN) +
 			sizeof(struct _mic_vring_info));
-		vr->va = (void *)
-			__get_free_pages(GFP_KERNEL | __GFP_ZERO,
-					 get_order(vr_size));
+		vr->va = dma_alloc_wc(&vpdev->dev, vr_size, &vr_addr,
+				      GFP_KERNEL | __GFP_ZERO);
 		if (!vr->va) {
 			ret = -ENOMEM;
 			dev_err(vop_dev(vdev), "%s %d err %d\n",
@@ -322,15 +321,6 @@ static int vop_virtio_add_device(struct vop_vdev *vdev,
 		vr->len = vr_size;
 		vr->info = vr->va + vring_size(num, MIC_VIRTIO_RING_ALIGN);
 		vr->info->magic = cpu_to_le32(MIC_MAGIC + vdev->virtio_id + i);
-		vr_addr = dma_map_single(&vpdev->dev, vr->va, vr_size,
-					 DMA_BIDIRECTIONAL);
-		if (dma_mapping_error(&vpdev->dev, vr_addr)) {
-			free_pages((unsigned long)vr->va, get_order(vr_size));
-			ret = -ENOMEM;
-			dev_err(vop_dev(vdev), "%s %d err %d\n",
-				__func__, __LINE__, ret);
-			goto err;
-		}
 		vqconfig[i].address = cpu_to_le64(vr_addr);
 
 		vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN);
@@ -394,10 +384,8 @@ static int vop_virtio_add_device(struct vop_vdev *vdev,
 	for (j = 0; j < i; j++) {
 		struct vop_vringh *vvr = &vdev->vvr[j];
 
-		dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[j].address),
-				 vvr->vring.len, DMA_BIDIRECTIONAL);
-		free_pages((unsigned long)vvr->vring.va,
-			   get_order(vvr->vring.len));
+		dma_free_wc(&vpdev->dev, vvr->vring.len, vvr->vring.va,
+			    le64_to_cpu(vqconfig[j].address));
 	}
 	return ret;
 }
@@ -452,10 +440,8 @@ static void vop_virtio_del_device(struct vop_vdev *vdev)
 			   get_order(VOP_INT_DMA_BUF_SIZE));
 		vringh_kiov_cleanup(&vvr->riov);
 		vringh_kiov_cleanup(&vvr->wiov);
-		dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[i].address),
-				 vvr->vring.len, DMA_BIDIRECTIONAL);
-		free_pages((unsigned long)vvr->vring.va,
-			   get_order(vvr->vring.len));
+		dma_free_wc(&vpdev->dev, vvr->vring.len, vvr->vring.va,
+			    le64_to_cpu(vqconfig[i].address));
 	}
 	/*
 	 * Order the type update with previous stores. This write barrier
@@ -1046,49 +1032,44 @@ static __poll_t vop_poll(struct file *f, poll_table *wait)
 }
 
 static inline int
-vop_query_offset(struct vop_vdev *vdev, unsigned long offset,
-		 unsigned long *size, unsigned long *pa)
+vop_query_offset(struct vop_vdev *vdev, unsigned long offset)
 {
-	struct vop_device *vpdev = vdev->vpdev;
 	unsigned long start = MIC_DP_SIZE;
 	int i;
 
-	/*
-	 * MMAP interface is as follows:
-	 * offset				region
-	 * 0x0					virtio device_page
-	 * 0x1000				first vring
-	 * 0x1000 + size of 1st vring		second vring
-	 * ....
-	 */
-	if (!offset) {
-		*pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev));
-		*size = MIC_DP_SIZE;
-		return 0;
-	}
-
 	for (i = 0; i < vdev->dd->num_vq; i++) {
 		struct vop_vringh *vvr = &vdev->vvr[i];
 
-		if (offset == start) {
-			*pa = virt_to_phys(vvr->vring.va);
-			*size = vvr->vring.len;
-			return 0;
-		}
+		if (offset == start)
+			return i;
+
 		start += vvr->vring.len;
 	}
+
 	return -1;
 }
 
 /*
  * Maps the device page and virtio rings to user space for readonly access.
+ *
+ * MMAP interface is as follows:
+ * offset				region
+ * 0x0					virtio device_page
+ * 0x1000				first vring
+ * 0x1000 + size of 1st vring		second vring
+ * ....
  */
 static int vop_mmap(struct file *f, struct vm_area_struct *vma)
 {
 	struct vop_vdev *vdev = f->private_data;
+	struct vop_device *vpdev = vdev->vpdev;
+	struct mic_vqconfig *vqconfig = mic_vq_config(vdev->dd);
 	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
-	unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem = size;
-	int i, err;
+	unsigned long size_rem = vma->vm_end - vma->vm_start;
+	unsigned long saved_pgoff = vma->vm_pgoff;
+	unsigned long saved_start = vma->vm_start;
+	unsigned long saved_end = vma->vm_end;
+	int err;
 
 	err = vop_vdev_inited(vdev);
 	if (err)
@@ -1098,16 +1079,44 @@ static int vop_mmap(struct file *f, struct vm_area_struct *vma)
 		goto ret;
 	}
 	while (size_rem) {
-		i = vop_query_offset(vdev, offset, &size, &pa);
-		if (i < 0) {
-			err = -EINVAL;
-			goto ret;
+		unsigned long size;
+		dma_addr_t dma_addr;
+		void *cpu_addr;
+
+		if (!offset) {
+			cpu_addr = vpdev->hw_ops->get_dp(vpdev);
+			dma_addr = vpdev->hw_ops->get_dp_dma(vpdev);
+			size = MIC_DP_SIZE;
+		} else {
+			struct vop_vringh *vvr;
+			int i;
+
+			i = vop_query_offset(vdev, offset);
+			if (i < 0) {
+				err = -EINVAL;
+				goto ret;
+			}
+
+			vvr = &vdev->vvr[i];
+			cpu_addr = vvr->vring.va;
+			dma_addr = vqconfig[i].address;
+			size = vvr->vring.len;
 		}
-		err = remap_pfn_range(vma, vma->vm_start + offset,
-				      pa >> PAGE_SHIFT, size,
-				      vma->vm_page_prot);
+
+		/* dma_mmap() wants to think it's mapping the entire vma */
+		vma->vm_start += offset;
+		vma->vm_end = vma->vm_start + PAGE_ALIGN(size);
+		vma->vm_pgoff = 0;
+
+		err = dma_mmap_wc(&vpdev->dev, vma, cpu_addr, dma_addr, size);
+
+		vma->vm_start = saved_start;
+		vma->vm_end = saved_end;
+		vma->vm_pgoff = saved_pgoff;
+
 		if (err)
 			goto ret;
+
 		size_rem -= size;
 		offset += size;
 	}
-- 
2.20.0


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* [PATCH 8/8] vop: Add loopback
  2019-01-16 16:32 [PATCH 0/8] Virtio-over-PCIe on non-MIC Vincent Whitchurch
                   ` (6 preceding siblings ...)
  2019-01-16 16:32 ` [PATCH 7/8] vop: Use consistent DMA Vincent Whitchurch
@ 2019-01-16 16:32 ` Vincent Whitchurch
  2019-01-16 17:07 ` [PATCH 0/8] Virtio-over-PCIe on non-MIC Arnd Bergmann
                   ` (2 subsequent siblings)
  10 siblings, 0 replies; 29+ messages in thread
From: Vincent Whitchurch @ 2019-01-16 16:32 UTC (permalink / raw)
  To: sudeep.dutt, ashutosh.dixit, gregkh, arnd
  Cc: linux-kernel, Vincent Whitchurch

Add a loopback driver to allow testing and evaluation of the VOP
framework without special hardware.  The host and the guest will run
under the same kernel.

Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
---
 drivers/misc/mic/Kconfig            |  10 +
 drivers/misc/mic/vop/Makefile       |   2 +
 drivers/misc/mic/vop/vop_loopback.c | 390 ++++++++++++++++++++++++++++
 3 files changed, 402 insertions(+)
 create mode 100644 drivers/misc/mic/vop/vop_loopback.c

diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig
index 319ee3c90a26..b181cf20d40f 100644
--- a/drivers/misc/mic/Kconfig
+++ b/drivers/misc/mic/Kconfig
@@ -149,6 +149,16 @@ config VOP
 	  OS and tools for MIC to use with this driver are available from
 	  <http://software.intel.com/en-us/mic-developer>.
 
+config VOP_LOOPBACK
+	tristate "VOP loopback driver"
+	depends on VOP
+	help
+	  This enables a loopback driver to test and evaluate the VOP
+	  infrastructure without actual PCIe hardware.  The host and the guest
+	  sides run under the same kernel.
+
+	  If unsure, say N.
+
 if VOP
 source "drivers/vhost/Kconfig.vringh"
 endif
diff --git a/drivers/misc/mic/vop/Makefile b/drivers/misc/mic/vop/Makefile
index 78819c8999f1..a6ead25c4418 100644
--- a/drivers/misc/mic/vop/Makefile
+++ b/drivers/misc/mic/vop/Makefile
@@ -7,3 +7,5 @@ obj-m := vop.o
 vop-objs += vop_main.o
 vop-objs += vop_debugfs.o
 vop-objs += vop_vringh.o
+
+obj-$(CONFIG_VOP_LOOPBACK) += vop_loopback.o
diff --git a/drivers/misc/mic/vop/vop_loopback.c b/drivers/misc/mic/vop/vop_loopback.c
new file mode 100644
index 000000000000..6e3d24f166cf
--- /dev/null
+++ b/drivers/misc/mic/vop/vop_loopback.c
@@ -0,0 +1,390 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Axis Communications AB
+ */
+
+#include <linux/platform_device.h>
+#include <linux/dma-direction.h>
+#include <linux/dma-mapping.h>
+#include <linux/mic_common.h>
+#include <linux/of_device.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/io.h>
+
+#include "../bus/vop_bus.h"
+
+struct mic_irq {
+	struct list_head list;
+	int irq;
+	irqreturn_t (*func)(int irq, void *data);
+	void *data;
+};
+
+struct vop_loopback_end {
+	struct vop_loopback *loopback;
+	const char *name;
+	struct vop_device *vop;
+	struct list_head irqs;
+	struct mutex mutex;
+	struct work_struct work;
+};
+
+struct vop_loopback {
+	struct device *dev;
+	void *dp;
+	struct vop_loopback_end host;
+	struct vop_loopback_end guest;
+};
+
+static inline struct vop_loopback *vop_to_loopback(struct device *dev)
+{
+	return dev_get_drvdata(dev->parent);
+}
+
+static dma_addr_t
+vop_loopback_dma_map_page(struct device *dev, struct page *page,
+		  unsigned long offset, size_t size,
+		  enum dma_data_direction dir, unsigned long attrs)
+{
+	return page_to_phys(page) + offset;
+}
+
+static void vop_loopback_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
+				size_t size, enum dma_data_direction dir,
+				unsigned long attrs)
+{
+}
+
+static int vop_loopback_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+		 void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		 unsigned long attrs)
+{
+	return remap_pfn_range(vma, vma->vm_start,
+			       PHYS_PFN(dma_addr), size,
+			       vma->vm_page_prot);
+}
+
+static void *vop_loopback_dma_alloc(struct device *dev, size_t size,
+				    dma_addr_t *handle, gfp_t gfp,
+				    unsigned long attrs)
+{
+	void *p = (void *) __get_free_pages(gfp, get_order(size));
+
+	if (p)
+		*handle = virt_to_phys(p);
+
+	return p;
+}
+
+static void vop_loopback_dma_free(struct device *dev, size_t size,
+				  void *cpu_addr, dma_addr_t handle,
+				  unsigned long attrs)
+{
+	free_pages((unsigned long) (uintptr_t) cpu_addr, get_order(size));
+}
+
+static const struct dma_map_ops vop_loopback_dma_ops = {
+	.map_page = vop_loopback_dma_map_page,
+	.unmap_page = vop_loopback_dma_unmap_page,
+	.mmap = vop_loopback_dma_mmap,
+	.alloc = vop_loopback_dma_alloc,
+	.free = vop_loopback_dma_free,
+};
+
+
+static void vop_loopback_ack_interrupt(struct vop_device *vop, int num)
+{
+}
+
+static int vop_loopback_next_db(struct vop_device *vop)
+{
+	return 0;
+}
+
+static void *vop_loopback_get_dp(struct vop_device *vop)
+{
+	struct vop_loopback *loopback = vop_to_loopback(&vop->dev);
+
+	return loopback->dp;
+}
+
+static dma_addr_t vop_loopback_get_dp_dma(struct vop_device *vop)
+{
+	struct vop_loopback *loopback = vop_to_loopback(&vop->dev);
+
+	return virt_to_phys(loopback->dp);
+}
+
+static void __iomem *vop_loopback_get_remote_dp(struct vop_device *vop)
+{
+	struct vop_loopback *loopback = vop_to_loopback(&vop->dev);
+
+	return (void __iomem *) loopback->dp;
+}
+
+static struct mic_irq *
+vop_loopback_request_irq(struct vop_loopback_end *end,
+		  irqreturn_t (*func)(int irq, void *data),
+		  void *data, int intr_src)
+{
+	struct mic_irq *mic_irq;
+
+	mic_irq = kzalloc(sizeof(*mic_irq), GFP_KERNEL);
+	if (!mic_irq)
+		return ERR_PTR(-ENOMEM);
+
+	mic_irq->irq = intr_src;
+	mic_irq->func = func;
+	mic_irq->data = data;
+
+	mutex_lock(&end->mutex);
+	list_add(&mic_irq->list, &end->irqs);
+	mutex_unlock(&end->mutex);
+
+	return mic_irq;
+}
+
+static struct mic_irq *
+vop_loopback_request_irq_host(struct vop_device *vop,
+		       irqreturn_t (*func)(int irq, void *data),
+		       const char *name, void *data, int intr_src)
+{
+	struct vop_loopback *loopback = vop_to_loopback(&vop->dev);
+
+	return vop_loopback_request_irq(&loopback->host, func, data, intr_src);
+}
+
+static struct mic_irq *
+vop_loopback_request_irq_guest(struct vop_device *vop,
+		       irqreturn_t (*func)(int irq, void *data),
+		       const char *name, void *data, int intr_src)
+{
+	struct vop_loopback *loopback = vop_to_loopback(&vop->dev);
+
+	return vop_loopback_request_irq(&loopback->guest, func, data, intr_src);
+}
+
+static void vop_loopback_free_irq(struct vop_loopback_end *end,
+			   struct mic_irq *cookie)
+{
+	mutex_lock(&end->mutex);
+	list_del(&cookie->list);
+	mutex_unlock(&end->mutex);
+
+	kfree(cookie);
+}
+
+static void vop_loopback_free_irq_host(struct vop_device *vop,
+				struct mic_irq *cookie, void *data)
+{
+	struct vop_loopback *loopback = vop_to_loopback(&vop->dev);
+
+	vop_loopback_free_irq(&loopback->host, cookie);
+}
+
+static void vop_loopback_free_irq_guest(struct vop_device *vop,
+				struct mic_irq *cookie, void *data)
+{
+	struct vop_loopback *loopback = vop_to_loopback(&vop->dev);
+
+	vop_loopback_free_irq(&loopback->guest, cookie);
+}
+
+static void vop_loopback_send_intr_host(struct vop_device *vop, int db)
+{
+	struct vop_loopback *loopback = vop_to_loopback(&vop->dev);
+
+	schedule_work(&loopback->guest.work);
+}
+
+static void vop_loopback_send_intr_guest(struct vop_device *vop, int db)
+{
+	struct vop_loopback *loopback = vop_to_loopback(&vop->dev);
+
+	schedule_work(&loopback->host.work);
+}
+
+static void __iomem *vop_loopback_ioremap(struct vop_device *vop,
+				   dma_addr_t pa, size_t len)
+{
+	return (void __iomem *) memremap(pa, len, MEMREMAP_WB);
+}
+
+static void vop_loopback_iounmap(struct vop_device *vop, void __iomem *va)
+{
+	memunmap((void __force *) va);
+}
+
+static struct vop_hw_ops vop_loopback_host_ops = {
+	.request_irq = vop_loopback_request_irq_host,
+	.free_irq = vop_loopback_free_irq_host,
+	.ack_interrupt = vop_loopback_ack_interrupt,
+	.next_db = vop_loopback_next_db,
+	.get_dp = vop_loopback_get_dp,
+	.get_dp_dma = vop_loopback_get_dp_dma,
+	.get_remote_dp = vop_loopback_get_remote_dp,
+	.send_intr = vop_loopback_send_intr_host,
+	.ioremap = vop_loopback_ioremap,
+	.iounmap = vop_loopback_iounmap,
+};
+
+static struct vop_hw_ops vop_loopback_guest_ops = {
+	.request_irq = vop_loopback_request_irq_guest,
+	.free_irq = vop_loopback_free_irq_guest,
+	.ack_interrupt = vop_loopback_ack_interrupt,
+	.next_db = vop_loopback_next_db,
+	.get_dp = vop_loopback_get_dp,
+	.get_remote_dp = vop_loopback_get_remote_dp,
+	.send_intr = vop_loopback_send_intr_guest,
+	.ioremap = vop_loopback_ioremap,
+	.iounmap = vop_loopback_iounmap,
+};
+
+static void vop_loopback_irq(struct work_struct *work)
+{
+	struct vop_loopback_end *end = container_of(work, struct vop_loopback_end, work);
+	struct vop_loopback *loopback = end->loopback;
+	struct mic_irq *mic_irq;
+
+	dev_dbg(loopback->dev, "%s irq\n", end->name);
+
+	mutex_lock(&end->mutex);
+	list_for_each_entry(mic_irq, &end->irqs, list) {
+		irqreturn_t ret;
+
+		dev_dbg(loopback->dev, "calling %pS\n", mic_irq->func);
+		ret = mic_irq->func(mic_irq->irq, mic_irq->data);
+		dev_dbg(loopback->dev, "%pS ret %d\n", mic_irq->func, ret);
+	}
+	mutex_unlock(&end->mutex);
+}
+
+static void vop_loopback_bootparam_init(struct vop_loopback *loopback)
+{
+	struct mic_bootparam *bootparam = loopback->dp;
+
+	bootparam->magic = cpu_to_le32(MIC_MAGIC);
+	bootparam->h2c_config_db = -1;
+	bootparam->node_id = 1;
+	bootparam->scif_host_dma_addr = 0x0;
+	bootparam->scif_card_dma_addr = 0x0;
+	bootparam->c2h_scif_db = -1;
+	bootparam->h2c_scif_db = -1;
+}
+
+static void vop_loopback_end_init(struct vop_loopback *loopback,
+				  struct vop_loopback_end *end,
+				  const char *name)
+{
+	end->loopback = loopback;
+	end->name = name;
+
+	INIT_WORK(&end->work, vop_loopback_irq);
+
+	INIT_LIST_HEAD(&end->irqs);
+	mutex_init(&end->mutex);
+}
+
+static int vop_loopback_probe(struct platform_device *pdev)
+{
+	struct vop_loopback *loopback;
+	int ret;
+
+	loopback = devm_kzalloc(&pdev->dev, sizeof(*loopback), GFP_KERNEL);
+	if (!loopback)
+		return -ENOMEM;
+
+	loopback->dp = (void *) devm_get_free_pages(&pdev->dev,
+				GFP_KERNEL | __GFP_ZERO,
+				get_order(MIC_DP_SIZE));
+	if (!loopback->dp)
+		return -ENOMEM;
+
+	loopback->dev = &pdev->dev;
+
+	vop_loopback_end_init(loopback, &loopback->host, "host");
+	vop_loopback_end_init(loopback, &loopback->guest, "guest");
+	vop_loopback_bootparam_init(loopback);
+
+	platform_set_drvdata(pdev, loopback);
+
+	loopback->host.vop = vop_register_device(&pdev->dev, VOP_DEV_TRNSP,
+						 &vop_loopback_dma_ops,
+						 &vop_loopback_host_ops, 1,
+						 NULL, NULL);
+	if (IS_ERR(loopback->host.vop))
+		return PTR_ERR(loopback->host.vop);
+
+	loopback->guest.vop = vop_register_device(&pdev->dev, VOP_DEV_TRNSP,
+						  &vop_loopback_dma_ops,
+						  &vop_loopback_guest_ops,
+						  0, NULL, NULL);
+	if (IS_ERR(loopback->guest.vop)) {
+		ret = PTR_ERR(loopback->guest.vop);
+		goto err_unregister_host;
+	}
+
+	schedule_work(&loopback->guest.work);
+
+	return 0;
+
+err_unregister_host:
+	vop_unregister_device(loopback->host.vop);
+	return ret;
+}
+
+static int vop_loopback_remove(struct platform_device *pdev)
+{
+	struct vop_loopback *loopback = platform_get_drvdata(pdev);
+
+	vop_unregister_device(loopback->guest.vop);
+	vop_unregister_device(loopback->host.vop);
+
+	return 0;
+}
+
+static struct platform_driver vop_loopback = {
+	.probe = vop_loopback_probe,
+	.remove = vop_loopback_remove,
+	.driver = {
+		.name = "vop-loopback",
+	},
+};
+
+static struct platform_device *loopback_dev;
+
+static int __init vop_loopback_init(void)
+{
+	int ret;
+
+	loopback_dev = platform_device_register_simple("vop-loopback", 0,
+						       NULL, 0);
+	if (IS_ERR(loopback_dev))
+		return PTR_ERR(loopback_dev);
+
+	ret = platform_driver_register(&vop_loopback);
+	if (ret)
+		goto err_remove_dev;
+
+	return 0;
+
+err_remove_dev:
+	platform_device_unregister(loopback_dev);
+	return ret;
+}
+
+static void __exit vop_loopback_exit(void)
+{
+	platform_driver_unregister(&vop_loopback);
+	platform_device_unregister(loopback_dev);
+}
+
+module_init(vop_loopback_init);
+module_exit(vop_loopback_exit);
+
+MODULE_LICENSE("GPL v2");
-- 
2.20.0


^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/8] Virtio-over-PCIe on non-MIC
  2019-01-16 16:32 [PATCH 0/8] Virtio-over-PCIe on non-MIC Vincent Whitchurch
                   ` (7 preceding siblings ...)
  2019-01-16 16:32 ` [PATCH 8/8] vop: Add loopback Vincent Whitchurch
@ 2019-01-16 17:07 ` Arnd Bergmann
  2019-01-17 10:54   ` Vincent Whitchurch
  2019-01-18 23:49 ` Stephen Warren
  2019-01-22 10:45 ` Greg KH
  10 siblings, 1 reply; 29+ messages in thread
From: Arnd Bergmann @ 2019-01-16 17:07 UTC (permalink / raw)
  To: Vincent Whitchurch
  Cc: sudeep.dutt, ashutosh.dixit, gregkh, Linux Kernel Mailing List,
	Vincent Whitchurch, Kishon Vijay Abraham I, Lorenzo Pieralisi,
	linux-pci, linux-ntb, Jon Mason, Dave Jiang, Allen Hubbe

On Wed, Jan 16, 2019 at 5:33 PM Vincent Whitchurch
<vincent.whitchurch@axis.com> wrote:
>
> The Virtio-over-PCIe framework living under drivers/misc/mic/vop implements a
> generic framework to use virtio between two Linux systems, given shared memory
> and a couple of interrupts.  It does not actually require the Intel MIC
> hardware, x86-64, or even PCIe for that matter.  This patch series makes it
> buildable on more systems and adds a loopback driver to test it without special
> hardware.
>
> Note that I don't have access to Intel MIC hardware so some testing of the
> patchset (especially the patch "vop: Use consistent DMA") on that platform
> would be appreciated, to ensure that the series does not break anything there.

Hi Vincent,

First of all, I think it is a very good idea to make virtio over PCIe avaialable
more generally. Your patches also make sense here, they mostly fix
portability bugs, so no objection there.

I think we need to take a step back though and discuss what combinations
we actually do want to support. I have not actually read the whole mic/vop
driver, so I don't know if this would be a good fit as a generic interface --
it may or may not be, and any other input would be helpful.

Aside from that, I should note that we have two related subsystems
in the kernel: the PCIe endpoint subsystem maintained by Kishon and
Lorenzo, and the NTB subsystem maintained by Jon, Dave and Allen.

In order to properly support virtio over PCIe, I would hope we can come
up with a user space interface that looks the same way for configuring
virtio drivers in mic, pcie-endpoint and ntb, if at all possible. Have
you looked at those two subsystems?

        Arnd

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 2/8] vop: Cast pointers to uintptr_t
  2019-01-16 16:32 ` [PATCH 2/8] vop: Cast pointers to uintptr_t Vincent Whitchurch
@ 2019-01-16 17:39   ` Joe Perches
  2019-01-22 10:41   ` Greg KH
  1 sibling, 0 replies; 29+ messages in thread
From: Joe Perches @ 2019-01-16 17:39 UTC (permalink / raw)
  To: Vincent Whitchurch, sudeep.dutt, ashutosh.dixit, gregkh, arnd
  Cc: linux-kernel, Vincent Whitchurch

On Wed, 2019-01-16 at 17:32 +0100, Vincent Whitchurch wrote:
> Fix these on 32-bit:
> 
>  vop_vringh.c:711:13: error: cast from pointer to integer of different
>  size [-Werror=pointer-to-int-cast]
[]
> diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c
[]
> @@ -497,7 +497,7 @@ static int _vop_add_device(struct mic_device_desc __iomem *d,
>  	vdev->desc = d;
>  	vdev->dc = (void __iomem *)d + _vop_aligned_desc_size(d);
>  	vdev->dnode = dnode;
> -	vdev->vdev.priv = (void *)(u64)dnode;
> +	vdev->vdev.priv = (void *)(uintptr_t)dnode;

trivia:

unsigned long is more commonly used in the kernel for
these casts.



^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/8] Virtio-over-PCIe on non-MIC
  2019-01-16 17:07 ` [PATCH 0/8] Virtio-over-PCIe on non-MIC Arnd Bergmann
@ 2019-01-17 10:54   ` Vincent Whitchurch
  2019-01-17 12:39     ` Arnd Bergmann
  0 siblings, 1 reply; 29+ messages in thread
From: Vincent Whitchurch @ 2019-01-17 10:54 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: sudeep.dutt, ashutosh.dixit, gregkh, Linux Kernel Mailing List,
	Kishon Vijay Abraham I, Lorenzo Pieralisi, linux-pci, linux-ntb,
	Jon Mason, Dave Jiang, Allen Hubbe

On Wed, Jan 16, 2019 at 06:07:53PM +0100, Arnd Bergmann wrote:
> On Wed, Jan 16, 2019 at 5:33 PM Vincent Whitchurch <vincent.whitchurch@axis.com> wrote:
> > The Virtio-over-PCIe framework living under drivers/misc/mic/vop implements a
> > generic framework to use virtio between two Linux systems, given shared memory
> > and a couple of interrupts.  It does not actually require the Intel MIC
> > hardware, x86-64, or even PCIe for that matter.  This patch series makes it
> > buildable on more systems and adds a loopback driver to test it without special
> > hardware.
> >
> > Note that I don't have access to Intel MIC hardware so some testing of the
> > patchset (especially the patch "vop: Use consistent DMA") on that platform
> > would be appreciated, to ensure that the series does not break anything there.
> 
> I think we need to take a step back though and discuss what combinations
> we actually do want to support. I have not actually read the whole mic/vop
> driver, so I don't know if this would be a good fit as a generic interface --
> it may or may not be, and any other input would be helpful.

The MIC driver as a whole is uninteresting as a generic interface since
it is quite tied to the Intel hardware.  The VOP parts though are
logically separated and have no relation to that hardware, even if the
ioctls are called MIC_VIRTIO_*.

The samples/mic/mpssd/mpssd.c code handles both the boot of the MIC
(sysfs) and the VOP parts (ioctls).

> Aside from that, I should note that we have two related subsystems
> in the kernel: the PCIe endpoint subsystem maintained by Kishon and
> Lorenzo, and the NTB subsystem maintained by Jon, Dave and Allen.
> 
> In order to properly support virtio over PCIe, I would hope we can come
> up with a user space interface that looks the same way for configuring
> virtio drivers in mic, pcie-endpoint and ntb, if at all possible. Have
> you looked at those two subsystems?

pcie-endpoint is a generic framework that allows Linux to act as an
endpoint and set up the BARs, etc.  mic appears to have Intel
MIC-specific code for this (pre-dating pcie-endpoint) but this is
separate from the vop code.  pcie-endpoint and vop do not have
overlapping functionality and can be used together.

I'm not familiar with NTB, but from a quick look it seems to be tied to
special hardware, and I don't see any virtio-related code there.  A vop
backend for NTB-backend would presumably work to allow virtio
functionality there.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/8] Virtio-over-PCIe on non-MIC
  2019-01-17 10:54   ` Vincent Whitchurch
@ 2019-01-17 12:39     ` Arnd Bergmann
  2019-01-17 15:15       ` Christoph Hellwig
  2019-01-17 15:19       ` Vincent Whitchurch
  0 siblings, 2 replies; 29+ messages in thread
From: Arnd Bergmann @ 2019-01-17 12:39 UTC (permalink / raw)
  To: Vincent Whitchurch
  Cc: sudeep.dutt, ashutosh.dixit, gregkh, Linux Kernel Mailing List,
	Kishon Vijay Abraham I, Lorenzo Pieralisi, linux-pci, linux-ntb,
	Jon Mason, Dave Jiang, Allen Hubbe

On Thu, Jan 17, 2019 at 11:54 AM Vincent Whitchurch
<vincent.whitchurch@axis.com> wrote:
>
> On Wed, Jan 16, 2019 at 06:07:53PM +0100, Arnd Bergmann wrote:
> > On Wed, Jan 16, 2019 at 5:33 PM Vincent Whitchurch <vincent.whitchurch@axis.com> wrote:
> > > The Virtio-over-PCIe framework living under drivers/misc/mic/vop implements a
> > > generic framework to use virtio between two Linux systems, given shared memory
> > > and a couple of interrupts.  It does not actually require the Intel MIC
> > > hardware, x86-64, or even PCIe for that matter.  This patch series makes it
> > > buildable on more systems and adds a loopback driver to test it without special
> > > hardware.
> > >
> > > Note that I don't have access to Intel MIC hardware so some testing of the
> > > patchset (especially the patch "vop: Use consistent DMA") on that platform
> > > would be appreciated, to ensure that the series does not break anything there.
> >
> > I think we need to take a step back though and discuss what combinations
> > we actually do want to support. I have not actually read the whole mic/vop
> > driver, so I don't know if this would be a good fit as a generic interface --
> > it may or may not be, and any other input would be helpful.
>
> The MIC driver as a whole is uninteresting as a generic interface since
> it is quite tied to the Intel hardware.  The VOP parts though are
> logically separated and have no relation to that hardware, even if the
> ioctls are called MIC_VIRTIO_*.
>
> The samples/mic/mpssd/mpssd.c code handles both the boot of the MIC
> (sysfs) and the VOP parts (ioctls).

Right, I wasn't talking about the MIC driver here, just the VOP
stuff. Since that comes with an ioctl interface that you want to keep
using on other hardware, this still means we have to review if it is
a good fit as a general-purpose API.

> > Aside from that, I should note that we have two related subsystems
> > in the kernel: the PCIe endpoint subsystem maintained by Kishon and
> > Lorenzo, and the NTB subsystem maintained by Jon, Dave and Allen.
> >
> > In order to properly support virtio over PCIe, I would hope we can come
> > up with a user space interface that looks the same way for configuring
> > virtio drivers in mic, pcie-endpoint and ntb, if at all possible. Have
> > you looked at those two subsystems?
>
> pcie-endpoint is a generic framework that allows Linux to act as an
> endpoint and set up the BARs, etc.  mic appears to have Intel
> MIC-specific code for this (pre-dating pcie-endpoint) but this is
> separate from the vop code.  pcie-endpoint and vop do not have
> overlapping functionality and can be used together.

What we need to find out though is whether the combination of vop
with pcie-endpoint provides a good abstraction for what users
actually need when want to use e.g. a virtio-net connection on top
of PCIe endpoint hardware.

> I'm not familiar with NTB, but from a quick look it seems to be tied to
> special hardware, and I don't see any virtio-related code there.  A vop
> backend for NTB-backend would presumably work to allow virtio
> functionality there.

Correct, and again we have to see if this is a good interface. The NTB
and PCIe-endpoint interfaces have a number of differences and a
number of similarities. In particular they should both be usable with
virtio-style drivers, but the underlying hardware differs mainly in how
it is probed by the system: an NTB is seen as a PCI device attached
to two host bridges, while and endpoint is typically a platform_device
on one side, but a pci_dev on the other side.

Can you describe how you expect a VOP device over NTB or
PCIe-endpoint would get created, configured and used?
Is there always one master side that is responsible for creating
virtio devices on it, with the slave side automatically attaching to
them, or can either side create virtio devices? Is there any limit on
the number of virtio devices or queues within a VOP device?

       Arnd

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/8] Virtio-over-PCIe on non-MIC
  2019-01-17 12:39     ` Arnd Bergmann
@ 2019-01-17 15:15       ` Christoph Hellwig
  2019-01-17 15:19         ` Christoph Hellwig
  2019-01-17 15:19       ` Vincent Whitchurch
  1 sibling, 1 reply; 29+ messages in thread
From: Christoph Hellwig @ 2019-01-17 15:15 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Vincent Whitchurch, sudeep.dutt, ashutosh.dixit, gregkh,
	Linux Kernel Mailing List, Kishon Vijay Abraham I,
	Lorenzo Pieralisi, linux-pci, linux-ntb, Jon Mason, Dave Jiang,
	Allen Hubbe

On Thu, Jan 17, 2019 at 01:39:27PM +0100, Arnd Bergmann wrote:
> Can you describe how you expect a VOP device over NTB or
> PCIe-endpoint would get created, configured and used?
> Is there always one master side that is responsible for creating
> virtio devices on it, with the slave side automatically attaching to
> them, or can either side create virtio devices? Is there any limit on
> the number of virtio devices or queues within a VOP device?

For VOP device over NTB your configure your device using configfs
on one side, and for the other side it will just show up like any
other PCIe device, because it is.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/8] Virtio-over-PCIe on non-MIC
  2019-01-17 12:39     ` Arnd Bergmann
  2019-01-17 15:15       ` Christoph Hellwig
@ 2019-01-17 15:19       ` Vincent Whitchurch
  2019-01-17 15:21         ` Christoph Hellwig
                           ` (2 more replies)
  1 sibling, 3 replies; 29+ messages in thread
From: Vincent Whitchurch @ 2019-01-17 15:19 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: sudeep.dutt, ashutosh.dixit, gregkh, Linux Kernel Mailing List,
	Kishon Vijay Abraham I, Lorenzo Pieralisi, linux-pci, linux-ntb,
	Jon Mason, Dave Jiang, Allen Hubbe

On Thu, Jan 17, 2019 at 01:39:27PM +0100, Arnd Bergmann wrote:
> Correct, and again we have to see if this is a good interface. The NTB
> and PCIe-endpoint interfaces have a number of differences and a
> number of similarities. In particular they should both be usable with
> virtio-style drivers, but the underlying hardware differs mainly in how
> it is probed by the system: an NTB is seen as a PCI device attached
> to two host bridges, while and endpoint is typically a platform_device
> on one side, but a pci_dev on the other side.
> 
> Can you describe how you expect a VOP device over NTB or
> PCIe-endpoint would get created, configured and used?

Assuming PCIe-endpoint:

On the RC, a vop-host-backend driver (PCI driver) sets up some shared
memory area which the RC and the endpoint can use to communicate the
location of the MIC device descriptors and other information such as the
MSI address.  It implements vop callbacks to allow the vop framework to
obtain the address of the MIC descriptors and send/receive interrupts
to/from the guest.

On the endpoint, the PCIe endpoint driver sets up (hardcoded) BARs and
memory regions as required to allow the endpoint and the root complex to
access each other's memory.

On the endpoint, the vop-guest-backend, via the shared memory set up by
the vop-host-backend, obtains the address of the MIC device page and the
MSI address, and a method to receive vop interrupts from the host.  This
information is used to implement the vop callbacks allowing the vop
framework to access to the MIC device page and send/receive interrupts
from/to the host.

vop (despite its name) doesn't care about PCIe.  The vop-guest-backend
doesn't actually need to talk to the PCIe endpoint driver.  The
vop-guest-backend can be probed via any means, such as via a device tree
on the endpoint.

On the RC, userspace opens the vop device and adds the virtio devices,
which end up in the MIC device page set up by the vop-host-backend.

On the endpoint, when the vop framework (via the vop-guest-backend) sees
these devices, it registers devices on the virtio bus and the virtio
drivers are probed.

On the RC, userspace implements the device end of the virtio
communication in userspace, using the MIC_VIRTIO_COPY_DESC ioctl.  I
also have patches to support vhost.

> Is there always one master side that is responsible for creating
> virtio devices on it, with the slave side automatically attaching to
> them, or can either side create virtio devices?

Only the master can create virtio devices.  The virtio drivers run on
the slave.

> Is there any limit on
> the number of virtio devices or queues within a VOP device?

The virtio device information (mic_device_desc) is put into the MIC
device page whose size is limited by the ABI header in
include/uapi/linux/mic_ioctl.h (MIC_DP_SIZE, 4096 bytes).  So the number
of devices is limited by the limit of the number of device descriptors
that can fit in that size.  There is also a per-device limit on the
number of vrings (MIC_VRING_ENTRIES) and vring entries
(MIC_VRING_ENTRIES) in the ABI header.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/8] Virtio-over-PCIe on non-MIC
  2019-01-17 15:15       ` Christoph Hellwig
@ 2019-01-17 15:19         ` Christoph Hellwig
  2019-01-17 15:31           ` Arnd Bergmann
  0 siblings, 1 reply; 29+ messages in thread
From: Christoph Hellwig @ 2019-01-17 15:19 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Vincent Whitchurch, sudeep.dutt, ashutosh.dixit, gregkh,
	Linux Kernel Mailing List, Kishon Vijay Abraham I,
	Lorenzo Pieralisi, linux-pci, linux-ntb, Jon Mason, Dave Jiang,
	Allen Hubbe

On Thu, Jan 17, 2019 at 07:15:29AM -0800, Christoph Hellwig wrote:
> On Thu, Jan 17, 2019 at 01:39:27PM +0100, Arnd Bergmann wrote:
> > Can you describe how you expect a VOP device over NTB or
> > PCIe-endpoint would get created, configured and used?
> > Is there always one master side that is responsible for creating
> > virtio devices on it, with the slave side automatically attaching to
> > them, or can either side create virtio devices? Is there any limit on
> > the number of virtio devices or queues within a VOP device?
> 
> For VOP device over NTB your configure your device using configfs
> on one side, and for the other side it will just show up like any
> other PCIe device, because it is.

Sorry, I mean over the PCI-EP infratructure of course.  NTB actually
is rather hairy and complicated.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/8] Virtio-over-PCIe on non-MIC
  2019-01-17 15:19       ` Vincent Whitchurch
@ 2019-01-17 15:21         ` Christoph Hellwig
  2019-01-17 15:32           ` Vincent Whitchurch
  2019-01-17 15:53         ` Arnd Bergmann
  2019-01-17 22:17         ` Logan Gunthorpe
  2 siblings, 1 reply; 29+ messages in thread
From: Christoph Hellwig @ 2019-01-17 15:21 UTC (permalink / raw)
  To: Vincent Whitchurch
  Cc: Arnd Bergmann, sudeep.dutt, ashutosh.dixit, gregkh,
	Linux Kernel Mailing List, Kishon Vijay Abraham I,
	Lorenzo Pieralisi, linux-pci, linux-ntb, Jon Mason, Dave Jiang,
	Allen Hubbe

On Thu, Jan 17, 2019 at 04:19:06PM +0100, Vincent Whitchurch wrote:
> On the RC, a vop-host-backend driver (PCI driver) sets up some shared
> memory area which the RC and the endpoint can use to communicate the
> location of the MIC device descriptors and other information such as the
> MSI address.  It implements vop callbacks to allow the vop framework to
> obtain the address of the MIC descriptors and send/receive interrupts
> to/from the guest.

Why would we require any work on the RC / host side?  A properly
setup software controlled virtio device should just show up as a
normal PCIe device, and the virtio-pci device should bind to it.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/8] Virtio-over-PCIe on non-MIC
  2019-01-17 15:19         ` Christoph Hellwig
@ 2019-01-17 15:31           ` Arnd Bergmann
  0 siblings, 0 replies; 29+ messages in thread
From: Arnd Bergmann @ 2019-01-17 15:31 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Vincent Whitchurch, sudeep.dutt, ashutosh.dixit, gregkh,
	Linux Kernel Mailing List, Kishon Vijay Abraham I,
	Lorenzo Pieralisi, linux-pci, linux-ntb, Jon Mason, Dave Jiang,
	Allen Hubbe

On Thu, Jan 17, 2019 at 4:19 PM Christoph Hellwig <hch@infradead.org> wrote:
>
> On Thu, Jan 17, 2019 at 07:15:29AM -0800, Christoph Hellwig wrote:
> > On Thu, Jan 17, 2019 at 01:39:27PM +0100, Arnd Bergmann wrote:
> > > Can you describe how you expect a VOP device over NTB or
> > > PCIe-endpoint would get created, configured and used?
> > > Is there always one master side that is responsible for creating
> > > virtio devices on it, with the slave side automatically attaching to
> > > them, or can either side create virtio devices? Is there any limit on
> > > the number of virtio devices or queues within a VOP device?
> >
> > For VOP device over NTB your configure your device using configfs
> > on one side, and for the other side it will just show up like any
> > other PCIe device, because it is.
>
> Sorry, I mean over the PCI-EP infratructure of course.  NTB actually
> is rather hairy and complicated.

My understanding was that with virtio, we would be able to have multiple
virtio devices on a single PCI-EP port, so you need a multi-step
configuration: You first set up the PCI-EP to instantiate a VOP device,
which is then seen on both ends of the connection. The question
is how to create a particular virtio device instance (or a set of those)
inside of it.

      Arnd

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/8] Virtio-over-PCIe on non-MIC
  2019-01-17 15:21         ` Christoph Hellwig
@ 2019-01-17 15:32           ` Vincent Whitchurch
  2019-01-17 15:46             ` Christoph Hellwig
  0 siblings, 1 reply; 29+ messages in thread
From: Vincent Whitchurch @ 2019-01-17 15:32 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Arnd Bergmann, sudeep.dutt, ashutosh.dixit, gregkh,
	Linux Kernel Mailing List, Kishon Vijay Abraham I,
	Lorenzo Pieralisi, linux-pci, linux-ntb, Jon Mason, Dave Jiang,
	Allen Hubbe

On Thu, Jan 17, 2019 at 07:21:42AM -0800, Christoph Hellwig wrote:
> On Thu, Jan 17, 2019 at 04:19:06PM +0100, Vincent Whitchurch wrote:
> > On the RC, a vop-host-backend driver (PCI driver) sets up some shared
> > memory area which the RC and the endpoint can use to communicate the
> > location of the MIC device descriptors and other information such as the
> > MSI address.  It implements vop callbacks to allow the vop framework to
> > obtain the address of the MIC descriptors and send/receive interrupts
> > to/from the guest.
> 
> Why would we require any work on the RC / host side?  A properly
> setup software controlled virtio device should just show up as a
> normal PCIe device, and the virtio-pci device should bind to it.

If I understand you correctly, I think you're talking about the RC
running the virtio drivers and the endpoint implementing the virtio
device?  This vop stuff is used for the other way around: the virtio
device is implement on the RC and the endpoint runs the virtio drivers.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/8] Virtio-over-PCIe on non-MIC
  2019-01-17 15:32           ` Vincent Whitchurch
@ 2019-01-17 15:46             ` Christoph Hellwig
  2019-01-17 16:18               ` Arnd Bergmann
  0 siblings, 1 reply; 29+ messages in thread
From: Christoph Hellwig @ 2019-01-17 15:46 UTC (permalink / raw)
  To: Vincent Whitchurch
  Cc: Christoph Hellwig, Arnd Bergmann, sudeep.dutt, ashutosh.dixit,
	gregkh, Linux Kernel Mailing List, Kishon Vijay Abraham I,
	Lorenzo Pieralisi, linux-pci, linux-ntb, Jon Mason, Dave Jiang,
	Allen Hubbe

On Thu, Jan 17, 2019 at 04:32:06PM +0100, Vincent Whitchurch wrote:
> If I understand you correctly, I think you're talking about the RC
> running the virtio drivers and the endpoint implementing the virtio
> device?  This vop stuff is used for the other way around: the virtio
> device is implement on the RC and the endpoint runs the virtio drivers.

Oh.  That is really weird and not that way I'd implement it..

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/8] Virtio-over-PCIe on non-MIC
  2019-01-17 15:19       ` Vincent Whitchurch
  2019-01-17 15:21         ` Christoph Hellwig
@ 2019-01-17 15:53         ` Arnd Bergmann
  2019-01-17 16:26           ` Vincent Whitchurch
  2019-01-17 22:17         ` Logan Gunthorpe
  2 siblings, 1 reply; 29+ messages in thread
From: Arnd Bergmann @ 2019-01-17 15:53 UTC (permalink / raw)
  To: Vincent Whitchurch
  Cc: sudeep.dutt, ashutosh.dixit, gregkh, Linux Kernel Mailing List,
	Kishon Vijay Abraham I, Lorenzo Pieralisi, linux-pci, linux-ntb,
	Jon Mason, Dave Jiang, Allen Hubbe

On Thu, Jan 17, 2019 at 4:19 PM Vincent Whitchurch
<vincent.whitchurch@axis.com> wrote:
>
> On Thu, Jan 17, 2019 at 01:39:27PM +0100, Arnd Bergmann wrote:
> > Correct, and again we have to see if this is a good interface. The NTB
> > and PCIe-endpoint interfaces have a number of differences and a
> > number of similarities. In particular they should both be usable with
> > virtio-style drivers, but the underlying hardware differs mainly in how
> > it is probed by the system: an NTB is seen as a PCI device attached
> > to two host bridges, while and endpoint is typically a platform_device
> > on one side, but a pci_dev on the other side.
> >
> > Can you describe how you expect a VOP device over NTB or
> > PCIe-endpoint would get created, configured and used?
>
> Assuming PCIe-endpoint:
>
> On the RC, a vop-host-backend driver (PCI driver) sets up some shared
> memory area which the RC and the endpoint can use to communicate the
> location of the MIC device descriptors and other information such as the
> MSI address.  It implements vop callbacks to allow the vop framework to
> obtain the address of the MIC descriptors and send/receive interrupts
> to/from the guest.
>
> On the endpoint, the PCIe endpoint driver sets up (hardcoded) BARs and
> memory regions as required to allow the endpoint and the root complex to
> access each other's memory.
>
> On the endpoint, the vop-guest-backend, via the shared memory set up by
> the vop-host-backend, obtains the address of the MIC device page and the
> MSI address, and a method to receive vop interrupts from the host.  This
> information is used to implement the vop callbacks allowing the vop
> framework to access to the MIC device page and send/receive interrupts
> from/to the host.

Ok, this seems fine so far. So the vop-host-backend is a regular PCI
driver that implements the VOP protocol from the host side, and it
can talk to either a MIC, or another guest-backend written for the PCI-EP
framework to implement the same protocol, right?

> vop (despite its name) doesn't care about PCIe.  The vop-guest-backend
> doesn't actually need to talk to the PCIe endpoint driver.  The
> vop-guest-backend can be probed via any means, such as via a device tree
> on the endpoint.
>
> On the RC, userspace opens the vop device and adds the virtio devices,
> which end up in the MIC device page set up by the vop-host-backend.
>
> On the endpoint, when the vop framework (via the vop-guest-backend) sees
> these devices, it registers devices on the virtio bus and the virtio
> drivers are probed.

Ah, so the direction is fixed, and it's the opposite of what Christoph
and I were expecting. This is probably something we need to discuss
a bit. From what I understand, there is no technical requirement why
it has to be this direction, right?

What I mean is that the same vop framework could work with
a PCI-EP driver implementing the vop-host-backend and
a PCI driver implementing the vop-guest-backend? In order
to do this, the PCI-EP configuration would need to pick whether
it wants the EP to be the vop host or guest, but having more
flexibility in it (letting each side add virtio devices) would be
harder to do.

> On the RC, userspace implements the device end of the virtio
> communication in userspace, using the MIC_VIRTIO_COPY_DESC ioctl.  I
> also have patches to support vhost.

This is a part I don't understand yet. Does this mean that the
normal operation is between a user space process on the vop-host
talking to the kernel on the vop-guest?

I'm a bit worried about the ioctl interface here, as this combines the
configuration side with the actual data transfer, and that seems
a bit inflexible.

> > Is there always one master side that is responsible for creating
> > virtio devices on it, with the slave side automatically attaching to
> > them, or can either side create virtio devices?
>
> Only the master can create virtio devices.  The virtio drivers run on
> the slave.

Ok.

> > Is there any limit on
> > the number of virtio devices or queues within a VOP device?
>
> The virtio device information (mic_device_desc) is put into the MIC
> device page whose size is limited by the ABI header in
> include/uapi/linux/mic_ioctl.h (MIC_DP_SIZE, 4096 bytes).  So the number
> of devices is limited by the limit of the number of device descriptors
> that can fit in that size.  There is also a per-device limit on the
> number of vrings (MIC_VRING_ENTRIES) and vring entries
> (MIC_VRING_ENTRIES) in the ABI header.

Ok, so you can have multiple virtio devices (e.g. a virtio-net and
virtio-console) but not an arbitrary number? I suppose we can always
extend it later if that becomes a problem.

       Arnd

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/8] Virtio-over-PCIe on non-MIC
  2019-01-17 15:46             ` Christoph Hellwig
@ 2019-01-17 16:18               ` Arnd Bergmann
  0 siblings, 0 replies; 29+ messages in thread
From: Arnd Bergmann @ 2019-01-17 16:18 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Vincent Whitchurch, sudeep.dutt, ashutosh.dixit, gregkh,
	Linux Kernel Mailing List, Kishon Vijay Abraham I,
	Lorenzo Pieralisi, linux-pci, linux-ntb, Jon Mason, Dave Jiang,
	Allen Hubbe

On Thu, Jan 17, 2019 at 4:46 PM Christoph Hellwig <hch@infradead.org> wrote:
>
> On Thu, Jan 17, 2019 at 04:32:06PM +0100, Vincent Whitchurch wrote:
> > If I understand you correctly, I think you're talking about the RC
> > running the virtio drivers and the endpoint implementing the virtio
> > device?  This vop stuff is used for the other way around: the virtio
> > device is implement on the RC and the endpoint runs the virtio drivers.
>
> Oh.  That is really weird and not that way I'd implement it..

It does make sense to me for the very special requirements of the MIC
device, which has a regular PC-style server that provides the environment
for a special embedded device inside of a PCIe card, so the PCI-EP
stuff is just used as a transport here going one way, and then the
configuration of the devices implemented through it goes the other
way, providing network connectivity and file system to the embedded
machine on the PCI-EP.

This is actually very similar to a setup that I considered implementing
over USB, where one might have an embedded machine (or a bunch
of them on a USB hub) connected to a USB host port, and then
use it in the opposite way of a regular gadget driver, by providing
a virtfs over USB to the gadget with files residing on a disk on the
USB host.

Apparently Vincent has the same use case that both the Intel
MIC folks and I had here, so doing it like this is clearly useful.
On the other hand, I agree that there are lots of other use cases
that need the opposite, so we should try to come up with a
design that can cover both.
An example of this might be a PCIe-endpoint device providing
network connectivity to the host using a vhost-net device, which
ideally just shows up as a device on the host as a virtio-net
without requiring any configuration.

So for configuring this, I think it'd like to see a way to have
either the PCI-EP or the PCI-host side be the one that can
create virtio devices that show up on the other end. This
configuration is currently done using an ioctl interface, which
was probably the easiest to do for the MIC case, but for
consistency with the PCI-EP framework, using configfs
is probably better.

A different matter is the question of what a virtio device
talks to. A lot of virtio devices are fundamentally
asymmetric (9pfs, rng, block, ...), so you'd have to
have the virtio device on one side, and a user space
or vhost driver on the other. The VOP driver seems to assume
that it's always the slave that uses virtio, while the
master side (which could be on the PCI EP or PCI
host for the sake of this argument) implements it in user
space or otherwise. Is this a safe assumption, or can
we imagine cases where this would be reversed as well?

        Arnd

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/8] Virtio-over-PCIe on non-MIC
  2019-01-17 15:53         ` Arnd Bergmann
@ 2019-01-17 16:26           ` Vincent Whitchurch
  2019-01-17 16:34             ` Arnd Bergmann
  0 siblings, 1 reply; 29+ messages in thread
From: Vincent Whitchurch @ 2019-01-17 16:26 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: sudeep.dutt, ashutosh.dixit, gregkh, Linux Kernel Mailing List,
	Kishon Vijay Abraham I, Lorenzo Pieralisi, linux-pci, linux-ntb,
	Jon Mason, Dave Jiang, Allen Hubbe

On Thu, Jan 17, 2019 at 04:53:25PM +0100, Arnd Bergmann wrote:
> On Thu, Jan 17, 2019 at 4:19 PM Vincent Whitchurch
> <vincent.whitchurch@axis.com> wrote:
> > On Thu, Jan 17, 2019 at 01:39:27PM +0100, Arnd Bergmann wrote:
> > > Can you describe how you expect a VOP device over NTB or
> > > PCIe-endpoint would get created, configured and used?
> >
> > Assuming PCIe-endpoint:
> >
> > On the RC, a vop-host-backend driver (PCI driver) sets up some shared
> > memory area which the RC and the endpoint can use to communicate the
> > location of the MIC device descriptors and other information such as the
> > MSI address.  It implements vop callbacks to allow the vop framework to
> > obtain the address of the MIC descriptors and send/receive interrupts
> > to/from the guest.
> >
> > On the endpoint, the PCIe endpoint driver sets up (hardcoded) BARs and
> > memory regions as required to allow the endpoint and the root complex to
> > access each other's memory.
> >
> > On the endpoint, the vop-guest-backend, via the shared memory set up by
> > the vop-host-backend, obtains the address of the MIC device page and the
> > MSI address, and a method to receive vop interrupts from the host.  This
> > information is used to implement the vop callbacks allowing the vop
> > framework to access to the MIC device page and send/receive interrupts
> > from/to the host.
> 
> Ok, this seems fine so far. So the vop-host-backend is a regular PCI
> driver that implements the VOP protocol from the host side, and it
> can talk to either a MIC, or another guest-backend written for the PCI-EP
> framework to implement the same protocol, right?

Yes, but just to clarify:  the placement of the device page and the way
to communicate the location of the device page address and any other
information needed by the guest-backend are hardware-specific so there
is no generic vop-host-backend implementation which can talk to both a
MIC and to something else.

> > vop (despite its name) doesn't care about PCIe.  The vop-guest-backend
> > doesn't actually need to talk to the PCIe endpoint driver.  The
> > vop-guest-backend can be probed via any means, such as via a device tree
> > on the endpoint.
> >
> > On the RC, userspace opens the vop device and adds the virtio devices,
> > which end up in the MIC device page set up by the vop-host-backend.
> >
> > On the endpoint, when the vop framework (via the vop-guest-backend) sees
> > these devices, it registers devices on the virtio bus and the virtio
> > drivers are probed.
> 
> Ah, so the direction is fixed, and it's the opposite of what Christoph
> and I were expecting. This is probably something we need to discuss
> a bit. From what I understand, there is no technical requirement why
> it has to be this direction, right?

I don't think the vop framework itself has any such requirement.

The MIC uses it in this way (see Documentation/mic/mic_overview.txt) and
it also makes sense (to me, at least) if one wants to treat the endpoint
like one would treat a virtualized guest.

> What I mean is that the same vop framework could work with
> a PCI-EP driver implementing the vop-host-backend and
> a PCI driver implementing the vop-guest-backend? In order
> to do this, the PCI-EP configuration would need to pick whether
> it wants the EP to be the vop host or guest, but having more
> flexibility in it (letting each side add virtio devices) would be
> harder to do.

Correct, this is my understanding also.

> > On the RC, userspace implements the device end of the virtio
> > communication in userspace, using the MIC_VIRTIO_COPY_DESC ioctl.  I
> > also have patches to support vhost.
> 
> This is a part I don't understand yet. Does this mean that the
> normal operation is between a user space process on the vop-host
> talking to the kernel on the vop-guest?

Yes.  For example, the guest mounts a 9p filesystem with virtio-9p and
the 9p server is implemented in a userspace process on the host.  This
is again similar to virtualization.

> I'm a bit worried about the ioctl interface here, as this combines the
> configuration side with the actual data transfer, and that seems
> a bit inflexible.
>
> > > Is there always one master side that is responsible for creating
> > > virtio devices on it, with the slave side automatically attaching to
> > > them, or can either side create virtio devices?
> >
> > Only the master can create virtio devices.  The virtio drivers run on
> > the slave.
> 
> Ok.
> 
> > > Is there any limit on
> > > the number of virtio devices or queues within a VOP device?
> >
> > The virtio device information (mic_device_desc) is put into the MIC
> > device page whose size is limited by the ABI header in
> > include/uapi/linux/mic_ioctl.h (MIC_DP_SIZE, 4096 bytes).  So the number
> > of devices is limited by the limit of the number of device descriptors
> > that can fit in that size.  There is also a per-device limit on the
> > number of vrings (MIC_VRING_ENTRIES) and vring entries
> > (MIC_VRING_ENTRIES) in the ABI header.
> 
> Ok, so you can have multiple virtio devices (e.g. a virtio-net and
> virtio-console) but not an arbitrary number? I suppose we can always
> extend it later if that becomes a problem.

Yes.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/8] Virtio-over-PCIe on non-MIC
  2019-01-17 16:26           ` Vincent Whitchurch
@ 2019-01-17 16:34             ` Arnd Bergmann
  0 siblings, 0 replies; 29+ messages in thread
From: Arnd Bergmann @ 2019-01-17 16:34 UTC (permalink / raw)
  To: Vincent Whitchurch
  Cc: sudeep.dutt, ashutosh.dixit, gregkh, Linux Kernel Mailing List,
	Kishon Vijay Abraham I, Lorenzo Pieralisi, linux-pci, linux-ntb,
	Jon Mason, Dave Jiang, Allen Hubbe

On Thu, Jan 17, 2019 at 5:26 PM Vincent Whitchurch
<vincent.whitchurch@axis.com> wrote:
> On Thu, Jan 17, 2019 at 04:53:25PM +0100, Arnd Bergmann wrote:
> > On Thu, Jan 17, 2019 at 4:19 PM Vincent Whitchurch

> > Ok, this seems fine so far. So the vop-host-backend is a regular PCI
> > driver that implements the VOP protocol from the host side, and it
> > can talk to either a MIC, or another guest-backend written for the PCI-EP
> > framework to implement the same protocol, right?
>
> Yes, but just to clarify:  the placement of the device page and the way
> to communicate the location of the device page address and any other
> information needed by the guest-backend are hardware-specific so there
> is no generic vop-host-backend implementation which can talk to both a
> MIC and to something else.

I'm not sure I understand what is hardware specific about it. Shouldn't
it be possible to define at least a vop-host-backend that could work with
any guest-backend running on the PCI-EP framework?

This may have to be different from the interface used on MIC, but
generally speaking that is what I expect from a PCI device.

       Arnd

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/8] Virtio-over-PCIe on non-MIC
  2019-01-17 15:19       ` Vincent Whitchurch
  2019-01-17 15:21         ` Christoph Hellwig
  2019-01-17 15:53         ` Arnd Bergmann
@ 2019-01-17 22:17         ` Logan Gunthorpe
  2 siblings, 0 replies; 29+ messages in thread
From: Logan Gunthorpe @ 2019-01-17 22:17 UTC (permalink / raw)
  To: Vincent Whitchurch, Arnd Bergmann
  Cc: sudeep.dutt, ashutosh.dixit, gregkh, Linux Kernel Mailing List,
	Kishon Vijay Abraham I, Lorenzo Pieralisi, linux-pci, linux-ntb,
	Jon Mason, Dave Jiang, Allen Hubbe



On 2019-01-17 8:19 a.m., Vincent Whitchurch wrote:
> On the endpoint, the PCIe endpoint driver sets up (hardcoded) BARs and
> memory regions as required to allow the endpoint and the root complex to
> access each other's memory.

This statement describes NTB hardware pretty well. In essence that's
what an NTB device is: a BAR that maps to a window in other hosts memory.

Right now the entire NTB upstream software stack (ntb_transport and
ntb_netdev) is specific to that ecosystem and only exposes a network
device so the hosts can communicate. This code works but has some issues
and was never able to perform at full PCIe line speeds (which everyone
expects). So it's not clear to me if anyone is doing anything real with
it. The companies that are working on NTB, that I'm aware of, have
mostly done their own out-of-tree stuff.

It would be interesting to unify ntb_transport with the virtio stack
because I suspect they do very similar things right now and there's a
lot more devices above virtio than just a network device. However, the
main problem people working on NTB face (besides performance) is trying
to get multi-host working in a general and sensible way given that the
hardware typically has limited BAR resources (among other limitations).

Logan

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/8] Virtio-over-PCIe on non-MIC
  2019-01-16 16:32 [PATCH 0/8] Virtio-over-PCIe on non-MIC Vincent Whitchurch
                   ` (8 preceding siblings ...)
  2019-01-16 17:07 ` [PATCH 0/8] Virtio-over-PCIe on non-MIC Arnd Bergmann
@ 2019-01-18 23:49 ` Stephen Warren
  2019-01-21 16:25   ` Vincent Whitchurch
  2019-01-22 10:45 ` Greg KH
  10 siblings, 1 reply; 29+ messages in thread
From: Stephen Warren @ 2019-01-18 23:49 UTC (permalink / raw)
  To: Vincent Whitchurch
  Cc: sudeep.dutt, ashutosh.dixit, gregkh, arnd, linux-kernel,
	Vincent Whitchurch, ABRAHAM, KISHON VIJAY, Lorenzo Pieralisi,
	linux-pci, linux-ntb, Jon Mason, Dave Jiang, Allen Hubbe,
	Christoph Hellwig

On 1/16/19 9:32 AM, Vincent Whitchurch wrote:
> The Virtio-over-PCIe framework living under drivers/misc/mic/vop implements a
> generic framework to use virtio between two Linux systems, given shared memory
> and a couple of interrupts.  It does not actually require the Intel MIC
> hardware, x86-64, or even PCIe for that matter.  This patch series makes it
> buildable on more systems and adds a loopback driver to test it without special
> hardware.
> 
> Note that I don't have access to Intel MIC hardware so some testing of the
> patchset (especially the patch "vop: Use consistent DMA") on that platform
> would be appreciated, to ensure that the series does not break anything there.

So a while ago I took a look at running virtio over PCIe. I found virtio 
basically had two parts:

1) The protocol used to enumerate which virtio devices exist, and 
perhaps configure them.

2) The ring buffer protocol that actually transfers the data.

I recall that data transfer was purely based on simple shared memory and 
interrupts, and hence could run over PCIe (e.g. via the PCIe endpoint 
subsystem in the kernel) without issue.

However, the enumeration/configuration protocol requires the host to be 
able to do all kinds of strange things that can't possibly be emulated 
over PCIe; IIRC the configuration data contains "registers" that when 
written select the data other "registers" access. When the virtio device 
is exposed by a hypervisor, and all the accesses are emulated 
synchronously through a trap, this is easy enough to implement. However, 
if the two ends of this configuration parsing are on different ends of a 
PCIe bus, there's no way this can work.

Are you thinking of doing something different for 
enumeration/configuration, and just using the virtio ring buffer 
protocol over PCIe?

I did post asking about this quite a while back, but IIRC I didn't 
receive much of a response. Yes, here it is:

> https://lists.linuxfoundation.org/pipermail/virtualization/2018-March/037276.html
"virtio over SW-defined/CPU-driven PCIe endpoint"

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/8] Virtio-over-PCIe on non-MIC
  2019-01-18 23:49 ` Stephen Warren
@ 2019-01-21 16:25   ` Vincent Whitchurch
  0 siblings, 0 replies; 29+ messages in thread
From: Vincent Whitchurch @ 2019-01-21 16:25 UTC (permalink / raw)
  To: Stephen Warren
  Cc: sudeep.dutt, ashutosh.dixit, gregkh, arnd, linux-kernel, ABRAHAM,
	KISHON VIJAY, Lorenzo Pieralisi, linux-pci, linux-ntb, Jon Mason,
	Dave Jiang, Allen Hubbe, Christoph Hellwig, virtualization

On Fri, Jan 18, 2019 at 04:49:16PM -0700, Stephen Warren wrote:
> On 1/16/19 9:32 AM, Vincent Whitchurch wrote:
> > The Virtio-over-PCIe framework living under drivers/misc/mic/vop implements a
> > generic framework to use virtio between two Linux systems, given shared memory
> > and a couple of interrupts.  It does not actually require the Intel MIC
> > hardware, x86-64, or even PCIe for that matter.  This patch series makes it
> > buildable on more systems and adds a loopback driver to test it without special
> > hardware.
> > 
> > Note that I don't have access to Intel MIC hardware so some testing of the
> > patchset (especially the patch "vop: Use consistent DMA") on that platform
> > would be appreciated, to ensure that the series does not break anything there.
> 
> So a while ago I took a look at running virtio over PCIe. I found virtio
> basically had two parts:
> 
> 1) The protocol used to enumerate which virtio devices exist, and perhaps
> configure them.
> 
> 2) The ring buffer protocol that actually transfers the data.
> 
> I recall that data transfer was purely based on simple shared memory and
> interrupts, and hence could run over PCIe (e.g. via the PCIe endpoint
> subsystem in the kernel) without issue.
> 
> However, the enumeration/configuration protocol requires the host to be able
> to do all kinds of strange things that can't possibly be emulated over PCIe;
> IIRC the configuration data contains "registers" that when written select
> the data other "registers" access. When the virtio device is exposed by a
> hypervisor, and all the accesses are emulated synchronously through a trap,
> this is easy enough to implement. However, if the two ends of this
> configuration parsing are on different ends of a PCIe bus, there's no way
> this can work.

Correct, and that's why the MIC "Virtio-over-PCIe framework" does not
try to implement the standard "Virtio Over PCI Bus".  (Yes, it's
confusing.)

> Are you thinking of doing something different for enumeration/configuration,
> and just using the virtio ring buffer protocol over PCIe?

The mic/vop code already does this.  See
Documentation/mic/mic_overview.txt for some information.

> I did post asking about this quite a while back, but IIRC I didn't receive
> much of a response. Yes, here it is:
> 
> > https://lists.linuxfoundation.org/pipermail/virtualization/2018-March/037276.html
> "virtio over SW-defined/CPU-driven PCIe endpoint"

I came to essentialy the same conclusions before I found the MIC code.

(Your "aside" in that email about virtio doing PCIe reads instead of
 writes is not solved by the MIC code, since that is how the standard
 virtio devices/drivers work.)

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 2/8] vop: Cast pointers to uintptr_t
  2019-01-16 16:32 ` [PATCH 2/8] vop: Cast pointers to uintptr_t Vincent Whitchurch
  2019-01-16 17:39   ` Joe Perches
@ 2019-01-22 10:41   ` Greg KH
  1 sibling, 0 replies; 29+ messages in thread
From: Greg KH @ 2019-01-22 10:41 UTC (permalink / raw)
  To: Vincent Whitchurch
  Cc: sudeep.dutt, ashutosh.dixit, arnd, linux-kernel, Vincent Whitchurch

On Wed, Jan 16, 2019 at 05:32:47PM +0100, Vincent Whitchurch wrote:
> Fix these on 32-bit:
> 
>  vop_vringh.c:711:13: error: cast from pointer to integer of different
>  size [-Werror=pointer-to-int-cast]
> 
> Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
> ---
>  drivers/misc/mic/vop/vop_main.c   | 8 ++++----
>  drivers/misc/mic/vop/vop_vringh.c | 4 ++--
>  2 files changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c
> index 6b212c8b78e7..5a366cf68d95 100644
> --- a/drivers/misc/mic/vop/vop_main.c
> +++ b/drivers/misc/mic/vop/vop_main.c
> @@ -497,7 +497,7 @@ static int _vop_add_device(struct mic_device_desc __iomem *d,
>  	vdev->desc = d;
>  	vdev->dc = (void __iomem *)d + _vop_aligned_desc_size(d);
>  	vdev->dnode = dnode;
> -	vdev->vdev.priv = (void *)(u64)dnode;
> +	vdev->vdev.priv = (void *)(uintptr_t)dnode;

As Joe said, use "unsigned long".  uintptr_t is a userspace type, not a
kernel type.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 0/8] Virtio-over-PCIe on non-MIC
  2019-01-16 16:32 [PATCH 0/8] Virtio-over-PCIe on non-MIC Vincent Whitchurch
                   ` (9 preceding siblings ...)
  2019-01-18 23:49 ` Stephen Warren
@ 2019-01-22 10:45 ` Greg KH
  10 siblings, 0 replies; 29+ messages in thread
From: Greg KH @ 2019-01-22 10:45 UTC (permalink / raw)
  To: Vincent Whitchurch
  Cc: sudeep.dutt, ashutosh.dixit, arnd, linux-kernel, Vincent Whitchurch

On Wed, Jan 16, 2019 at 05:32:45PM +0100, Vincent Whitchurch wrote:
> The Virtio-over-PCIe framework living under drivers/misc/mic/vop implements a
> generic framework to use virtio between two Linux systems, given shared memory
> and a couple of interrupts.  It does not actually require the Intel MIC
> hardware, x86-64, or even PCIe for that matter.  This patch series makes it
> buildable on more systems and adds a loopback driver to test it without special
> hardware.
> 
> Note that I don't have access to Intel MIC hardware so some testing of the
> patchset (especially the patch "vop: Use consistent DMA") on that platform
> would be appreciated, to ensure that the series does not break anything there.
> 
> Vincent Whitchurch (8):
>   vop: Use %z for size_t
>   vop: Cast pointers to uintptr_t
>   vop: Add definition of readq/writeq if missing
>   vop: Allow building on more systems
>   vop: vringh: Do not crash if no DMA channel
>   vop: Fix handling of >32 feature bits
>   vop: Use consistent DMA
>   vop: Add loopback

I applied a few of these to my tree.  Feel free to rebase and fix up
patch 2 and resend.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2019-01-22 10:45 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-16 16:32 [PATCH 0/8] Virtio-over-PCIe on non-MIC Vincent Whitchurch
2019-01-16 16:32 ` [PATCH 1/8] vop: Use %z for size_t Vincent Whitchurch
2019-01-16 16:32 ` [PATCH 2/8] vop: Cast pointers to uintptr_t Vincent Whitchurch
2019-01-16 17:39   ` Joe Perches
2019-01-22 10:41   ` Greg KH
2019-01-16 16:32 ` [PATCH 3/8] vop: Add definition of readq/writeq if missing Vincent Whitchurch
2019-01-16 16:32 ` [PATCH 4/8] vop: Allow building on more systems Vincent Whitchurch
2019-01-16 16:32 ` [PATCH 5/8] vop: vringh: Do not crash if no DMA channel Vincent Whitchurch
2019-01-16 16:32 ` [PATCH 6/8] vop: Fix handling of >32 feature bits Vincent Whitchurch
2019-01-16 16:32 ` [PATCH 7/8] vop: Use consistent DMA Vincent Whitchurch
2019-01-16 16:32 ` [PATCH 8/8] vop: Add loopback Vincent Whitchurch
2019-01-16 17:07 ` [PATCH 0/8] Virtio-over-PCIe on non-MIC Arnd Bergmann
2019-01-17 10:54   ` Vincent Whitchurch
2019-01-17 12:39     ` Arnd Bergmann
2019-01-17 15:15       ` Christoph Hellwig
2019-01-17 15:19         ` Christoph Hellwig
2019-01-17 15:31           ` Arnd Bergmann
2019-01-17 15:19       ` Vincent Whitchurch
2019-01-17 15:21         ` Christoph Hellwig
2019-01-17 15:32           ` Vincent Whitchurch
2019-01-17 15:46             ` Christoph Hellwig
2019-01-17 16:18               ` Arnd Bergmann
2019-01-17 15:53         ` Arnd Bergmann
2019-01-17 16:26           ` Vincent Whitchurch
2019-01-17 16:34             ` Arnd Bergmann
2019-01-17 22:17         ` Logan Gunthorpe
2019-01-18 23:49 ` Stephen Warren
2019-01-21 16:25   ` Vincent Whitchurch
2019-01-22 10:45 ` Greg KH

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).